]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw_rados.h
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rgw / rgw_rados.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_RGWRADOS_H
5#define CEPH_RGWRADOS_H
6
7#include <functional>
8
9#include "include/rados/librados.hpp"
10#include "include/Context.h"
3a9019d9 11#include "common/admin_socket.h"
7c673cae
FG
12#include "common/RefCountedObj.h"
13#include "common/RWLock.h"
14#include "common/ceph_time.h"
15#include "common/lru_map.h"
11fdf7f2 16#include "common/ceph_json.h"
7c673cae
FG
17#include "rgw_common.h"
18#include "cls/rgw/cls_rgw_types.h"
19#include "cls/version/cls_version_types.h"
20#include "cls/log/cls_log_types.h"
7c673cae 21#include "cls/timeindex/cls_timeindex_types.h"
11fdf7f2 22#include "cls/otp/cls_otp_types.h"
7c673cae
FG
23#include "rgw_log.h"
24#include "rgw_metadata.h"
25#include "rgw_meta_sync_status.h"
26#include "rgw_period_puller.h"
27#include "rgw_sync_module.h"
b32b8144 28#include "rgw_sync_log_trim.h"
11fdf7f2
TL
29#include "rgw_service.h"
30
31#include "services/svc_rados.h"
32#include "services/svc_zone.h"
7c673cae
FG
33
34class RGWWatcher;
35class SafeTimer;
36class ACLOwner;
37class RGWGC;
38class RGWMetaNotifier;
39class RGWDataNotifier;
40class RGWLC;
41class RGWObjectExpirer;
42class RGWMetaSyncProcessorThread;
43class RGWDataSyncProcessorThread;
44class RGWSyncLogTrimThread;
11fdf7f2 45class RGWSyncTraceManager;
7c673cae
FG
46struct RGWZoneGroup;
47struct RGWZoneParams;
31f18b77
FG
48class RGWReshard;
49class RGWReshardWait;
7c673cae 50
11fdf7f2
TL
51class RGWSysObjectCtx;
52
7c673cae
FG
53/* flags for put_obj_meta() */
54#define PUT_OBJ_CREATE 0x01
55#define PUT_OBJ_EXCL 0x02
56#define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
57
58#define RGW_OBJ_NS_MULTIPART "multipart"
59#define RGW_OBJ_NS_SHADOW "shadow"
60
61#define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
62
63#define RGW_NO_SHARD -1
64
31f18b77
FG
65#define RGW_SHARDS_PRIME_0 7877
66#define RGW_SHARDS_PRIME_1 65521
67
11fdf7f2
TL
68extern const std::string MP_META_SUFFIX;
69
1adf2230 70// only called by rgw_shard_id and rgw_bucket_shard_index
31f18b77
FG
71static inline int rgw_shards_mod(unsigned hval, int max_shards)
72{
73 if (max_shards <= RGW_SHARDS_PRIME_0) {
74 return hval % RGW_SHARDS_PRIME_0 % max_shards;
75 }
76 return hval % RGW_SHARDS_PRIME_1 % max_shards;
77}
78
1adf2230
AA
79// used for logging and tagging
80static inline int rgw_shard_id(const string& key, int max_shards)
31f18b77 81{
1adf2230
AA
82 return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()),
83 max_shards);
84}
85
86// used for bucket indices
87static inline uint32_t rgw_bucket_shard_index(const std::string& key,
88 int num_shards) {
89 uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size());
90 uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
91 return rgw_shards_mod(sid2, num_shards);
31f18b77
FG
92}
93
94static inline int rgw_shards_max()
95{
96 return RGW_SHARDS_PRIME_1;
97}
7c673cae
FG
98
99static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
100{
101 if (bucket.marker.empty() || orig_oid.empty()) {
102 oid = orig_oid;
103 } else {
104 oid = bucket.marker;
105 oid.append("_");
106 oid.append(orig_oid);
107 }
108}
109
110static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
111{
112 const rgw_bucket& bucket = obj.bucket;
113 prepend_bucket_marker(bucket, obj.get_oid(), oid);
114 const string& loc = obj.key.get_loc();
115 if (!loc.empty()) {
116 prepend_bucket_marker(bucket, loc, locator);
117 } else {
118 locator.clear();
119 }
120}
121
7c673cae
FG
122int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
123
124static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj)
125{
126 ssize_t pos = raw_obj.oid.find('_');
127 if (pos < 0) {
128 return false;
129 }
130
131 if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
132 return false;
133 }
134 obj->bucket = bucket;
135
136 return true;
137}
138
11fdf7f2 139
7c673cae 140struct rgw_bucket_placement {
11fdf7f2 141 rgw_placement_rule placement_rule;
7c673cae
FG
142 rgw_bucket bucket;
143
144 void dump(Formatter *f) const;
145};
146
147class rgw_obj_select {
11fdf7f2 148 rgw_placement_rule placement_rule;
7c673cae
FG
149 rgw_obj obj;
150 rgw_raw_obj raw_obj;
151 bool is_raw;
152
153public:
154 rgw_obj_select() : is_raw(false) {}
11fdf7f2
TL
155 explicit rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {}
156 explicit rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {}
7c673cae 157 rgw_obj_select(const rgw_obj_select& rhs) {
c07f9fc5 158 placement_rule = rhs.placement_rule;
7c673cae
FG
159 is_raw = rhs.is_raw;
160 if (is_raw) {
161 raw_obj = rhs.raw_obj;
162 } else {
163 obj = rhs.obj;
164 }
165 }
166
167 rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const;
168 rgw_raw_obj get_raw_obj(RGWRados *store) const;
169
170 rgw_obj_select& operator=(const rgw_obj& rhs) {
171 obj = rhs;
172 is_raw = false;
173 return *this;
174 }
175
176 rgw_obj_select& operator=(const rgw_raw_obj& rhs) {
177 raw_obj = rhs;
178 is_raw = true;
179 return *this;
180 }
181
11fdf7f2 182 void set_placement_rule(const rgw_placement_rule& rule) {
7c673cae
FG
183 placement_rule = rule;
184 }
11fdf7f2 185 void dump(Formatter *f) const;
7c673cae
FG
186};
187
188struct compression_block {
189 uint64_t old_ofs;
190 uint64_t new_ofs;
191 uint64_t len;
192
193 void encode(bufferlist& bl) const {
194 ENCODE_START(1, 1, bl);
11fdf7f2
TL
195 encode(old_ofs, bl);
196 encode(new_ofs, bl);
197 encode(len, bl);
7c673cae
FG
198 ENCODE_FINISH(bl);
199 }
200
11fdf7f2 201 void decode(bufferlist::const_iterator& bl) {
7c673cae 202 DECODE_START(1, bl);
11fdf7f2
TL
203 decode(old_ofs, bl);
204 decode(new_ofs, bl);
205 decode(len, bl);
7c673cae
FG
206 DECODE_FINISH(bl);
207 }
11fdf7f2 208 void dump(Formatter *f) const;
7c673cae
FG
209};
210WRITE_CLASS_ENCODER(compression_block)
211
212struct RGWCompressionInfo {
213 string compression_type;
214 uint64_t orig_size;
215 vector<compression_block> blocks;
216
217 RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
218 RGWCompressionInfo(const RGWCompressionInfo& cs_info) : compression_type(cs_info.compression_type),
219 orig_size(cs_info.orig_size),
220 blocks(cs_info.blocks) {}
221
222 void encode(bufferlist& bl) const {
223 ENCODE_START(1, 1, bl);
11fdf7f2
TL
224 encode(compression_type, bl);
225 encode(orig_size, bl);
226 encode(blocks, bl);
7c673cae
FG
227 ENCODE_FINISH(bl);
228 }
229
11fdf7f2 230 void decode(bufferlist::const_iterator& bl) {
7c673cae 231 DECODE_START(1, bl);
11fdf7f2
TL
232 decode(compression_type, bl);
233 decode(orig_size, bl);
234 decode(blocks, bl);
7c673cae 235 DECODE_FINISH(bl);
11fdf7f2
TL
236 }
237 void dump(Formatter *f) const;
7c673cae
FG
238};
239WRITE_CLASS_ENCODER(RGWCompressionInfo)
240
241int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info);
242
243struct RGWOLHInfo {
244 rgw_obj target;
245 bool removed;
246
247 RGWOLHInfo() : removed(false) {}
248
249 void encode(bufferlist& bl) const {
250 ENCODE_START(1, 1, bl);
11fdf7f2
TL
251 encode(target, bl);
252 encode(removed, bl);
7c673cae
FG
253 ENCODE_FINISH(bl);
254 }
255
11fdf7f2 256 void decode(bufferlist::const_iterator& bl) {
7c673cae 257 DECODE_START(1, bl);
11fdf7f2
TL
258 decode(target, bl);
259 decode(removed, bl);
7c673cae
FG
260 DECODE_FINISH(bl);
261 }
262 static void generate_test_instances(list<RGWOLHInfo*>& o);
263 void dump(Formatter *f) const;
264};
265WRITE_CLASS_ENCODER(RGWOLHInfo)
266
267struct RGWOLHPendingInfo {
268 ceph::real_time time;
269
270 RGWOLHPendingInfo() {}
271
272 void encode(bufferlist& bl) const {
273 ENCODE_START(1, 1, bl);
11fdf7f2 274 encode(time, bl);
7c673cae
FG
275 ENCODE_FINISH(bl);
276 }
277
11fdf7f2 278 void decode(bufferlist::const_iterator& bl) {
7c673cae 279 DECODE_START(1, bl);
11fdf7f2 280 decode(time, bl);
7c673cae
FG
281 DECODE_FINISH(bl);
282 }
283
284 void dump(Formatter *f) const;
285};
286WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
287
288struct RGWUsageBatch {
289 map<ceph::real_time, rgw_usage_log_entry> m;
290
291 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
292 bool exists = m.find(t) != m.end();
293 *account = !exists;
294 m[t].aggregate(entry);
295 }
296};
297
298struct RGWUsageIter {
299 string read_iter;
300 uint32_t index;
301
302 RGWUsageIter() : index(0) {}
303};
304
305class RGWGetDataCB {
7c673cae
FG
306public:
307 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
11fdf7f2 308 RGWGetDataCB() {}
7c673cae 309 virtual ~RGWGetDataCB() {}
7c673cae
FG
310};
311
312struct RGWCloneRangeInfo {
313 rgw_obj src;
314 off_t src_ofs;
315 off_t dst_ofs;
316 uint64_t len;
317};
318
319struct RGWObjManifestPart {
320 rgw_obj loc; /* the object where the data is located */
321 uint64_t loc_ofs; /* the offset at that object where the data is located */
322 uint64_t size; /* the part size */
323
324 RGWObjManifestPart() : loc_ofs(0), size(0) {}
325
326 void encode(bufferlist& bl) const {
327 ENCODE_START(2, 2, bl);
11fdf7f2
TL
328 encode(loc, bl);
329 encode(loc_ofs, bl);
330 encode(size, bl);
7c673cae
FG
331 ENCODE_FINISH(bl);
332 }
333
11fdf7f2 334 void decode(bufferlist::const_iterator& bl) {
7c673cae 335 DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
11fdf7f2
TL
336 decode(loc, bl);
337 decode(loc_ofs, bl);
338 decode(size, bl);
7c673cae
FG
339 DECODE_FINISH(bl);
340 }
341
342 void dump(Formatter *f) const;
343 static void generate_test_instances(list<RGWObjManifestPart*>& o);
344};
345WRITE_CLASS_ENCODER(RGWObjManifestPart)
346
347/*
348 The manifest defines a set of rules for structuring the object parts.
349 There are a few terms to note:
350 - head: the head part of the object, which is the part that contains
351 the first chunk of data. An object might not have a head (as in the
352 case of multipart-part objects).
353 - stripe: data portion of a single rgw object that resides on a single
354 rados object.
355 - part: a collection of stripes that make a contiguous part of an
356 object. A regular object will only have one part (although might have
357 many stripes), a multipart object might have many parts. Each part
358 has a fixed stripe size, although the last stripe of a part might
359 be smaller than that. Consecutive parts may be merged if their stripe
360 value is the same.
361*/
362
363struct RGWObjManifestRule {
364 uint32_t start_part_num;
365 uint64_t start_ofs;
366 uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
367 uint64_t stripe_max_size; /* underlying obj max size */
368 string override_prefix;
369
370 RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
371 RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
372 start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
373
374 void encode(bufferlist& bl) const {
375 ENCODE_START(2, 1, bl);
11fdf7f2
TL
376 encode(start_part_num, bl);
377 encode(start_ofs, bl);
378 encode(part_size, bl);
379 encode(stripe_max_size, bl);
380 encode(override_prefix, bl);
7c673cae
FG
381 ENCODE_FINISH(bl);
382 }
383
11fdf7f2 384 void decode(bufferlist::const_iterator& bl) {
7c673cae 385 DECODE_START(2, bl);
11fdf7f2
TL
386 decode(start_part_num, bl);
387 decode(start_ofs, bl);
388 decode(part_size, bl);
389 decode(stripe_max_size, bl);
7c673cae 390 if (struct_v >= 2)
11fdf7f2 391 decode(override_prefix, bl);
7c673cae
FG
392 DECODE_FINISH(bl);
393 }
394 void dump(Formatter *f) const;
395};
396WRITE_CLASS_ENCODER(RGWObjManifestRule)
397
398class RGWObjManifest {
399protected:
400 bool explicit_objs; /* old manifest? */
401 map<uint64_t, RGWObjManifestPart> objs;
402
403 uint64_t obj_size;
404
405 rgw_obj obj;
406 uint64_t head_size;
11fdf7f2 407 rgw_placement_rule head_placement_rule;
7c673cae
FG
408
409 uint64_t max_head_size;
410 string prefix;
411 rgw_bucket_placement tail_placement; /* might be different than the original bucket,
412 as object might have been copied across pools */
413 map<uint64_t, RGWObjManifestRule> rules;
414
415 string tail_instance; /* tail object's instance */
416
417 void convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
418 int append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
419 void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
420
421 void update_iterators() {
422 begin_iter.seek(0);
423 end_iter.seek(obj_size);
424 }
425public:
426
427 RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
428 begin_iter(this), end_iter(this) {}
429 RGWObjManifest(const RGWObjManifest& rhs) {
430 *this = rhs;
431 }
432 RGWObjManifest& operator=(const RGWObjManifest& rhs) {
433 explicit_objs = rhs.explicit_objs;
434 objs = rhs.objs;
435 obj_size = rhs.obj_size;
436 obj = rhs.obj;
437 head_size = rhs.head_size;
438 max_head_size = rhs.max_head_size;
439 prefix = rhs.prefix;
440 tail_placement = rhs.tail_placement;
441 rules = rhs.rules;
442 tail_instance = rhs.tail_instance;
443
444 begin_iter.set_manifest(this);
445 end_iter.set_manifest(this);
446
447 begin_iter.seek(rhs.begin_iter.get_ofs());
448 end_iter.seek(rhs.end_iter.get_ofs());
449
450 return *this;
451 }
452
453 map<uint64_t, RGWObjManifestPart>& get_explicit_objs() {
454 return objs;
455 }
456
457
458 void set_explicit(uint64_t _size, map<uint64_t, RGWObjManifestPart>& _objs) {
459 explicit_objs = true;
460 obj_size = _size;
461 objs.swap(_objs);
462 }
463
464 void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location);
465
466 void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
467 RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
468 rules[0] = rule;
469 max_head_size = tail_ofs;
470 }
471
472 void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) {
473 RGWObjManifestRule rule(0, 0, 0, stripe_max_size);
474 rule.start_part_num = part_num;
475 rules[0] = rule;
476 max_head_size = 0;
477 }
478
479 void encode(bufferlist& bl) const {
480 ENCODE_START(7, 6, bl);
11fdf7f2
TL
481 encode(obj_size, bl);
482 encode(objs, bl);
483 encode(explicit_objs, bl);
484 encode(obj, bl);
485 encode(head_size, bl);
486 encode(max_head_size, bl);
487 encode(prefix, bl);
488 encode(rules, bl);
7c673cae 489 bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket);
11fdf7f2 490 encode(encode_tail_bucket, bl);
7c673cae 491 if (encode_tail_bucket) {
11fdf7f2 492 encode(tail_placement.bucket, bl);
7c673cae
FG
493 }
494 bool encode_tail_instance = (tail_instance != obj.key.instance);
11fdf7f2 495 encode(encode_tail_instance, bl);
7c673cae 496 if (encode_tail_instance) {
11fdf7f2 497 encode(tail_instance, bl);
7c673cae 498 }
11fdf7f2
TL
499 encode(head_placement_rule, bl);
500 encode(tail_placement.placement_rule, bl);
7c673cae
FG
501 ENCODE_FINISH(bl);
502 }
503
11fdf7f2 504 void decode(bufferlist::const_iterator& bl) {
7c673cae 505 DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl);
11fdf7f2
TL
506 decode(obj_size, bl);
507 decode(objs, bl);
7c673cae 508 if (struct_v >= 3) {
11fdf7f2
TL
509 decode(explicit_objs, bl);
510 decode(obj, bl);
511 decode(head_size, bl);
512 decode(max_head_size, bl);
513 decode(prefix, bl);
514 decode(rules, bl);
7c673cae
FG
515 } else {
516 explicit_objs = true;
517 if (!objs.empty()) {
518 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
519 obj = iter->second.loc;
520 head_size = iter->second.size;
521 max_head_size = head_size;
522 }
523 }
524
525 if (explicit_objs && head_size > 0 && !objs.empty()) {
526 /* patch up manifest due to issue 16435:
527 * the first object in the explicit objs list might not be the one we need to access, use the
528 * head object instead if set. This would happen if we had an old object that was created
529 * when the explicit objs manifest was around, and it got copied.
530 */
531 rgw_obj& obj_0 = objs[0].loc;
532 if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) {
533 objs[0].loc = obj;
534 objs[0].size = head_size;
535 }
536 }
537
538 if (struct_v >= 4) {
539 if (struct_v < 6) {
11fdf7f2 540 decode(tail_placement.bucket, bl);
7c673cae
FG
541 } else {
542 bool need_to_decode;
11fdf7f2 543 decode(need_to_decode, bl);
7c673cae 544 if (need_to_decode) {
11fdf7f2 545 decode(tail_placement.bucket, bl);
7c673cae
FG
546 } else {
547 tail_placement.bucket = obj.bucket;
548 }
549 }
550 }
551
552 if (struct_v >= 5) {
553 if (struct_v < 6) {
11fdf7f2 554 decode(tail_instance, bl);
7c673cae
FG
555 } else {
556 bool need_to_decode;
11fdf7f2 557 decode(need_to_decode, bl);
7c673cae 558 if (need_to_decode) {
11fdf7f2 559 decode(tail_instance, bl);
7c673cae
FG
560 } else {
561 tail_instance = obj.key.instance;
562 }
563 }
564 } else { // old object created before 'tail_instance' field added to manifest
565 tail_instance = obj.key.instance;
566 }
567
568 if (struct_v >= 7) {
11fdf7f2
TL
569 decode(head_placement_rule, bl);
570 decode(tail_placement.placement_rule, bl);
7c673cae
FG
571 }
572
573 update_iterators();
574 DECODE_FINISH(bl);
575 }
576
577 void dump(Formatter *f) const;
578 static void generate_test_instances(list<RGWObjManifest*>& o);
579
11fdf7f2
TL
580 int append(RGWObjManifest& m, const RGWZoneGroup& zonegroup,
581 const RGWZoneParams& zone_params);
582 int append(RGWObjManifest& m, RGWSI_Zone *zone_svc);
7c673cae
FG
583
584 bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
585
586 bool empty() {
587 if (explicit_objs)
588 return objs.empty();
589 return rules.empty();
590 }
591
592 bool has_explicit_objs() {
593 return explicit_objs;
594 }
595
596 bool has_tail() {
597 if (explicit_objs) {
598 if (objs.size() == 1) {
599 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
600 rgw_obj& o = iter->second.loc;
601 return !(obj == o);
602 }
603 return (objs.size() >= 2);
604 }
605 return (obj_size > head_size);
606 }
607
11fdf7f2 608 void set_head(const rgw_placement_rule& placement_rule, const rgw_obj& _o, uint64_t _s) {
7c673cae
FG
609 head_placement_rule = placement_rule;
610 obj = _o;
611 head_size = _s;
612
613 if (explicit_objs && head_size > 0) {
614 objs[0].loc = obj;
615 objs[0].size = head_size;
616 }
617 }
618
619 const rgw_obj& get_obj() {
620 return obj;
621 }
622
11fdf7f2 623 void set_tail_placement(const rgw_placement_rule& placement_rule, const rgw_bucket& _b) {
7c673cae
FG
624 tail_placement.placement_rule = placement_rule;
625 tail_placement.bucket = _b;
626 }
627
628 const rgw_bucket_placement& get_tail_placement() {
629 return tail_placement;
630 }
631
11fdf7f2 632 const rgw_placement_rule& get_head_placement_rule() {
7c673cae
FG
633 return head_placement_rule;
634 }
635
636 void set_prefix(const string& _p) {
637 prefix = _p;
638 }
639
640 const string& get_prefix() {
641 return prefix;
642 }
643
644 void set_tail_instance(const string& _ti) {
645 tail_instance = _ti;
646 }
647
648 const string& get_tail_instance() {
649 return tail_instance;
650 }
651
652 void set_head_size(uint64_t _s) {
653 head_size = _s;
654 }
655
656 void set_obj_size(uint64_t s) {
657 obj_size = s;
658
659 update_iterators();
660 }
661
662 uint64_t get_obj_size() {
663 return obj_size;
664 }
665
666 uint64_t get_head_size() {
667 return head_size;
668 }
669
7c673cae
FG
670 uint64_t get_max_head_size() {
671 return max_head_size;
672 }
673
674 class obj_iterator {
675 RGWObjManifest *manifest;
676 uint64_t part_ofs; /* where current part starts */
677 uint64_t stripe_ofs; /* where current stripe starts */
678 uint64_t ofs; /* current position within the object */
679 uint64_t stripe_size; /* current part size */
680
681 int cur_part_id;
682 int cur_stripe;
683 string cur_override_prefix;
684
685 rgw_obj_select location;
686
687 map<uint64_t, RGWObjManifestRule>::iterator rule_iter;
688 map<uint64_t, RGWObjManifestRule>::iterator next_rule_iter;
689
690 map<uint64_t, RGWObjManifestPart>::iterator explicit_iter;
691
692 void init() {
693 part_ofs = 0;
694 stripe_ofs = 0;
695 ofs = 0;
696 stripe_size = 0;
697 cur_part_id = 0;
698 cur_stripe = 0;
699 }
700
701 void update_explicit_pos();
702
703
704 protected:
705
706 void set_manifest(RGWObjManifest *m) {
707 manifest = m;
708 }
709
710 public:
711 obj_iterator() : manifest(NULL) {
712 init();
713 }
714 explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
715 init();
716 if (!manifest->empty()) {
717 seek(0);
718 }
719 }
720 obj_iterator(RGWObjManifest *_m, uint64_t _ofs) : manifest(_m) {
721 init();
722 if (!manifest->empty()) {
723 seek(_ofs);
724 }
725 }
726 void seek(uint64_t ofs);
727
728 void operator++();
729 bool operator==(const obj_iterator& rhs) {
730 return (ofs == rhs.ofs);
731 }
732 bool operator!=(const obj_iterator& rhs) {
733 return (ofs != rhs.ofs);
734 }
735 const rgw_obj_select& get_location() {
736 return location;
737 }
738
739 /* start of current stripe */
740 uint64_t get_stripe_ofs() {
741 if (manifest->explicit_objs) {
742 return explicit_iter->first;
743 }
744 return stripe_ofs;
745 }
746
747 /* current ofs relative to start of rgw object */
748 uint64_t get_ofs() const {
749 return ofs;
750 }
751
752 /* stripe number */
753 int get_cur_stripe() const {
754 return cur_stripe;
755 }
756
757 /* current stripe size */
758 uint64_t get_stripe_size() {
759 if (manifest->explicit_objs) {
760 return explicit_iter->second.size;
761 }
762 return stripe_size;
763 }
764
765 /* offset where data starts within current stripe */
766 uint64_t location_ofs() {
767 if (manifest->explicit_objs) {
768 return explicit_iter->second.loc_ofs;
769 }
770 return 0; /* all stripes start at zero offset */
771 }
772
773 void update_location();
774
775 friend class RGWObjManifest;
11fdf7f2 776 void dump(Formatter *f) const;
7c673cae
FG
777 };
778
779 const obj_iterator& obj_begin();
780 const obj_iterator& obj_end();
781 obj_iterator obj_find(uint64_t ofs);
782
783 obj_iterator begin_iter;
784 obj_iterator end_iter;
785
786 /*
787 * simple object generator. Using a simple single rule manifest.
788 */
789 class generator {
790 RGWObjManifest *manifest;
791 uint64_t last_ofs;
792 uint64_t cur_part_ofs;
793 int cur_part_id;
794 int cur_stripe;
795 uint64_t cur_stripe_size;
796 string cur_oid;
797
798 string oid_prefix;
799
800 rgw_obj_select cur_obj;
7c673cae
FG
801
802 RGWObjManifestRule rule;
803
804 public:
805 generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
806 cur_stripe(0), cur_stripe_size(0) {}
11fdf7f2
TL
807 int create_begin(CephContext *cct, RGWObjManifest *manifest,
808 const rgw_placement_rule& head_placement_rule,
809 const rgw_placement_rule *tail_placement_rule,
810 const rgw_bucket& bucket,
811 const rgw_obj& obj);
7c673cae
FG
812
813 int create_next(uint64_t ofs);
814
815 rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); }
11fdf7f2 816 rgw_raw_obj get_cur_obj(RGWRados *store) const { return cur_obj.get_raw_obj(store); }
7c673cae
FG
817
818 /* total max size of current stripe (including head obj) */
11fdf7f2 819 uint64_t cur_stripe_max_size() const {
7c673cae
FG
820 return cur_stripe_size;
821 }
822 };
823};
824WRITE_CLASS_ENCODER(RGWObjManifest)
825
826struct RGWUploadPartInfo {
827 uint32_t num;
828 uint64_t size;
829 uint64_t accounted_size{0};
830 string etag;
831 ceph::real_time modified;
832 RGWObjManifest manifest;
833 RGWCompressionInfo cs_info;
834
835 RGWUploadPartInfo() : num(0), size(0) {}
836
837 void encode(bufferlist& bl) const {
838 ENCODE_START(4, 2, bl);
11fdf7f2
TL
839 encode(num, bl);
840 encode(size, bl);
841 encode(etag, bl);
842 encode(modified, bl);
843 encode(manifest, bl);
844 encode(cs_info, bl);
845 encode(accounted_size, bl);
7c673cae
FG
846 ENCODE_FINISH(bl);
847 }
11fdf7f2 848 void decode(bufferlist::const_iterator& bl) {
7c673cae 849 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
11fdf7f2
TL
850 decode(num, bl);
851 decode(size, bl);
852 decode(etag, bl);
853 decode(modified, bl);
7c673cae 854 if (struct_v >= 3)
11fdf7f2 855 decode(manifest, bl);
7c673cae 856 if (struct_v >= 4) {
11fdf7f2
TL
857 decode(cs_info, bl);
858 decode(accounted_size, bl);
7c673cae
FG
859 } else {
860 accounted_size = size;
861 }
862 DECODE_FINISH(bl);
863 }
864 void dump(Formatter *f) const;
865 static void generate_test_instances(list<RGWUploadPartInfo*>& o);
866};
867WRITE_CLASS_ENCODER(RGWUploadPartInfo)
868
869struct RGWObjState {
870 rgw_obj obj;
871 bool is_atomic;
872 bool has_attrs;
873 bool exists;
874 uint64_t size; //< size of raw object
875 uint64_t accounted_size{0}; //< size before compression, encryption
876 ceph::real_time mtime;
877 uint64_t epoch;
878 bufferlist obj_tag;
181888fb 879 bufferlist tail_tag;
7c673cae
FG
880 string write_tag;
881 bool fake_tag;
882 RGWObjManifest manifest;
883 bool has_manifest;
884 string shadow_obj;
885 bool has_data;
886 bufferlist data;
887 bool prefetch_data;
888 bool keep_tail;
889 bool is_olh;
890 bufferlist olh_tag;
891 uint64_t pg_ver;
892 uint32_t zone_short_id;
893
894 /* important! don't forget to update copy constructor */
895
896 RGWObjVersionTracker objv_tracker;
897
898 map<string, bufferlist> attrset;
899 RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
900 size(0), epoch(0), fake_tag(false), has_manifest(false),
901 has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
902 pg_ver(0), zone_short_id(0) {}
903 RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
904 is_atomic = rhs.is_atomic;
905 has_attrs = rhs.has_attrs;
906 exists = rhs.exists;
907 size = rhs.size;
908 accounted_size = rhs.accounted_size;
909 mtime = rhs.mtime;
910 epoch = rhs.epoch;
911 if (rhs.obj_tag.length()) {
912 obj_tag = rhs.obj_tag;
913 }
181888fb
FG
914 if (rhs.tail_tag.length()) {
915 tail_tag = rhs.tail_tag;
916 }
7c673cae
FG
917 write_tag = rhs.write_tag;
918 fake_tag = rhs.fake_tag;
919 if (rhs.has_manifest) {
920 manifest = rhs.manifest;
921 }
922 has_manifest = rhs.has_manifest;
923 shadow_obj = rhs.shadow_obj;
924 has_data = rhs.has_data;
925 if (rhs.data.length()) {
926 data = rhs.data;
927 }
928 prefetch_data = rhs.prefetch_data;
929 keep_tail = rhs.keep_tail;
930 is_olh = rhs.is_olh;
931 objv_tracker = rhs.objv_tracker;
932 pg_ver = rhs.pg_ver;
933 }
934
935 bool get_attr(string name, bufferlist& dest) {
936 map<string, bufferlist>::iterator iter = attrset.find(name);
937 if (iter != attrset.end()) {
938 dest = iter->second;
939 return true;
940 }
941 return false;
942 }
943};
944
945struct RGWRawObjState {
946 rgw_raw_obj obj;
947 bool has_attrs{false};
948 bool exists{false};
949 uint64_t size{0};
950 ceph::real_time mtime;
11fdf7f2 951 uint64_t epoch{0};
7c673cae
FG
952 bufferlist obj_tag;
953 bool has_data{false};
954 bufferlist data;
955 bool prefetch_data{false};
956 uint64_t pg_ver{0};
957
958 /* important! don't forget to update copy constructor */
959
960 RGWObjVersionTracker objv_tracker;
961
962 map<string, bufferlist> attrset;
963 RGWRawObjState() {}
964 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
965 has_attrs = rhs.has_attrs;
966 exists = rhs.exists;
967 size = rhs.size;
968 mtime = rhs.mtime;
969 epoch = rhs.epoch;
970 if (rhs.obj_tag.length()) {
971 obj_tag = rhs.obj_tag;
972 }
973 has_data = rhs.has_data;
974 if (rhs.data.length()) {
975 data = rhs.data;
976 }
977 prefetch_data = rhs.prefetch_data;
978 pg_ver = rhs.pg_ver;
979 objv_tracker = rhs.objv_tracker;
980 }
981};
982
983struct RGWPoolIterCtx {
984 librados::IoCtx io_ctx;
985 librados::NObjectIterator iter;
986};
987
988struct RGWListRawObjsCtx {
989 bool initialized;
990 RGWPoolIterCtx iter_ctx;
991
992 RGWListRawObjsCtx() : initialized(false) {}
993};
994
7c673cae
FG
995struct objexp_hint_entry {
996 string tenant;
997 string bucket_name;
998 string bucket_id;
999 rgw_obj_key obj_key;
1000 ceph::real_time exp_time;
1001
1002 void encode(bufferlist& bl) const {
1003 ENCODE_START(2, 1, bl);
11fdf7f2
TL
1004 encode(bucket_name, bl);
1005 encode(bucket_id, bl);
1006 encode(obj_key, bl);
1007 encode(exp_time, bl);
1008 encode(tenant, bl);
7c673cae
FG
1009 ENCODE_FINISH(bl);
1010 }
1011
11fdf7f2 1012 void decode(bufferlist::const_iterator& bl) {
7c673cae
FG
1013 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
1014 DECODE_START(2, bl);
11fdf7f2
TL
1015 decode(bucket_name, bl);
1016 decode(bucket_id, bl);
1017 decode(obj_key, bl);
1018 decode(exp_time, bl);
7c673cae 1019 if (struct_v >= 2) {
11fdf7f2 1020 decode(tenant, bl);
7c673cae
FG
1021 } else {
1022 tenant.clear();
1023 }
1024 DECODE_FINISH(bl);
1025 }
1026};
1027WRITE_CLASS_ENCODER(objexp_hint_entry)
1028
7c673cae
FG
1029class RGWDataChangesLog;
1030class RGWMetaSyncStatusManager;
1031class RGWDataSyncStatusManager;
7c673cae 1032class RGWCoroutinesManagerRegistry;
7c673cae
FG
1033
1034class RGWGetBucketStats_CB : public RefCountedObject {
1035protected:
1036 rgw_bucket bucket;
1037 map<RGWObjCategory, RGWStorageStats> *stats;
1038public:
224ce89b 1039 explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
7c673cae
FG
1040 ~RGWGetBucketStats_CB() override {}
1041 virtual void handle_response(int r) = 0;
1042 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
1043 stats = _stats;
1044 }
1045};
1046
1047class RGWGetUserStats_CB : public RefCountedObject {
1048protected:
1049 rgw_user user;
1050 RGWStorageStats stats;
1051public:
1052 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
1053 ~RGWGetUserStats_CB() override {}
1054 virtual void handle_response(int r) = 0;
1055 virtual void set_response(RGWStorageStats& _stats) {
1056 stats = _stats;
1057 }
1058};
1059
1060class RGWGetDirHeader_CB;
1061class RGWGetUserHeader_CB;
1062
11fdf7f2
TL
1063class RGWObjectCtx {
1064 RGWRados *store;
1065 RWLock lock{"RGWObjectCtx"};
1066 void *s{nullptr};
7c673cae 1067
11fdf7f2 1068 std::map<rgw_obj, RGWObjState> objs_state;
7c673cae 1069public:
11fdf7f2
TL
1070 explicit RGWObjectCtx(RGWRados *_store) : store(_store) {}
1071 explicit RGWObjectCtx(RGWRados *_store, void *_s) : store(_store), s(_s) {}
7c673cae 1072
11fdf7f2
TL
1073 void *get_private() {
1074 return s;
1075 }
7c673cae 1076
11fdf7f2
TL
1077 RGWRados *get_store() {
1078 return store;
1079 }
7c673cae 1080
11fdf7f2
TL
1081 RGWObjState *get_state(const rgw_obj& obj) {
1082 RGWObjState *result;
1083 typename std::map<rgw_obj, RGWObjState>::iterator iter;
7c673cae
FG
1084 lock.get_read();
1085 assert (!obj.empty());
1086 iter = objs_state.find(obj);
1087 if (iter != objs_state.end()) {
1088 result = &iter->second;
1089 lock.unlock();
1090 } else {
1091 lock.unlock();
1092 lock.get_write();
1093 result = &objs_state[obj];
1094 lock.unlock();
1095 }
1096 return result;
1097 }
1098
11fdf7f2 1099 void set_atomic(rgw_obj& obj) {
7c673cae
FG
1100 RWLock::WLocker wl(lock);
1101 assert (!obj.empty());
1102 objs_state[obj].is_atomic = true;
1103 }
11fdf7f2 1104 void set_prefetch_data(const rgw_obj& obj) {
7c673cae
FG
1105 RWLock::WLocker wl(lock);
1106 assert (!obj.empty());
1107 objs_state[obj].prefetch_data = true;
1108 }
11fdf7f2
TL
1109
1110 void invalidate(const rgw_obj& obj) {
7c673cae
FG
1111 RWLock::WLocker wl(lock);
1112 auto iter = objs_state.find(obj);
1113 if (iter == objs_state.end()) {
1114 return;
1115 }
1116 bool is_atomic = iter->second.is_atomic;
1117 bool prefetch_data = iter->second.prefetch_data;
1118
1119 objs_state.erase(iter);
1120
1121 if (is_atomic || prefetch_data) {
11fdf7f2
TL
1122 auto& state = objs_state[obj];
1123 state.is_atomic = is_atomic;
1124 state.prefetch_data = prefetch_data;
7c673cae
FG
1125 }
1126 }
1127};
1128
7c673cae
FG
1129class RGWAsyncRadosProcessor;
1130
1131template <class T>
1132class RGWChainedCacheImpl;
1133
1134struct bucket_info_entry {
1135 RGWBucketInfo info;
1136 real_time mtime;
1137 map<string, bufferlist> attrs;
1138};
1139
1140struct tombstone_entry {
1141 ceph::real_time mtime;
1142 uint32_t zone_short_id;
1143 uint64_t pg_ver;
1144
1145 tombstone_entry() = default;
11fdf7f2 1146 explicit tombstone_entry(const RGWObjState& state)
7c673cae
FG
1147 : mtime(state.mtime), zone_short_id(state.zone_short_id),
1148 pg_ver(state.pg_ver) {}
1149};
1150
31f18b77
FG
1151class RGWIndexCompletionManager;
1152
3a9019d9 1153class RGWRados : public AdminSocketHook
7c673cae
FG
1154{
1155 friend class RGWGC;
1156 friend class RGWMetaNotifier;
1157 friend class RGWDataNotifier;
1158 friend class RGWLC;
1159 friend class RGWObjectExpirer;
1160 friend class RGWMetaSyncProcessorThread;
1161 friend class RGWDataSyncProcessorThread;
31f18b77
FG
1162 friend class RGWReshard;
1163 friend class RGWBucketReshard;
f64942e4 1164 friend class RGWBucketReshardLock;
31f18b77 1165 friend class BucketIndexLockGuard;
d2e6a577 1166 friend class RGWCompleteMultipart;
7c673cae 1167
11fdf7f2
TL
1168 static constexpr const char* admin_commands[4][3] = {
1169 { "cache list",
1170 "cache list name=filter,type=CephString,req=false",
1171 "cache list [filter_str]: list object cache, possibly matching substrings" },
1172 { "cache inspect",
1173 "cache inspect name=target,type=CephString,req=true",
1174 "cache inspect target: print cache element" },
1175 { "cache erase",
1176 "cache erase name=target,type=CephString,req=true",
1177 "cache erase target: erase element from cache" },
1178 { "cache zap",
1179 "cache zap",
1180 "cache zap: erase all elements from cache" }
1181 };
3a9019d9 1182
7c673cae
FG
1183 /** Open the pool used as root for this gateway */
1184 int open_root_pool_ctx();
1185 int open_gc_pool_ctx();
1186 int open_lc_pool_ctx();
1187 int open_objexp_pool_ctx();
31f18b77 1188 int open_reshard_pool_ctx();
7c673cae 1189
494da23a
TL
1190 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx,
1191 bool mostly_omap);
7c673cae
FG
1192 int open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx);
1193 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid);
1194 int open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1195 string& bucket_oid_base);
1196 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1197 const string& obj_key, string *bucket_obj, int *shard_id);
1198 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1199 int shard_id, string *bucket_obj);
1200 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1201 map<int, string>& bucket_objs, int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
1202 template<typename T>
1203 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1204 map<int, string>& oids, map<int, T>& bucket_objs,
1205 int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
1206 void build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
1207 string *marker);
1208
1209 void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
1210
1211 std::atomic<int64_t> max_req_id = { 0 };
1212 Mutex lock;
7c673cae
FG
1213 SafeTimer *timer;
1214
1215 RGWGC *gc;
1216 RGWLC *lc;
1217 RGWObjectExpirer *obj_expirer;
1218 bool use_gc_thread;
1219 bool use_lc_thread;
1220 bool quota_threads;
1221 bool run_sync_thread;
31f18b77 1222 bool run_reshard_thread;
7c673cae
FG
1223
1224 RGWAsyncRadosProcessor* async_rados;
1225
1226 RGWMetaNotifier *meta_notifier;
1227 RGWDataNotifier *data_notifier;
1228 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
11fdf7f2 1229 RGWSyncTraceManager *sync_tracer = nullptr;
7c673cae
FG
1230 map<string, RGWDataSyncProcessorThread *> data_sync_processor_threads;
1231
b32b8144 1232 boost::optional<rgw::BucketTrimManager> bucket_trim;
7c673cae
FG
1233 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
1234
1235 Mutex meta_sync_thread_lock;
1236 Mutex data_sync_thread_lock;
1237
7c673cae 1238 librados::IoCtx root_pool_ctx; // .rgw
11fdf7f2
TL
1239
1240 double inject_notify_timeout_probability = 0;
1241 unsigned max_notify_retries = 0;
7c673cae
FG
1242
1243 friend class RGWWatcher;
1244
1245 Mutex bucket_id_lock;
1246
1247 // This field represents the number of bucket index object shards
1248 uint32_t bucket_index_max_shards;
1249
1250 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
1251 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
224ce89b 1252 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae
FG
1253 uint64_t max_bucket_id;
1254
1255 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
1256 RGWObjState *olh_state, RGWObjState **target_state);
7c673cae
FG
1257 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
1258 bool follow_olh, bool assume_noent = false);
1259 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
1260 librados::ObjectOperation& op, RGWObjState **state);
11fdf7f2 1261 int append_atomic_test(const RGWObjState* astate, librados::ObjectOperation& op);
7c673cae
FG
1262
1263 int update_placement_map();
1264 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
1265
1266 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
1267 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
1268 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
1269protected:
1270 CephContext *cct;
1271
494da23a 1272 librados::Rados rados;
7c673cae
FG
1273
1274 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
1275 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
1276
1277 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
1278 tombstone_cache_t *obj_tombstone_cache;
1279
1280 librados::IoCtx gc_pool_ctx; // .rgw.gc
1281 librados::IoCtx lc_pool_ctx; // .rgw.lc
1282 librados::IoCtx objexp_pool_ctx;
31f18b77 1283 librados::IoCtx reshard_pool_ctx;
7c673cae 1284
11fdf7f2 1285 bool pools_initialized;
7c673cae 1286
11fdf7f2 1287 RGWQuotaHandler *quota_handler;
7c673cae 1288
11fdf7f2 1289 RGWCoroutinesManagerRegistry *cr_registry;
7c673cae 1290
11fdf7f2
TL
1291 RGWSyncModuleInstanceRef sync_module;
1292 bool writeable_zone{false};
7c673cae 1293
11fdf7f2 1294 RGWIndexCompletionManager *index_completion_manager{nullptr};
7c673cae 1295
11fdf7f2
TL
1296 bool use_cache{false};
1297public:
1298 RGWRados(): lock("rados_timer_lock"), timer(NULL),
1299 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
1300 run_sync_thread(false), run_reshard_thread(false), async_rados(nullptr), meta_notifier(NULL),
1301 data_notifier(NULL), meta_sync_processor_thread(NULL),
1302 meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
1303 bucket_id_lock("rados_bucket_id"),
1304 bucket_index_max_shards(0),
1305 max_bucket_id(0), cct(NULL),
11fdf7f2
TL
1306 binfo_cache(NULL), obj_tombstone_cache(nullptr),
1307 pools_initialized(false),
1308 quota_handler(NULL),
1309 cr_registry(NULL),
1310 meta_mgr(NULL), data_log(NULL), reshard(NULL) {}
7c673cae 1311
11fdf7f2
TL
1312 RGWRados& set_use_cache(bool status) {
1313 use_cache = status;
1314 return *this;
7c673cae
FG
1315 }
1316
11fdf7f2
TL
1317 RGWLC *get_lc() {
1318 return lc;
7c673cae
FG
1319 }
1320
11fdf7f2
TL
1321 RGWRados& set_run_gc_thread(bool _use_gc_thread) {
1322 use_gc_thread = _use_gc_thread;
1323 return *this;
7c673cae
FG
1324 }
1325
11fdf7f2
TL
1326 RGWRados& set_run_lc_thread(bool _use_lc_thread) {
1327 use_lc_thread = _use_lc_thread;
1328 return *this;
7c673cae
FG
1329 }
1330
11fdf7f2
TL
1331 RGWRados& set_run_quota_threads(bool _run_quota_threads) {
1332 quota_threads = _run_quota_threads;
1333 return *this;
7c673cae
FG
1334 }
1335
11fdf7f2
TL
1336 RGWRados& set_run_sync_thread(bool _run_sync_thread) {
1337 run_sync_thread = _run_sync_thread;
1338 return *this;
7c673cae
FG
1339 }
1340
11fdf7f2
TL
1341 RGWRados& set_run_reshard_thread(bool _run_reshard_thread) {
1342 run_reshard_thread = _run_reshard_thread;
1343 return *this;
7c673cae
FG
1344 }
1345
11fdf7f2
TL
1346 uint64_t get_new_req_id() {
1347 return ++max_req_id;
7c673cae
FG
1348 }
1349
11fdf7f2
TL
1350 librados::IoCtx* get_lc_pool_ctx() {
1351 return &lc_pool_ctx;
7c673cae 1352 }
11fdf7f2
TL
1353 void set_context(CephContext *_cct) {
1354 cct = _cct;
7c673cae 1355 }
31f18b77 1356
11fdf7f2
TL
1357 RGWServices svc;
1358
1359 /**
1360 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
1361 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
1362 */
1363 string host_id;
31f18b77 1364
7c673cae
FG
1365 // pulls missing periods for period_history
1366 std::unique_ptr<RGWPeriodPuller> period_puller;
1367 // maintains a connected history of periods
1368 std::unique_ptr<RGWPeriodHistory> period_history;
1369
1370 RGWAsyncRadosProcessor* get_async_rados() const { return async_rados; };
1371
1372 RGWMetadataManager *meta_mgr;
1373
1374 RGWDataChangesLog *data_log;
1375
31f18b77
FG
1376 RGWReshard *reshard;
1377 std::shared_ptr<RGWReshardWait> reshard_wait;
1378
7c673cae
FG
1379 virtual ~RGWRados() = default;
1380
1381 tombstone_cache_t *get_tombstone_cache() {
1382 return obj_tombstone_cache;
1383 }
7c673cae
FG
1384 const RGWSyncModuleInstanceRef& get_sync_module() {
1385 return sync_module;
1386 }
11fdf7f2
TL
1387 RGWSyncTraceManager *get_sync_tracer() {
1388 return sync_tracer;
1389 }
7c673cae
FG
1390
1391 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
11fdf7f2
TL
1392 void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size);
1393 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
1394 int get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
7c673cae
FG
1395
1396 uint32_t get_max_bucket_shards() {
31f18b77 1397 return rgw_shards_max();
7c673cae
FG
1398 }
1399
181888fb 1400
224ce89b 1401 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae 1402
181888fb
FG
1403 int list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx);
1404 int list_raw_objects_next(const string& prefix_filter, int max,
1405 RGWListRawObjsCtx& ctx, list<string>& oids,
1406 bool *is_truncated);
7c673cae
FG
1407 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
1408 RGWListRawObjsCtx& ctx, list<string>& oids,
1409 bool *is_truncated);
181888fb 1410 string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
7c673cae 1411
7c673cae
FG
1412 CephContext *ctx() { return cct; }
1413 /** do all necessary setup of the storage device */
11fdf7f2 1414 int initialize(CephContext *_cct) {
7c673cae 1415 set_context(_cct);
7c673cae
FG
1416 return initialize();
1417 }
1418 /** Initialize the RADOS instance and prepare to do other ops */
11fdf7f2
TL
1419 int init_svc(bool raw);
1420 int init_rados();
7c673cae 1421 int init_complete();
7c673cae
FG
1422 int initialize();
1423 void finalize();
1424
224ce89b 1425 int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
11fdf7f2 1426 int update_service_map(std::map<std::string, std::string>&& status);
7c673cae
FG
1427
1428 /// list logs
1429 int log_list_init(const string& prefix, RGWAccessHandle *handle);
1430 int log_list_next(RGWAccessHandle handle, string *name);
1431
1432 /// remove log
1433 int log_remove(const string& name);
1434
1435 /// show log
1436 int log_show_init(const string& name, RGWAccessHandle *handle);
1437 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
1438
1439 // log bandwidth info
1440 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
11fdf7f2
TL
1441 int read_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch,
1442 uint32_t max_entries, bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket,
1443 rgw_usage_log_entry>& usage);
1444 int trim_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch);
1445 int clear_usage();
7c673cae
FG
1446
1447 int create_pool(const rgw_pool& pool);
1448
7c673cae 1449 int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
f64942e4 1450 int clean_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
7c673cae
FG
1451 void create_bucket_id(string *bucket_id);
1452
11fdf7f2
TL
1453 bool get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool);
1454 bool obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
7c673cae 1455
11fdf7f2 1456 int create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket,
7c673cae 1457 const string& zonegroup_id,
11fdf7f2 1458 const rgw_placement_rule& placement_rule,
7c673cae
FG
1459 const string& swift_ver_location,
1460 const RGWQuotaInfo * pquota_info,
1461 map<std::string,bufferlist>& attrs,
1462 RGWBucketInfo& bucket_info,
1463 obj_version *pobjv,
1464 obj_version *pep_objv,
1465 ceph::real_time creation_time,
1466 rgw_bucket *master_bucket,
1467 uint32_t *master_num_shards,
1468 bool exclusive = true);
7c673cae
FG
1469
1470 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
1471
7c673cae
FG
1472 struct BucketShard {
1473 RGWRados *store;
1474 rgw_bucket bucket;
1475 int shard_id;
1476 librados::IoCtx index_ctx;
1477 string bucket_obj;
1478
1479 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
f64942e4
AA
1480 int init(const rgw_bucket& _bucket, const rgw_obj& obj, RGWBucketInfo* out);
1481 int init(const rgw_bucket& _bucket, int sid, RGWBucketInfo* out);
a8e16298 1482 int init(const RGWBucketInfo& bucket_info, const rgw_obj& obj);
b32b8144 1483 int init(const RGWBucketInfo& bucket_info, int sid);
7c673cae
FG
1484 };
1485
1486 class Object {
1487 RGWRados *store;
1488 RGWBucketInfo bucket_info;
1489 RGWObjectCtx& ctx;
1490 rgw_obj obj;
1491
1492 BucketShard bs;
1493
1494 RGWObjState *state;
1495
1496 bool versioning_disabled;
1497
1498 bool bs_initialized;
1499
1500 protected:
1501 int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false);
1502 void invalidate_state();
1503
1504 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
181888fb 1505 const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail);
7c673cae
FG
1506 int complete_atomic_modification();
1507
1508 public:
1509 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
1510 ctx(_ctx), obj(_obj), bs(store),
1511 state(NULL), versioning_disabled(false),
1512 bs_initialized(false) {}
1513
1514 RGWRados *get_store() { return store; }
1515 rgw_obj& get_obj() { return obj; }
1516 RGWObjectCtx& get_ctx() { return ctx; }
1517 RGWBucketInfo& get_bucket_info() { return bucket_info; }
1518 int get_manifest(RGWObjManifest **pmanifest);
1519
1520 int get_bucket_shard(BucketShard **pbs) {
1521 if (!bs_initialized) {
f64942e4
AA
1522 int r =
1523 bs.init(bucket_info.bucket, obj, nullptr /* no RGWBucketInfo */);
7c673cae
FG
1524 if (r < 0) {
1525 return r;
1526 }
1527 bs_initialized = true;
1528 }
1529 *pbs = &bs;
1530 return 0;
1531 }
1532
1533 void set_versioning_disabled(bool status) {
1534 versioning_disabled = status;
1535 }
1536
1537 bool versioning_enabled() {
1538 return (!versioning_disabled && bucket_info.versioning_enabled());
1539 }
1540
1541 struct Read {
1542 RGWRados::Object *source;
1543
1544 struct GetObjState {
11fdf7f2
TL
1545 map<rgw_pool, librados::IoCtx> io_ctxs;
1546 rgw_pool cur_pool;
1547 librados::IoCtx *cur_ioctx{nullptr};
7c673cae
FG
1548 rgw_obj obj;
1549 rgw_raw_obj head_obj;
1550 } state;
1551
1552 struct ConditionParams {
1553 const ceph::real_time *mod_ptr;
1554 const ceph::real_time *unmod_ptr;
1555 bool high_precision_time;
1556 uint32_t mod_zone_id;
1557 uint64_t mod_pg_ver;
1558 const char *if_match;
1559 const char *if_nomatch;
1560
1561 ConditionParams() :
1562 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
1563 if_match(NULL), if_nomatch(NULL) {}
1564 } conds;
1565
1566 struct Params {
1567 ceph::real_time *lastmod;
1568 uint64_t *obj_size;
1569 map<string, bufferlist> *attrs;
7c673cae 1570
31f18b77 1571 Params() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
7c673cae
FG
1572 } params;
1573
1574 explicit Read(RGWRados::Object *_source) : source(_source) {}
1575
1576 int prepare();
1577 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
1578 int read(int64_t ofs, int64_t end, bufferlist& bl);
1579 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb);
1580 int get_attr(const char *name, bufferlist& dest);
1581 };
1582
1583 struct Write {
1584 RGWRados::Object *target;
1585
1586 struct MetaParams {
1587 ceph::real_time *mtime;
1588 map<std::string, bufferlist>* rmattrs;
1589 const bufferlist *data;
1590 RGWObjManifest *manifest;
1591 const string *ptag;
1592 list<rgw_obj_index_key> *remove_objs;
1593 ceph::real_time set_mtime;
1594 rgw_user owner;
1595 RGWObjCategory category;
1596 int flags;
1597 const char *if_match;
1598 const char *if_nomatch;
11fdf7f2 1599 std::optional<uint64_t> olh_epoch;
7c673cae
FG
1600 ceph::real_time delete_at;
1601 bool canceled;
1602 const string *user_data;
31f18b77 1603 rgw_zone_set *zones_trace;
181888fb 1604 bool modify_tail;
3efd9988 1605 bool completeMultipart;
11fdf7f2 1606 bool appendable;
7c673cae
FG
1607
1608 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
11fdf7f2 1609 remove_objs(NULL), category(RGWObjCategory::Main), flags(0),
91327a77 1610 if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr),
11fdf7f2 1611 modify_tail(false), completeMultipart(false), appendable(false) {}
7c673cae
FG
1612 } meta;
1613
1614 explicit Write(RGWRados::Object *_target) : target(_target) {}
1615
1616 int _do_write_meta(uint64_t size, uint64_t accounted_size,
1617 map<std::string, bufferlist>& attrs,
181888fb 1618 bool modify_tail, bool assume_noent,
7c673cae
FG
1619 void *index_op);
1620 int write_meta(uint64_t size, uint64_t accounted_size,
1621 map<std::string, bufferlist>& attrs);
1622 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
11fdf7f2
TL
1623 const req_state* get_req_state() {
1624 return (req_state *)target->get_ctx().get_private();
1625 }
7c673cae
FG
1626 };
1627
1628 struct Delete {
1629 RGWRados::Object *target;
1630
1631 struct DeleteParams {
1632 rgw_user bucket_owner;
1633 int versioning_status;
1634 ACLOwner obj_owner; /* needed for creation of deletion marker */
1635 uint64_t olh_epoch;
1636 string marker_version_id;
1637 uint32_t bilog_flags;
1638 list<rgw_obj_index_key> *remove_objs;
1639 ceph::real_time expiration_time;
1640 ceph::real_time unmod_since;
1641 ceph::real_time mtime; /* for setting delete marker mtime */
1642 bool high_precision_time;
31f18b77 1643 rgw_zone_set *zones_trace;
7c673cae 1644
31f18b77 1645 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr) {}
7c673cae
FG
1646 } params;
1647
1648 struct DeleteResult {
1649 bool delete_marker;
1650 string version_id;
1651
1652 DeleteResult() : delete_marker(false) {}
1653 } result;
1654
1655 explicit Delete(RGWRados::Object *_target) : target(_target) {}
1656
1657 int delete_obj();
1658 };
1659
1660 struct Stat {
1661 RGWRados::Object *source;
1662
1663 struct Result {
1664 rgw_obj obj;
1665 RGWObjManifest manifest;
1666 bool has_manifest;
1667 uint64_t size;
1668 struct timespec mtime;
1669 map<string, bufferlist> attrs;
1670
1671 Result() : has_manifest(false), size(0) {}
1672 } result;
1673
1674 struct State {
1675 librados::IoCtx io_ctx;
1676 librados::AioCompletion *completion;
1677 int ret;
1678
1679 State() : completion(NULL), ret(0) {}
1680 } state;
1681
1682
1683 explicit Stat(RGWRados::Object *_source) : source(_source) {}
1684
1685 int stat_async();
1686 int wait();
1687 int stat();
1688 private:
1689 int finish();
1690 };
1691 };
1692
1693 class Bucket {
1694 RGWRados *store;
1695 RGWBucketInfo bucket_info;
1696 rgw_bucket& bucket;
1697 int shard_id;
1698
1699 public:
1700 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
1701 shard_id(RGW_NO_SHARD) {}
1702 RGWRados *get_store() { return store; }
1703 rgw_bucket& get_bucket() { return bucket; }
1704 RGWBucketInfo& get_bucket_info() { return bucket_info; }
1705
31f18b77
FG
1706 int update_bucket_id(const string& new_bucket_id);
1707
7c673cae
FG
1708 int get_shard_id() { return shard_id; }
1709 void set_shard_id(int id) {
1710 shard_id = id;
1711 }
1712
1713 class UpdateIndex {
1714 RGWRados::Bucket *target;
1715 string optag;
1716 rgw_obj obj;
1717 uint16_t bilog_flags{0};
1718 BucketShard bs;
1719 bool bs_initialized{false};
1720 bool blind;
1721 bool prepared{false};
31f18b77
FG
1722 rgw_zone_set *zones_trace{nullptr};
1723
1724 int init_bs() {
f64942e4
AA
1725 int r =
1726 bs.init(target->get_bucket(), obj, nullptr /* no RGWBucketInfo */);
31f18b77
FG
1727 if (r < 0) {
1728 return r;
1729 }
1730 bs_initialized = true;
1731 return 0;
1732 }
1733
1734 void invalidate_bs() {
1735 bs_initialized = false;
1736 }
1737
1738 int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
7c673cae
FG
1739 public:
1740
1741 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
1742 bs(target->get_store()) {
1743 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
1744 }
1745
1746 int get_bucket_shard(BucketShard **pbs) {
1747 if (!bs_initialized) {
31f18b77 1748 int r = init_bs();
7c673cae
FG
1749 if (r < 0) {
1750 return r;
1751 }
7c673cae
FG
1752 }
1753 *pbs = &bs;
1754 return 0;
1755 }
1756
1757 void set_bilog_flags(uint16_t flags) {
1758 bilog_flags = flags;
1759 }
31f18b77
FG
1760
1761 void set_zones_trace(rgw_zone_set *_zones_trace) {
1762 zones_trace = _zones_trace;
1763 }
7c673cae
FG
1764
1765 int prepare(RGWModifyOp, const string *write_tag);
1766 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
1767 uint64_t accounted_size, ceph::real_time& ut,
1768 const string& etag, const string& content_type,
11fdf7f2 1769 const string& storage_class,
7c673cae 1770 bufferlist *acl_bl, RGWObjCategory category,
11fdf7f2 1771 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr, bool appendable = false);
7c673cae
FG
1772 int complete_del(int64_t poolid, uint64_t epoch,
1773 ceph::real_time& removed_mtime, /* mtime of removed object */
1774 list<rgw_obj_index_key> *remove_objs);
1775 int cancel();
1776
1777 const string *get_optag() { return &optag; }
1778
1779 bool is_prepared() { return prepared; }
1adf2230
AA
1780 }; // class UpdateIndex
1781
1782 class List {
1783 protected:
7c673cae 1784
7c673cae
FG
1785 RGWRados::Bucket *target;
1786 rgw_obj_key next_marker;
1787
1adf2230
AA
1788 int list_objects_ordered(int64_t max,
1789 vector<rgw_bucket_dir_entry> *result,
1790 map<string, bool> *common_prefixes,
1791 bool *is_truncated);
1792 int list_objects_unordered(int64_t max,
1793 vector<rgw_bucket_dir_entry> *result,
1794 map<string, bool> *common_prefixes,
1795 bool *is_truncated);
1796
1797 public:
1798
7c673cae
FG
1799 struct Params {
1800 string prefix;
1801 string delim;
1802 rgw_obj_key marker;
1803 rgw_obj_key end_marker;
1804 string ns;
1805 bool enforce_ns;
1806 RGWAccessListFilter *filter;
1807 bool list_versions;
1adf2230
AA
1808 bool allow_unordered;
1809
1810 Params() :
1811 enforce_ns(true),
1812 filter(NULL),
1813 list_versions(false),
1814 allow_unordered(false)
1815 {}
7c673cae
FG
1816 } params;
1817
7c673cae
FG
1818 explicit List(RGWRados::Bucket *_target) : target(_target) {}
1819
1adf2230
AA
1820 int list_objects(int64_t max,
1821 vector<rgw_bucket_dir_entry> *result,
1822 map<string, bool> *common_prefixes,
1823 bool *is_truncated) {
1824 if (params.allow_unordered) {
1825 return list_objects_unordered(max, result, common_prefixes,
1826 is_truncated);
1827 } else {
1828 return list_objects_ordered(max, result, common_prefixes,
1829 is_truncated);
1830 }
1831 }
7c673cae
FG
1832 rgw_obj_key& get_next_marker() {
1833 return next_marker;
1834 }
1adf2230
AA
1835 }; // class List
1836 }; // class Bucket
7c673cae 1837
7c673cae
FG
1838 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
1839 const std::string& obj_prefix,
1840 const std::string& obj_delim,
1841 std::function<int(const rgw_bucket_dir_entry&)> handler);
1842
1843 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
1844 return bucket_info.has_swift_versioning() &&
1845 bucket_info.swift_ver_location.size();
1846 }
1847
1848 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
1849 const rgw_user& user, /* in */
1850 RGWBucketInfo& bucket_info, /* in */
1851 rgw_obj& obj); /* in */
11fdf7f2
TL
1852 int swift_versioning_restore(RGWSysObjectCtx& sysobj_ctx,
1853 RGWObjectCtx& obj_ctx, /* in/out */
7c673cae
FG
1854 const rgw_user& user, /* in */
1855 RGWBucketInfo& bucket_info, /* in */
1856 rgw_obj& obj, /* in */
1857 bool& restored); /* out */
1858 int copy_obj_to_remote_dest(RGWObjState *astate,
1859 map<string, bufferlist>& src_attrs,
1860 RGWRados::Object::Read& read_op,
1861 const rgw_user& user_id,
1862 rgw_obj& dest_obj,
1863 ceph::real_time *mtime);
1864
1865 enum AttrsMod {
1866 ATTRSMOD_NONE = 0,
1867 ATTRSMOD_REPLACE = 1,
1868 ATTRSMOD_MERGE = 2
1869 };
1870
11fdf7f2 1871 int rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj);
7c673cae
FG
1872
1873 int stat_remote_obj(RGWObjectCtx& obj_ctx,
1874 const rgw_user& user_id,
7c673cae
FG
1875 req_info *info,
1876 const string& source_zone,
1877 rgw_obj& src_obj,
1878 RGWBucketInfo& src_bucket_info,
1879 real_time *src_mtime,
1880 uint64_t *psize,
1881 const real_time *mod_ptr,
1882 const real_time *unmod_ptr,
1883 bool high_precision_time,
1884 const char *if_match,
1885 const char *if_nomatch,
1886 map<string, bufferlist> *pattrs,
11fdf7f2 1887 map<string, string> *pheaders,
7c673cae
FG
1888 string *version_id,
1889 string *ptag,
1890 string *petag);
1891
1892 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
1893 const rgw_user& user_id,
7c673cae
FG
1894 req_info *info,
1895 const string& source_zone,
11fdf7f2
TL
1896 const rgw_obj& dest_obj,
1897 const rgw_obj& src_obj,
7c673cae
FG
1898 RGWBucketInfo& dest_bucket_info,
1899 RGWBucketInfo& src_bucket_info,
11fdf7f2 1900 std::optional<rgw_placement_rule> dest_placement,
7c673cae
FG
1901 ceph::real_time *src_mtime,
1902 ceph::real_time *mtime,
1903 const ceph::real_time *mod_ptr,
1904 const ceph::real_time *unmod_ptr,
1905 bool high_precision_time,
1906 const char *if_match,
1907 const char *if_nomatch,
1908 AttrsMod attrs_mod,
1909 bool copy_if_newer,
1910 map<string, bufferlist>& attrs,
1911 RGWObjCategory category,
11fdf7f2 1912 std::optional<uint64_t> olh_epoch,
7c673cae 1913 ceph::real_time delete_at,
7c673cae 1914 string *ptag,
11fdf7f2 1915 string *petag,
7c673cae 1916 void (*progress_cb)(off_t, void *),
31f18b77 1917 void *progress_data,
81eedcae
TL
1918 rgw_zone_set *zones_trace= nullptr,
1919 std::optional<uint64_t>* bytes_transferred = 0);
7c673cae
FG
1920 /**
1921 * Copy an object.
1922 * dest_obj: the object to copy into
1923 * src_obj: the object to copy from
1924 * attrs: usage depends on attrs_mod parameter
1925 * attrs_mod: the modification mode of the attrs, may have the following values:
1926 * ATTRSMOD_NONE - the attributes of the source object will be
1927 * copied without modifications, attrs parameter is ignored;
1928 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
1929 * parameter, source object attributes are not copied;
1930 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
1931 * are overwritten by values contained in attrs parameter.
7c673cae
FG
1932 * Returns: 0 on success, -ERR# otherwise.
1933 */
1934 int copy_obj(RGWObjectCtx& obj_ctx,
1935 const rgw_user& user_id,
7c673cae
FG
1936 req_info *info,
1937 const string& source_zone,
1938 rgw_obj& dest_obj,
1939 rgw_obj& src_obj,
1940 RGWBucketInfo& dest_bucket_info,
1941 RGWBucketInfo& src_bucket_info,
11fdf7f2 1942 const rgw_placement_rule& dest_placement,
7c673cae
FG
1943 ceph::real_time *src_mtime,
1944 ceph::real_time *mtime,
1945 const ceph::real_time *mod_ptr,
1946 const ceph::real_time *unmod_ptr,
1947 bool high_precision_time,
1948 const char *if_match,
1949 const char *if_nomatch,
1950 AttrsMod attrs_mod,
1951 bool copy_if_newer,
1952 map<std::string, bufferlist>& attrs,
1953 RGWObjCategory category,
1954 uint64_t olh_epoch,
1955 ceph::real_time delete_at,
1956 string *version_id,
1957 string *ptag,
11fdf7f2 1958 string *petag,
7c673cae
FG
1959 void (*progress_cb)(off_t, void *),
1960 void *progress_data);
1961
1962 int copy_obj_data(RGWObjectCtx& obj_ctx,
1963 RGWBucketInfo& dest_bucket_info,
11fdf7f2 1964 const rgw_placement_rule& dest_placement,
7c673cae 1965 RGWRados::Object::Read& read_op, off_t end,
11fdf7f2 1966 const rgw_obj& dest_obj,
7c673cae
FG
1967 ceph::real_time *mtime,
1968 ceph::real_time set_mtime,
1969 map<string, bufferlist>& attrs,
7c673cae
FG
1970 uint64_t olh_epoch,
1971 ceph::real_time delete_at,
11fdf7f2 1972 string *petag);
7c673cae 1973
11fdf7f2
TL
1974 int transition_obj(RGWObjectCtx& obj_ctx,
1975 RGWBucketInfo& bucket_info,
1976 rgw_obj& obj,
1977 const rgw_placement_rule& placement_rule,
1978 const real_time& mtime,
1979 uint64_t olh_epoch);
1980
7c673cae
FG
1981 int check_bucket_empty(RGWBucketInfo& bucket_info);
1982
1983 /**
1984 * Delete a bucket.
1985 * bucket: the name of the bucket to delete
1986 * Returns 0 on success, -ERR# otherwise.
1987 */
1988 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true);
1989
7c673cae
FG
1990 void wakeup_meta_sync_shards(set<int>& shard_ids);
1991 void wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids);
1992
1993 RGWMetaSyncStatusManager* get_meta_sync_manager();
1994 RGWDataSyncStatusManager* get_data_sync_manager(const std::string& source_zone);
1995
1996 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
1997 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
1998 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
1999
2000 /** Delete an object.*/
2001 int delete_obj(RGWObjectCtx& obj_ctx,
2002 const RGWBucketInfo& bucket_owner,
2003 const rgw_obj& src_obj,
2004 int versioning_status,
2005 uint16_t bilog_flags = 0,
31f18b77
FG
2006 const ceph::real_time& expiration_time = ceph::real_time(),
2007 rgw_zone_set *zones_trace = nullptr);
7c673cae 2008
7c673cae
FG
2009 int delete_raw_obj(const rgw_raw_obj& obj);
2010
7c673cae 2011 /** Remove an object from the bucket index */
494da23a 2012 int delete_obj_index(const rgw_obj& obj, ceph::real_time mtime);
7c673cae 2013
7c673cae
FG
2014 /**
2015 * Set an attr on an object.
2016 * bucket: name of the bucket holding the object
2017 * obj: name of the object to set the attr on
2018 * name: the attr to set
2019 * bl: the contents of the attr
2020 * Returns: 0 on success, -ERR# otherwise.
2021 */
2022 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
2023
2024 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
2025 map<string, bufferlist>& attrs,
2026 map<string, bufferlist>* rmattrs);
2027
7c673cae
FG
2028 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
2029 bool follow_olh, bool assume_noent = false);
2030 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state) {
2031 return get_obj_state(rctx, bucket_info, obj, state, true);
2032 }
2033
11fdf7f2
TL
2034 using iterate_obj_cb = int (*)(const rgw_raw_obj&, off_t, off_t,
2035 off_t, bool, RGWObjState*, void*);
2036
2037 int iterate_obj(RGWObjectCtx& ctx, const RGWBucketInfo& bucket_info,
2038 const rgw_obj& obj, off_t ofs, off_t end,
2039 uint64_t max_chunk_size, iterate_obj_cb cb, void *arg);
7c673cae
FG
2040
2041 int flush_read_list(struct get_obj_data *d);
2042
11fdf7f2
TL
2043 int get_obj_iterate_cb(const rgw_raw_obj& read_obj, off_t obj_ofs,
2044 off_t read_ofs, off_t len, bool is_head_obj,
2045 RGWObjState *astate, void *arg);
7c673cae
FG
2046
2047 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
2048
2049 /**
2050 * a simple object read without keeping state
2051 */
2052
11fdf7f2
TL
2053 int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
2054 map<string, bufferlist> *attrs, bufferlist *first_chunk,
2055 RGWObjVersionTracker *objv_tracker);
7c673cae
FG
2056
2057 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
2058 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
2059
f64942e4
AA
2060 int guard_reshard(BucketShard *bs,
2061 const rgw_obj& obj_instance,
2062 const RGWBucketInfo& bucket_info,
2063 std::function<int(BucketShard *)> call);
2064 int block_while_resharding(RGWRados::BucketShard *bs,
2065 string *new_bucket_id,
11fdf7f2
TL
2066 const RGWBucketInfo& bucket_info,
2067 optional_yield y);
31f18b77 2068
7c673cae
FG
2069 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
2070 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
2071 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
2072 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
2073 const rgw_obj& obj_instance, bool delete_marker,
2074 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
2075 uint64_t olh_epoch,
91327a77
AA
2076 ceph::real_time unmod_since, bool high_precision_time,
2077 rgw_zone_set *zones_trace = nullptr,
2078 bool log_data_change = false);
31f18b77 2079 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
2080 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
2081 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
2082 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
2083 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
2084 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2085 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
31f18b77
FG
2086 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
2087 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
7c673cae 2088 int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
91327a77
AA
2089 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time,
2090 rgw_zone_set *zones_trace = nullptr, bool log_data_change = false);
a8e16298
TL
2091 int repair_olh(RGWObjState* state, const RGWBucketInfo& bucket_info,
2092 const rgw_obj& obj);
7c673cae 2093 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
31f18b77 2094 uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
2095
2096 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
2097 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
2098 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
2099 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
2100
11fdf7f2 2101 void gen_rand_obj_instance_name(rgw_obj_key *target_key);
7c673cae
FG
2102 void gen_rand_obj_instance_name(rgw_obj *target);
2103
7c673cae
FG
2104 int update_containers_stats(map<string, RGWBucketEnt>& m);
2105 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
2106
11fdf7f2 2107public:
7c673cae
FG
2108 void set_atomic(void *ctx, rgw_obj& obj) {
2109 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
11fdf7f2 2110 rctx->set_atomic(obj);
7c673cae 2111 }
11fdf7f2 2112 void set_prefetch_data(void *ctx, const rgw_obj& obj) {
7c673cae 2113 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
11fdf7f2 2114 rctx->set_prefetch_data(obj);
7c673cae 2115 }
7c673cae
FG
2116 int decode_policy(bufferlist& bl, ACLOwner *owner);
2117 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
c07f9fc5 2118 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool* syncstopped = NULL);
7c673cae
FG
2119 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
2120 int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
2121 int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
2122 void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj);
2123 void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
2124
2125 int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
2126 bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
2127 map<string, bufferlist> *pattrs);
2128 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
11fdf7f2 2129 int get_bucket_entrypoint_info(RGWSysObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name,
7c673cae 2130 RGWBucketEntryPoint& entry_point, RGWObjVersionTracker *objv_tracker,
b32b8144
FG
2131 ceph::real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info = NULL,
2132 boost::optional<obj_version> refresh_version = boost::none);
11fdf7f2
TL
2133 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
2134 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
2135 int get_bucket_instance_from_oid(RGWSysObjectCtx& obj_ctx, const string& oid, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs,
b32b8144
FG
2136 rgw_cache_entry_info *cache_info = NULL,
2137 boost::optional<obj_version> refresh_version = boost::none);
7c673cae 2138
11fdf7f2 2139 int convert_old_bucket_info(RGWSysObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name);
7c673cae 2140 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
b32b8144
FG
2141
2142
2143private:
11fdf7f2 2144 int _get_bucket_info(RGWSysObjectCtx& obj_ctx, const string& tenant,
b32b8144
FG
2145 const string& bucket_name, RGWBucketInfo& info,
2146 real_time *pmtime,
2147 map<string, bufferlist> *pattrs,
2148 boost::optional<obj_version> refresh_version);
2149public:
2150
11fdf7f2
TL
2151 bool call(std::string_view command, const cmdmap_t& cmdmap,
2152 std::string_view format,
3a9019d9
FG
2153 bufferlist& out) override final;
2154
3a9019d9 2155protected:
3a9019d9
FG
2156 // `call_list` must iterate over all cache entries and call
2157 // `cache_list_dump_helper` with the supplied Formatter on any that
2158 // include `filter` as a substring.
2159 //
11fdf7f2 2160 void call_list(const std::optional<std::string>& filter,
3a9019d9
FG
2161 Formatter* format);
2162 // `call_inspect` must look up the requested target and, if found,
2163 // dump it to the supplied Formatter and return true. If not found,
2164 // it must return false.
2165 //
11fdf7f2 2166 bool call_inspect(const std::string& target, Formatter* format);
3a9019d9
FG
2167
2168 // `call_erase` must erase the requested target and return true. If
2169 // the requested target does not exist, it should return false.
11fdf7f2 2170 bool call_erase(const std::string& target);
3a9019d9
FG
2171
2172 // `call_zap` must erase the cache.
11fdf7f2 2173 void call_zap();
3a9019d9 2174public:
b32b8144 2175
11fdf7f2 2176 int get_bucket_info(RGWSysObjectCtx& obj_ctx,
b32b8144
FG
2177 const string& tenant_name, const string& bucket_name,
2178 RGWBucketInfo& info,
2179 ceph::real_time *pmtime, map<string, bufferlist> *pattrs = NULL);
2180
81eedcae
TL
2181 // Returns 0 on successful refresh. Returns error code if there was
2182 // an error or the version stored on the OSD is the same as that
b32b8144
FG
2183 // presented in the BucketInfo structure.
2184 //
2185 int try_refresh_bucket_info(RGWBucketInfo& info,
2186 ceph::real_time *pmtime,
2187 map<string, bufferlist> *pattrs = nullptr);
2188
7c673cae 2189 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
b32b8144 2190 map<string, bufferlist> *pattrs, bool create_entry_point);
7c673cae 2191
31f18b77
FG
2192 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2193 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
2194 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2195 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
2196 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
7c673cae 2197 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
31f18b77
FG
2198 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2199 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
7c673cae 2200 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
1adf2230 2201 int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id,
11fdf7f2
TL
2202 const rgw_obj_index_key& start,
2203 const string& prefix,
1adf2230
AA
2204 uint32_t num_entries, bool list_versions,
2205 map<string, rgw_bucket_dir_entry>& m,
2206 bool *is_truncated,
2207 rgw_obj_index_key *last_entry,
2208 bool (*force_check_filter)(const string& name) = nullptr);
2209 int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id,
11fdf7f2
TL
2210 const rgw_obj_index_key& start,
2211 const string& prefix,
1adf2230
AA
2212 uint32_t num_entries, bool list_versions,
2213 vector<rgw_bucket_dir_entry>& ent_list,
2214 bool *is_truncated, rgw_obj_index_key *last_entry,
2215 bool (*force_check_filter)(const string& name) = nullptr);
a8e16298 2216 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
7c673cae
FG
2217 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
2218 int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
2219 int trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, string& end_marker);
c07f9fc5
FG
2220 int resync_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
2221 int stop_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
7c673cae
FG
2222 int get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id, map<int, string>& max_marker);
2223
a8e16298
TL
2224 int bi_get_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent);
2225 int bi_get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh);
2226 int bi_get(const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
7c673cae
FG
2227 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
2228 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
2229 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
2230 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2231 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2232 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
2233 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2234 int bi_remove(BucketShard& bs);
2235
2236 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
11fdf7f2
TL
2237 int cls_obj_usage_log_read(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
2238 uint64_t end_epoch, uint32_t max_entries, string& read_iter, map<rgw_user_bucket,
2239 rgw_usage_log_entry>& usage, bool *is_truncated);
2240 int cls_obj_usage_log_trim(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
2241 uint64_t end_epoch);
2242 int cls_obj_usage_log_clear(string& oid);
7c673cae
FG
2243
2244 int key_to_shard_id(const string& key, int max_shards);
2245 void shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id);
2246 void shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name);
2247 void shard_name(const string& prefix, unsigned shard_id, string& name);
2248 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
2249 void time_log_prepare_entry(cls_log_entry& entry, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
2250 int time_log_add_init(librados::IoCtx& io_ctx);
2251 int time_log_add(const string& oid, list<cls_log_entry>& entries,
2252 librados::AioCompletion *completion, bool monotonic_inc = true);
2253 int time_log_add(const string& oid, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
2254 int time_log_list(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
2255 int max_entries, list<cls_log_entry>& entries,
2256 const string& marker, string *out_marker, bool *truncated);
2257 int time_log_info(const string& oid, cls_log_header *header);
2258 int time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion);
2259 int time_log_trim(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
2260 const string& from_marker, const string& to_marker,
2261 librados::AioCompletion *completion = nullptr);
2262
2263 string objexp_hint_get_shardname(int shard_num);
2264 int objexp_key_shard(const rgw_obj_index_key& key);
2265 void objexp_get_shard(int shard_num,
2266 string& shard); /* out */
2267 int objexp_hint_add(const ceph::real_time& delete_at,
2268 const string& tenant_name,
2269 const string& bucket_name,
2270 const string& bucket_id,
2271 const rgw_obj_index_key& obj_key);
2272 int objexp_hint_list(const string& oid,
2273 const ceph::real_time& start_time,
2274 const ceph::real_time& end_time,
2275 const int max_entries,
2276 const string& marker,
2277 list<cls_timeindex_entry>& entries, /* out */
2278 string *out_marker, /* out */
2279 bool *truncated); /* out */
2280 int objexp_hint_parse(cls_timeindex_entry &ti_entry,
2281 objexp_hint_entry& hint_entry); /* out */
2282 int objexp_hint_trim(const string& oid,
2283 const ceph::real_time& start_time,
2284 const ceph::real_time& end_time,
2285 const string& from_marker = std::string(),
2286 const string& to_marker = std::string());
2287
11fdf7f2
TL
2288 int lock_exclusive(const rgw_pool& pool, const string& oid, ceph::timespan& duration, string& zone_id, string& owner_id);
2289 int unlock(const rgw_pool& pool, const string& oid, string& zone_id, string& owner_id);
7c673cae
FG
2290
2291 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
2292 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync);
2293 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
11fdf7f2 2294 int gc_aio_operate(string& oid, librados::ObjectWriteOperation *op, librados::AioCompletion **pc = nullptr);
7c673cae
FG
2295 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
2296
2297 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
11fdf7f2 2298 int process_gc(bool expired_only);
1adf2230 2299 bool process_expire_objects();
7c673cae
FG
2300 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
2301
2302 int process_lc();
2303 int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
2304
2305 int bucket_check_index(RGWBucketInfo& bucket_info,
2306 map<RGWObjCategory, RGWStorageStats> *existing_stats,
2307 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
2308 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
f64942e4 2309 int bucket_set_reshard(const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
7c673cae
FG
2310 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
2311 int move_rados_obj(librados::IoCtx& src_ioctx,
2312 const string& src_oid, const string& src_locator,
2313 librados::IoCtx& dst_ioctx,
2314 const string& dst_oid, const string& dst_locator);
2315 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
2316 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix);
2317
2318 int cls_user_get_header(const string& user_id, cls_user_header *header);
94b18763 2319 int cls_user_reset_stats(const string& user_id);
7c673cae
FG
2320 int cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx);
2321 int cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info);
2322 int cls_user_list_buckets(rgw_raw_obj& obj,
2323 const string& in_marker,
2324 const string& end_marker,
2325 int max_entries,
2326 list<cls_user_bucket_entry>& entries,
2327 string *out_marker,
2328 bool *truncated);
2329 int cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry);
2330 int cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add);
2331 int cls_user_complete_stats_sync(rgw_raw_obj& obj);
2332 int complete_sync_user_stats(const rgw_user& user_id);
7c673cae 2333 int cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket);
c07f9fc5 2334 int cls_user_get_bucket_stats(const rgw_bucket& bucket, cls_user_bucket_entry& entry);
7c673cae
FG
2335
2336 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
11fdf7f2 2337 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, bool check_size_only = false);
7c673cae 2338
224ce89b 2339 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
31f18b77
FG
2340 RGWQuotaInfo& bucket_quota);
2341
2342 int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
2343
7c673cae 2344 uint64_t instance_id();
3efd9988 2345
7c673cae
FG
2346 librados::Rados* get_rados_handle();
2347
2348 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
2349 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
2350 list<librados::AioCompletion *>& handles, bool keep_index_consistent);
11fdf7f2
TL
2351
2352 /* mfa/totp stuff */
2353 private:
2354 void prepare_mfa_write(librados::ObjectWriteOperation *op,
2355 RGWObjVersionTracker *objv_tracker,
2356 const ceph::real_time& mtime);
2357 public:
2358 string get_mfa_oid(const rgw_user& user);
2359 int get_mfa_ref(const rgw_user& user, rgw_rados_ref *ref);
2360 int check_mfa(const rgw_user& user, const string& otp_id, const string& pin);
2361 int create_mfa(const rgw_user& user, const rados::cls::otp::otp_info_t& config,
2362 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2363 int remove_mfa(const rgw_user& user, const string& id,
2364 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2365 int get_mfa(const rgw_user& user, const string& id, rados::cls::otp::otp_info_t *result);
2366 int list_mfa(const rgw_user& user, list<rados::cls::otp::otp_info_t> *result);
2367 int otp_get_current_time(const rgw_user& user, ceph::real_time *result);
2368
2369 /* mfa interfaces used by metadata engine */
2370 int set_mfa(const string& oid, const list<rados::cls::otp::otp_info_t>& entries, bool reset_obj,
2371 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2372 int list_mfa(const string& oid, list<rados::cls::otp::otp_info_t> *result,
2373 RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime);
7c673cae
FG
2374 private:
2375 /**
2376 * This is a helper method, it generates a list of bucket index objects with the given
2377 * bucket base oid and number of shards.
2378 *
2379 * bucket_oid_base [in] - base name of the bucket index object;
2380 * num_shards [in] - number of bucket index object shards.
2381 * bucket_objs [out] - filled by this method, a list of bucket index objects.
2382 */
2383 void get_bucket_index_objects(const string& bucket_oid_base, uint32_t num_shards,
2384 map<int, string>& bucket_objs, int shard_id = -1);
2385
2386 /**
2387 * Get the bucket index object with the given base bucket index object and object key,
2388 * and the number of bucket index shards.
2389 *
2390 * bucket_oid_base [in] - bucket object base name.
2391 * obj_key [in] - object key.
2392 * num_shards [in] - number of bucket index shards.
2393 * hash_type [in] - type of hash to find the shard ID.
2394 * bucket_obj [out] - the bucket index object for the given object.
2395 *
2396 * Return 0 on success, a failure code otherwise.
2397 */
2398 int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
2399 uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard);
2400
2401 void get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
2402 int shard_id, string *bucket_obj);
2403
2404 /**
2405 * Check the actual on-disk state of the object specified
2406 * by list_state, and fill in the time and size of object.
2407 * Then append any changes to suggested_updates for
2408 * the rgw class' dir_suggest_changes function.
2409 *
2410 * Note that this can maul list_state; don't use it afterwards. Also
2411 * it expects object to already be filled in from list_state; it only
2412 * sets the size and mtime.
2413 *
2414 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
2415 * and -errno on other failures. (-ENOENT is not a failure, and it
2416 * will encode that info as a suggested update.)
2417 */
2418 int check_disk_state(librados::IoCtx io_ctx,
2419 const RGWBucketInfo& bucket_info,
2420 rgw_bucket_dir_entry& list_state,
2421 rgw_bucket_dir_entry& object,
2422 bufferlist& suggested_updates);
2423
2424 /**
2425 * Init pool iteration
31f18b77 2426 * pool: pool to use for the ctx initialization
7c673cae
FG
2427 * ctx: context object to use for the iteration
2428 * Returns: 0 on success, -ERR# otherwise.
2429 */
2430 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
31f18b77 2431
181888fb
FG
2432 /**
2433 * Init pool iteration
2434 * pool: pool to use
2435 * cursor: position to start iteration
2436 * ctx: context object to use for the iteration
2437 * Returns: 0 on success, -ERR# otherwise.
2438 */
2439 int pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx);
2440
2441 /**
2442 * Get pool iteration position
2443 * ctx: context object to use for the iteration
2444 * Returns: string representation of position
2445 */
2446 string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
2447
7c673cae
FG
2448 /**
2449 * Iterate over pool return object names, use optional filter
2450 * ctx: iteration context, initialized with pool_iterate_begin()
2451 * num: max number of objects to return
2452 * objs: a vector that the results will append into
2453 * is_truncated: if not NULL, will hold true iff iteration is complete
2454 * filter: if not NULL, will be used to filter returned objects
2455 * Returns: 0 on success, -ERR# otherwise.
2456 */
2457 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
2458 bool *is_truncated, RGWAccessListFilter *filter);
2459
2460 uint64_t next_bucket_id();
2461};
2462
2463class RGWStoreManager {
2464public:
2465 RGWStoreManager() {}
28e407b8
AA
2466 static RGWRados *get_storage(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads,
2467 bool run_sync_thread, bool run_reshard_thread, bool use_cache = true) {
31f18b77 2468 RGWRados *store = init_storage_provider(cct, use_gc_thread, use_lc_thread, quota_threads, run_sync_thread,
28e407b8 2469 run_reshard_thread, use_cache);
7c673cae
FG
2470 return store;
2471 }
2472 static RGWRados *get_raw_storage(CephContext *cct) {
2473 RGWRados *store = init_raw_storage_provider(cct);
2474 return store;
2475 }
28e407b8 2476 static RGWRados *init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread, bool use_metadata_cache);
7c673cae
FG
2477 static RGWRados *init_raw_storage_provider(CephContext *cct);
2478 static void close_storage(RGWRados *store);
2479
2480};
2481
7c673cae
FG
2482class RGWMPObj {
2483 string oid;
2484 string prefix;
2485 string meta;
2486 string upload_id;
2487public:
2488 RGWMPObj() {}
2489 RGWMPObj(const string& _oid, const string& _upload_id) {
2490 init(_oid, _upload_id, _upload_id);
2491 }
2492 void init(const string& _oid, const string& _upload_id) {
2493 init(_oid, _upload_id, _upload_id);
2494 }
2495 void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
2496 if (_oid.empty()) {
2497 clear();
2498 return;
2499 }
2500 oid = _oid;
2501 upload_id = _upload_id;
2502 prefix = oid + ".";
2503 meta = prefix + upload_id + MP_META_SUFFIX;
2504 prefix.append(part_unique_str);
2505 }
11fdf7f2
TL
2506 const string& get_meta() const { return meta; }
2507 string get_part(int num) const {
7c673cae
FG
2508 char buf[16];
2509 snprintf(buf, 16, ".%d", num);
2510 string s = prefix;
2511 s.append(buf);
2512 return s;
2513 }
11fdf7f2 2514 string get_part(const string& part) const {
7c673cae
FG
2515 string s = prefix;
2516 s.append(".");
2517 s.append(part);
2518 return s;
2519 }
11fdf7f2 2520 const string& get_upload_id() const {
7c673cae
FG
2521 return upload_id;
2522 }
11fdf7f2 2523 const string& get_key() const {
7c673cae
FG
2524 return oid;
2525 }
2526 bool from_meta(string& meta) {
2527 int end_pos = meta.rfind('.'); // search for ".meta"
2528 if (end_pos < 0)
2529 return false;
2530 int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
2531 if (mid_pos < 0)
2532 return false;
2533 oid = meta.substr(0, mid_pos);
2534 upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
2535 init(oid, upload_id, upload_id);
2536 return true;
2537 }
2538 void clear() {
2539 oid = "";
2540 prefix = "";
2541 meta = "";
2542 upload_id = "";
2543 }
11fdf7f2 2544}; // class RGWMPObj
7c673cae 2545
11fdf7f2
TL
2546
2547class RGWRadosThread {
2548 class Worker : public Thread {
2549 CephContext *cct;
2550 RGWRadosThread *processor;
2551 Mutex lock;
2552 Cond cond;
2553
2554 void wait() {
2555 Mutex::Locker l(lock);
2556 cond.Wait(lock);
2557 };
2558
2559 void wait_interval(const utime_t& wait_time) {
2560 Mutex::Locker l(lock);
2561 cond.WaitInterval(lock, wait_time);
2562 }
2563
2564 public:
2565 Worker(CephContext *_cct, RGWRadosThread *_p) : cct(_cct), processor(_p), lock("RGWRadosThread::Worker") {}
2566 void *entry() override;
2567 void signal() {
2568 Mutex::Locker l(lock);
2569 cond.Signal();
2570 }
2571 };
2572
2573 Worker *worker;
7c673cae
FG
2574
2575protected:
11fdf7f2
TL
2576 CephContext *cct;
2577 RGWRados *store;
2578
2579 std::atomic<bool> down_flag = { false };
2580
2581 string thread_name;
2582
2583 virtual uint64_t interval_msec() = 0;
2584 virtual void stop_process() {}
7c673cae 2585public:
11fdf7f2
TL
2586 RGWRadosThread(RGWRados *_store, const string& thread_name = "radosgw")
2587 : worker(NULL), cct(_store->ctx()), store(_store), thread_name(thread_name) {}
2588 virtual ~RGWRadosThread() {
2589 stop();
2590 }
2591
2592 virtual int init() { return 0; }
2593 virtual int process() = 0;
2594
2595 bool going_down() { return down_flag; }
2596
2597 void start();
2598 void stop();
2599
2600 void signal() {
2601 if (worker) {
2602 worker->signal();
2603 }
2604 }
2605};
2606
7c673cae 2607#endif