]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw_rados.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rgw / rgw_rados.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_RGWRADOS_H
5#define CEPH_RGWRADOS_H
6
7#include <functional>
8
9#include "include/rados/librados.hpp"
10#include "include/Context.h"
3a9019d9 11#include "common/admin_socket.h"
7c673cae
FG
12#include "common/RefCountedObj.h"
13#include "common/RWLock.h"
14#include "common/ceph_time.h"
15#include "common/lru_map.h"
11fdf7f2 16#include "common/ceph_json.h"
7c673cae
FG
17#include "rgw_common.h"
18#include "cls/rgw/cls_rgw_types.h"
19#include "cls/version/cls_version_types.h"
20#include "cls/log/cls_log_types.h"
7c673cae 21#include "cls/timeindex/cls_timeindex_types.h"
11fdf7f2 22#include "cls/otp/cls_otp_types.h"
7c673cae
FG
23#include "rgw_log.h"
24#include "rgw_metadata.h"
25#include "rgw_meta_sync_status.h"
26#include "rgw_period_puller.h"
27#include "rgw_sync_module.h"
b32b8144 28#include "rgw_sync_log_trim.h"
11fdf7f2
TL
29#include "rgw_service.h"
30
31#include "services/svc_rados.h"
32#include "services/svc_zone.h"
7c673cae
FG
33
34class RGWWatcher;
35class SafeTimer;
36class ACLOwner;
37class RGWGC;
38class RGWMetaNotifier;
39class RGWDataNotifier;
40class RGWLC;
41class RGWObjectExpirer;
42class RGWMetaSyncProcessorThread;
43class RGWDataSyncProcessorThread;
44class RGWSyncLogTrimThread;
11fdf7f2 45class RGWSyncTraceManager;
7c673cae
FG
46struct RGWZoneGroup;
47struct RGWZoneParams;
31f18b77
FG
48class RGWReshard;
49class RGWReshardWait;
7c673cae 50
11fdf7f2
TL
51class RGWSysObjectCtx;
52
7c673cae
FG
53/* flags for put_obj_meta() */
54#define PUT_OBJ_CREATE 0x01
55#define PUT_OBJ_EXCL 0x02
56#define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
57
58#define RGW_OBJ_NS_MULTIPART "multipart"
59#define RGW_OBJ_NS_SHADOW "shadow"
60
61#define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
62
63#define RGW_NO_SHARD -1
64
31f18b77
FG
65#define RGW_SHARDS_PRIME_0 7877
66#define RGW_SHARDS_PRIME_1 65521
67
11fdf7f2
TL
68extern const std::string MP_META_SUFFIX;
69
1adf2230 70// only called by rgw_shard_id and rgw_bucket_shard_index
31f18b77
FG
71static inline int rgw_shards_mod(unsigned hval, int max_shards)
72{
73 if (max_shards <= RGW_SHARDS_PRIME_0) {
74 return hval % RGW_SHARDS_PRIME_0 % max_shards;
75 }
76 return hval % RGW_SHARDS_PRIME_1 % max_shards;
77}
78
1adf2230
AA
79// used for logging and tagging
80static inline int rgw_shard_id(const string& key, int max_shards)
31f18b77 81{
1adf2230
AA
82 return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()),
83 max_shards);
84}
85
86// used for bucket indices
87static inline uint32_t rgw_bucket_shard_index(const std::string& key,
88 int num_shards) {
89 uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size());
90 uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
91 return rgw_shards_mod(sid2, num_shards);
31f18b77
FG
92}
93
94static inline int rgw_shards_max()
95{
96 return RGW_SHARDS_PRIME_1;
97}
7c673cae
FG
98
99static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
100{
101 if (bucket.marker.empty() || orig_oid.empty()) {
102 oid = orig_oid;
103 } else {
104 oid = bucket.marker;
105 oid.append("_");
106 oid.append(orig_oid);
107 }
108}
109
110static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
111{
112 const rgw_bucket& bucket = obj.bucket;
113 prepend_bucket_marker(bucket, obj.get_oid(), oid);
114 const string& loc = obj.key.get_loc();
115 if (!loc.empty()) {
116 prepend_bucket_marker(bucket, loc, locator);
117 } else {
118 locator.clear();
119 }
120}
121
122int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, librados::IoCtx& ioctx, bool create = false);
123
124int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
125
126static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj)
127{
128 ssize_t pos = raw_obj.oid.find('_');
129 if (pos < 0) {
130 return false;
131 }
132
133 if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
134 return false;
135 }
136 obj->bucket = bucket;
137
138 return true;
139}
140
11fdf7f2 141
7c673cae 142struct rgw_bucket_placement {
11fdf7f2 143 rgw_placement_rule placement_rule;
7c673cae
FG
144 rgw_bucket bucket;
145
146 void dump(Formatter *f) const;
147};
148
149class rgw_obj_select {
11fdf7f2 150 rgw_placement_rule placement_rule;
7c673cae
FG
151 rgw_obj obj;
152 rgw_raw_obj raw_obj;
153 bool is_raw;
154
155public:
156 rgw_obj_select() : is_raw(false) {}
11fdf7f2
TL
157 explicit rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {}
158 explicit rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {}
7c673cae 159 rgw_obj_select(const rgw_obj_select& rhs) {
c07f9fc5 160 placement_rule = rhs.placement_rule;
7c673cae
FG
161 is_raw = rhs.is_raw;
162 if (is_raw) {
163 raw_obj = rhs.raw_obj;
164 } else {
165 obj = rhs.obj;
166 }
167 }
168
169 rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const;
170 rgw_raw_obj get_raw_obj(RGWRados *store) const;
171
172 rgw_obj_select& operator=(const rgw_obj& rhs) {
173 obj = rhs;
174 is_raw = false;
175 return *this;
176 }
177
178 rgw_obj_select& operator=(const rgw_raw_obj& rhs) {
179 raw_obj = rhs;
180 is_raw = true;
181 return *this;
182 }
183
11fdf7f2 184 void set_placement_rule(const rgw_placement_rule& rule) {
7c673cae
FG
185 placement_rule = rule;
186 }
11fdf7f2 187 void dump(Formatter *f) const;
7c673cae
FG
188};
189
190struct compression_block {
191 uint64_t old_ofs;
192 uint64_t new_ofs;
193 uint64_t len;
194
195 void encode(bufferlist& bl) const {
196 ENCODE_START(1, 1, bl);
11fdf7f2
TL
197 encode(old_ofs, bl);
198 encode(new_ofs, bl);
199 encode(len, bl);
7c673cae
FG
200 ENCODE_FINISH(bl);
201 }
202
11fdf7f2 203 void decode(bufferlist::const_iterator& bl) {
7c673cae 204 DECODE_START(1, bl);
11fdf7f2
TL
205 decode(old_ofs, bl);
206 decode(new_ofs, bl);
207 decode(len, bl);
7c673cae
FG
208 DECODE_FINISH(bl);
209 }
11fdf7f2 210 void dump(Formatter *f) const;
7c673cae
FG
211};
212WRITE_CLASS_ENCODER(compression_block)
213
214struct RGWCompressionInfo {
215 string compression_type;
216 uint64_t orig_size;
217 vector<compression_block> blocks;
218
219 RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
220 RGWCompressionInfo(const RGWCompressionInfo& cs_info) : compression_type(cs_info.compression_type),
221 orig_size(cs_info.orig_size),
222 blocks(cs_info.blocks) {}
223
224 void encode(bufferlist& bl) const {
225 ENCODE_START(1, 1, bl);
11fdf7f2
TL
226 encode(compression_type, bl);
227 encode(orig_size, bl);
228 encode(blocks, bl);
7c673cae
FG
229 ENCODE_FINISH(bl);
230 }
231
11fdf7f2 232 void decode(bufferlist::const_iterator& bl) {
7c673cae 233 DECODE_START(1, bl);
11fdf7f2
TL
234 decode(compression_type, bl);
235 decode(orig_size, bl);
236 decode(blocks, bl);
7c673cae 237 DECODE_FINISH(bl);
11fdf7f2
TL
238 }
239 void dump(Formatter *f) const;
7c673cae
FG
240};
241WRITE_CLASS_ENCODER(RGWCompressionInfo)
242
243int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info);
244
245struct RGWOLHInfo {
246 rgw_obj target;
247 bool removed;
248
249 RGWOLHInfo() : removed(false) {}
250
251 void encode(bufferlist& bl) const {
252 ENCODE_START(1, 1, bl);
11fdf7f2
TL
253 encode(target, bl);
254 encode(removed, bl);
7c673cae
FG
255 ENCODE_FINISH(bl);
256 }
257
11fdf7f2 258 void decode(bufferlist::const_iterator& bl) {
7c673cae 259 DECODE_START(1, bl);
11fdf7f2
TL
260 decode(target, bl);
261 decode(removed, bl);
7c673cae
FG
262 DECODE_FINISH(bl);
263 }
264 static void generate_test_instances(list<RGWOLHInfo*>& o);
265 void dump(Formatter *f) const;
266};
267WRITE_CLASS_ENCODER(RGWOLHInfo)
268
269struct RGWOLHPendingInfo {
270 ceph::real_time time;
271
272 RGWOLHPendingInfo() {}
273
274 void encode(bufferlist& bl) const {
275 ENCODE_START(1, 1, bl);
11fdf7f2 276 encode(time, bl);
7c673cae
FG
277 ENCODE_FINISH(bl);
278 }
279
11fdf7f2 280 void decode(bufferlist::const_iterator& bl) {
7c673cae 281 DECODE_START(1, bl);
11fdf7f2 282 decode(time, bl);
7c673cae
FG
283 DECODE_FINISH(bl);
284 }
285
286 void dump(Formatter *f) const;
287};
288WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
289
290struct RGWUsageBatch {
291 map<ceph::real_time, rgw_usage_log_entry> m;
292
293 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
294 bool exists = m.find(t) != m.end();
295 *account = !exists;
296 m[t].aggregate(entry);
297 }
298};
299
300struct RGWUsageIter {
301 string read_iter;
302 uint32_t index;
303
304 RGWUsageIter() : index(0) {}
305};
306
307class RGWGetDataCB {
7c673cae
FG
308public:
309 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
11fdf7f2 310 RGWGetDataCB() {}
7c673cae 311 virtual ~RGWGetDataCB() {}
7c673cae
FG
312};
313
314struct RGWCloneRangeInfo {
315 rgw_obj src;
316 off_t src_ofs;
317 off_t dst_ofs;
318 uint64_t len;
319};
320
321struct RGWObjManifestPart {
322 rgw_obj loc; /* the object where the data is located */
323 uint64_t loc_ofs; /* the offset at that object where the data is located */
324 uint64_t size; /* the part size */
325
326 RGWObjManifestPart() : loc_ofs(0), size(0) {}
327
328 void encode(bufferlist& bl) const {
329 ENCODE_START(2, 2, bl);
11fdf7f2
TL
330 encode(loc, bl);
331 encode(loc_ofs, bl);
332 encode(size, bl);
7c673cae
FG
333 ENCODE_FINISH(bl);
334 }
335
11fdf7f2 336 void decode(bufferlist::const_iterator& bl) {
7c673cae 337 DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
11fdf7f2
TL
338 decode(loc, bl);
339 decode(loc_ofs, bl);
340 decode(size, bl);
7c673cae
FG
341 DECODE_FINISH(bl);
342 }
343
344 void dump(Formatter *f) const;
345 static void generate_test_instances(list<RGWObjManifestPart*>& o);
346};
347WRITE_CLASS_ENCODER(RGWObjManifestPart)
348
349/*
350 The manifest defines a set of rules for structuring the object parts.
351 There are a few terms to note:
352 - head: the head part of the object, which is the part that contains
353 the first chunk of data. An object might not have a head (as in the
354 case of multipart-part objects).
355 - stripe: data portion of a single rgw object that resides on a single
356 rados object.
357 - part: a collection of stripes that make a contiguous part of an
358 object. A regular object will only have one part (although might have
359 many stripes), a multipart object might have many parts. Each part
360 has a fixed stripe size, although the last stripe of a part might
361 be smaller than that. Consecutive parts may be merged if their stripe
362 value is the same.
363*/
364
365struct RGWObjManifestRule {
366 uint32_t start_part_num;
367 uint64_t start_ofs;
368 uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
369 uint64_t stripe_max_size; /* underlying obj max size */
370 string override_prefix;
371
372 RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
373 RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
374 start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
375
376 void encode(bufferlist& bl) const {
377 ENCODE_START(2, 1, bl);
11fdf7f2
TL
378 encode(start_part_num, bl);
379 encode(start_ofs, bl);
380 encode(part_size, bl);
381 encode(stripe_max_size, bl);
382 encode(override_prefix, bl);
7c673cae
FG
383 ENCODE_FINISH(bl);
384 }
385
11fdf7f2 386 void decode(bufferlist::const_iterator& bl) {
7c673cae 387 DECODE_START(2, bl);
11fdf7f2
TL
388 decode(start_part_num, bl);
389 decode(start_ofs, bl);
390 decode(part_size, bl);
391 decode(stripe_max_size, bl);
7c673cae 392 if (struct_v >= 2)
11fdf7f2 393 decode(override_prefix, bl);
7c673cae
FG
394 DECODE_FINISH(bl);
395 }
396 void dump(Formatter *f) const;
397};
398WRITE_CLASS_ENCODER(RGWObjManifestRule)
399
400class RGWObjManifest {
401protected:
402 bool explicit_objs; /* old manifest? */
403 map<uint64_t, RGWObjManifestPart> objs;
404
405 uint64_t obj_size;
406
407 rgw_obj obj;
408 uint64_t head_size;
11fdf7f2 409 rgw_placement_rule head_placement_rule;
7c673cae
FG
410
411 uint64_t max_head_size;
412 string prefix;
413 rgw_bucket_placement tail_placement; /* might be different than the original bucket,
414 as object might have been copied across pools */
415 map<uint64_t, RGWObjManifestRule> rules;
416
417 string tail_instance; /* tail object's instance */
418
419 void convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
420 int append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
421 void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
422
423 void update_iterators() {
424 begin_iter.seek(0);
425 end_iter.seek(obj_size);
426 }
427public:
428
429 RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
430 begin_iter(this), end_iter(this) {}
431 RGWObjManifest(const RGWObjManifest& rhs) {
432 *this = rhs;
433 }
434 RGWObjManifest& operator=(const RGWObjManifest& rhs) {
435 explicit_objs = rhs.explicit_objs;
436 objs = rhs.objs;
437 obj_size = rhs.obj_size;
438 obj = rhs.obj;
439 head_size = rhs.head_size;
440 max_head_size = rhs.max_head_size;
441 prefix = rhs.prefix;
442 tail_placement = rhs.tail_placement;
443 rules = rhs.rules;
444 tail_instance = rhs.tail_instance;
445
446 begin_iter.set_manifest(this);
447 end_iter.set_manifest(this);
448
449 begin_iter.seek(rhs.begin_iter.get_ofs());
450 end_iter.seek(rhs.end_iter.get_ofs());
451
452 return *this;
453 }
454
455 map<uint64_t, RGWObjManifestPart>& get_explicit_objs() {
456 return objs;
457 }
458
459
460 void set_explicit(uint64_t _size, map<uint64_t, RGWObjManifestPart>& _objs) {
461 explicit_objs = true;
462 obj_size = _size;
463 objs.swap(_objs);
464 }
465
466 void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location);
467
468 void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
469 RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
470 rules[0] = rule;
471 max_head_size = tail_ofs;
472 }
473
474 void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) {
475 RGWObjManifestRule rule(0, 0, 0, stripe_max_size);
476 rule.start_part_num = part_num;
477 rules[0] = rule;
478 max_head_size = 0;
479 }
480
481 void encode(bufferlist& bl) const {
482 ENCODE_START(7, 6, bl);
11fdf7f2
TL
483 encode(obj_size, bl);
484 encode(objs, bl);
485 encode(explicit_objs, bl);
486 encode(obj, bl);
487 encode(head_size, bl);
488 encode(max_head_size, bl);
489 encode(prefix, bl);
490 encode(rules, bl);
7c673cae 491 bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket);
11fdf7f2 492 encode(encode_tail_bucket, bl);
7c673cae 493 if (encode_tail_bucket) {
11fdf7f2 494 encode(tail_placement.bucket, bl);
7c673cae
FG
495 }
496 bool encode_tail_instance = (tail_instance != obj.key.instance);
11fdf7f2 497 encode(encode_tail_instance, bl);
7c673cae 498 if (encode_tail_instance) {
11fdf7f2 499 encode(tail_instance, bl);
7c673cae 500 }
11fdf7f2
TL
501 encode(head_placement_rule, bl);
502 encode(tail_placement.placement_rule, bl);
7c673cae
FG
503 ENCODE_FINISH(bl);
504 }
505
11fdf7f2 506 void decode(bufferlist::const_iterator& bl) {
7c673cae 507 DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl);
11fdf7f2
TL
508 decode(obj_size, bl);
509 decode(objs, bl);
7c673cae 510 if (struct_v >= 3) {
11fdf7f2
TL
511 decode(explicit_objs, bl);
512 decode(obj, bl);
513 decode(head_size, bl);
514 decode(max_head_size, bl);
515 decode(prefix, bl);
516 decode(rules, bl);
7c673cae
FG
517 } else {
518 explicit_objs = true;
519 if (!objs.empty()) {
520 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
521 obj = iter->second.loc;
522 head_size = iter->second.size;
523 max_head_size = head_size;
524 }
525 }
526
527 if (explicit_objs && head_size > 0 && !objs.empty()) {
528 /* patch up manifest due to issue 16435:
529 * the first object in the explicit objs list might not be the one we need to access, use the
530 * head object instead if set. This would happen if we had an old object that was created
531 * when the explicit objs manifest was around, and it got copied.
532 */
533 rgw_obj& obj_0 = objs[0].loc;
534 if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) {
535 objs[0].loc = obj;
536 objs[0].size = head_size;
537 }
538 }
539
540 if (struct_v >= 4) {
541 if (struct_v < 6) {
11fdf7f2 542 decode(tail_placement.bucket, bl);
7c673cae
FG
543 } else {
544 bool need_to_decode;
11fdf7f2 545 decode(need_to_decode, bl);
7c673cae 546 if (need_to_decode) {
11fdf7f2 547 decode(tail_placement.bucket, bl);
7c673cae
FG
548 } else {
549 tail_placement.bucket = obj.bucket;
550 }
551 }
552 }
553
554 if (struct_v >= 5) {
555 if (struct_v < 6) {
11fdf7f2 556 decode(tail_instance, bl);
7c673cae
FG
557 } else {
558 bool need_to_decode;
11fdf7f2 559 decode(need_to_decode, bl);
7c673cae 560 if (need_to_decode) {
11fdf7f2 561 decode(tail_instance, bl);
7c673cae
FG
562 } else {
563 tail_instance = obj.key.instance;
564 }
565 }
566 } else { // old object created before 'tail_instance' field added to manifest
567 tail_instance = obj.key.instance;
568 }
569
570 if (struct_v >= 7) {
11fdf7f2
TL
571 decode(head_placement_rule, bl);
572 decode(tail_placement.placement_rule, bl);
7c673cae
FG
573 }
574
575 update_iterators();
576 DECODE_FINISH(bl);
577 }
578
579 void dump(Formatter *f) const;
580 static void generate_test_instances(list<RGWObjManifest*>& o);
581
11fdf7f2
TL
582 int append(RGWObjManifest& m, const RGWZoneGroup& zonegroup,
583 const RGWZoneParams& zone_params);
584 int append(RGWObjManifest& m, RGWSI_Zone *zone_svc);
7c673cae
FG
585
586 bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
587
588 bool empty() {
589 if (explicit_objs)
590 return objs.empty();
591 return rules.empty();
592 }
593
594 bool has_explicit_objs() {
595 return explicit_objs;
596 }
597
598 bool has_tail() {
599 if (explicit_objs) {
600 if (objs.size() == 1) {
601 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
602 rgw_obj& o = iter->second.loc;
603 return !(obj == o);
604 }
605 return (objs.size() >= 2);
606 }
607 return (obj_size > head_size);
608 }
609
11fdf7f2 610 void set_head(const rgw_placement_rule& placement_rule, const rgw_obj& _o, uint64_t _s) {
7c673cae
FG
611 head_placement_rule = placement_rule;
612 obj = _o;
613 head_size = _s;
614
615 if (explicit_objs && head_size > 0) {
616 objs[0].loc = obj;
617 objs[0].size = head_size;
618 }
619 }
620
621 const rgw_obj& get_obj() {
622 return obj;
623 }
624
11fdf7f2 625 void set_tail_placement(const rgw_placement_rule& placement_rule, const rgw_bucket& _b) {
7c673cae
FG
626 tail_placement.placement_rule = placement_rule;
627 tail_placement.bucket = _b;
628 }
629
630 const rgw_bucket_placement& get_tail_placement() {
631 return tail_placement;
632 }
633
11fdf7f2 634 const rgw_placement_rule& get_head_placement_rule() {
7c673cae
FG
635 return head_placement_rule;
636 }
637
638 void set_prefix(const string& _p) {
639 prefix = _p;
640 }
641
642 const string& get_prefix() {
643 return prefix;
644 }
645
646 void set_tail_instance(const string& _ti) {
647 tail_instance = _ti;
648 }
649
650 const string& get_tail_instance() {
651 return tail_instance;
652 }
653
654 void set_head_size(uint64_t _s) {
655 head_size = _s;
656 }
657
658 void set_obj_size(uint64_t s) {
659 obj_size = s;
660
661 update_iterators();
662 }
663
664 uint64_t get_obj_size() {
665 return obj_size;
666 }
667
668 uint64_t get_head_size() {
669 return head_size;
670 }
671
7c673cae
FG
672 uint64_t get_max_head_size() {
673 return max_head_size;
674 }
675
676 class obj_iterator {
677 RGWObjManifest *manifest;
678 uint64_t part_ofs; /* where current part starts */
679 uint64_t stripe_ofs; /* where current stripe starts */
680 uint64_t ofs; /* current position within the object */
681 uint64_t stripe_size; /* current part size */
682
683 int cur_part_id;
684 int cur_stripe;
685 string cur_override_prefix;
686
687 rgw_obj_select location;
688
689 map<uint64_t, RGWObjManifestRule>::iterator rule_iter;
690 map<uint64_t, RGWObjManifestRule>::iterator next_rule_iter;
691
692 map<uint64_t, RGWObjManifestPart>::iterator explicit_iter;
693
694 void init() {
695 part_ofs = 0;
696 stripe_ofs = 0;
697 ofs = 0;
698 stripe_size = 0;
699 cur_part_id = 0;
700 cur_stripe = 0;
701 }
702
703 void update_explicit_pos();
704
705
706 protected:
707
708 void set_manifest(RGWObjManifest *m) {
709 manifest = m;
710 }
711
712 public:
713 obj_iterator() : manifest(NULL) {
714 init();
715 }
716 explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
717 init();
718 if (!manifest->empty()) {
719 seek(0);
720 }
721 }
722 obj_iterator(RGWObjManifest *_m, uint64_t _ofs) : manifest(_m) {
723 init();
724 if (!manifest->empty()) {
725 seek(_ofs);
726 }
727 }
728 void seek(uint64_t ofs);
729
730 void operator++();
731 bool operator==(const obj_iterator& rhs) {
732 return (ofs == rhs.ofs);
733 }
734 bool operator!=(const obj_iterator& rhs) {
735 return (ofs != rhs.ofs);
736 }
737 const rgw_obj_select& get_location() {
738 return location;
739 }
740
741 /* start of current stripe */
742 uint64_t get_stripe_ofs() {
743 if (manifest->explicit_objs) {
744 return explicit_iter->first;
745 }
746 return stripe_ofs;
747 }
748
749 /* current ofs relative to start of rgw object */
750 uint64_t get_ofs() const {
751 return ofs;
752 }
753
754 /* stripe number */
755 int get_cur_stripe() const {
756 return cur_stripe;
757 }
758
759 /* current stripe size */
760 uint64_t get_stripe_size() {
761 if (manifest->explicit_objs) {
762 return explicit_iter->second.size;
763 }
764 return stripe_size;
765 }
766
767 /* offset where data starts within current stripe */
768 uint64_t location_ofs() {
769 if (manifest->explicit_objs) {
770 return explicit_iter->second.loc_ofs;
771 }
772 return 0; /* all stripes start at zero offset */
773 }
774
775 void update_location();
776
777 friend class RGWObjManifest;
11fdf7f2 778 void dump(Formatter *f) const;
7c673cae
FG
779 };
780
781 const obj_iterator& obj_begin();
782 const obj_iterator& obj_end();
783 obj_iterator obj_find(uint64_t ofs);
784
785 obj_iterator begin_iter;
786 obj_iterator end_iter;
787
788 /*
789 * simple object generator. Using a simple single rule manifest.
790 */
791 class generator {
792 RGWObjManifest *manifest;
793 uint64_t last_ofs;
794 uint64_t cur_part_ofs;
795 int cur_part_id;
796 int cur_stripe;
797 uint64_t cur_stripe_size;
798 string cur_oid;
799
800 string oid_prefix;
801
802 rgw_obj_select cur_obj;
7c673cae
FG
803
804 RGWObjManifestRule rule;
805
806 public:
807 generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
808 cur_stripe(0), cur_stripe_size(0) {}
11fdf7f2
TL
809 int create_begin(CephContext *cct, RGWObjManifest *manifest,
810 const rgw_placement_rule& head_placement_rule,
811 const rgw_placement_rule *tail_placement_rule,
812 const rgw_bucket& bucket,
813 const rgw_obj& obj);
7c673cae
FG
814
815 int create_next(uint64_t ofs);
816
817 rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); }
11fdf7f2 818 rgw_raw_obj get_cur_obj(RGWRados *store) const { return cur_obj.get_raw_obj(store); }
7c673cae
FG
819
820 /* total max size of current stripe (including head obj) */
11fdf7f2 821 uint64_t cur_stripe_max_size() const {
7c673cae
FG
822 return cur_stripe_size;
823 }
824 };
825};
826WRITE_CLASS_ENCODER(RGWObjManifest)
827
828struct RGWUploadPartInfo {
829 uint32_t num;
830 uint64_t size;
831 uint64_t accounted_size{0};
832 string etag;
833 ceph::real_time modified;
834 RGWObjManifest manifest;
835 RGWCompressionInfo cs_info;
836
837 RGWUploadPartInfo() : num(0), size(0) {}
838
839 void encode(bufferlist& bl) const {
840 ENCODE_START(4, 2, bl);
11fdf7f2
TL
841 encode(num, bl);
842 encode(size, bl);
843 encode(etag, bl);
844 encode(modified, bl);
845 encode(manifest, bl);
846 encode(cs_info, bl);
847 encode(accounted_size, bl);
7c673cae
FG
848 ENCODE_FINISH(bl);
849 }
11fdf7f2 850 void decode(bufferlist::const_iterator& bl) {
7c673cae 851 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
11fdf7f2
TL
852 decode(num, bl);
853 decode(size, bl);
854 decode(etag, bl);
855 decode(modified, bl);
7c673cae 856 if (struct_v >= 3)
11fdf7f2 857 decode(manifest, bl);
7c673cae 858 if (struct_v >= 4) {
11fdf7f2
TL
859 decode(cs_info, bl);
860 decode(accounted_size, bl);
7c673cae
FG
861 } else {
862 accounted_size = size;
863 }
864 DECODE_FINISH(bl);
865 }
866 void dump(Formatter *f) const;
867 static void generate_test_instances(list<RGWUploadPartInfo*>& o);
868};
869WRITE_CLASS_ENCODER(RGWUploadPartInfo)
870
871struct RGWObjState {
872 rgw_obj obj;
873 bool is_atomic;
874 bool has_attrs;
875 bool exists;
876 uint64_t size; //< size of raw object
877 uint64_t accounted_size{0}; //< size before compression, encryption
878 ceph::real_time mtime;
879 uint64_t epoch;
880 bufferlist obj_tag;
181888fb 881 bufferlist tail_tag;
7c673cae
FG
882 string write_tag;
883 bool fake_tag;
884 RGWObjManifest manifest;
885 bool has_manifest;
886 string shadow_obj;
887 bool has_data;
888 bufferlist data;
889 bool prefetch_data;
890 bool keep_tail;
891 bool is_olh;
892 bufferlist olh_tag;
893 uint64_t pg_ver;
894 uint32_t zone_short_id;
895
896 /* important! don't forget to update copy constructor */
897
898 RGWObjVersionTracker objv_tracker;
899
900 map<string, bufferlist> attrset;
901 RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
902 size(0), epoch(0), fake_tag(false), has_manifest(false),
903 has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
904 pg_ver(0), zone_short_id(0) {}
905 RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
906 is_atomic = rhs.is_atomic;
907 has_attrs = rhs.has_attrs;
908 exists = rhs.exists;
909 size = rhs.size;
910 accounted_size = rhs.accounted_size;
911 mtime = rhs.mtime;
912 epoch = rhs.epoch;
913 if (rhs.obj_tag.length()) {
914 obj_tag = rhs.obj_tag;
915 }
181888fb
FG
916 if (rhs.tail_tag.length()) {
917 tail_tag = rhs.tail_tag;
918 }
7c673cae
FG
919 write_tag = rhs.write_tag;
920 fake_tag = rhs.fake_tag;
921 if (rhs.has_manifest) {
922 manifest = rhs.manifest;
923 }
924 has_manifest = rhs.has_manifest;
925 shadow_obj = rhs.shadow_obj;
926 has_data = rhs.has_data;
927 if (rhs.data.length()) {
928 data = rhs.data;
929 }
930 prefetch_data = rhs.prefetch_data;
931 keep_tail = rhs.keep_tail;
932 is_olh = rhs.is_olh;
933 objv_tracker = rhs.objv_tracker;
934 pg_ver = rhs.pg_ver;
935 }
936
937 bool get_attr(string name, bufferlist& dest) {
938 map<string, bufferlist>::iterator iter = attrset.find(name);
939 if (iter != attrset.end()) {
940 dest = iter->second;
941 return true;
942 }
943 return false;
944 }
945};
946
947struct RGWRawObjState {
948 rgw_raw_obj obj;
949 bool has_attrs{false};
950 bool exists{false};
951 uint64_t size{0};
952 ceph::real_time mtime;
11fdf7f2 953 uint64_t epoch{0};
7c673cae
FG
954 bufferlist obj_tag;
955 bool has_data{false};
956 bufferlist data;
957 bool prefetch_data{false};
958 uint64_t pg_ver{0};
959
960 /* important! don't forget to update copy constructor */
961
962 RGWObjVersionTracker objv_tracker;
963
964 map<string, bufferlist> attrset;
965 RGWRawObjState() {}
966 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
967 has_attrs = rhs.has_attrs;
968 exists = rhs.exists;
969 size = rhs.size;
970 mtime = rhs.mtime;
971 epoch = rhs.epoch;
972 if (rhs.obj_tag.length()) {
973 obj_tag = rhs.obj_tag;
974 }
975 has_data = rhs.has_data;
976 if (rhs.data.length()) {
977 data = rhs.data;
978 }
979 prefetch_data = rhs.prefetch_data;
980 pg_ver = rhs.pg_ver;
981 objv_tracker = rhs.objv_tracker;
982 }
983};
984
985struct RGWPoolIterCtx {
986 librados::IoCtx io_ctx;
987 librados::NObjectIterator iter;
988};
989
990struct RGWListRawObjsCtx {
991 bool initialized;
992 RGWPoolIterCtx iter_ctx;
993
994 RGWListRawObjsCtx() : initialized(false) {}
995};
996
7c673cae
FG
997struct objexp_hint_entry {
998 string tenant;
999 string bucket_name;
1000 string bucket_id;
1001 rgw_obj_key obj_key;
1002 ceph::real_time exp_time;
1003
1004 void encode(bufferlist& bl) const {
1005 ENCODE_START(2, 1, bl);
11fdf7f2
TL
1006 encode(bucket_name, bl);
1007 encode(bucket_id, bl);
1008 encode(obj_key, bl);
1009 encode(exp_time, bl);
1010 encode(tenant, bl);
7c673cae
FG
1011 ENCODE_FINISH(bl);
1012 }
1013
11fdf7f2 1014 void decode(bufferlist::const_iterator& bl) {
7c673cae
FG
1015 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
1016 DECODE_START(2, bl);
11fdf7f2
TL
1017 decode(bucket_name, bl);
1018 decode(bucket_id, bl);
1019 decode(obj_key, bl);
1020 decode(exp_time, bl);
7c673cae 1021 if (struct_v >= 2) {
11fdf7f2 1022 decode(tenant, bl);
7c673cae
FG
1023 } else {
1024 tenant.clear();
1025 }
1026 DECODE_FINISH(bl);
1027 }
1028};
1029WRITE_CLASS_ENCODER(objexp_hint_entry)
1030
7c673cae
FG
1031class RGWDataChangesLog;
1032class RGWMetaSyncStatusManager;
1033class RGWDataSyncStatusManager;
7c673cae 1034class RGWCoroutinesManagerRegistry;
7c673cae
FG
1035
1036class RGWGetBucketStats_CB : public RefCountedObject {
1037protected:
1038 rgw_bucket bucket;
1039 map<RGWObjCategory, RGWStorageStats> *stats;
1040public:
224ce89b 1041 explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
7c673cae
FG
1042 ~RGWGetBucketStats_CB() override {}
1043 virtual void handle_response(int r) = 0;
1044 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
1045 stats = _stats;
1046 }
1047};
1048
1049class RGWGetUserStats_CB : public RefCountedObject {
1050protected:
1051 rgw_user user;
1052 RGWStorageStats stats;
1053public:
1054 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
1055 ~RGWGetUserStats_CB() override {}
1056 virtual void handle_response(int r) = 0;
1057 virtual void set_response(RGWStorageStats& _stats) {
1058 stats = _stats;
1059 }
1060};
1061
1062class RGWGetDirHeader_CB;
1063class RGWGetUserHeader_CB;
1064
11fdf7f2
TL
1065class RGWObjectCtx {
1066 RGWRados *store;
1067 RWLock lock{"RGWObjectCtx"};
1068 void *s{nullptr};
7c673cae 1069
11fdf7f2 1070 std::map<rgw_obj, RGWObjState> objs_state;
7c673cae 1071public:
11fdf7f2
TL
1072 explicit RGWObjectCtx(RGWRados *_store) : store(_store) {}
1073 explicit RGWObjectCtx(RGWRados *_store, void *_s) : store(_store), s(_s) {}
7c673cae 1074
11fdf7f2
TL
1075 void *get_private() {
1076 return s;
1077 }
7c673cae 1078
11fdf7f2
TL
1079 RGWRados *get_store() {
1080 return store;
1081 }
7c673cae 1082
11fdf7f2
TL
1083 RGWObjState *get_state(const rgw_obj& obj) {
1084 RGWObjState *result;
1085 typename std::map<rgw_obj, RGWObjState>::iterator iter;
7c673cae
FG
1086 lock.get_read();
1087 assert (!obj.empty());
1088 iter = objs_state.find(obj);
1089 if (iter != objs_state.end()) {
1090 result = &iter->second;
1091 lock.unlock();
1092 } else {
1093 lock.unlock();
1094 lock.get_write();
1095 result = &objs_state[obj];
1096 lock.unlock();
1097 }
1098 return result;
1099 }
1100
11fdf7f2 1101 void set_atomic(rgw_obj& obj) {
7c673cae
FG
1102 RWLock::WLocker wl(lock);
1103 assert (!obj.empty());
1104 objs_state[obj].is_atomic = true;
1105 }
11fdf7f2 1106 void set_prefetch_data(const rgw_obj& obj) {
7c673cae
FG
1107 RWLock::WLocker wl(lock);
1108 assert (!obj.empty());
1109 objs_state[obj].prefetch_data = true;
1110 }
11fdf7f2
TL
1111
1112 void invalidate(const rgw_obj& obj) {
7c673cae
FG
1113 RWLock::WLocker wl(lock);
1114 auto iter = objs_state.find(obj);
1115 if (iter == objs_state.end()) {
1116 return;
1117 }
1118 bool is_atomic = iter->second.is_atomic;
1119 bool prefetch_data = iter->second.prefetch_data;
1120
1121 objs_state.erase(iter);
1122
1123 if (is_atomic || prefetch_data) {
11fdf7f2
TL
1124 auto& state = objs_state[obj];
1125 state.is_atomic = is_atomic;
1126 state.prefetch_data = prefetch_data;
7c673cae
FG
1127 }
1128 }
1129};
1130
7c673cae
FG
1131class RGWAsyncRadosProcessor;
1132
1133template <class T>
1134class RGWChainedCacheImpl;
1135
1136struct bucket_info_entry {
1137 RGWBucketInfo info;
1138 real_time mtime;
1139 map<string, bufferlist> attrs;
1140};
1141
1142struct tombstone_entry {
1143 ceph::real_time mtime;
1144 uint32_t zone_short_id;
1145 uint64_t pg_ver;
1146
1147 tombstone_entry() = default;
11fdf7f2 1148 explicit tombstone_entry(const RGWObjState& state)
7c673cae
FG
1149 : mtime(state.mtime), zone_short_id(state.zone_short_id),
1150 pg_ver(state.pg_ver) {}
1151};
1152
31f18b77
FG
1153class RGWIndexCompletionManager;
1154
3a9019d9 1155class RGWRados : public AdminSocketHook
7c673cae
FG
1156{
1157 friend class RGWGC;
1158 friend class RGWMetaNotifier;
1159 friend class RGWDataNotifier;
1160 friend class RGWLC;
1161 friend class RGWObjectExpirer;
1162 friend class RGWMetaSyncProcessorThread;
1163 friend class RGWDataSyncProcessorThread;
31f18b77
FG
1164 friend class RGWReshard;
1165 friend class RGWBucketReshard;
f64942e4 1166 friend class RGWBucketReshardLock;
31f18b77 1167 friend class BucketIndexLockGuard;
d2e6a577 1168 friend class RGWCompleteMultipart;
7c673cae 1169
11fdf7f2
TL
1170 static constexpr const char* admin_commands[4][3] = {
1171 { "cache list",
1172 "cache list name=filter,type=CephString,req=false",
1173 "cache list [filter_str]: list object cache, possibly matching substrings" },
1174 { "cache inspect",
1175 "cache inspect name=target,type=CephString,req=true",
1176 "cache inspect target: print cache element" },
1177 { "cache erase",
1178 "cache erase name=target,type=CephString,req=true",
1179 "cache erase target: erase element from cache" },
1180 { "cache zap",
1181 "cache zap",
1182 "cache zap: erase all elements from cache" }
1183 };
3a9019d9 1184
7c673cae
FG
1185 /** Open the pool used as root for this gateway */
1186 int open_root_pool_ctx();
1187 int open_gc_pool_ctx();
1188 int open_lc_pool_ctx();
1189 int open_objexp_pool_ctx();
31f18b77 1190 int open_reshard_pool_ctx();
7c673cae
FG
1191
1192 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx);
1193 int open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx);
1194 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid);
1195 int open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1196 string& bucket_oid_base);
1197 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1198 const string& obj_key, string *bucket_obj, int *shard_id);
1199 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1200 int shard_id, string *bucket_obj);
1201 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1202 map<int, string>& bucket_objs, int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
1203 template<typename T>
1204 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1205 map<int, string>& oids, map<int, T>& bucket_objs,
1206 int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
1207 void build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
1208 string *marker);
1209
1210 void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
1211
1212 std::atomic<int64_t> max_req_id = { 0 };
1213 Mutex lock;
7c673cae
FG
1214 SafeTimer *timer;
1215
1216 RGWGC *gc;
1217 RGWLC *lc;
1218 RGWObjectExpirer *obj_expirer;
1219 bool use_gc_thread;
1220 bool use_lc_thread;
1221 bool quota_threads;
1222 bool run_sync_thread;
31f18b77 1223 bool run_reshard_thread;
7c673cae
FG
1224
1225 RGWAsyncRadosProcessor* async_rados;
1226
1227 RGWMetaNotifier *meta_notifier;
1228 RGWDataNotifier *data_notifier;
1229 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
11fdf7f2 1230 RGWSyncTraceManager *sync_tracer = nullptr;
7c673cae
FG
1231 map<string, RGWDataSyncProcessorThread *> data_sync_processor_threads;
1232
b32b8144 1233 boost::optional<rgw::BucketTrimManager> bucket_trim;
7c673cae
FG
1234 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
1235
1236 Mutex meta_sync_thread_lock;
1237 Mutex data_sync_thread_lock;
1238
7c673cae 1239 librados::IoCtx root_pool_ctx; // .rgw
11fdf7f2
TL
1240
1241 double inject_notify_timeout_probability = 0;
1242 unsigned max_notify_retries = 0;
7c673cae
FG
1243
1244 friend class RGWWatcher;
1245
1246 Mutex bucket_id_lock;
1247
1248 // This field represents the number of bucket index object shards
1249 uint32_t bucket_index_max_shards;
1250
1251 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
1252 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
224ce89b 1253 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae
FG
1254 uint64_t max_bucket_id;
1255
1256 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
1257 RGWObjState *olh_state, RGWObjState **target_state);
7c673cae
FG
1258 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
1259 bool follow_olh, bool assume_noent = false);
1260 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
1261 librados::ObjectOperation& op, RGWObjState **state);
11fdf7f2 1262 int append_atomic_test(const RGWObjState* astate, librados::ObjectOperation& op);
7c673cae
FG
1263
1264 int update_placement_map();
1265 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
1266
1267 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
1268 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
1269 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
1270protected:
1271 CephContext *cct;
1272
1273 std::vector<librados::Rados> rados;
1274 uint32_t next_rados_handle;
1275 RWLock handle_lock;
1276 std::map<pthread_t, int> rados_map;
1277
1278 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
1279 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
1280
1281 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
1282 tombstone_cache_t *obj_tombstone_cache;
1283
1284 librados::IoCtx gc_pool_ctx; // .rgw.gc
1285 librados::IoCtx lc_pool_ctx; // .rgw.lc
1286 librados::IoCtx objexp_pool_ctx;
31f18b77 1287 librados::IoCtx reshard_pool_ctx;
7c673cae 1288
11fdf7f2 1289 bool pools_initialized;
7c673cae 1290
11fdf7f2 1291 RGWQuotaHandler *quota_handler;
7c673cae 1292
11fdf7f2 1293 RGWCoroutinesManagerRegistry *cr_registry;
7c673cae 1294
11fdf7f2
TL
1295 RGWSyncModuleInstanceRef sync_module;
1296 bool writeable_zone{false};
7c673cae 1297
11fdf7f2 1298 RGWIndexCompletionManager *index_completion_manager{nullptr};
7c673cae 1299
11fdf7f2
TL
1300 bool use_cache{false};
1301public:
1302 RGWRados(): lock("rados_timer_lock"), timer(NULL),
1303 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
1304 run_sync_thread(false), run_reshard_thread(false), async_rados(nullptr), meta_notifier(NULL),
1305 data_notifier(NULL), meta_sync_processor_thread(NULL),
1306 meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
1307 bucket_id_lock("rados_bucket_id"),
1308 bucket_index_max_shards(0),
1309 max_bucket_id(0), cct(NULL),
1310 next_rados_handle(0),
1311 handle_lock("rados_handle_lock"),
1312 binfo_cache(NULL), obj_tombstone_cache(nullptr),
1313 pools_initialized(false),
1314 quota_handler(NULL),
1315 cr_registry(NULL),
1316 meta_mgr(NULL), data_log(NULL), reshard(NULL) {}
7c673cae 1317
11fdf7f2
TL
1318 RGWRados& set_use_cache(bool status) {
1319 use_cache = status;
1320 return *this;
7c673cae
FG
1321 }
1322
11fdf7f2
TL
1323 RGWLC *get_lc() {
1324 return lc;
7c673cae
FG
1325 }
1326
11fdf7f2
TL
1327 RGWRados& set_run_gc_thread(bool _use_gc_thread) {
1328 use_gc_thread = _use_gc_thread;
1329 return *this;
7c673cae
FG
1330 }
1331
11fdf7f2
TL
1332 RGWRados& set_run_lc_thread(bool _use_lc_thread) {
1333 use_lc_thread = _use_lc_thread;
1334 return *this;
7c673cae
FG
1335 }
1336
11fdf7f2
TL
1337 RGWRados& set_run_quota_threads(bool _run_quota_threads) {
1338 quota_threads = _run_quota_threads;
1339 return *this;
7c673cae
FG
1340 }
1341
11fdf7f2
TL
1342 RGWRados& set_run_sync_thread(bool _run_sync_thread) {
1343 run_sync_thread = _run_sync_thread;
1344 return *this;
7c673cae
FG
1345 }
1346
11fdf7f2
TL
1347 RGWRados& set_run_reshard_thread(bool _run_reshard_thread) {
1348 run_reshard_thread = _run_reshard_thread;
1349 return *this;
7c673cae
FG
1350 }
1351
11fdf7f2
TL
1352 uint64_t get_new_req_id() {
1353 return ++max_req_id;
7c673cae
FG
1354 }
1355
11fdf7f2
TL
1356 librados::IoCtx* get_lc_pool_ctx() {
1357 return &lc_pool_ctx;
7c673cae 1358 }
11fdf7f2
TL
1359 void set_context(CephContext *_cct) {
1360 cct = _cct;
7c673cae 1361 }
31f18b77 1362
11fdf7f2
TL
1363 RGWServices svc;
1364
1365 /**
1366 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
1367 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
1368 */
1369 string host_id;
31f18b77 1370
7c673cae
FG
1371 // pulls missing periods for period_history
1372 std::unique_ptr<RGWPeriodPuller> period_puller;
1373 // maintains a connected history of periods
1374 std::unique_ptr<RGWPeriodHistory> period_history;
1375
1376 RGWAsyncRadosProcessor* get_async_rados() const { return async_rados; };
1377
1378 RGWMetadataManager *meta_mgr;
1379
1380 RGWDataChangesLog *data_log;
1381
31f18b77
FG
1382 RGWReshard *reshard;
1383 std::shared_ptr<RGWReshardWait> reshard_wait;
1384
7c673cae
FG
1385 virtual ~RGWRados() = default;
1386
1387 tombstone_cache_t *get_tombstone_cache() {
1388 return obj_tombstone_cache;
1389 }
7c673cae
FG
1390 const RGWSyncModuleInstanceRef& get_sync_module() {
1391 return sync_module;
1392 }
11fdf7f2
TL
1393 RGWSyncTraceManager *get_sync_tracer() {
1394 return sync_tracer;
1395 }
7c673cae
FG
1396
1397 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
11fdf7f2
TL
1398 void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size);
1399 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
1400 int get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
7c673cae
FG
1401
1402 uint32_t get_max_bucket_shards() {
31f18b77 1403 return rgw_shards_max();
7c673cae
FG
1404 }
1405
181888fb 1406
224ce89b 1407 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae 1408
181888fb
FG
1409 int list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx);
1410 int list_raw_objects_next(const string& prefix_filter, int max,
1411 RGWListRawObjsCtx& ctx, list<string>& oids,
1412 bool *is_truncated);
7c673cae
FG
1413 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
1414 RGWListRawObjsCtx& ctx, list<string>& oids,
1415 bool *is_truncated);
181888fb 1416 string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
7c673cae 1417
7c673cae
FG
1418 CephContext *ctx() { return cct; }
1419 /** do all necessary setup of the storage device */
11fdf7f2 1420 int initialize(CephContext *_cct) {
7c673cae 1421 set_context(_cct);
7c673cae
FG
1422 return initialize();
1423 }
1424 /** Initialize the RADOS instance and prepare to do other ops */
11fdf7f2
TL
1425 int init_svc(bool raw);
1426 int init_rados();
7c673cae 1427 int init_complete();
7c673cae
FG
1428 int initialize();
1429 void finalize();
1430
224ce89b 1431 int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
11fdf7f2 1432 int update_service_map(std::map<std::string, std::string>&& status);
7c673cae
FG
1433
1434 /// list logs
1435 int log_list_init(const string& prefix, RGWAccessHandle *handle);
1436 int log_list_next(RGWAccessHandle handle, string *name);
1437
1438 /// remove log
1439 int log_remove(const string& name);
1440
1441 /// show log
1442 int log_show_init(const string& name, RGWAccessHandle *handle);
1443 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
1444
1445 // log bandwidth info
1446 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
11fdf7f2
TL
1447 int read_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch,
1448 uint32_t max_entries, bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket,
1449 rgw_usage_log_entry>& usage);
1450 int trim_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch);
1451 int clear_usage();
7c673cae
FG
1452
1453 int create_pool(const rgw_pool& pool);
1454
7c673cae 1455 int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
f64942e4 1456 int clean_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
7c673cae
FG
1457 void create_bucket_id(string *bucket_id);
1458
11fdf7f2
TL
1459 bool get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool);
1460 bool obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
7c673cae 1461
11fdf7f2 1462 int create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket,
7c673cae 1463 const string& zonegroup_id,
11fdf7f2 1464 const rgw_placement_rule& placement_rule,
7c673cae
FG
1465 const string& swift_ver_location,
1466 const RGWQuotaInfo * pquota_info,
1467 map<std::string,bufferlist>& attrs,
1468 RGWBucketInfo& bucket_info,
1469 obj_version *pobjv,
1470 obj_version *pep_objv,
1471 ceph::real_time creation_time,
1472 rgw_bucket *master_bucket,
1473 uint32_t *master_num_shards,
1474 bool exclusive = true);
7c673cae
FG
1475
1476 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
1477
7c673cae
FG
1478 struct BucketShard {
1479 RGWRados *store;
1480 rgw_bucket bucket;
1481 int shard_id;
1482 librados::IoCtx index_ctx;
1483 string bucket_obj;
1484
1485 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
f64942e4
AA
1486 int init(const rgw_bucket& _bucket, const rgw_obj& obj, RGWBucketInfo* out);
1487 int init(const rgw_bucket& _bucket, int sid, RGWBucketInfo* out);
a8e16298 1488 int init(const RGWBucketInfo& bucket_info, const rgw_obj& obj);
b32b8144 1489 int init(const RGWBucketInfo& bucket_info, int sid);
7c673cae
FG
1490 };
1491
1492 class Object {
1493 RGWRados *store;
1494 RGWBucketInfo bucket_info;
1495 RGWObjectCtx& ctx;
1496 rgw_obj obj;
1497
1498 BucketShard bs;
1499
1500 RGWObjState *state;
1501
1502 bool versioning_disabled;
1503
1504 bool bs_initialized;
1505
1506 protected:
1507 int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false);
1508 void invalidate_state();
1509
1510 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
181888fb 1511 const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail);
7c673cae
FG
1512 int complete_atomic_modification();
1513
1514 public:
1515 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
1516 ctx(_ctx), obj(_obj), bs(store),
1517 state(NULL), versioning_disabled(false),
1518 bs_initialized(false) {}
1519
1520 RGWRados *get_store() { return store; }
1521 rgw_obj& get_obj() { return obj; }
1522 RGWObjectCtx& get_ctx() { return ctx; }
1523 RGWBucketInfo& get_bucket_info() { return bucket_info; }
1524 int get_manifest(RGWObjManifest **pmanifest);
1525
1526 int get_bucket_shard(BucketShard **pbs) {
1527 if (!bs_initialized) {
f64942e4
AA
1528 int r =
1529 bs.init(bucket_info.bucket, obj, nullptr /* no RGWBucketInfo */);
7c673cae
FG
1530 if (r < 0) {
1531 return r;
1532 }
1533 bs_initialized = true;
1534 }
1535 *pbs = &bs;
1536 return 0;
1537 }
1538
1539 void set_versioning_disabled(bool status) {
1540 versioning_disabled = status;
1541 }
1542
1543 bool versioning_enabled() {
1544 return (!versioning_disabled && bucket_info.versioning_enabled());
1545 }
1546
1547 struct Read {
1548 RGWRados::Object *source;
1549
1550 struct GetObjState {
11fdf7f2
TL
1551 map<rgw_pool, librados::IoCtx> io_ctxs;
1552 rgw_pool cur_pool;
1553 librados::IoCtx *cur_ioctx{nullptr};
7c673cae
FG
1554 rgw_obj obj;
1555 rgw_raw_obj head_obj;
1556 } state;
1557
1558 struct ConditionParams {
1559 const ceph::real_time *mod_ptr;
1560 const ceph::real_time *unmod_ptr;
1561 bool high_precision_time;
1562 uint32_t mod_zone_id;
1563 uint64_t mod_pg_ver;
1564 const char *if_match;
1565 const char *if_nomatch;
1566
1567 ConditionParams() :
1568 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
1569 if_match(NULL), if_nomatch(NULL) {}
1570 } conds;
1571
1572 struct Params {
1573 ceph::real_time *lastmod;
1574 uint64_t *obj_size;
1575 map<string, bufferlist> *attrs;
7c673cae 1576
31f18b77 1577 Params() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
7c673cae
FG
1578 } params;
1579
1580 explicit Read(RGWRados::Object *_source) : source(_source) {}
1581
1582 int prepare();
1583 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
1584 int read(int64_t ofs, int64_t end, bufferlist& bl);
1585 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb);
1586 int get_attr(const char *name, bufferlist& dest);
1587 };
1588
1589 struct Write {
1590 RGWRados::Object *target;
1591
1592 struct MetaParams {
1593 ceph::real_time *mtime;
1594 map<std::string, bufferlist>* rmattrs;
1595 const bufferlist *data;
1596 RGWObjManifest *manifest;
1597 const string *ptag;
1598 list<rgw_obj_index_key> *remove_objs;
1599 ceph::real_time set_mtime;
1600 rgw_user owner;
1601 RGWObjCategory category;
1602 int flags;
1603 const char *if_match;
1604 const char *if_nomatch;
11fdf7f2 1605 std::optional<uint64_t> olh_epoch;
7c673cae
FG
1606 ceph::real_time delete_at;
1607 bool canceled;
1608 const string *user_data;
31f18b77 1609 rgw_zone_set *zones_trace;
181888fb 1610 bool modify_tail;
3efd9988 1611 bool completeMultipart;
11fdf7f2 1612 bool appendable;
7c673cae
FG
1613
1614 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
11fdf7f2 1615 remove_objs(NULL), category(RGWObjCategory::Main), flags(0),
91327a77 1616 if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr),
11fdf7f2 1617 modify_tail(false), completeMultipart(false), appendable(false) {}
7c673cae
FG
1618 } meta;
1619
1620 explicit Write(RGWRados::Object *_target) : target(_target) {}
1621
1622 int _do_write_meta(uint64_t size, uint64_t accounted_size,
1623 map<std::string, bufferlist>& attrs,
181888fb 1624 bool modify_tail, bool assume_noent,
7c673cae
FG
1625 void *index_op);
1626 int write_meta(uint64_t size, uint64_t accounted_size,
1627 map<std::string, bufferlist>& attrs);
1628 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
11fdf7f2
TL
1629 const req_state* get_req_state() {
1630 return (req_state *)target->get_ctx().get_private();
1631 }
7c673cae
FG
1632 };
1633
1634 struct Delete {
1635 RGWRados::Object *target;
1636
1637 struct DeleteParams {
1638 rgw_user bucket_owner;
1639 int versioning_status;
1640 ACLOwner obj_owner; /* needed for creation of deletion marker */
1641 uint64_t olh_epoch;
1642 string marker_version_id;
1643 uint32_t bilog_flags;
1644 list<rgw_obj_index_key> *remove_objs;
1645 ceph::real_time expiration_time;
1646 ceph::real_time unmod_since;
1647 ceph::real_time mtime; /* for setting delete marker mtime */
1648 bool high_precision_time;
31f18b77 1649 rgw_zone_set *zones_trace;
7c673cae 1650
31f18b77 1651 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr) {}
7c673cae
FG
1652 } params;
1653
1654 struct DeleteResult {
1655 bool delete_marker;
1656 string version_id;
1657
1658 DeleteResult() : delete_marker(false) {}
1659 } result;
1660
1661 explicit Delete(RGWRados::Object *_target) : target(_target) {}
1662
1663 int delete_obj();
1664 };
1665
1666 struct Stat {
1667 RGWRados::Object *source;
1668
1669 struct Result {
1670 rgw_obj obj;
1671 RGWObjManifest manifest;
1672 bool has_manifest;
1673 uint64_t size;
1674 struct timespec mtime;
1675 map<string, bufferlist> attrs;
1676
1677 Result() : has_manifest(false), size(0) {}
1678 } result;
1679
1680 struct State {
1681 librados::IoCtx io_ctx;
1682 librados::AioCompletion *completion;
1683 int ret;
1684
1685 State() : completion(NULL), ret(0) {}
1686 } state;
1687
1688
1689 explicit Stat(RGWRados::Object *_source) : source(_source) {}
1690
1691 int stat_async();
1692 int wait();
1693 int stat();
1694 private:
1695 int finish();
1696 };
1697 };
1698
1699 class Bucket {
1700 RGWRados *store;
1701 RGWBucketInfo bucket_info;
1702 rgw_bucket& bucket;
1703 int shard_id;
1704
1705 public:
1706 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
1707 shard_id(RGW_NO_SHARD) {}
1708 RGWRados *get_store() { return store; }
1709 rgw_bucket& get_bucket() { return bucket; }
1710 RGWBucketInfo& get_bucket_info() { return bucket_info; }
1711
31f18b77
FG
1712 int update_bucket_id(const string& new_bucket_id);
1713
7c673cae
FG
1714 int get_shard_id() { return shard_id; }
1715 void set_shard_id(int id) {
1716 shard_id = id;
1717 }
1718
1719 class UpdateIndex {
1720 RGWRados::Bucket *target;
1721 string optag;
1722 rgw_obj obj;
1723 uint16_t bilog_flags{0};
1724 BucketShard bs;
1725 bool bs_initialized{false};
1726 bool blind;
1727 bool prepared{false};
31f18b77
FG
1728 rgw_zone_set *zones_trace{nullptr};
1729
1730 int init_bs() {
f64942e4
AA
1731 int r =
1732 bs.init(target->get_bucket(), obj, nullptr /* no RGWBucketInfo */);
31f18b77
FG
1733 if (r < 0) {
1734 return r;
1735 }
1736 bs_initialized = true;
1737 return 0;
1738 }
1739
1740 void invalidate_bs() {
1741 bs_initialized = false;
1742 }
1743
1744 int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
7c673cae
FG
1745 public:
1746
1747 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
1748 bs(target->get_store()) {
1749 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
1750 }
1751
1752 int get_bucket_shard(BucketShard **pbs) {
1753 if (!bs_initialized) {
31f18b77 1754 int r = init_bs();
7c673cae
FG
1755 if (r < 0) {
1756 return r;
1757 }
7c673cae
FG
1758 }
1759 *pbs = &bs;
1760 return 0;
1761 }
1762
1763 void set_bilog_flags(uint16_t flags) {
1764 bilog_flags = flags;
1765 }
31f18b77
FG
1766
1767 void set_zones_trace(rgw_zone_set *_zones_trace) {
1768 zones_trace = _zones_trace;
1769 }
7c673cae
FG
1770
1771 int prepare(RGWModifyOp, const string *write_tag);
1772 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
1773 uint64_t accounted_size, ceph::real_time& ut,
1774 const string& etag, const string& content_type,
11fdf7f2 1775 const string& storage_class,
7c673cae 1776 bufferlist *acl_bl, RGWObjCategory category,
11fdf7f2 1777 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr, bool appendable = false);
7c673cae
FG
1778 int complete_del(int64_t poolid, uint64_t epoch,
1779 ceph::real_time& removed_mtime, /* mtime of removed object */
1780 list<rgw_obj_index_key> *remove_objs);
1781 int cancel();
1782
1783 const string *get_optag() { return &optag; }
1784
1785 bool is_prepared() { return prepared; }
1adf2230
AA
1786 }; // class UpdateIndex
1787
1788 class List {
1789 protected:
7c673cae 1790
7c673cae
FG
1791 RGWRados::Bucket *target;
1792 rgw_obj_key next_marker;
1793
1adf2230
AA
1794 int list_objects_ordered(int64_t max,
1795 vector<rgw_bucket_dir_entry> *result,
1796 map<string, bool> *common_prefixes,
1797 bool *is_truncated);
1798 int list_objects_unordered(int64_t max,
1799 vector<rgw_bucket_dir_entry> *result,
1800 map<string, bool> *common_prefixes,
1801 bool *is_truncated);
1802
1803 public:
1804
7c673cae
FG
1805 struct Params {
1806 string prefix;
1807 string delim;
1808 rgw_obj_key marker;
1809 rgw_obj_key end_marker;
1810 string ns;
1811 bool enforce_ns;
1812 RGWAccessListFilter *filter;
1813 bool list_versions;
1adf2230
AA
1814 bool allow_unordered;
1815
1816 Params() :
1817 enforce_ns(true),
1818 filter(NULL),
1819 list_versions(false),
1820 allow_unordered(false)
1821 {}
7c673cae
FG
1822 } params;
1823
7c673cae
FG
1824 explicit List(RGWRados::Bucket *_target) : target(_target) {}
1825
1adf2230
AA
1826 int list_objects(int64_t max,
1827 vector<rgw_bucket_dir_entry> *result,
1828 map<string, bool> *common_prefixes,
1829 bool *is_truncated) {
1830 if (params.allow_unordered) {
1831 return list_objects_unordered(max, result, common_prefixes,
1832 is_truncated);
1833 } else {
1834 return list_objects_ordered(max, result, common_prefixes,
1835 is_truncated);
1836 }
1837 }
7c673cae
FG
1838 rgw_obj_key& get_next_marker() {
1839 return next_marker;
1840 }
1adf2230
AA
1841 }; // class List
1842 }; // class Bucket
7c673cae 1843
7c673cae
FG
1844 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
1845 const std::string& obj_prefix,
1846 const std::string& obj_delim,
1847 std::function<int(const rgw_bucket_dir_entry&)> handler);
1848
1849 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
1850 return bucket_info.has_swift_versioning() &&
1851 bucket_info.swift_ver_location.size();
1852 }
1853
1854 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
1855 const rgw_user& user, /* in */
1856 RGWBucketInfo& bucket_info, /* in */
1857 rgw_obj& obj); /* in */
11fdf7f2
TL
1858 int swift_versioning_restore(RGWSysObjectCtx& sysobj_ctx,
1859 RGWObjectCtx& obj_ctx, /* in/out */
7c673cae
FG
1860 const rgw_user& user, /* in */
1861 RGWBucketInfo& bucket_info, /* in */
1862 rgw_obj& obj, /* in */
1863 bool& restored); /* out */
1864 int copy_obj_to_remote_dest(RGWObjState *astate,
1865 map<string, bufferlist>& src_attrs,
1866 RGWRados::Object::Read& read_op,
1867 const rgw_user& user_id,
1868 rgw_obj& dest_obj,
1869 ceph::real_time *mtime);
1870
1871 enum AttrsMod {
1872 ATTRSMOD_NONE = 0,
1873 ATTRSMOD_REPLACE = 1,
1874 ATTRSMOD_MERGE = 2
1875 };
1876
11fdf7f2 1877 int rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj);
7c673cae
FG
1878
1879 int stat_remote_obj(RGWObjectCtx& obj_ctx,
1880 const rgw_user& user_id,
7c673cae
FG
1881 req_info *info,
1882 const string& source_zone,
1883 rgw_obj& src_obj,
1884 RGWBucketInfo& src_bucket_info,
1885 real_time *src_mtime,
1886 uint64_t *psize,
1887 const real_time *mod_ptr,
1888 const real_time *unmod_ptr,
1889 bool high_precision_time,
1890 const char *if_match,
1891 const char *if_nomatch,
1892 map<string, bufferlist> *pattrs,
11fdf7f2 1893 map<string, string> *pheaders,
7c673cae
FG
1894 string *version_id,
1895 string *ptag,
1896 string *petag);
1897
1898 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
1899 const rgw_user& user_id,
7c673cae
FG
1900 req_info *info,
1901 const string& source_zone,
11fdf7f2
TL
1902 const rgw_obj& dest_obj,
1903 const rgw_obj& src_obj,
7c673cae
FG
1904 RGWBucketInfo& dest_bucket_info,
1905 RGWBucketInfo& src_bucket_info,
11fdf7f2 1906 std::optional<rgw_placement_rule> dest_placement,
7c673cae
FG
1907 ceph::real_time *src_mtime,
1908 ceph::real_time *mtime,
1909 const ceph::real_time *mod_ptr,
1910 const ceph::real_time *unmod_ptr,
1911 bool high_precision_time,
1912 const char *if_match,
1913 const char *if_nomatch,
1914 AttrsMod attrs_mod,
1915 bool copy_if_newer,
1916 map<string, bufferlist>& attrs,
1917 RGWObjCategory category,
11fdf7f2 1918 std::optional<uint64_t> olh_epoch,
7c673cae 1919 ceph::real_time delete_at,
7c673cae 1920 string *ptag,
11fdf7f2 1921 string *petag,
7c673cae 1922 void (*progress_cb)(off_t, void *),
31f18b77
FG
1923 void *progress_data,
1924 rgw_zone_set *zones_trace= nullptr);
7c673cae
FG
1925 /**
1926 * Copy an object.
1927 * dest_obj: the object to copy into
1928 * src_obj: the object to copy from
1929 * attrs: usage depends on attrs_mod parameter
1930 * attrs_mod: the modification mode of the attrs, may have the following values:
1931 * ATTRSMOD_NONE - the attributes of the source object will be
1932 * copied without modifications, attrs parameter is ignored;
1933 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
1934 * parameter, source object attributes are not copied;
1935 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
1936 * are overwritten by values contained in attrs parameter.
7c673cae
FG
1937 * Returns: 0 on success, -ERR# otherwise.
1938 */
1939 int copy_obj(RGWObjectCtx& obj_ctx,
1940 const rgw_user& user_id,
7c673cae
FG
1941 req_info *info,
1942 const string& source_zone,
1943 rgw_obj& dest_obj,
1944 rgw_obj& src_obj,
1945 RGWBucketInfo& dest_bucket_info,
1946 RGWBucketInfo& src_bucket_info,
11fdf7f2 1947 const rgw_placement_rule& dest_placement,
7c673cae
FG
1948 ceph::real_time *src_mtime,
1949 ceph::real_time *mtime,
1950 const ceph::real_time *mod_ptr,
1951 const ceph::real_time *unmod_ptr,
1952 bool high_precision_time,
1953 const char *if_match,
1954 const char *if_nomatch,
1955 AttrsMod attrs_mod,
1956 bool copy_if_newer,
1957 map<std::string, bufferlist>& attrs,
1958 RGWObjCategory category,
1959 uint64_t olh_epoch,
1960 ceph::real_time delete_at,
1961 string *version_id,
1962 string *ptag,
11fdf7f2 1963 string *petag,
7c673cae
FG
1964 void (*progress_cb)(off_t, void *),
1965 void *progress_data);
1966
1967 int copy_obj_data(RGWObjectCtx& obj_ctx,
1968 RGWBucketInfo& dest_bucket_info,
11fdf7f2 1969 const rgw_placement_rule& dest_placement,
7c673cae 1970 RGWRados::Object::Read& read_op, off_t end,
11fdf7f2 1971 const rgw_obj& dest_obj,
7c673cae
FG
1972 ceph::real_time *mtime,
1973 ceph::real_time set_mtime,
1974 map<string, bufferlist>& attrs,
7c673cae
FG
1975 uint64_t olh_epoch,
1976 ceph::real_time delete_at,
11fdf7f2 1977 string *petag);
7c673cae 1978
11fdf7f2
TL
1979 int transition_obj(RGWObjectCtx& obj_ctx,
1980 RGWBucketInfo& bucket_info,
1981 rgw_obj& obj,
1982 const rgw_placement_rule& placement_rule,
1983 const real_time& mtime,
1984 uint64_t olh_epoch);
1985
7c673cae
FG
1986 int check_bucket_empty(RGWBucketInfo& bucket_info);
1987
1988 /**
1989 * Delete a bucket.
1990 * bucket: the name of the bucket to delete
1991 * Returns 0 on success, -ERR# otherwise.
1992 */
1993 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true);
1994
7c673cae
FG
1995 void wakeup_meta_sync_shards(set<int>& shard_ids);
1996 void wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids);
1997
1998 RGWMetaSyncStatusManager* get_meta_sync_manager();
1999 RGWDataSyncStatusManager* get_data_sync_manager(const std::string& source_zone);
2000
2001 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
2002 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
2003 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
2004
2005 /** Delete an object.*/
2006 int delete_obj(RGWObjectCtx& obj_ctx,
2007 const RGWBucketInfo& bucket_owner,
2008 const rgw_obj& src_obj,
2009 int versioning_status,
2010 uint16_t bilog_flags = 0,
31f18b77
FG
2011 const ceph::real_time& expiration_time = ceph::real_time(),
2012 rgw_zone_set *zones_trace = nullptr);
7c673cae 2013
7c673cae
FG
2014 int delete_raw_obj(const rgw_raw_obj& obj);
2015
7c673cae
FG
2016 /** Remove an object from the bucket index */
2017 int delete_obj_index(const rgw_obj& obj);
2018
7c673cae
FG
2019 /**
2020 * Set an attr on an object.
2021 * bucket: name of the bucket holding the object
2022 * obj: name of the object to set the attr on
2023 * name: the attr to set
2024 * bl: the contents of the attr
2025 * Returns: 0 on success, -ERR# otherwise.
2026 */
2027 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
2028
2029 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
2030 map<string, bufferlist>& attrs,
2031 map<string, bufferlist>* rmattrs);
2032
7c673cae
FG
2033 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
2034 bool follow_olh, bool assume_noent = false);
2035 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state) {
2036 return get_obj_state(rctx, bucket_info, obj, state, true);
2037 }
2038
11fdf7f2
TL
2039 using iterate_obj_cb = int (*)(const rgw_raw_obj&, off_t, off_t,
2040 off_t, bool, RGWObjState*, void*);
2041
2042 int iterate_obj(RGWObjectCtx& ctx, const RGWBucketInfo& bucket_info,
2043 const rgw_obj& obj, off_t ofs, off_t end,
2044 uint64_t max_chunk_size, iterate_obj_cb cb, void *arg);
7c673cae
FG
2045
2046 int flush_read_list(struct get_obj_data *d);
2047
11fdf7f2
TL
2048 int get_obj_iterate_cb(const rgw_raw_obj& read_obj, off_t obj_ofs,
2049 off_t read_ofs, off_t len, bool is_head_obj,
2050 RGWObjState *astate, void *arg);
7c673cae
FG
2051
2052 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
2053
2054 /**
2055 * a simple object read without keeping state
2056 */
2057
11fdf7f2
TL
2058 int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
2059 map<string, bufferlist> *attrs, bufferlist *first_chunk,
2060 RGWObjVersionTracker *objv_tracker);
7c673cae
FG
2061
2062 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
2063 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
2064
f64942e4
AA
2065 int guard_reshard(BucketShard *bs,
2066 const rgw_obj& obj_instance,
2067 const RGWBucketInfo& bucket_info,
2068 std::function<int(BucketShard *)> call);
2069 int block_while_resharding(RGWRados::BucketShard *bs,
2070 string *new_bucket_id,
11fdf7f2
TL
2071 const RGWBucketInfo& bucket_info,
2072 optional_yield y);
31f18b77 2073
7c673cae
FG
2074 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
2075 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
2076 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
2077 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
2078 const rgw_obj& obj_instance, bool delete_marker,
2079 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
2080 uint64_t olh_epoch,
91327a77
AA
2081 ceph::real_time unmod_since, bool high_precision_time,
2082 rgw_zone_set *zones_trace = nullptr,
2083 bool log_data_change = false);
31f18b77 2084 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
2085 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
2086 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
2087 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
2088 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
2089 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2090 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
31f18b77
FG
2091 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
2092 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
7c673cae 2093 int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
91327a77
AA
2094 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time,
2095 rgw_zone_set *zones_trace = nullptr, bool log_data_change = false);
a8e16298
TL
2096 int repair_olh(RGWObjState* state, const RGWBucketInfo& bucket_info,
2097 const rgw_obj& obj);
7c673cae 2098 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
31f18b77 2099 uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
2100
2101 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
2102 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
2103 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
2104 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
2105
11fdf7f2 2106 void gen_rand_obj_instance_name(rgw_obj_key *target_key);
7c673cae
FG
2107 void gen_rand_obj_instance_name(rgw_obj *target);
2108
7c673cae
FG
2109 int update_containers_stats(map<string, RGWBucketEnt>& m);
2110 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
2111
11fdf7f2 2112public:
7c673cae
FG
2113 void set_atomic(void *ctx, rgw_obj& obj) {
2114 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
11fdf7f2 2115 rctx->set_atomic(obj);
7c673cae 2116 }
11fdf7f2 2117 void set_prefetch_data(void *ctx, const rgw_obj& obj) {
7c673cae 2118 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
11fdf7f2 2119 rctx->set_prefetch_data(obj);
7c673cae 2120 }
7c673cae
FG
2121 int decode_policy(bufferlist& bl, ACLOwner *owner);
2122 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
c07f9fc5 2123 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool* syncstopped = NULL);
7c673cae
FG
2124 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
2125 int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
2126 int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
2127 void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj);
2128 void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
2129
2130 int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
2131 bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
2132 map<string, bufferlist> *pattrs);
2133 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
11fdf7f2 2134 int get_bucket_entrypoint_info(RGWSysObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name,
7c673cae 2135 RGWBucketEntryPoint& entry_point, RGWObjVersionTracker *objv_tracker,
b32b8144
FG
2136 ceph::real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info = NULL,
2137 boost::optional<obj_version> refresh_version = boost::none);
11fdf7f2
TL
2138 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
2139 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
2140 int get_bucket_instance_from_oid(RGWSysObjectCtx& obj_ctx, const string& oid, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs,
b32b8144
FG
2141 rgw_cache_entry_info *cache_info = NULL,
2142 boost::optional<obj_version> refresh_version = boost::none);
7c673cae 2143
11fdf7f2 2144 int convert_old_bucket_info(RGWSysObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name);
7c673cae 2145 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
b32b8144
FG
2146
2147
2148private:
11fdf7f2 2149 int _get_bucket_info(RGWSysObjectCtx& obj_ctx, const string& tenant,
b32b8144
FG
2150 const string& bucket_name, RGWBucketInfo& info,
2151 real_time *pmtime,
2152 map<string, bufferlist> *pattrs,
2153 boost::optional<obj_version> refresh_version);
2154public:
2155
11fdf7f2
TL
2156 bool call(std::string_view command, const cmdmap_t& cmdmap,
2157 std::string_view format,
3a9019d9
FG
2158 bufferlist& out) override final;
2159
3a9019d9 2160protected:
3a9019d9
FG
2161 // `call_list` must iterate over all cache entries and call
2162 // `cache_list_dump_helper` with the supplied Formatter on any that
2163 // include `filter` as a substring.
2164 //
11fdf7f2 2165 void call_list(const std::optional<std::string>& filter,
3a9019d9
FG
2166 Formatter* format);
2167 // `call_inspect` must look up the requested target and, if found,
2168 // dump it to the supplied Formatter and return true. If not found,
2169 // it must return false.
2170 //
11fdf7f2 2171 bool call_inspect(const std::string& target, Formatter* format);
3a9019d9
FG
2172
2173 // `call_erase` must erase the requested target and return true. If
2174 // the requested target does not exist, it should return false.
11fdf7f2 2175 bool call_erase(const std::string& target);
3a9019d9
FG
2176
2177 // `call_zap` must erase the cache.
11fdf7f2 2178 void call_zap();
3a9019d9 2179public:
b32b8144 2180
11fdf7f2 2181 int get_bucket_info(RGWSysObjectCtx& obj_ctx,
b32b8144
FG
2182 const string& tenant_name, const string& bucket_name,
2183 RGWBucketInfo& info,
2184 ceph::real_time *pmtime, map<string, bufferlist> *pattrs = NULL);
2185
2186 // Returns true on successful refresh. Returns false if there was an
2187 // error or the version stored on the OSD is the same as that
2188 // presented in the BucketInfo structure.
2189 //
2190 int try_refresh_bucket_info(RGWBucketInfo& info,
2191 ceph::real_time *pmtime,
2192 map<string, bufferlist> *pattrs = nullptr);
2193
7c673cae 2194 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
b32b8144 2195 map<string, bufferlist> *pattrs, bool create_entry_point);
7c673cae 2196
31f18b77
FG
2197 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2198 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
2199 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2200 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
2201 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
7c673cae 2202 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
31f18b77
FG
2203 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2204 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
7c673cae 2205 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
1adf2230 2206 int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id,
11fdf7f2
TL
2207 const rgw_obj_index_key& start,
2208 const string& prefix,
1adf2230
AA
2209 uint32_t num_entries, bool list_versions,
2210 map<string, rgw_bucket_dir_entry>& m,
2211 bool *is_truncated,
2212 rgw_obj_index_key *last_entry,
2213 bool (*force_check_filter)(const string& name) = nullptr);
2214 int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id,
11fdf7f2
TL
2215 const rgw_obj_index_key& start,
2216 const string& prefix,
1adf2230
AA
2217 uint32_t num_entries, bool list_versions,
2218 vector<rgw_bucket_dir_entry>& ent_list,
2219 bool *is_truncated, rgw_obj_index_key *last_entry,
2220 bool (*force_check_filter)(const string& name) = nullptr);
a8e16298 2221 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
7c673cae
FG
2222 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
2223 int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
2224 int trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, string& end_marker);
c07f9fc5
FG
2225 int resync_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
2226 int stop_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
7c673cae
FG
2227 int get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id, map<int, string>& max_marker);
2228
a8e16298
TL
2229 int bi_get_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent);
2230 int bi_get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh);
2231 int bi_get(const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
7c673cae
FG
2232 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
2233 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
2234 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
2235 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2236 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2237 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
2238 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2239 int bi_remove(BucketShard& bs);
2240
2241 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
11fdf7f2
TL
2242 int cls_obj_usage_log_read(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
2243 uint64_t end_epoch, uint32_t max_entries, string& read_iter, map<rgw_user_bucket,
2244 rgw_usage_log_entry>& usage, bool *is_truncated);
2245 int cls_obj_usage_log_trim(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
2246 uint64_t end_epoch);
2247 int cls_obj_usage_log_clear(string& oid);
7c673cae
FG
2248
2249 int key_to_shard_id(const string& key, int max_shards);
2250 void shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id);
2251 void shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name);
2252 void shard_name(const string& prefix, unsigned shard_id, string& name);
2253 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
2254 void time_log_prepare_entry(cls_log_entry& entry, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
2255 int time_log_add_init(librados::IoCtx& io_ctx);
2256 int time_log_add(const string& oid, list<cls_log_entry>& entries,
2257 librados::AioCompletion *completion, bool monotonic_inc = true);
2258 int time_log_add(const string& oid, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
2259 int time_log_list(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
2260 int max_entries, list<cls_log_entry>& entries,
2261 const string& marker, string *out_marker, bool *truncated);
2262 int time_log_info(const string& oid, cls_log_header *header);
2263 int time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion);
2264 int time_log_trim(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
2265 const string& from_marker, const string& to_marker,
2266 librados::AioCompletion *completion = nullptr);
2267
2268 string objexp_hint_get_shardname(int shard_num);
2269 int objexp_key_shard(const rgw_obj_index_key& key);
2270 void objexp_get_shard(int shard_num,
2271 string& shard); /* out */
2272 int objexp_hint_add(const ceph::real_time& delete_at,
2273 const string& tenant_name,
2274 const string& bucket_name,
2275 const string& bucket_id,
2276 const rgw_obj_index_key& obj_key);
2277 int objexp_hint_list(const string& oid,
2278 const ceph::real_time& start_time,
2279 const ceph::real_time& end_time,
2280 const int max_entries,
2281 const string& marker,
2282 list<cls_timeindex_entry>& entries, /* out */
2283 string *out_marker, /* out */
2284 bool *truncated); /* out */
2285 int objexp_hint_parse(cls_timeindex_entry &ti_entry,
2286 objexp_hint_entry& hint_entry); /* out */
2287 int objexp_hint_trim(const string& oid,
2288 const ceph::real_time& start_time,
2289 const ceph::real_time& end_time,
2290 const string& from_marker = std::string(),
2291 const string& to_marker = std::string());
2292
11fdf7f2
TL
2293 int lock_exclusive(const rgw_pool& pool, const string& oid, ceph::timespan& duration, string& zone_id, string& owner_id);
2294 int unlock(const rgw_pool& pool, const string& oid, string& zone_id, string& owner_id);
7c673cae
FG
2295
2296 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
2297 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync);
2298 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
11fdf7f2 2299 int gc_aio_operate(string& oid, librados::ObjectWriteOperation *op, librados::AioCompletion **pc = nullptr);
7c673cae
FG
2300 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
2301
2302 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
11fdf7f2 2303 int process_gc(bool expired_only);
1adf2230 2304 bool process_expire_objects();
7c673cae
FG
2305 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
2306
2307 int process_lc();
2308 int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
2309
2310 int bucket_check_index(RGWBucketInfo& bucket_info,
2311 map<RGWObjCategory, RGWStorageStats> *existing_stats,
2312 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
2313 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
f64942e4 2314 int bucket_set_reshard(const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
7c673cae
FG
2315 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
2316 int move_rados_obj(librados::IoCtx& src_ioctx,
2317 const string& src_oid, const string& src_locator,
2318 librados::IoCtx& dst_ioctx,
2319 const string& dst_oid, const string& dst_locator);
2320 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
2321 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix);
2322
2323 int cls_user_get_header(const string& user_id, cls_user_header *header);
94b18763 2324 int cls_user_reset_stats(const string& user_id);
7c673cae
FG
2325 int cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx);
2326 int cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info);
2327 int cls_user_list_buckets(rgw_raw_obj& obj,
2328 const string& in_marker,
2329 const string& end_marker,
2330 int max_entries,
2331 list<cls_user_bucket_entry>& entries,
2332 string *out_marker,
2333 bool *truncated);
2334 int cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry);
2335 int cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add);
2336 int cls_user_complete_stats_sync(rgw_raw_obj& obj);
2337 int complete_sync_user_stats(const rgw_user& user_id);
7c673cae 2338 int cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket);
c07f9fc5 2339 int cls_user_get_bucket_stats(const rgw_bucket& bucket, cls_user_bucket_entry& entry);
7c673cae
FG
2340
2341 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
11fdf7f2 2342 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, bool check_size_only = false);
7c673cae 2343
224ce89b 2344 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
31f18b77
FG
2345 RGWQuotaInfo& bucket_quota);
2346
2347 int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
2348
7c673cae 2349 uint64_t instance_id();
3efd9988 2350
7c673cae
FG
2351 librados::Rados* get_rados_handle();
2352
2353 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
2354 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
2355 list<librados::AioCompletion *>& handles, bool keep_index_consistent);
11fdf7f2
TL
2356
2357 /* mfa/totp stuff */
2358 private:
2359 void prepare_mfa_write(librados::ObjectWriteOperation *op,
2360 RGWObjVersionTracker *objv_tracker,
2361 const ceph::real_time& mtime);
2362 public:
2363 string get_mfa_oid(const rgw_user& user);
2364 int get_mfa_ref(const rgw_user& user, rgw_rados_ref *ref);
2365 int check_mfa(const rgw_user& user, const string& otp_id, const string& pin);
2366 int create_mfa(const rgw_user& user, const rados::cls::otp::otp_info_t& config,
2367 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2368 int remove_mfa(const rgw_user& user, const string& id,
2369 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2370 int get_mfa(const rgw_user& user, const string& id, rados::cls::otp::otp_info_t *result);
2371 int list_mfa(const rgw_user& user, list<rados::cls::otp::otp_info_t> *result);
2372 int otp_get_current_time(const rgw_user& user, ceph::real_time *result);
2373
2374 /* mfa interfaces used by metadata engine */
2375 int set_mfa(const string& oid, const list<rados::cls::otp::otp_info_t>& entries, bool reset_obj,
2376 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2377 int list_mfa(const string& oid, list<rados::cls::otp::otp_info_t> *result,
2378 RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime);
7c673cae
FG
2379 private:
2380 /**
2381 * This is a helper method, it generates a list of bucket index objects with the given
2382 * bucket base oid and number of shards.
2383 *
2384 * bucket_oid_base [in] - base name of the bucket index object;
2385 * num_shards [in] - number of bucket index object shards.
2386 * bucket_objs [out] - filled by this method, a list of bucket index objects.
2387 */
2388 void get_bucket_index_objects(const string& bucket_oid_base, uint32_t num_shards,
2389 map<int, string>& bucket_objs, int shard_id = -1);
2390
2391 /**
2392 * Get the bucket index object with the given base bucket index object and object key,
2393 * and the number of bucket index shards.
2394 *
2395 * bucket_oid_base [in] - bucket object base name.
2396 * obj_key [in] - object key.
2397 * num_shards [in] - number of bucket index shards.
2398 * hash_type [in] - type of hash to find the shard ID.
2399 * bucket_obj [out] - the bucket index object for the given object.
2400 *
2401 * Return 0 on success, a failure code otherwise.
2402 */
2403 int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
2404 uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard);
2405
2406 void get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
2407 int shard_id, string *bucket_obj);
2408
2409 /**
2410 * Check the actual on-disk state of the object specified
2411 * by list_state, and fill in the time and size of object.
2412 * Then append any changes to suggested_updates for
2413 * the rgw class' dir_suggest_changes function.
2414 *
2415 * Note that this can maul list_state; don't use it afterwards. Also
2416 * it expects object to already be filled in from list_state; it only
2417 * sets the size and mtime.
2418 *
2419 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
2420 * and -errno on other failures. (-ENOENT is not a failure, and it
2421 * will encode that info as a suggested update.)
2422 */
2423 int check_disk_state(librados::IoCtx io_ctx,
2424 const RGWBucketInfo& bucket_info,
2425 rgw_bucket_dir_entry& list_state,
2426 rgw_bucket_dir_entry& object,
2427 bufferlist& suggested_updates);
2428
2429 /**
2430 * Init pool iteration
31f18b77 2431 * pool: pool to use for the ctx initialization
7c673cae
FG
2432 * ctx: context object to use for the iteration
2433 * Returns: 0 on success, -ERR# otherwise.
2434 */
2435 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
31f18b77 2436
181888fb
FG
2437 /**
2438 * Init pool iteration
2439 * pool: pool to use
2440 * cursor: position to start iteration
2441 * ctx: context object to use for the iteration
2442 * Returns: 0 on success, -ERR# otherwise.
2443 */
2444 int pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx);
2445
2446 /**
2447 * Get pool iteration position
2448 * ctx: context object to use for the iteration
2449 * Returns: string representation of position
2450 */
2451 string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
2452
7c673cae
FG
2453 /**
2454 * Iterate over pool return object names, use optional filter
2455 * ctx: iteration context, initialized with pool_iterate_begin()
2456 * num: max number of objects to return
2457 * objs: a vector that the results will append into
2458 * is_truncated: if not NULL, will hold true iff iteration is complete
2459 * filter: if not NULL, will be used to filter returned objects
2460 * Returns: 0 on success, -ERR# otherwise.
2461 */
2462 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
2463 bool *is_truncated, RGWAccessListFilter *filter);
2464
2465 uint64_t next_bucket_id();
2466};
2467
2468class RGWStoreManager {
2469public:
2470 RGWStoreManager() {}
28e407b8
AA
2471 static RGWRados *get_storage(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads,
2472 bool run_sync_thread, bool run_reshard_thread, bool use_cache = true) {
31f18b77 2473 RGWRados *store = init_storage_provider(cct, use_gc_thread, use_lc_thread, quota_threads, run_sync_thread,
28e407b8 2474 run_reshard_thread, use_cache);
7c673cae
FG
2475 return store;
2476 }
2477 static RGWRados *get_raw_storage(CephContext *cct) {
2478 RGWRados *store = init_raw_storage_provider(cct);
2479 return store;
2480 }
28e407b8 2481 static RGWRados *init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread, bool use_metadata_cache);
7c673cae
FG
2482 static RGWRados *init_raw_storage_provider(CephContext *cct);
2483 static void close_storage(RGWRados *store);
2484
2485};
2486
7c673cae
FG
2487class RGWMPObj {
2488 string oid;
2489 string prefix;
2490 string meta;
2491 string upload_id;
2492public:
2493 RGWMPObj() {}
2494 RGWMPObj(const string& _oid, const string& _upload_id) {
2495 init(_oid, _upload_id, _upload_id);
2496 }
2497 void init(const string& _oid, const string& _upload_id) {
2498 init(_oid, _upload_id, _upload_id);
2499 }
2500 void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
2501 if (_oid.empty()) {
2502 clear();
2503 return;
2504 }
2505 oid = _oid;
2506 upload_id = _upload_id;
2507 prefix = oid + ".";
2508 meta = prefix + upload_id + MP_META_SUFFIX;
2509 prefix.append(part_unique_str);
2510 }
11fdf7f2
TL
2511 const string& get_meta() const { return meta; }
2512 string get_part(int num) const {
7c673cae
FG
2513 char buf[16];
2514 snprintf(buf, 16, ".%d", num);
2515 string s = prefix;
2516 s.append(buf);
2517 return s;
2518 }
11fdf7f2 2519 string get_part(const string& part) const {
7c673cae
FG
2520 string s = prefix;
2521 s.append(".");
2522 s.append(part);
2523 return s;
2524 }
11fdf7f2 2525 const string& get_upload_id() const {
7c673cae
FG
2526 return upload_id;
2527 }
11fdf7f2 2528 const string& get_key() const {
7c673cae
FG
2529 return oid;
2530 }
2531 bool from_meta(string& meta) {
2532 int end_pos = meta.rfind('.'); // search for ".meta"
2533 if (end_pos < 0)
2534 return false;
2535 int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
2536 if (mid_pos < 0)
2537 return false;
2538 oid = meta.substr(0, mid_pos);
2539 upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
2540 init(oid, upload_id, upload_id);
2541 return true;
2542 }
2543 void clear() {
2544 oid = "";
2545 prefix = "";
2546 meta = "";
2547 upload_id = "";
2548 }
11fdf7f2 2549}; // class RGWMPObj
7c673cae 2550
11fdf7f2
TL
2551
2552class RGWRadosThread {
2553 class Worker : public Thread {
2554 CephContext *cct;
2555 RGWRadosThread *processor;
2556 Mutex lock;
2557 Cond cond;
2558
2559 void wait() {
2560 Mutex::Locker l(lock);
2561 cond.Wait(lock);
2562 };
2563
2564 void wait_interval(const utime_t& wait_time) {
2565 Mutex::Locker l(lock);
2566 cond.WaitInterval(lock, wait_time);
2567 }
2568
2569 public:
2570 Worker(CephContext *_cct, RGWRadosThread *_p) : cct(_cct), processor(_p), lock("RGWRadosThread::Worker") {}
2571 void *entry() override;
2572 void signal() {
2573 Mutex::Locker l(lock);
2574 cond.Signal();
2575 }
2576 };
2577
2578 Worker *worker;
7c673cae
FG
2579
2580protected:
11fdf7f2
TL
2581 CephContext *cct;
2582 RGWRados *store;
2583
2584 std::atomic<bool> down_flag = { false };
2585
2586 string thread_name;
2587
2588 virtual uint64_t interval_msec() = 0;
2589 virtual void stop_process() {}
7c673cae 2590public:
11fdf7f2
TL
2591 RGWRadosThread(RGWRados *_store, const string& thread_name = "radosgw")
2592 : worker(NULL), cct(_store->ctx()), store(_store), thread_name(thread_name) {}
2593 virtual ~RGWRadosThread() {
2594 stop();
2595 }
2596
2597 virtual int init() { return 0; }
2598 virtual int process() = 0;
2599
2600 bool going_down() { return down_flag; }
2601
2602 void start();
2603 void stop();
2604
2605 void signal() {
2606 if (worker) {
2607 worker->signal();
2608 }
2609 }
2610};
2611
7c673cae 2612#endif