]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_rados.h
8a5ee7cfeefe5c1f0c664cfcec73b9ab4022df98
[ceph.git] / ceph / src / rgw / rgw_rados.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
3
4 #ifndef CEPH_RGWRADOS_H
5 #define CEPH_RGWRADOS_H
6
7 #include <functional>
8 #include <boost/container/flat_map.hpp>
9
10 #include "include/rados/librados.hpp"
11 #include "include/Context.h"
12 #include "include/random.h"
13 #include "common/RefCountedObj.h"
14 #include "common/RWLock.h"
15 #include "common/ceph_time.h"
16 #include "rgw_common.h"
17 #include "cls/rgw/cls_rgw_types.h"
18 #include "cls/version/cls_version_types.h"
19 #include "cls/log/cls_log_types.h"
20 #include "cls/timeindex/cls_timeindex_types.h"
21 #include "cls/otp/cls_otp_types.h"
22 #include "rgw_log.h"
23 #include "rgw_metadata.h"
24 #include "rgw_meta_sync_status.h"
25 #include "rgw_period_puller.h"
26 #include "rgw_obj_manifest.h"
27 #include "rgw_sync_module.h"
28 #include "rgw_trim_bilog.h"
29 #include "rgw_service.h"
30
31 #include "services/svc_rados.h"
32 #include "services/svc_bi_rados.h"
33
34 class RGWWatcher;
35 class SafeTimer;
36 class ACLOwner;
37 class RGWGC;
38 class RGWMetaNotifier;
39 class RGWDataNotifier;
40 class RGWLC;
41 class RGWObjectExpirer;
42 class RGWMetaSyncProcessorThread;
43 class RGWDataSyncProcessorThread;
44 class RGWSyncLogTrimThread;
45 class RGWSyncTraceManager;
46 struct RGWZoneGroup;
47 struct RGWZoneParams;
48 class RGWReshard;
49 class RGWReshardWait;
50
51 class RGWSysObjectCtx;
52
53 /* flags for put_obj_meta() */
54 #define PUT_OBJ_CREATE 0x01
55 #define PUT_OBJ_EXCL 0x02
56 #define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
57
58 #define RGW_OBJ_NS_MULTIPART "multipart"
59 #define RGW_OBJ_NS_SHADOW "shadow"
60
61 static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
62 {
63 if (bucket.marker.empty() || orig_oid.empty()) {
64 oid = orig_oid;
65 } else {
66 oid = bucket.marker;
67 oid.append("_");
68 oid.append(orig_oid);
69 }
70 }
71
72 static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
73 {
74 const rgw_bucket& bucket = obj.bucket;
75 prepend_bucket_marker(bucket, obj.get_oid(), oid);
76 const string& loc = obj.key.get_loc();
77 if (!loc.empty()) {
78 prepend_bucket_marker(bucket, loc, locator);
79 } else {
80 locator.clear();
81 }
82 }
83
84 int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
85
86 struct RGWOLHInfo {
87 rgw_obj target;
88 bool removed;
89
90 RGWOLHInfo() : removed(false) {}
91
92 void encode(bufferlist& bl) const {
93 ENCODE_START(1, 1, bl);
94 encode(target, bl);
95 encode(removed, bl);
96 ENCODE_FINISH(bl);
97 }
98
99 void decode(bufferlist::const_iterator& bl) {
100 DECODE_START(1, bl);
101 decode(target, bl);
102 decode(removed, bl);
103 DECODE_FINISH(bl);
104 }
105 static void generate_test_instances(list<RGWOLHInfo*>& o);
106 void dump(Formatter *f) const;
107 };
108 WRITE_CLASS_ENCODER(RGWOLHInfo)
109
110 struct RGWOLHPendingInfo {
111 ceph::real_time time;
112
113 RGWOLHPendingInfo() {}
114
115 void encode(bufferlist& bl) const {
116 ENCODE_START(1, 1, bl);
117 encode(time, bl);
118 ENCODE_FINISH(bl);
119 }
120
121 void decode(bufferlist::const_iterator& bl) {
122 DECODE_START(1, bl);
123 decode(time, bl);
124 DECODE_FINISH(bl);
125 }
126
127 void dump(Formatter *f) const;
128 };
129 WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
130
131 struct RGWUsageBatch {
132 map<ceph::real_time, rgw_usage_log_entry> m;
133
134 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
135 bool exists = m.find(t) != m.end();
136 *account = !exists;
137 m[t].aggregate(entry);
138 }
139 };
140
141 struct RGWUsageIter {
142 string read_iter;
143 uint32_t index;
144
145 RGWUsageIter() : index(0) {}
146 };
147
148 class RGWGetDataCB {
149 public:
150 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
151 RGWGetDataCB() {}
152 virtual ~RGWGetDataCB() {}
153 };
154
155 struct RGWCloneRangeInfo {
156 rgw_obj src;
157 off_t src_ofs;
158 off_t dst_ofs;
159 uint64_t len;
160 };
161
162 struct RGWObjState {
163 rgw_obj obj;
164 bool is_atomic{false};
165 bool has_attrs{false};
166 bool exists{false};
167 uint64_t size{0}; //< size of raw object
168 uint64_t accounted_size{0}; //< size before compression, encryption
169 ceph::real_time mtime;
170 uint64_t epoch{0};
171 bufferlist obj_tag;
172 bufferlist tail_tag;
173 string write_tag;
174 bool fake_tag{false};
175 std::optional<RGWObjManifest> manifest;
176 string shadow_obj;
177 bool has_data{false};
178 bufferlist data;
179 bool prefetch_data{false};
180 bool keep_tail{false};
181 bool is_olh{false};
182 bufferlist olh_tag;
183 uint64_t pg_ver{false};
184 uint32_t zone_short_id{0};
185
186 /* important! don't forget to update copy constructor */
187
188 RGWObjVersionTracker objv_tracker;
189
190 map<string, bufferlist> attrset;
191
192 RGWObjState();
193 RGWObjState(const RGWObjState& rhs);
194 ~RGWObjState();
195
196 bool get_attr(string name, bufferlist& dest) {
197 map<string, bufferlist>::iterator iter = attrset.find(name);
198 if (iter != attrset.end()) {
199 dest = iter->second;
200 return true;
201 }
202 return false;
203 }
204 };
205
206 class RGWFetchObjFilter {
207 public:
208 virtual ~RGWFetchObjFilter() {}
209
210 virtual int filter(CephContext *cct,
211 const rgw_obj_key& source_key,
212 const RGWBucketInfo& dest_bucket_info,
213 std::optional<rgw_placement_rule> dest_placement_rule,
214 const map<string, bufferlist>& obj_attrs,
215 std::optional<rgw_user> *poverride_owner,
216 const rgw_placement_rule **prule) = 0;
217 };
218
219 class RGWFetchObjFilter_Default : public RGWFetchObjFilter {
220 protected:
221 rgw_placement_rule dest_rule;
222 public:
223 RGWFetchObjFilter_Default() {}
224
225 int filter(CephContext *cct,
226 const rgw_obj_key& source_key,
227 const RGWBucketInfo& dest_bucket_info,
228 std::optional<rgw_placement_rule> dest_placement_rule,
229 const map<string, bufferlist>& obj_attrs,
230 std::optional<rgw_user> *poverride_owner,
231 const rgw_placement_rule **prule) override;
232 };
233
234 class RGWObjectCtx {
235 rgw::sal::RGWRadosStore *store;
236 ceph::shared_mutex lock = ceph::make_shared_mutex("RGWObjectCtx");
237 void *s{nullptr};
238
239 std::map<rgw_obj, RGWObjState> objs_state;
240 public:
241 explicit RGWObjectCtx(rgw::sal::RGWRadosStore *_store) : store(_store) {}
242 explicit RGWObjectCtx(rgw::sal::RGWRadosStore *_store, void *_s) : store(_store), s(_s) {}
243
244 void *get_private() {
245 return s;
246 }
247
248 rgw::sal::RGWRadosStore *get_store() {
249 return store;
250 }
251
252 RGWObjState *get_state(const rgw_obj& obj);
253
254 void set_atomic(rgw_obj& obj);
255 void set_prefetch_data(const rgw_obj& obj);
256 void invalidate(const rgw_obj& obj);
257 };
258
259
260 struct RGWRawObjState {
261 rgw_raw_obj obj;
262 bool has_attrs{false};
263 bool exists{false};
264 uint64_t size{0};
265 ceph::real_time mtime;
266 uint64_t epoch{0};
267 bufferlist obj_tag;
268 bool has_data{false};
269 bufferlist data;
270 bool prefetch_data{false};
271 uint64_t pg_ver{0};
272
273 /* important! don't forget to update copy constructor */
274
275 RGWObjVersionTracker objv_tracker;
276
277 map<string, bufferlist> attrset;
278 RGWRawObjState() {}
279 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
280 has_attrs = rhs.has_attrs;
281 exists = rhs.exists;
282 size = rhs.size;
283 mtime = rhs.mtime;
284 epoch = rhs.epoch;
285 if (rhs.obj_tag.length()) {
286 obj_tag = rhs.obj_tag;
287 }
288 has_data = rhs.has_data;
289 if (rhs.data.length()) {
290 data = rhs.data;
291 }
292 prefetch_data = rhs.prefetch_data;
293 pg_ver = rhs.pg_ver;
294 objv_tracker = rhs.objv_tracker;
295 }
296 };
297
298 struct RGWPoolIterCtx {
299 librados::IoCtx io_ctx;
300 librados::NObjectIterator iter;
301 };
302
303 struct RGWListRawObjsCtx {
304 bool initialized;
305 RGWPoolIterCtx iter_ctx;
306
307 RGWListRawObjsCtx() : initialized(false) {}
308 };
309
310 struct objexp_hint_entry {
311 string tenant;
312 string bucket_name;
313 string bucket_id;
314 rgw_obj_key obj_key;
315 ceph::real_time exp_time;
316
317 void encode(bufferlist& bl) const {
318 ENCODE_START(2, 1, bl);
319 encode(bucket_name, bl);
320 encode(bucket_id, bl);
321 encode(obj_key, bl);
322 encode(exp_time, bl);
323 encode(tenant, bl);
324 ENCODE_FINISH(bl);
325 }
326
327 void decode(bufferlist::const_iterator& bl) {
328 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
329 DECODE_START(2, bl);
330 decode(bucket_name, bl);
331 decode(bucket_id, bl);
332 decode(obj_key, bl);
333 decode(exp_time, bl);
334 if (struct_v >= 2) {
335 decode(tenant, bl);
336 } else {
337 tenant.clear();
338 }
339 DECODE_FINISH(bl);
340 }
341
342 void dump(Formatter *f) const;
343 static void generate_test_instances(list<objexp_hint_entry*>& o);
344 };
345 WRITE_CLASS_ENCODER(objexp_hint_entry)
346
347 class RGWDataChangesLog;
348 class RGWMetaSyncStatusManager;
349 class RGWDataSyncStatusManager;
350 class RGWCoroutinesManagerRegistry;
351
352 class RGWGetBucketStats_CB : public RefCountedObject {
353 protected:
354 rgw_bucket bucket;
355 map<RGWObjCategory, RGWStorageStats> *stats;
356 public:
357 explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
358 ~RGWGetBucketStats_CB() override {}
359 virtual void handle_response(int r) = 0;
360 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
361 stats = _stats;
362 }
363 };
364
365 class RGWGetUserStats_CB : public RefCountedObject {
366 protected:
367 rgw_user user;
368 RGWStorageStats stats;
369 public:
370 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
371 ~RGWGetUserStats_CB() override {}
372 virtual void handle_response(int r) = 0;
373 virtual void set_response(RGWStorageStats& _stats) {
374 stats = _stats;
375 }
376 };
377
378 class RGWGetDirHeader_CB;
379 class RGWGetUserHeader_CB;
380 namespace rgw { namespace sal { class RGWRadosStore; } }
381
382 class RGWAsyncRadosProcessor;
383
384 template <class T>
385 class RGWChainedCacheImpl;
386
387 struct bucket_info_entry {
388 RGWBucketInfo info;
389 real_time mtime;
390 map<string, bufferlist> attrs;
391 };
392
393 struct tombstone_entry;
394
395 template <class K, class V>
396 class lru_map;
397 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
398
399 class RGWIndexCompletionManager;
400
401 class RGWRados
402 {
403 friend class RGWGC;
404 friend class RGWMetaNotifier;
405 friend class RGWDataNotifier;
406 friend class RGWLC;
407 friend class RGWObjectExpirer;
408 friend class RGWMetaSyncProcessorThread;
409 friend class RGWDataSyncProcessorThread;
410 friend class RGWReshard;
411 friend class RGWBucketReshard;
412 friend class RGWBucketReshardLock;
413 friend class BucketIndexLockGuard;
414 friend class RGWCompleteMultipart;
415
416 /** Open the pool used as root for this gateway */
417 int open_root_pool_ctx();
418 int open_gc_pool_ctx();
419 int open_lc_pool_ctx();
420 int open_objexp_pool_ctx();
421 int open_reshard_pool_ctx();
422
423 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx,
424 bool mostly_omap);
425
426
427 ceph::mutex lock = ceph::make_mutex("rados_timer_lock");
428 SafeTimer *timer;
429
430 rgw::sal::RGWRadosStore *store;
431 RGWGC *gc;
432 RGWLC *lc;
433 RGWObjectExpirer *obj_expirer;
434 bool use_gc_thread;
435 bool use_lc_thread;
436 bool quota_threads;
437 bool run_sync_thread;
438 bool run_reshard_thread;
439
440 RGWMetaNotifier *meta_notifier;
441 RGWDataNotifier *data_notifier;
442 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
443 RGWSyncTraceManager *sync_tracer = nullptr;
444 map<rgw_zone_id, RGWDataSyncProcessorThread *> data_sync_processor_threads;
445
446 boost::optional<rgw::BucketTrimManager> bucket_trim;
447 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
448
449 ceph::mutex meta_sync_thread_lock = ceph::make_mutex("meta_sync_thread_lock");
450 ceph::mutex data_sync_thread_lock = ceph::make_mutex("data_sync_thread_lock");
451
452 librados::IoCtx root_pool_ctx; // .rgw
453
454 double inject_notify_timeout_probability = 0;
455 unsigned max_notify_retries = 0;
456
457 friend class RGWWatcher;
458
459 ceph::mutex bucket_id_lock = ceph::make_mutex("rados_bucket_id");
460
461 // This field represents the number of bucket index object shards
462 uint32_t bucket_index_max_shards;
463
464 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
465 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
466 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
467 uint64_t max_bucket_id;
468
469 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
470 RGWObjState *olh_state, RGWObjState **target_state, optional_yield y);
471 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
472 bool follow_olh, optional_yield y, bool assume_noent = false);
473 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
474 librados::ObjectOperation& op, RGWObjState **state, optional_yield y);
475 int append_atomic_test(const RGWObjState* astate, librados::ObjectOperation& op);
476
477 int update_placement_map();
478 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
479
480 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
481 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
482 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
483 protected:
484 CephContext *cct;
485
486 librados::Rados rados;
487
488 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
489 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
490
491 tombstone_cache_t *obj_tombstone_cache;
492
493 librados::IoCtx gc_pool_ctx; // .rgw.gc
494 librados::IoCtx lc_pool_ctx; // .rgw.lc
495 librados::IoCtx objexp_pool_ctx;
496 librados::IoCtx reshard_pool_ctx;
497
498 bool pools_initialized;
499
500 RGWQuotaHandler *quota_handler;
501
502 RGWCoroutinesManagerRegistry *cr_registry;
503
504 RGWSyncModuleInstanceRef sync_module;
505 bool writeable_zone{false};
506
507 RGWIndexCompletionManager *index_completion_manager{nullptr};
508
509 bool use_cache{false};
510 public:
511 RGWRados(): timer(NULL),
512 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
513 run_sync_thread(false), run_reshard_thread(false), meta_notifier(NULL),
514 data_notifier(NULL), meta_sync_processor_thread(NULL),
515 bucket_index_max_shards(0),
516 max_bucket_id(0), cct(NULL),
517 binfo_cache(NULL), obj_tombstone_cache(nullptr),
518 pools_initialized(false),
519 quota_handler(NULL),
520 cr_registry(NULL),
521 pctl(&ctl),
522 reshard(NULL) {}
523
524 RGWRados& set_use_cache(bool status) {
525 use_cache = status;
526 return *this;
527 }
528
529 RGWLC *get_lc() {
530 return lc;
531 }
532
533 RGWRados& set_run_gc_thread(bool _use_gc_thread) {
534 use_gc_thread = _use_gc_thread;
535 return *this;
536 }
537
538 RGWRados& set_run_lc_thread(bool _use_lc_thread) {
539 use_lc_thread = _use_lc_thread;
540 return *this;
541 }
542
543 RGWRados& set_run_quota_threads(bool _run_quota_threads) {
544 quota_threads = _run_quota_threads;
545 return *this;
546 }
547
548 RGWRados& set_run_sync_thread(bool _run_sync_thread) {
549 run_sync_thread = _run_sync_thread;
550 return *this;
551 }
552
553 RGWRados& set_run_reshard_thread(bool _run_reshard_thread) {
554 run_reshard_thread = _run_reshard_thread;
555 return *this;
556 }
557
558 uint64_t get_new_req_id() {
559 return ceph::util::generate_random_number<uint64_t>();
560 }
561
562 librados::IoCtx* get_lc_pool_ctx() {
563 return &lc_pool_ctx;
564 }
565 void set_context(CephContext *_cct) {
566 cct = _cct;
567 }
568 void set_store(rgw::sal::RGWRadosStore *_store) {
569 store = _store;
570 }
571
572 RGWServices svc;
573 RGWCtl ctl;
574
575 RGWCtl *pctl{nullptr};
576
577 /**
578 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
579 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
580 */
581 string host_id;
582
583 RGWReshard *reshard;
584 std::shared_ptr<RGWReshardWait> reshard_wait;
585
586 virtual ~RGWRados() = default;
587
588 tombstone_cache_t *get_tombstone_cache() {
589 return obj_tombstone_cache;
590 }
591 const RGWSyncModuleInstanceRef& get_sync_module() {
592 return sync_module;
593 }
594 RGWSyncTraceManager *get_sync_tracer() {
595 return sync_tracer;
596 }
597
598 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
599 void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size);
600 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
601 int get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
602
603 uint32_t get_max_bucket_shards() {
604 return RGWSI_BucketIndex_RADOS::shards_max();
605 }
606
607
608 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
609
610 int list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx);
611 int list_raw_objects_next(const string& prefix_filter, int max,
612 RGWListRawObjsCtx& ctx, list<string>& oids,
613 bool *is_truncated);
614 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
615 RGWListRawObjsCtx& ctx, list<string>& oids,
616 bool *is_truncated);
617 string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
618
619 CephContext *ctx() { return cct; }
620 /** do all necessary setup of the storage device */
621 int initialize(CephContext *_cct) {
622 set_context(_cct);
623 return initialize();
624 }
625 /** Initialize the RADOS instance and prepare to do other ops */
626 int init_svc(bool raw);
627 int init_ctl();
628 int init_rados();
629 int init_complete();
630 int initialize();
631 void finalize();
632
633 int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
634 int update_service_map(std::map<std::string, std::string>&& status);
635
636 /// list logs
637 int log_list_init(const string& prefix, RGWAccessHandle *handle);
638 int log_list_next(RGWAccessHandle handle, string *name);
639
640 /// remove log
641 int log_remove(const string& name);
642
643 /// show log
644 int log_show_init(const string& name, RGWAccessHandle *handle);
645 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
646
647 // log bandwidth info
648 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
649 int read_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch,
650 uint32_t max_entries, bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket,
651 rgw_usage_log_entry>& usage);
652 int trim_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch);
653 int clear_usage();
654
655 int create_pool(const rgw_pool& pool);
656
657 void create_bucket_id(string *bucket_id);
658
659 bool get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool);
660 bool obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
661
662 int create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket,
663 const string& zonegroup_id,
664 const rgw_placement_rule& placement_rule,
665 const string& swift_ver_location,
666 const RGWQuotaInfo * pquota_info,
667 map<std::string,bufferlist>& attrs,
668 RGWBucketInfo& bucket_info,
669 obj_version *pobjv,
670 obj_version *pep_objv,
671 ceph::real_time creation_time,
672 rgw_bucket *master_bucket,
673 uint32_t *master_num_shards,
674 bool exclusive = true);
675
676 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
677
678 struct BucketShard {
679 RGWRados *store;
680 rgw_bucket bucket;
681 int shard_id;
682 RGWSI_RADOS::Obj bucket_obj;
683
684 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
685 int init(const rgw_bucket& _bucket, const rgw_obj& obj, RGWBucketInfo* out);
686 int init(const rgw_bucket& _bucket, int sid, RGWBucketInfo* out);
687 int init(const RGWBucketInfo& bucket_info, const rgw_obj& obj);
688 int init(const RGWBucketInfo& bucket_info, int sid);
689 };
690
691 class Object {
692 RGWRados *store;
693 RGWBucketInfo bucket_info;
694 RGWObjectCtx& ctx;
695 rgw_obj obj;
696
697 BucketShard bs;
698
699 RGWObjState *state;
700
701 bool versioning_disabled;
702
703 bool bs_initialized;
704
705 protected:
706 int get_state(RGWObjState **pstate, bool follow_olh, optional_yield y, bool assume_noent = false);
707 void invalidate_state();
708
709 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
710 const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail, optional_yield y);
711 int complete_atomic_modification();
712
713 public:
714 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
715 ctx(_ctx), obj(_obj), bs(store),
716 state(NULL), versioning_disabled(false),
717 bs_initialized(false) {}
718
719 RGWRados *get_store() { return store; }
720 rgw_obj& get_obj() { return obj; }
721 RGWObjectCtx& get_ctx() { return ctx; }
722 RGWBucketInfo& get_bucket_info() { return bucket_info; }
723 int get_manifest(RGWObjManifest **pmanifest, optional_yield y);
724
725 int get_bucket_shard(BucketShard **pbs) {
726 if (!bs_initialized) {
727 int r =
728 bs.init(bucket_info.bucket, obj, nullptr /* no RGWBucketInfo */);
729 if (r < 0) {
730 return r;
731 }
732 bs_initialized = true;
733 }
734 *pbs = &bs;
735 return 0;
736 }
737
738 void set_versioning_disabled(bool status) {
739 versioning_disabled = status;
740 }
741
742 bool versioning_enabled() {
743 return (!versioning_disabled && bucket_info.versioning_enabled());
744 }
745
746 struct Read {
747 RGWRados::Object *source;
748
749 struct GetObjState {
750 map<rgw_pool, librados::IoCtx> io_ctxs;
751 rgw_pool cur_pool;
752 librados::IoCtx *cur_ioctx{nullptr};
753 rgw_obj obj;
754 rgw_raw_obj head_obj;
755 } state;
756
757 struct ConditionParams {
758 const ceph::real_time *mod_ptr;
759 const ceph::real_time *unmod_ptr;
760 bool high_precision_time;
761 uint32_t mod_zone_id;
762 uint64_t mod_pg_ver;
763 const char *if_match;
764 const char *if_nomatch;
765
766 ConditionParams() :
767 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
768 if_match(NULL), if_nomatch(NULL) {}
769 } conds;
770
771 struct Params {
772 ceph::real_time *lastmod;
773 uint64_t *obj_size;
774 map<string, bufferlist> *attrs;
775 rgw_obj *target_obj;
776
777 Params() : lastmod(nullptr), obj_size(nullptr), attrs(nullptr),
778 target_obj(nullptr) {}
779 } params;
780
781 explicit Read(RGWRados::Object *_source) : source(_source) {}
782
783 int prepare(optional_yield y);
784 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
785 int read(int64_t ofs, int64_t end, bufferlist& bl, optional_yield y);
786 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb, optional_yield y);
787 int get_attr(const char *name, bufferlist& dest, optional_yield y);
788 };
789
790 struct Write {
791 RGWRados::Object *target;
792
793 struct MetaParams {
794 ceph::real_time *mtime;
795 map<std::string, bufferlist>* rmattrs;
796 const bufferlist *data;
797 RGWObjManifest *manifest;
798 const string *ptag;
799 list<rgw_obj_index_key> *remove_objs;
800 ceph::real_time set_mtime;
801 rgw_user owner;
802 RGWObjCategory category;
803 int flags;
804 const char *if_match;
805 const char *if_nomatch;
806 std::optional<uint64_t> olh_epoch;
807 ceph::real_time delete_at;
808 bool canceled;
809 const string *user_data;
810 rgw_zone_set *zones_trace;
811 bool modify_tail;
812 bool completeMultipart;
813 bool appendable;
814
815 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
816 remove_objs(NULL), category(RGWObjCategory::Main), flags(0),
817 if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr),
818 modify_tail(false), completeMultipart(false), appendable(false) {}
819 } meta;
820
821 explicit Write(RGWRados::Object *_target) : target(_target) {}
822
823 int _do_write_meta(uint64_t size, uint64_t accounted_size,
824 map<std::string, bufferlist>& attrs,
825 bool modify_tail, bool assume_noent,
826 void *index_op, optional_yield y);
827 int write_meta(uint64_t size, uint64_t accounted_size,
828 map<std::string, bufferlist>& attrs, optional_yield y);
829 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
830 const req_state* get_req_state() {
831 return (req_state *)target->get_ctx().get_private();
832 }
833 };
834
835 struct Delete {
836 RGWRados::Object *target;
837
838 struct DeleteParams {
839 rgw_user bucket_owner;
840 int versioning_status;
841 ACLOwner obj_owner; /* needed for creation of deletion marker */
842 uint64_t olh_epoch;
843 string marker_version_id;
844 uint32_t bilog_flags;
845 list<rgw_obj_index_key> *remove_objs;
846 ceph::real_time expiration_time;
847 ceph::real_time unmod_since;
848 ceph::real_time mtime; /* for setting delete marker mtime */
849 bool high_precision_time;
850 rgw_zone_set *zones_trace;
851 bool abortmp;
852 uint64_t parts_accounted_size;
853
854 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr), abortmp(false), parts_accounted_size(0) {}
855 } params;
856
857 struct DeleteResult {
858 bool delete_marker;
859 string version_id;
860
861 DeleteResult() : delete_marker(false) {}
862 } result;
863
864 explicit Delete(RGWRados::Object *_target) : target(_target) {}
865
866 int delete_obj(optional_yield y);
867 };
868
869 struct Stat {
870 RGWRados::Object *source;
871
872 struct Result {
873 rgw_obj obj;
874 std::optional<RGWObjManifest> manifest;
875 uint64_t size{0};
876 struct timespec mtime {};
877 map<string, bufferlist> attrs;
878 } result;
879
880 struct State {
881 librados::IoCtx io_ctx;
882 librados::AioCompletion *completion;
883 int ret;
884
885 State() : completion(NULL), ret(0) {}
886 } state;
887
888
889 explicit Stat(RGWRados::Object *_source) : source(_source) {}
890
891 int stat_async();
892 int wait();
893 int stat();
894 private:
895 int finish();
896 };
897 };
898
899 class Bucket {
900 RGWRados *store;
901 RGWBucketInfo bucket_info;
902 rgw_bucket& bucket;
903 int shard_id;
904
905 public:
906 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
907 shard_id(RGW_NO_SHARD) {}
908 RGWRados *get_store() { return store; }
909 rgw_bucket& get_bucket() { return bucket; }
910 RGWBucketInfo& get_bucket_info() { return bucket_info; }
911
912 int update_bucket_id(const string& new_bucket_id);
913
914 int get_shard_id() { return shard_id; }
915 void set_shard_id(int id) {
916 shard_id = id;
917 }
918
919 class UpdateIndex {
920 RGWRados::Bucket *target;
921 string optag;
922 rgw_obj obj;
923 uint16_t bilog_flags{0};
924 BucketShard bs;
925 bool bs_initialized{false};
926 bool blind;
927 bool prepared{false};
928 rgw_zone_set *zones_trace{nullptr};
929
930 int init_bs() {
931 int r =
932 bs.init(target->get_bucket(), obj, nullptr /* no RGWBucketInfo */);
933 if (r < 0) {
934 return r;
935 }
936 bs_initialized = true;
937 return 0;
938 }
939
940 void invalidate_bs() {
941 bs_initialized = false;
942 }
943
944 int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
945 public:
946
947 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
948 bs(target->get_store()) {
949 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
950 }
951
952 int get_bucket_shard(BucketShard **pbs) {
953 if (!bs_initialized) {
954 int r = init_bs();
955 if (r < 0) {
956 return r;
957 }
958 }
959 *pbs = &bs;
960 return 0;
961 }
962
963 void set_bilog_flags(uint16_t flags) {
964 bilog_flags = flags;
965 }
966
967 void set_zones_trace(rgw_zone_set *_zones_trace) {
968 zones_trace = _zones_trace;
969 }
970
971 int prepare(RGWModifyOp, const string *write_tag, optional_yield y);
972 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
973 uint64_t accounted_size, ceph::real_time& ut,
974 const string& etag, const string& content_type,
975 const string& storage_class,
976 bufferlist *acl_bl, RGWObjCategory category,
977 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr, bool appendable = false);
978 int complete_del(int64_t poolid, uint64_t epoch,
979 ceph::real_time& removed_mtime, /* mtime of removed object */
980 list<rgw_obj_index_key> *remove_objs);
981 int cancel();
982
983 const string *get_optag() { return &optag; }
984
985 bool is_prepared() { return prepared; }
986 }; // class UpdateIndex
987
988 class List {
989 protected:
990 // absolute maximum number of objects that
991 // list_objects_(un)ordered can return
992 static constexpr int64_t bucket_list_objects_absolute_max = 25000;
993
994 RGWRados::Bucket *target;
995 rgw_obj_key next_marker;
996
997 int list_objects_ordered(int64_t max,
998 vector<rgw_bucket_dir_entry> *result,
999 map<string, bool> *common_prefixes,
1000 bool *is_truncated,
1001 optional_yield y);
1002 int list_objects_unordered(int64_t max,
1003 vector<rgw_bucket_dir_entry> *result,
1004 map<string, bool> *common_prefixes,
1005 bool *is_truncated,
1006 optional_yield y);
1007
1008 public:
1009
1010 struct Params {
1011 string prefix;
1012 string delim;
1013 rgw_obj_key marker;
1014 rgw_obj_key end_marker;
1015 string ns;
1016 bool enforce_ns;
1017 RGWAccessListFilter *filter;
1018 bool list_versions;
1019 bool allow_unordered;
1020
1021 Params() :
1022 enforce_ns(true),
1023 filter(NULL),
1024 list_versions(false),
1025 allow_unordered(false)
1026 {}
1027 } params;
1028
1029 explicit List(RGWRados::Bucket *_target) : target(_target) {}
1030
1031 int list_objects(int64_t max,
1032 vector<rgw_bucket_dir_entry> *result,
1033 map<string, bool> *common_prefixes,
1034 bool *is_truncated,
1035 optional_yield y) {
1036 if (params.allow_unordered) {
1037 return list_objects_unordered(max, result, common_prefixes,
1038 is_truncated, y);
1039 } else {
1040 return list_objects_ordered(max, result, common_prefixes,
1041 is_truncated, y);
1042 }
1043 }
1044 rgw_obj_key& get_next_marker() {
1045 return next_marker;
1046 }
1047 }; // class List
1048 }; // class Bucket
1049
1050 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
1051 const std::string& obj_prefix,
1052 const std::string& obj_delim,
1053 std::function<int(const rgw_bucket_dir_entry&)> handler);
1054
1055 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
1056 return bucket_info.has_swift_versioning() &&
1057 bucket_info.swift_ver_location.size();
1058 }
1059
1060 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
1061 const rgw_user& user, /* in */
1062 RGWBucketInfo& bucket_info, /* in */
1063 rgw_obj& obj, /* in */
1064 const DoutPrefixProvider *dpp, /* in/out */
1065 optional_yield y); /* in */
1066 int swift_versioning_restore(RGWObjectCtx& obj_ctx, /* in/out */
1067 const rgw_user& user, /* in */
1068 RGWBucketInfo& bucket_info, /* in */
1069 rgw_obj& obj, /* in */
1070 bool& restored, /* out */
1071 const DoutPrefixProvider *dpp); /* in/out */
1072 int copy_obj_to_remote_dest(RGWObjState *astate,
1073 map<string, bufferlist>& src_attrs,
1074 RGWRados::Object::Read& read_op,
1075 const rgw_user& user_id,
1076 rgw_obj& dest_obj,
1077 ceph::real_time *mtime);
1078
1079 enum AttrsMod {
1080 ATTRSMOD_NONE = 0,
1081 ATTRSMOD_REPLACE = 1,
1082 ATTRSMOD_MERGE = 2
1083 };
1084
1085 int rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj, const DoutPrefixProvider *dpp, optional_yield y);
1086
1087 int stat_remote_obj(RGWObjectCtx& obj_ctx,
1088 const rgw_user& user_id,
1089 req_info *info,
1090 const rgw_zone_id& source_zone,
1091 rgw_obj& src_obj,
1092 const RGWBucketInfo *src_bucket_info,
1093 real_time *src_mtime,
1094 uint64_t *psize,
1095 const real_time *mod_ptr,
1096 const real_time *unmod_ptr,
1097 bool high_precision_time,
1098 const char *if_match,
1099 const char *if_nomatch,
1100 map<string, bufferlist> *pattrs,
1101 map<string, string> *pheaders,
1102 string *version_id,
1103 string *ptag,
1104 string *petag);
1105
1106 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
1107 const rgw_user& user_id,
1108 req_info *info,
1109 const rgw_zone_id& source_zone,
1110 const rgw_obj& dest_obj,
1111 const rgw_obj& src_obj,
1112 const RGWBucketInfo& dest_bucket_info,
1113 const RGWBucketInfo *src_bucket_info,
1114 std::optional<rgw_placement_rule> dest_placement,
1115 ceph::real_time *src_mtime,
1116 ceph::real_time *mtime,
1117 const ceph::real_time *mod_ptr,
1118 const ceph::real_time *unmod_ptr,
1119 bool high_precision_time,
1120 const char *if_match,
1121 const char *if_nomatch,
1122 AttrsMod attrs_mod,
1123 bool copy_if_newer,
1124 map<string, bufferlist>& attrs,
1125 RGWObjCategory category,
1126 std::optional<uint64_t> olh_epoch,
1127 ceph::real_time delete_at,
1128 string *ptag,
1129 string *petag,
1130 void (*progress_cb)(off_t, void *),
1131 void *progress_data,
1132 const DoutPrefixProvider *dpp,
1133 RGWFetchObjFilter *filter,
1134 rgw_zone_set *zones_trace= nullptr,
1135 std::optional<uint64_t>* bytes_transferred = 0);
1136 /**
1137 * Copy an object.
1138 * dest_obj: the object to copy into
1139 * src_obj: the object to copy from
1140 * attrs: usage depends on attrs_mod parameter
1141 * attrs_mod: the modification mode of the attrs, may have the following values:
1142 * ATTRSMOD_NONE - the attributes of the source object will be
1143 * copied without modifications, attrs parameter is ignored;
1144 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
1145 * parameter, source object attributes are not copied;
1146 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
1147 * are overwritten by values contained in attrs parameter.
1148 * Returns: 0 on success, -ERR# otherwise.
1149 */
1150 int copy_obj(RGWObjectCtx& obj_ctx,
1151 const rgw_user& user_id,
1152 req_info *info,
1153 const rgw_zone_id& source_zone,
1154 rgw_obj& dest_obj,
1155 rgw_obj& src_obj,
1156 RGWBucketInfo& dest_bucket_info,
1157 RGWBucketInfo& src_bucket_info,
1158 const rgw_placement_rule& dest_placement,
1159 ceph::real_time *src_mtime,
1160 ceph::real_time *mtime,
1161 const ceph::real_time *mod_ptr,
1162 const ceph::real_time *unmod_ptr,
1163 bool high_precision_time,
1164 const char *if_match,
1165 const char *if_nomatch,
1166 AttrsMod attrs_mod,
1167 bool copy_if_newer,
1168 map<std::string, bufferlist>& attrs,
1169 RGWObjCategory category,
1170 uint64_t olh_epoch,
1171 ceph::real_time delete_at,
1172 string *version_id,
1173 string *ptag,
1174 string *petag,
1175 void (*progress_cb)(off_t, void *),
1176 void *progress_data,
1177 const DoutPrefixProvider *dpp,
1178 optional_yield y);
1179
1180 int copy_obj_data(RGWObjectCtx& obj_ctx,
1181 RGWBucketInfo& dest_bucket_info,
1182 const rgw_placement_rule& dest_placement,
1183 RGWRados::Object::Read& read_op, off_t end,
1184 const rgw_obj& dest_obj,
1185 ceph::real_time *mtime,
1186 ceph::real_time set_mtime,
1187 map<string, bufferlist>& attrs,
1188 uint64_t olh_epoch,
1189 ceph::real_time delete_at,
1190 string *petag,
1191 const DoutPrefixProvider *dpp,
1192 optional_yield y);
1193
1194 int transition_obj(RGWObjectCtx& obj_ctx,
1195 RGWBucketInfo& bucket_info,
1196 rgw_obj& obj,
1197 const rgw_placement_rule& placement_rule,
1198 const real_time& mtime,
1199 uint64_t olh_epoch,
1200 const DoutPrefixProvider *dpp,
1201 optional_yield y);
1202
1203 int check_bucket_empty(RGWBucketInfo& bucket_info, optional_yield y);
1204
1205 /**
1206 * Delete a bucket.
1207 * bucket: the name of the bucket to delete
1208 * Returns 0 on success, -ERR# otherwise.
1209 */
1210 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, optional_yield y, bool check_empty = true);
1211
1212 void wakeup_meta_sync_shards(set<int>& shard_ids);
1213 void wakeup_data_sync_shards(const rgw_zone_id& source_zone, map<int, set<string> >& shard_ids);
1214
1215 RGWMetaSyncStatusManager* get_meta_sync_manager();
1216 RGWDataSyncStatusManager* get_data_sync_manager(const rgw_zone_id& source_zone);
1217
1218 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
1219 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
1220 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
1221
1222 /** Delete an object.*/
1223 int delete_obj(RGWObjectCtx& obj_ctx,
1224 const RGWBucketInfo& bucket_owner,
1225 const rgw_obj& src_obj,
1226 int versioning_status,
1227 uint16_t bilog_flags = 0,
1228 const ceph::real_time& expiration_time = ceph::real_time(),
1229 rgw_zone_set *zones_trace = nullptr);
1230
1231 int delete_raw_obj(const rgw_raw_obj& obj);
1232
1233 /** Remove an object from the bucket index */
1234 int delete_obj_index(const rgw_obj& obj, ceph::real_time mtime);
1235
1236 /**
1237 * Set an attr on an object.
1238 * bucket: name of the bucket holding the object
1239 * obj: name of the object to set the attr on
1240 * name: the attr to set
1241 * bl: the contents of the attr
1242 * Returns: 0 on success, -ERR# otherwise.
1243 */
1244 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
1245
1246 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
1247 map<string, bufferlist>& attrs,
1248 map<string, bufferlist>* rmattrs,
1249 optional_yield y);
1250
1251 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
1252 bool follow_olh, optional_yield y, bool assume_noent = false);
1253 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state, optional_yield y) {
1254 return get_obj_state(rctx, bucket_info, obj, state, true, y);
1255 }
1256
1257 using iterate_obj_cb = int (*)(const rgw_raw_obj&, off_t, off_t,
1258 off_t, bool, RGWObjState*, void*);
1259
1260 int iterate_obj(RGWObjectCtx& ctx, const RGWBucketInfo& bucket_info,
1261 const rgw_obj& obj, off_t ofs, off_t end,
1262 uint64_t max_chunk_size, iterate_obj_cb cb, void *arg,
1263 optional_yield y);
1264
1265 int get_obj_iterate_cb(const rgw_raw_obj& read_obj, off_t obj_ofs,
1266 off_t read_ofs, off_t len, bool is_head_obj,
1267 RGWObjState *astate, void *arg);
1268
1269 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
1270
1271 /**
1272 * a simple object read without keeping state
1273 */
1274
1275 int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
1276 map<string, bufferlist> *attrs, bufferlist *first_chunk,
1277 RGWObjVersionTracker *objv_tracker, optional_yield y);
1278
1279 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
1280 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
1281
1282 int guard_reshard(BucketShard *bs,
1283 const rgw_obj& obj_instance,
1284 const RGWBucketInfo& bucket_info,
1285 std::function<int(BucketShard *)> call);
1286 int block_while_resharding(RGWRados::BucketShard *bs,
1287 string *new_bucket_id,
1288 const RGWBucketInfo& bucket_info,
1289 optional_yield y);
1290
1291 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
1292 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
1293 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
1294 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
1295 const rgw_obj& obj_instance, bool delete_marker,
1296 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
1297 uint64_t olh_epoch,
1298 ceph::real_time unmod_since, bool high_precision_time,
1299 rgw_zone_set *zones_trace = nullptr,
1300 bool log_data_change = false);
1301 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
1302 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
1303 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
1304 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
1305 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
1306 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
1307 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
1308 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
1309 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
1310 int set_olh(RGWObjectCtx& obj_ctx, const RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
1311 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time,
1312 optional_yield y, rgw_zone_set *zones_trace = nullptr, bool log_data_change = false);
1313 int repair_olh(RGWObjState* state, const RGWBucketInfo& bucket_info,
1314 const rgw_obj& obj);
1315 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
1316 uint64_t olh_epoch, optional_yield y, rgw_zone_set *zones_trace = nullptr);
1317
1318 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
1319 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
1320 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
1321 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
1322
1323 void gen_rand_obj_instance_name(rgw_obj_key *target_key);
1324 void gen_rand_obj_instance_name(rgw_obj *target);
1325
1326 int update_containers_stats(map<string, RGWBucketEnt>& m);
1327 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
1328
1329 public:
1330 void set_atomic(void *ctx, rgw_obj& obj) {
1331 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
1332 rctx->set_atomic(obj);
1333 }
1334 void set_prefetch_data(void *ctx, const rgw_obj& obj) {
1335 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
1336 rctx->set_prefetch_data(obj);
1337 }
1338 int decode_policy(bufferlist& bl, ACLOwner *owner);
1339 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
1340 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool* syncstopped = NULL);
1341 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
1342
1343 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
1344 /* xxx dang obj_ctx -> svc */
1345 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs, optional_yield y);
1346 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs, optional_yield y);
1347
1348 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
1349
1350 int get_bucket_info(RGWServices *svc,
1351 const string& tenant_name, const string& bucket_name,
1352 RGWBucketInfo& info,
1353 ceph::real_time *pmtime, optional_yield y, map<string, bufferlist> *pattrs = NULL);
1354
1355 // Returns 0 on successful refresh. Returns error code if there was
1356 // an error or the version stored on the OSD is the same as that
1357 // presented in the BucketInfo structure.
1358 //
1359 int try_refresh_bucket_info(RGWBucketInfo& info,
1360 ceph::real_time *pmtime,
1361 map<string, bufferlist> *pattrs = nullptr);
1362
1363 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
1364 map<string, bufferlist> *pattrs, bool create_entry_point);
1365
1366 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, optional_yield y, rgw_zone_set *zones_trace = nullptr);
1367 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
1368 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1369 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
1370 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1371 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
1372 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1373 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1374 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
1375
1376 using ent_map_t =
1377 boost::container::flat_map<std::string, rgw_bucket_dir_entry>;
1378
1379 using check_filter_t = bool (*)(const std::string&);
1380
1381 int cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
1382 const int shard_id,
1383 const rgw_obj_index_key& start_after,
1384 const string& prefix,
1385 const string& delimiter,
1386 const uint32_t num_entries,
1387 const bool list_versions,
1388 const uint16_t exp_factor, // 0 means ignore
1389 ent_map_t& m,
1390 bool* is_truncated,
1391 bool* cls_filtered,
1392 rgw_obj_index_key *last_entry,
1393 optional_yield y,
1394 check_filter_t force_check_filter = nullptr);
1395 int cls_bucket_list_unordered(RGWBucketInfo& bucket_info,
1396 int shard_id,
1397 const rgw_obj_index_key& start_after,
1398 const string& prefix,
1399 uint32_t num_entries,
1400 bool list_versions,
1401 vector<rgw_bucket_dir_entry>& ent_list,
1402 bool *is_truncated,
1403 rgw_obj_index_key *last_entry,
1404 optional_yield y,
1405 check_filter_t = nullptr);
1406 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
1407 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
1408
1409 int bi_get_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent);
1410 int bi_get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh);
1411 int bi_get(const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
1412 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
1413 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
1414 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
1415 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
1416 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
1417 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
1418 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
1419 int bi_remove(BucketShard& bs);
1420
1421 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
1422 int cls_obj_usage_log_read(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
1423 uint64_t end_epoch, uint32_t max_entries, string& read_iter, map<rgw_user_bucket,
1424 rgw_usage_log_entry>& usage, bool *is_truncated);
1425 int cls_obj_usage_log_trim(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
1426 uint64_t end_epoch);
1427 int cls_obj_usage_log_clear(string& oid);
1428
1429 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
1430
1431 int lock_exclusive(const rgw_pool& pool, const string& oid, ceph::timespan& duration, rgw_zone_id& zone_id, string& owner_id);
1432 int unlock(const rgw_pool& pool, const string& oid, rgw_zone_id& zone_id, string& owner_id);
1433
1434 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
1435 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag);
1436 void delete_objs_inline(cls_rgw_obj_chain& chain, const string& tag);
1437 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
1438 int gc_aio_operate(const std::string& oid, librados::AioCompletion *c,
1439 librados::ObjectWriteOperation *op);
1440 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
1441
1442 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated, bool& processing_queue);
1443 int process_gc(bool expired_only);
1444 bool process_expire_objects();
1445 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y);
1446
1447 int process_lc();
1448 int list_lc_progress(string& marker, uint32_t max_entries,
1449 vector<cls_rgw_lc_entry>& progress_map, int& index);
1450
1451 int bucket_check_index(RGWBucketInfo& bucket_info,
1452 map<RGWObjCategory, RGWStorageStats> *existing_stats,
1453 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
1454 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
1455 int bucket_set_reshard(const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
1456 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
1457 int move_rados_obj(librados::IoCtx& src_ioctx,
1458 const string& src_oid, const string& src_locator,
1459 librados::IoCtx& dst_ioctx,
1460 const string& dst_oid, const string& dst_locator);
1461 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
1462 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix, optional_yield y);
1463
1464 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
1465 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, bool check_size_only = false);
1466
1467 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
1468 uint64_t num_objs);
1469
1470 int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
1471
1472 uint64_t instance_id();
1473
1474 librados::Rados* get_rados_handle();
1475
1476 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
1477 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
1478 list<librados::AioCompletion *>& handles, bool keep_index_consistent,
1479 optional_yield y);
1480
1481 private:
1482 /**
1483 * Check the actual on-disk state of the object specified
1484 * by list_state, and fill in the time and size of object.
1485 * Then append any changes to suggested_updates for
1486 * the rgw class' dir_suggest_changes function.
1487 *
1488 * Note that this can maul list_state; don't use it afterwards. Also
1489 * it expects object to already be filled in from list_state; it only
1490 * sets the size and mtime.
1491 *
1492 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
1493 * and -errno on other failures. (-ENOENT is not a failure, and it
1494 * will encode that info as a suggested update.)
1495 */
1496 int check_disk_state(librados::IoCtx io_ctx,
1497 const RGWBucketInfo& bucket_info,
1498 rgw_bucket_dir_entry& list_state,
1499 rgw_bucket_dir_entry& object,
1500 bufferlist& suggested_updates,
1501 optional_yield y);
1502
1503 /**
1504 * Init pool iteration
1505 * pool: pool to use for the ctx initialization
1506 * ctx: context object to use for the iteration
1507 * Returns: 0 on success, -ERR# otherwise.
1508 */
1509 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
1510
1511 /**
1512 * Init pool iteration
1513 * pool: pool to use
1514 * cursor: position to start iteration
1515 * ctx: context object to use for the iteration
1516 * Returns: 0 on success, -ERR# otherwise.
1517 */
1518 int pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx);
1519
1520 /**
1521 * Get pool iteration position
1522 * ctx: context object to use for the iteration
1523 * Returns: string representation of position
1524 */
1525 string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
1526
1527 /**
1528 * Iterate over pool return object names, use optional filter
1529 * ctx: iteration context, initialized with pool_iterate_begin()
1530 * num: max number of objects to return
1531 * objs: a vector that the results will append into
1532 * is_truncated: if not NULL, will hold true iff iteration is complete
1533 * filter: if not NULL, will be used to filter returned objects
1534 * Returns: 0 on success, -ERR# otherwise.
1535 */
1536 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
1537 bool *is_truncated, RGWAccessListFilter *filter);
1538
1539 uint64_t next_bucket_id();
1540
1541 /**
1542 * This is broken out to facilitate unit testing.
1543 */
1544 static uint32_t calc_ordered_bucket_list_per_shard(uint32_t num_entries,
1545 uint32_t num_shards);
1546 };
1547
1548 #endif