]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_rados.h
e19b8d44811e63265c31c9ca79f7c85bbae9f57c
[ceph.git] / ceph / src / rgw / rgw_rados.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
3
4 #ifndef CEPH_RGWRADOS_H
5 #define CEPH_RGWRADOS_H
6
7 #include <functional>
8 #include <boost/container/flat_map.hpp>
9
10 #include "include/rados/librados.hpp"
11 #include "include/Context.h"
12 #include "common/RefCountedObj.h"
13 #include "common/RWLock.h"
14 #include "common/ceph_time.h"
15 #include "rgw_common.h"
16 #include "cls/rgw/cls_rgw_types.h"
17 #include "cls/version/cls_version_types.h"
18 #include "cls/log/cls_log_types.h"
19 #include "cls/timeindex/cls_timeindex_types.h"
20 #include "cls/otp/cls_otp_types.h"
21 #include "rgw_log.h"
22 #include "rgw_metadata.h"
23 #include "rgw_meta_sync_status.h"
24 #include "rgw_period_puller.h"
25 #include "rgw_obj_manifest.h"
26 #include "rgw_sync_module.h"
27 #include "rgw_trim_bilog.h"
28 #include "rgw_service.h"
29
30 #include "services/svc_rados.h"
31 #include "services/svc_bi_rados.h"
32
33 class RGWWatcher;
34 class SafeTimer;
35 class ACLOwner;
36 class RGWGC;
37 class RGWMetaNotifier;
38 class RGWDataNotifier;
39 class RGWLC;
40 class RGWObjectExpirer;
41 class RGWMetaSyncProcessorThread;
42 class RGWDataSyncProcessorThread;
43 class RGWSyncLogTrimThread;
44 class RGWSyncTraceManager;
45 struct RGWZoneGroup;
46 struct RGWZoneParams;
47 class RGWReshard;
48 class RGWReshardWait;
49
50 class RGWSysObjectCtx;
51
52 /* flags for put_obj_meta() */
53 #define PUT_OBJ_CREATE 0x01
54 #define PUT_OBJ_EXCL 0x02
55 #define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
56
57 #define RGW_OBJ_NS_MULTIPART "multipart"
58 #define RGW_OBJ_NS_SHADOW "shadow"
59
60 static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
61 {
62 if (bucket.marker.empty() || orig_oid.empty()) {
63 oid = orig_oid;
64 } else {
65 oid = bucket.marker;
66 oid.append("_");
67 oid.append(orig_oid);
68 }
69 }
70
71 static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
72 {
73 const rgw_bucket& bucket = obj.bucket;
74 prepend_bucket_marker(bucket, obj.get_oid(), oid);
75 const string& loc = obj.key.get_loc();
76 if (!loc.empty()) {
77 prepend_bucket_marker(bucket, loc, locator);
78 } else {
79 locator.clear();
80 }
81 }
82
83 int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
84
85 struct RGWOLHInfo {
86 rgw_obj target;
87 bool removed;
88
89 RGWOLHInfo() : removed(false) {}
90
91 void encode(bufferlist& bl) const {
92 ENCODE_START(1, 1, bl);
93 encode(target, bl);
94 encode(removed, bl);
95 ENCODE_FINISH(bl);
96 }
97
98 void decode(bufferlist::const_iterator& bl) {
99 DECODE_START(1, bl);
100 decode(target, bl);
101 decode(removed, bl);
102 DECODE_FINISH(bl);
103 }
104 static void generate_test_instances(list<RGWOLHInfo*>& o);
105 void dump(Formatter *f) const;
106 };
107 WRITE_CLASS_ENCODER(RGWOLHInfo)
108
109 struct RGWOLHPendingInfo {
110 ceph::real_time time;
111
112 RGWOLHPendingInfo() {}
113
114 void encode(bufferlist& bl) const {
115 ENCODE_START(1, 1, bl);
116 encode(time, bl);
117 ENCODE_FINISH(bl);
118 }
119
120 void decode(bufferlist::const_iterator& bl) {
121 DECODE_START(1, bl);
122 decode(time, bl);
123 DECODE_FINISH(bl);
124 }
125
126 void dump(Formatter *f) const;
127 };
128 WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
129
130 struct RGWUsageBatch {
131 map<ceph::real_time, rgw_usage_log_entry> m;
132
133 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
134 bool exists = m.find(t) != m.end();
135 *account = !exists;
136 m[t].aggregate(entry);
137 }
138 };
139
140 struct RGWUsageIter {
141 string read_iter;
142 uint32_t index;
143
144 RGWUsageIter() : index(0) {}
145 };
146
147 class RGWGetDataCB {
148 public:
149 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
150 RGWGetDataCB() {}
151 virtual ~RGWGetDataCB() {}
152 };
153
154 struct RGWCloneRangeInfo {
155 rgw_obj src;
156 off_t src_ofs;
157 off_t dst_ofs;
158 uint64_t len;
159 };
160
161 struct RGWObjState {
162 rgw_obj obj;
163 bool is_atomic{false};
164 bool has_attrs{false};
165 bool exists{false};
166 uint64_t size{0}; //< size of raw object
167 uint64_t accounted_size{0}; //< size before compression, encryption
168 ceph::real_time mtime;
169 uint64_t epoch{0};
170 bufferlist obj_tag;
171 bufferlist tail_tag;
172 string write_tag;
173 bool fake_tag{false};
174 std::optional<RGWObjManifest> manifest;
175 string shadow_obj;
176 bool has_data{false};
177 bufferlist data;
178 bool prefetch_data{false};
179 bool keep_tail{false};
180 bool is_olh{false};
181 bufferlist olh_tag;
182 uint64_t pg_ver{false};
183 uint32_t zone_short_id{0};
184
185 /* important! don't forget to update copy constructor */
186
187 RGWObjVersionTracker objv_tracker;
188
189 map<string, bufferlist> attrset;
190
191 RGWObjState();
192 RGWObjState(const RGWObjState& rhs);
193 ~RGWObjState();
194
195 bool get_attr(string name, bufferlist& dest) {
196 map<string, bufferlist>::iterator iter = attrset.find(name);
197 if (iter != attrset.end()) {
198 dest = iter->second;
199 return true;
200 }
201 return false;
202 }
203 };
204
205 class RGWFetchObjFilter {
206 public:
207 virtual ~RGWFetchObjFilter() {}
208
209 virtual int filter(CephContext *cct,
210 const rgw_obj_key& source_key,
211 const RGWBucketInfo& dest_bucket_info,
212 std::optional<rgw_placement_rule> dest_placement_rule,
213 const map<string, bufferlist>& obj_attrs,
214 std::optional<rgw_user> *poverride_owner,
215 const rgw_placement_rule **prule) = 0;
216 };
217
218 class RGWFetchObjFilter_Default : public RGWFetchObjFilter {
219 protected:
220 rgw_placement_rule dest_rule;
221 public:
222 RGWFetchObjFilter_Default() {}
223
224 int filter(CephContext *cct,
225 const rgw_obj_key& source_key,
226 const RGWBucketInfo& dest_bucket_info,
227 std::optional<rgw_placement_rule> dest_placement_rule,
228 const map<string, bufferlist>& obj_attrs,
229 std::optional<rgw_user> *poverride_owner,
230 const rgw_placement_rule **prule) override;
231 };
232
233 class RGWObjectCtx {
234 rgw::sal::RGWRadosStore *store;
235 ceph::shared_mutex lock = ceph::make_shared_mutex("RGWObjectCtx");
236 void *s{nullptr};
237
238 std::map<rgw_obj, RGWObjState> objs_state;
239 public:
240 explicit RGWObjectCtx(rgw::sal::RGWRadosStore *_store) : store(_store) {}
241 explicit RGWObjectCtx(rgw::sal::RGWRadosStore *_store, void *_s) : store(_store), s(_s) {}
242
243 void *get_private() {
244 return s;
245 }
246
247 rgw::sal::RGWRadosStore *get_store() {
248 return store;
249 }
250
251 RGWObjState *get_state(const rgw_obj& obj);
252
253 void set_atomic(rgw_obj& obj);
254 void set_prefetch_data(const rgw_obj& obj);
255 void invalidate(const rgw_obj& obj);
256 };
257
258
259 struct RGWRawObjState {
260 rgw_raw_obj obj;
261 bool has_attrs{false};
262 bool exists{false};
263 uint64_t size{0};
264 ceph::real_time mtime;
265 uint64_t epoch{0};
266 bufferlist obj_tag;
267 bool has_data{false};
268 bufferlist data;
269 bool prefetch_data{false};
270 uint64_t pg_ver{0};
271
272 /* important! don't forget to update copy constructor */
273
274 RGWObjVersionTracker objv_tracker;
275
276 map<string, bufferlist> attrset;
277 RGWRawObjState() {}
278 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
279 has_attrs = rhs.has_attrs;
280 exists = rhs.exists;
281 size = rhs.size;
282 mtime = rhs.mtime;
283 epoch = rhs.epoch;
284 if (rhs.obj_tag.length()) {
285 obj_tag = rhs.obj_tag;
286 }
287 has_data = rhs.has_data;
288 if (rhs.data.length()) {
289 data = rhs.data;
290 }
291 prefetch_data = rhs.prefetch_data;
292 pg_ver = rhs.pg_ver;
293 objv_tracker = rhs.objv_tracker;
294 }
295 };
296
297 struct RGWPoolIterCtx {
298 librados::IoCtx io_ctx;
299 librados::NObjectIterator iter;
300 };
301
302 struct RGWListRawObjsCtx {
303 bool initialized;
304 RGWPoolIterCtx iter_ctx;
305
306 RGWListRawObjsCtx() : initialized(false) {}
307 };
308
309 struct objexp_hint_entry {
310 string tenant;
311 string bucket_name;
312 string bucket_id;
313 rgw_obj_key obj_key;
314 ceph::real_time exp_time;
315
316 void encode(bufferlist& bl) const {
317 ENCODE_START(2, 1, bl);
318 encode(bucket_name, bl);
319 encode(bucket_id, bl);
320 encode(obj_key, bl);
321 encode(exp_time, bl);
322 encode(tenant, bl);
323 ENCODE_FINISH(bl);
324 }
325
326 void decode(bufferlist::const_iterator& bl) {
327 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
328 DECODE_START(2, bl);
329 decode(bucket_name, bl);
330 decode(bucket_id, bl);
331 decode(obj_key, bl);
332 decode(exp_time, bl);
333 if (struct_v >= 2) {
334 decode(tenant, bl);
335 } else {
336 tenant.clear();
337 }
338 DECODE_FINISH(bl);
339 }
340
341 void dump(Formatter *f) const;
342 static void generate_test_instances(list<objexp_hint_entry*>& o);
343 };
344 WRITE_CLASS_ENCODER(objexp_hint_entry)
345
346 class RGWDataChangesLog;
347 class RGWMetaSyncStatusManager;
348 class RGWDataSyncStatusManager;
349 class RGWCoroutinesManagerRegistry;
350
351 class RGWGetBucketStats_CB : public RefCountedObject {
352 protected:
353 rgw_bucket bucket;
354 map<RGWObjCategory, RGWStorageStats> *stats;
355 public:
356 explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
357 ~RGWGetBucketStats_CB() override {}
358 virtual void handle_response(int r) = 0;
359 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
360 stats = _stats;
361 }
362 };
363
364 class RGWGetUserStats_CB : public RefCountedObject {
365 protected:
366 rgw_user user;
367 RGWStorageStats stats;
368 public:
369 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
370 ~RGWGetUserStats_CB() override {}
371 virtual void handle_response(int r) = 0;
372 virtual void set_response(RGWStorageStats& _stats) {
373 stats = _stats;
374 }
375 };
376
377 class RGWGetDirHeader_CB;
378 class RGWGetUserHeader_CB;
379 namespace rgw { namespace sal { class RGWRadosStore; } }
380
381 class RGWAsyncRadosProcessor;
382
383 template <class T>
384 class RGWChainedCacheImpl;
385
386 struct bucket_info_entry {
387 RGWBucketInfo info;
388 real_time mtime;
389 map<string, bufferlist> attrs;
390 };
391
392 struct tombstone_entry;
393
394 template <class K, class V>
395 class lru_map;
396 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
397
398 class RGWIndexCompletionManager;
399
400 class RGWRados
401 {
402 friend class RGWGC;
403 friend class RGWMetaNotifier;
404 friend class RGWDataNotifier;
405 friend class RGWLC;
406 friend class RGWObjectExpirer;
407 friend class RGWMetaSyncProcessorThread;
408 friend class RGWDataSyncProcessorThread;
409 friend class RGWReshard;
410 friend class RGWBucketReshard;
411 friend class RGWBucketReshardLock;
412 friend class BucketIndexLockGuard;
413 friend class RGWCompleteMultipart;
414
415 /** Open the pool used as root for this gateway */
416 int open_root_pool_ctx();
417 int open_gc_pool_ctx();
418 int open_lc_pool_ctx();
419 int open_objexp_pool_ctx();
420 int open_reshard_pool_ctx();
421
422 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx,
423 bool mostly_omap);
424
425 std::atomic<int64_t> max_req_id = { 0 };
426 ceph::mutex lock = ceph::make_mutex("rados_timer_lock");
427 SafeTimer *timer;
428
429 rgw::sal::RGWRadosStore *store;
430 RGWGC *gc;
431 RGWLC *lc;
432 RGWObjectExpirer *obj_expirer;
433 bool use_gc_thread;
434 bool use_lc_thread;
435 bool quota_threads;
436 bool run_sync_thread;
437 bool run_reshard_thread;
438
439 RGWMetaNotifier *meta_notifier;
440 RGWDataNotifier *data_notifier;
441 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
442 RGWSyncTraceManager *sync_tracer = nullptr;
443 map<rgw_zone_id, RGWDataSyncProcessorThread *> data_sync_processor_threads;
444
445 boost::optional<rgw::BucketTrimManager> bucket_trim;
446 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
447
448 ceph::mutex meta_sync_thread_lock = ceph::make_mutex("meta_sync_thread_lock");
449 ceph::mutex data_sync_thread_lock = ceph::make_mutex("data_sync_thread_lock");
450
451 librados::IoCtx root_pool_ctx; // .rgw
452
453 double inject_notify_timeout_probability = 0;
454 unsigned max_notify_retries = 0;
455
456 friend class RGWWatcher;
457
458 ceph::mutex bucket_id_lock = ceph::make_mutex("rados_bucket_id");
459
460 // This field represents the number of bucket index object shards
461 uint32_t bucket_index_max_shards;
462
463 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
464 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
465 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
466 uint64_t max_bucket_id;
467
468 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
469 RGWObjState *olh_state, RGWObjState **target_state, optional_yield y);
470 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
471 bool follow_olh, optional_yield y, bool assume_noent = false);
472 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
473 librados::ObjectOperation& op, RGWObjState **state, optional_yield y);
474 int append_atomic_test(const RGWObjState* astate, librados::ObjectOperation& op);
475
476 int update_placement_map();
477 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
478
479 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
480 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
481 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
482 protected:
483 CephContext *cct;
484
485 librados::Rados rados;
486
487 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
488 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
489
490 tombstone_cache_t *obj_tombstone_cache;
491
492 librados::IoCtx gc_pool_ctx; // .rgw.gc
493 librados::IoCtx lc_pool_ctx; // .rgw.lc
494 librados::IoCtx objexp_pool_ctx;
495 librados::IoCtx reshard_pool_ctx;
496
497 bool pools_initialized;
498
499 RGWQuotaHandler *quota_handler;
500
501 RGWCoroutinesManagerRegistry *cr_registry;
502
503 RGWSyncModuleInstanceRef sync_module;
504 bool writeable_zone{false};
505
506 RGWIndexCompletionManager *index_completion_manager{nullptr};
507
508 bool use_cache{false};
509 public:
510 RGWRados(): timer(NULL),
511 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
512 run_sync_thread(false), run_reshard_thread(false), meta_notifier(NULL),
513 data_notifier(NULL), meta_sync_processor_thread(NULL),
514 bucket_index_max_shards(0),
515 max_bucket_id(0), cct(NULL),
516 binfo_cache(NULL), obj_tombstone_cache(nullptr),
517 pools_initialized(false),
518 quota_handler(NULL),
519 cr_registry(NULL),
520 pctl(&ctl),
521 reshard(NULL) {}
522
523 RGWRados& set_use_cache(bool status) {
524 use_cache = status;
525 return *this;
526 }
527
528 RGWLC *get_lc() {
529 return lc;
530 }
531
532 RGWRados& set_run_gc_thread(bool _use_gc_thread) {
533 use_gc_thread = _use_gc_thread;
534 return *this;
535 }
536
537 RGWRados& set_run_lc_thread(bool _use_lc_thread) {
538 use_lc_thread = _use_lc_thread;
539 return *this;
540 }
541
542 RGWRados& set_run_quota_threads(bool _run_quota_threads) {
543 quota_threads = _run_quota_threads;
544 return *this;
545 }
546
547 RGWRados& set_run_sync_thread(bool _run_sync_thread) {
548 run_sync_thread = _run_sync_thread;
549 return *this;
550 }
551
552 RGWRados& set_run_reshard_thread(bool _run_reshard_thread) {
553 run_reshard_thread = _run_reshard_thread;
554 return *this;
555 }
556
557 uint64_t get_new_req_id() {
558 return ++max_req_id;
559 }
560
561 librados::IoCtx* get_lc_pool_ctx() {
562 return &lc_pool_ctx;
563 }
564 void set_context(CephContext *_cct) {
565 cct = _cct;
566 }
567 void set_store(rgw::sal::RGWRadosStore *_store) {
568 store = _store;
569 }
570
571 RGWServices svc;
572 RGWCtl ctl;
573
574 RGWCtl *pctl{nullptr};
575
576 /**
577 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
578 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
579 */
580 string host_id;
581
582 RGWReshard *reshard;
583 std::shared_ptr<RGWReshardWait> reshard_wait;
584
585 virtual ~RGWRados() = default;
586
587 tombstone_cache_t *get_tombstone_cache() {
588 return obj_tombstone_cache;
589 }
590 const RGWSyncModuleInstanceRef& get_sync_module() {
591 return sync_module;
592 }
593 RGWSyncTraceManager *get_sync_tracer() {
594 return sync_tracer;
595 }
596
597 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
598 void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size);
599 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
600 int get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
601
602 uint32_t get_max_bucket_shards() {
603 return RGWSI_BucketIndex_RADOS::shards_max();
604 }
605
606
607 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
608
609 int list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx);
610 int list_raw_objects_next(const string& prefix_filter, int max,
611 RGWListRawObjsCtx& ctx, list<string>& oids,
612 bool *is_truncated);
613 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
614 RGWListRawObjsCtx& ctx, list<string>& oids,
615 bool *is_truncated);
616 string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
617
618 CephContext *ctx() { return cct; }
619 /** do all necessary setup of the storage device */
620 int initialize(CephContext *_cct) {
621 set_context(_cct);
622 return initialize();
623 }
624 /** Initialize the RADOS instance and prepare to do other ops */
625 int init_svc(bool raw);
626 int init_ctl();
627 int init_rados();
628 int init_complete();
629 int initialize();
630 void finalize();
631
632 int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
633 int update_service_map(std::map<std::string, std::string>&& status);
634
635 /// list logs
636 int log_list_init(const string& prefix, RGWAccessHandle *handle);
637 int log_list_next(RGWAccessHandle handle, string *name);
638
639 /// remove log
640 int log_remove(const string& name);
641
642 /// show log
643 int log_show_init(const string& name, RGWAccessHandle *handle);
644 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
645
646 // log bandwidth info
647 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
648 int read_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch,
649 uint32_t max_entries, bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket,
650 rgw_usage_log_entry>& usage);
651 int trim_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch);
652 int clear_usage();
653
654 int create_pool(const rgw_pool& pool);
655
656 void create_bucket_id(string *bucket_id);
657
658 bool get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool);
659 bool obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
660
661 int create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket,
662 const string& zonegroup_id,
663 const rgw_placement_rule& placement_rule,
664 const string& swift_ver_location,
665 const RGWQuotaInfo * pquota_info,
666 map<std::string,bufferlist>& attrs,
667 RGWBucketInfo& bucket_info,
668 obj_version *pobjv,
669 obj_version *pep_objv,
670 ceph::real_time creation_time,
671 rgw_bucket *master_bucket,
672 uint32_t *master_num_shards,
673 bool exclusive = true);
674
675 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
676
677 struct BucketShard {
678 RGWRados *store;
679 rgw_bucket bucket;
680 int shard_id;
681 RGWSI_RADOS::Obj bucket_obj;
682
683 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
684 int init(const rgw_bucket& _bucket, const rgw_obj& obj, RGWBucketInfo* out);
685 int init(const rgw_bucket& _bucket, int sid, RGWBucketInfo* out);
686 int init(const RGWBucketInfo& bucket_info, const rgw_obj& obj);
687 int init(const RGWBucketInfo& bucket_info, int sid);
688 };
689
690 class Object {
691 RGWRados *store;
692 RGWBucketInfo bucket_info;
693 RGWObjectCtx& ctx;
694 rgw_obj obj;
695
696 BucketShard bs;
697
698 RGWObjState *state;
699
700 bool versioning_disabled;
701
702 bool bs_initialized;
703
704 protected:
705 int get_state(RGWObjState **pstate, bool follow_olh, optional_yield y, bool assume_noent = false);
706 void invalidate_state();
707
708 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
709 const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail, optional_yield y);
710 int complete_atomic_modification();
711
712 public:
713 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
714 ctx(_ctx), obj(_obj), bs(store),
715 state(NULL), versioning_disabled(false),
716 bs_initialized(false) {}
717
718 RGWRados *get_store() { return store; }
719 rgw_obj& get_obj() { return obj; }
720 RGWObjectCtx& get_ctx() { return ctx; }
721 RGWBucketInfo& get_bucket_info() { return bucket_info; }
722 int get_manifest(RGWObjManifest **pmanifest, optional_yield y);
723
724 int get_bucket_shard(BucketShard **pbs) {
725 if (!bs_initialized) {
726 int r =
727 bs.init(bucket_info.bucket, obj, nullptr /* no RGWBucketInfo */);
728 if (r < 0) {
729 return r;
730 }
731 bs_initialized = true;
732 }
733 *pbs = &bs;
734 return 0;
735 }
736
737 void set_versioning_disabled(bool status) {
738 versioning_disabled = status;
739 }
740
741 bool versioning_enabled() {
742 return (!versioning_disabled && bucket_info.versioning_enabled());
743 }
744
745 struct Read {
746 RGWRados::Object *source;
747
748 struct GetObjState {
749 map<rgw_pool, librados::IoCtx> io_ctxs;
750 rgw_pool cur_pool;
751 librados::IoCtx *cur_ioctx{nullptr};
752 rgw_obj obj;
753 rgw_raw_obj head_obj;
754 } state;
755
756 struct ConditionParams {
757 const ceph::real_time *mod_ptr;
758 const ceph::real_time *unmod_ptr;
759 bool high_precision_time;
760 uint32_t mod_zone_id;
761 uint64_t mod_pg_ver;
762 const char *if_match;
763 const char *if_nomatch;
764
765 ConditionParams() :
766 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
767 if_match(NULL), if_nomatch(NULL) {}
768 } conds;
769
770 struct Params {
771 ceph::real_time *lastmod;
772 uint64_t *obj_size;
773 map<string, bufferlist> *attrs;
774 rgw_obj *target_obj;
775
776 Params() : lastmod(nullptr), obj_size(nullptr), attrs(nullptr),
777 target_obj(nullptr) {}
778 } params;
779
780 explicit Read(RGWRados::Object *_source) : source(_source) {}
781
782 int prepare(optional_yield y);
783 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
784 int read(int64_t ofs, int64_t end, bufferlist& bl, optional_yield y);
785 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb, optional_yield y);
786 int get_attr(const char *name, bufferlist& dest, optional_yield y);
787 };
788
789 struct Write {
790 RGWRados::Object *target;
791
792 struct MetaParams {
793 ceph::real_time *mtime;
794 map<std::string, bufferlist>* rmattrs;
795 const bufferlist *data;
796 RGWObjManifest *manifest;
797 const string *ptag;
798 list<rgw_obj_index_key> *remove_objs;
799 ceph::real_time set_mtime;
800 rgw_user owner;
801 RGWObjCategory category;
802 int flags;
803 const char *if_match;
804 const char *if_nomatch;
805 std::optional<uint64_t> olh_epoch;
806 ceph::real_time delete_at;
807 bool canceled;
808 const string *user_data;
809 rgw_zone_set *zones_trace;
810 bool modify_tail;
811 bool completeMultipart;
812 bool appendable;
813
814 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
815 remove_objs(NULL), category(RGWObjCategory::Main), flags(0),
816 if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr),
817 modify_tail(false), completeMultipart(false), appendable(false) {}
818 } meta;
819
820 explicit Write(RGWRados::Object *_target) : target(_target) {}
821
822 int _do_write_meta(uint64_t size, uint64_t accounted_size,
823 map<std::string, bufferlist>& attrs,
824 bool modify_tail, bool assume_noent,
825 void *index_op, optional_yield y);
826 int write_meta(uint64_t size, uint64_t accounted_size,
827 map<std::string, bufferlist>& attrs, optional_yield y);
828 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
829 const req_state* get_req_state() {
830 return (req_state *)target->get_ctx().get_private();
831 }
832 };
833
834 struct Delete {
835 RGWRados::Object *target;
836
837 struct DeleteParams {
838 rgw_user bucket_owner;
839 int versioning_status;
840 ACLOwner obj_owner; /* needed for creation of deletion marker */
841 uint64_t olh_epoch;
842 string marker_version_id;
843 uint32_t bilog_flags;
844 list<rgw_obj_index_key> *remove_objs;
845 ceph::real_time expiration_time;
846 ceph::real_time unmod_since;
847 ceph::real_time mtime; /* for setting delete marker mtime */
848 bool high_precision_time;
849 rgw_zone_set *zones_trace;
850 bool abortmp;
851 uint64_t parts_accounted_size;
852
853 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr), abortmp(false), parts_accounted_size(0) {}
854 } params;
855
856 struct DeleteResult {
857 bool delete_marker;
858 string version_id;
859
860 DeleteResult() : delete_marker(false) {}
861 } result;
862
863 explicit Delete(RGWRados::Object *_target) : target(_target) {}
864
865 int delete_obj(optional_yield y);
866 };
867
868 struct Stat {
869 RGWRados::Object *source;
870
871 struct Result {
872 rgw_obj obj;
873 std::optional<RGWObjManifest> manifest;
874 uint64_t size{0};
875 struct timespec mtime {};
876 map<string, bufferlist> attrs;
877 } result;
878
879 struct State {
880 librados::IoCtx io_ctx;
881 librados::AioCompletion *completion;
882 int ret;
883
884 State() : completion(NULL), ret(0) {}
885 } state;
886
887
888 explicit Stat(RGWRados::Object *_source) : source(_source) {}
889
890 int stat_async();
891 int wait();
892 int stat();
893 private:
894 int finish();
895 };
896 };
897
898 class Bucket {
899 RGWRados *store;
900 RGWBucketInfo bucket_info;
901 rgw_bucket& bucket;
902 int shard_id;
903
904 public:
905 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
906 shard_id(RGW_NO_SHARD) {}
907 RGWRados *get_store() { return store; }
908 rgw_bucket& get_bucket() { return bucket; }
909 RGWBucketInfo& get_bucket_info() { return bucket_info; }
910
911 int update_bucket_id(const string& new_bucket_id);
912
913 int get_shard_id() { return shard_id; }
914 void set_shard_id(int id) {
915 shard_id = id;
916 }
917
918 class UpdateIndex {
919 RGWRados::Bucket *target;
920 string optag;
921 rgw_obj obj;
922 uint16_t bilog_flags{0};
923 BucketShard bs;
924 bool bs_initialized{false};
925 bool blind;
926 bool prepared{false};
927 rgw_zone_set *zones_trace{nullptr};
928
929 int init_bs() {
930 int r =
931 bs.init(target->get_bucket(), obj, nullptr /* no RGWBucketInfo */);
932 if (r < 0) {
933 return r;
934 }
935 bs_initialized = true;
936 return 0;
937 }
938
939 void invalidate_bs() {
940 bs_initialized = false;
941 }
942
943 int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
944 public:
945
946 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
947 bs(target->get_store()) {
948 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
949 }
950
951 int get_bucket_shard(BucketShard **pbs) {
952 if (!bs_initialized) {
953 int r = init_bs();
954 if (r < 0) {
955 return r;
956 }
957 }
958 *pbs = &bs;
959 return 0;
960 }
961
962 void set_bilog_flags(uint16_t flags) {
963 bilog_flags = flags;
964 }
965
966 void set_zones_trace(rgw_zone_set *_zones_trace) {
967 zones_trace = _zones_trace;
968 }
969
970 int prepare(RGWModifyOp, const string *write_tag, optional_yield y);
971 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
972 uint64_t accounted_size, ceph::real_time& ut,
973 const string& etag, const string& content_type,
974 const string& storage_class,
975 bufferlist *acl_bl, RGWObjCategory category,
976 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr, bool appendable = false);
977 int complete_del(int64_t poolid, uint64_t epoch,
978 ceph::real_time& removed_mtime, /* mtime of removed object */
979 list<rgw_obj_index_key> *remove_objs);
980 int cancel();
981
982 const string *get_optag() { return &optag; }
983
984 bool is_prepared() { return prepared; }
985 }; // class UpdateIndex
986
987 class List {
988 protected:
989 // absolute maximum number of objects that
990 // list_objects_(un)ordered can return
991 static constexpr int64_t bucket_list_objects_absolute_max = 25000;
992
993 RGWRados::Bucket *target;
994 rgw_obj_key next_marker;
995
996 int list_objects_ordered(int64_t max,
997 vector<rgw_bucket_dir_entry> *result,
998 map<string, bool> *common_prefixes,
999 bool *is_truncated,
1000 optional_yield y);
1001 int list_objects_unordered(int64_t max,
1002 vector<rgw_bucket_dir_entry> *result,
1003 map<string, bool> *common_prefixes,
1004 bool *is_truncated,
1005 optional_yield y);
1006
1007 public:
1008
1009 struct Params {
1010 string prefix;
1011 string delim;
1012 rgw_obj_key marker;
1013 rgw_obj_key end_marker;
1014 string ns;
1015 bool enforce_ns;
1016 RGWAccessListFilter *filter;
1017 bool list_versions;
1018 bool allow_unordered;
1019
1020 Params() :
1021 enforce_ns(true),
1022 filter(NULL),
1023 list_versions(false),
1024 allow_unordered(false)
1025 {}
1026 } params;
1027
1028 explicit List(RGWRados::Bucket *_target) : target(_target) {}
1029
1030 int list_objects(int64_t max,
1031 vector<rgw_bucket_dir_entry> *result,
1032 map<string, bool> *common_prefixes,
1033 bool *is_truncated,
1034 optional_yield y) {
1035 if (params.allow_unordered) {
1036 return list_objects_unordered(max, result, common_prefixes,
1037 is_truncated, y);
1038 } else {
1039 return list_objects_ordered(max, result, common_prefixes,
1040 is_truncated, y);
1041 }
1042 }
1043 rgw_obj_key& get_next_marker() {
1044 return next_marker;
1045 }
1046 }; // class List
1047 }; // class Bucket
1048
1049 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
1050 const std::string& obj_prefix,
1051 const std::string& obj_delim,
1052 std::function<int(const rgw_bucket_dir_entry&)> handler);
1053
1054 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
1055 return bucket_info.has_swift_versioning() &&
1056 bucket_info.swift_ver_location.size();
1057 }
1058
1059 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
1060 const rgw_user& user, /* in */
1061 RGWBucketInfo& bucket_info, /* in */
1062 rgw_obj& obj, /* in */
1063 const DoutPrefixProvider *dpp, /* in/out */
1064 optional_yield y); /* in */
1065 int swift_versioning_restore(RGWObjectCtx& obj_ctx, /* in/out */
1066 const rgw_user& user, /* in */
1067 RGWBucketInfo& bucket_info, /* in */
1068 rgw_obj& obj, /* in */
1069 bool& restored, /* out */
1070 const DoutPrefixProvider *dpp); /* in/out */
1071 int copy_obj_to_remote_dest(RGWObjState *astate,
1072 map<string, bufferlist>& src_attrs,
1073 RGWRados::Object::Read& read_op,
1074 const rgw_user& user_id,
1075 rgw_obj& dest_obj,
1076 ceph::real_time *mtime);
1077
1078 enum AttrsMod {
1079 ATTRSMOD_NONE = 0,
1080 ATTRSMOD_REPLACE = 1,
1081 ATTRSMOD_MERGE = 2
1082 };
1083
1084 int rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj, const DoutPrefixProvider *dpp, optional_yield y);
1085
1086 int stat_remote_obj(RGWObjectCtx& obj_ctx,
1087 const rgw_user& user_id,
1088 req_info *info,
1089 const rgw_zone_id& source_zone,
1090 rgw_obj& src_obj,
1091 const RGWBucketInfo *src_bucket_info,
1092 real_time *src_mtime,
1093 uint64_t *psize,
1094 const real_time *mod_ptr,
1095 const real_time *unmod_ptr,
1096 bool high_precision_time,
1097 const char *if_match,
1098 const char *if_nomatch,
1099 map<string, bufferlist> *pattrs,
1100 map<string, string> *pheaders,
1101 string *version_id,
1102 string *ptag,
1103 string *petag);
1104
1105 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
1106 const rgw_user& user_id,
1107 req_info *info,
1108 const rgw_zone_id& source_zone,
1109 const rgw_obj& dest_obj,
1110 const rgw_obj& src_obj,
1111 const RGWBucketInfo& dest_bucket_info,
1112 const RGWBucketInfo *src_bucket_info,
1113 std::optional<rgw_placement_rule> dest_placement,
1114 ceph::real_time *src_mtime,
1115 ceph::real_time *mtime,
1116 const ceph::real_time *mod_ptr,
1117 const ceph::real_time *unmod_ptr,
1118 bool high_precision_time,
1119 const char *if_match,
1120 const char *if_nomatch,
1121 AttrsMod attrs_mod,
1122 bool copy_if_newer,
1123 map<string, bufferlist>& attrs,
1124 RGWObjCategory category,
1125 std::optional<uint64_t> olh_epoch,
1126 ceph::real_time delete_at,
1127 string *ptag,
1128 string *petag,
1129 void (*progress_cb)(off_t, void *),
1130 void *progress_data,
1131 const DoutPrefixProvider *dpp,
1132 RGWFetchObjFilter *filter,
1133 rgw_zone_set *zones_trace= nullptr,
1134 std::optional<uint64_t>* bytes_transferred = 0);
1135 /**
1136 * Copy an object.
1137 * dest_obj: the object to copy into
1138 * src_obj: the object to copy from
1139 * attrs: usage depends on attrs_mod parameter
1140 * attrs_mod: the modification mode of the attrs, may have the following values:
1141 * ATTRSMOD_NONE - the attributes of the source object will be
1142 * copied without modifications, attrs parameter is ignored;
1143 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
1144 * parameter, source object attributes are not copied;
1145 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
1146 * are overwritten by values contained in attrs parameter.
1147 * Returns: 0 on success, -ERR# otherwise.
1148 */
1149 int copy_obj(RGWObjectCtx& obj_ctx,
1150 const rgw_user& user_id,
1151 req_info *info,
1152 const rgw_zone_id& source_zone,
1153 rgw_obj& dest_obj,
1154 rgw_obj& src_obj,
1155 RGWBucketInfo& dest_bucket_info,
1156 RGWBucketInfo& src_bucket_info,
1157 const rgw_placement_rule& dest_placement,
1158 ceph::real_time *src_mtime,
1159 ceph::real_time *mtime,
1160 const ceph::real_time *mod_ptr,
1161 const ceph::real_time *unmod_ptr,
1162 bool high_precision_time,
1163 const char *if_match,
1164 const char *if_nomatch,
1165 AttrsMod attrs_mod,
1166 bool copy_if_newer,
1167 map<std::string, bufferlist>& attrs,
1168 RGWObjCategory category,
1169 uint64_t olh_epoch,
1170 ceph::real_time delete_at,
1171 string *version_id,
1172 string *ptag,
1173 string *petag,
1174 void (*progress_cb)(off_t, void *),
1175 void *progress_data,
1176 const DoutPrefixProvider *dpp,
1177 optional_yield y);
1178
1179 int copy_obj_data(RGWObjectCtx& obj_ctx,
1180 RGWBucketInfo& dest_bucket_info,
1181 const rgw_placement_rule& dest_placement,
1182 RGWRados::Object::Read& read_op, off_t end,
1183 const rgw_obj& dest_obj,
1184 ceph::real_time *mtime,
1185 ceph::real_time set_mtime,
1186 map<string, bufferlist>& attrs,
1187 uint64_t olh_epoch,
1188 ceph::real_time delete_at,
1189 string *petag,
1190 const DoutPrefixProvider *dpp,
1191 optional_yield y);
1192
1193 int transition_obj(RGWObjectCtx& obj_ctx,
1194 RGWBucketInfo& bucket_info,
1195 rgw_obj& obj,
1196 const rgw_placement_rule& placement_rule,
1197 const real_time& mtime,
1198 uint64_t olh_epoch,
1199 const DoutPrefixProvider *dpp,
1200 optional_yield y);
1201
1202 int check_bucket_empty(RGWBucketInfo& bucket_info, optional_yield y);
1203
1204 /**
1205 * Delete a bucket.
1206 * bucket: the name of the bucket to delete
1207 * Returns 0 on success, -ERR# otherwise.
1208 */
1209 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, optional_yield y, bool check_empty = true);
1210
1211 void wakeup_meta_sync_shards(set<int>& shard_ids);
1212 void wakeup_data_sync_shards(const rgw_zone_id& source_zone, map<int, set<string> >& shard_ids);
1213
1214 RGWMetaSyncStatusManager* get_meta_sync_manager();
1215 RGWDataSyncStatusManager* get_data_sync_manager(const rgw_zone_id& source_zone);
1216
1217 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
1218 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
1219 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
1220
1221 /** Delete an object.*/
1222 int delete_obj(RGWObjectCtx& obj_ctx,
1223 const RGWBucketInfo& bucket_owner,
1224 const rgw_obj& src_obj,
1225 int versioning_status,
1226 uint16_t bilog_flags = 0,
1227 const ceph::real_time& expiration_time = ceph::real_time(),
1228 rgw_zone_set *zones_trace = nullptr);
1229
1230 int delete_raw_obj(const rgw_raw_obj& obj);
1231
1232 /** Remove an object from the bucket index */
1233 int delete_obj_index(const rgw_obj& obj, ceph::real_time mtime);
1234
1235 /**
1236 * Set an attr on an object.
1237 * bucket: name of the bucket holding the object
1238 * obj: name of the object to set the attr on
1239 * name: the attr to set
1240 * bl: the contents of the attr
1241 * Returns: 0 on success, -ERR# otherwise.
1242 */
1243 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
1244
1245 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
1246 map<string, bufferlist>& attrs,
1247 map<string, bufferlist>* rmattrs,
1248 optional_yield y);
1249
1250 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
1251 bool follow_olh, optional_yield y, bool assume_noent = false);
1252 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state, optional_yield y) {
1253 return get_obj_state(rctx, bucket_info, obj, state, true, y);
1254 }
1255
1256 using iterate_obj_cb = int (*)(const rgw_raw_obj&, off_t, off_t,
1257 off_t, bool, RGWObjState*, void*);
1258
1259 int iterate_obj(RGWObjectCtx& ctx, const RGWBucketInfo& bucket_info,
1260 const rgw_obj& obj, off_t ofs, off_t end,
1261 uint64_t max_chunk_size, iterate_obj_cb cb, void *arg,
1262 optional_yield y);
1263
1264 int get_obj_iterate_cb(const rgw_raw_obj& read_obj, off_t obj_ofs,
1265 off_t read_ofs, off_t len, bool is_head_obj,
1266 RGWObjState *astate, void *arg);
1267
1268 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
1269
1270 /**
1271 * a simple object read without keeping state
1272 */
1273
1274 int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
1275 map<string, bufferlist> *attrs, bufferlist *first_chunk,
1276 RGWObjVersionTracker *objv_tracker, optional_yield y);
1277
1278 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
1279 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
1280
1281 int guard_reshard(BucketShard *bs,
1282 const rgw_obj& obj_instance,
1283 const RGWBucketInfo& bucket_info,
1284 std::function<int(BucketShard *)> call);
1285 int block_while_resharding(RGWRados::BucketShard *bs,
1286 string *new_bucket_id,
1287 const RGWBucketInfo& bucket_info,
1288 optional_yield y);
1289
1290 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
1291 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
1292 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
1293 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
1294 const rgw_obj& obj_instance, bool delete_marker,
1295 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
1296 uint64_t olh_epoch,
1297 ceph::real_time unmod_since, bool high_precision_time,
1298 rgw_zone_set *zones_trace = nullptr,
1299 bool log_data_change = false);
1300 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
1301 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
1302 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
1303 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
1304 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
1305 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
1306 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
1307 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
1308 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
1309 int set_olh(RGWObjectCtx& obj_ctx, const RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
1310 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time,
1311 optional_yield y, rgw_zone_set *zones_trace = nullptr, bool log_data_change = false);
1312 int repair_olh(RGWObjState* state, const RGWBucketInfo& bucket_info,
1313 const rgw_obj& obj);
1314 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
1315 uint64_t olh_epoch, optional_yield y, rgw_zone_set *zones_trace = nullptr);
1316
1317 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
1318 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
1319 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
1320 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
1321
1322 void gen_rand_obj_instance_name(rgw_obj_key *target_key);
1323 void gen_rand_obj_instance_name(rgw_obj *target);
1324
1325 int update_containers_stats(map<string, RGWBucketEnt>& m);
1326 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
1327
1328 public:
1329 void set_atomic(void *ctx, rgw_obj& obj) {
1330 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
1331 rctx->set_atomic(obj);
1332 }
1333 void set_prefetch_data(void *ctx, const rgw_obj& obj) {
1334 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
1335 rctx->set_prefetch_data(obj);
1336 }
1337 int decode_policy(bufferlist& bl, ACLOwner *owner);
1338 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
1339 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool* syncstopped = NULL);
1340 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
1341
1342 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
1343 /* xxx dang obj_ctx -> svc */
1344 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs, optional_yield y);
1345 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs, optional_yield y);
1346
1347 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
1348
1349 int get_bucket_info(RGWServices *svc,
1350 const string& tenant_name, const string& bucket_name,
1351 RGWBucketInfo& info,
1352 ceph::real_time *pmtime, optional_yield y, map<string, bufferlist> *pattrs = NULL);
1353
1354 // Returns 0 on successful refresh. Returns error code if there was
1355 // an error or the version stored on the OSD is the same as that
1356 // presented in the BucketInfo structure.
1357 //
1358 int try_refresh_bucket_info(RGWBucketInfo& info,
1359 ceph::real_time *pmtime,
1360 map<string, bufferlist> *pattrs = nullptr);
1361
1362 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
1363 map<string, bufferlist> *pattrs, bool create_entry_point);
1364
1365 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, optional_yield y, rgw_zone_set *zones_trace = nullptr);
1366 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
1367 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1368 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
1369 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1370 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
1371 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1372 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1373 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
1374
1375 using ent_map_t =
1376 boost::container::flat_map<std::string, rgw_bucket_dir_entry>;
1377
1378 using check_filter_t = bool (*)(const std::string&);
1379
1380 int cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
1381 const int shard_id,
1382 const rgw_obj_index_key& start_after,
1383 const string& prefix,
1384 const string& delimiter,
1385 const uint32_t num_entries,
1386 const bool list_versions,
1387 const uint16_t exp_factor, // 0 means ignore
1388 ent_map_t& m,
1389 bool* is_truncated,
1390 bool* cls_filtered,
1391 rgw_obj_index_key *last_entry,
1392 optional_yield y,
1393 check_filter_t force_check_filter = nullptr);
1394 int cls_bucket_list_unordered(RGWBucketInfo& bucket_info,
1395 int shard_id,
1396 const rgw_obj_index_key& start_after,
1397 const string& prefix,
1398 uint32_t num_entries,
1399 bool list_versions,
1400 vector<rgw_bucket_dir_entry>& ent_list,
1401 bool *is_truncated,
1402 rgw_obj_index_key *last_entry,
1403 optional_yield y,
1404 check_filter_t = nullptr);
1405 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
1406 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
1407
1408 int bi_get_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent);
1409 int bi_get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh);
1410 int bi_get(const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
1411 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
1412 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
1413 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
1414 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
1415 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
1416 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
1417 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
1418 int bi_remove(BucketShard& bs);
1419
1420 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
1421 int cls_obj_usage_log_read(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
1422 uint64_t end_epoch, uint32_t max_entries, string& read_iter, map<rgw_user_bucket,
1423 rgw_usage_log_entry>& usage, bool *is_truncated);
1424 int cls_obj_usage_log_trim(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
1425 uint64_t end_epoch);
1426 int cls_obj_usage_log_clear(string& oid);
1427
1428 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
1429
1430 int lock_exclusive(const rgw_pool& pool, const string& oid, ceph::timespan& duration, rgw_zone_id& zone_id, string& owner_id);
1431 int unlock(const rgw_pool& pool, const string& oid, rgw_zone_id& zone_id, string& owner_id);
1432
1433 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
1434 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag);
1435 void delete_objs_inline(cls_rgw_obj_chain& chain, const string& tag);
1436 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
1437 int gc_aio_operate(const std::string& oid, librados::AioCompletion *c,
1438 librados::ObjectWriteOperation *op);
1439 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
1440
1441 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated, bool& processing_queue);
1442 int process_gc(bool expired_only);
1443 bool process_expire_objects();
1444 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y);
1445
1446 int process_lc();
1447 int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
1448
1449 int bucket_check_index(RGWBucketInfo& bucket_info,
1450 map<RGWObjCategory, RGWStorageStats> *existing_stats,
1451 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
1452 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
1453 int bucket_set_reshard(const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
1454 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
1455 int move_rados_obj(librados::IoCtx& src_ioctx,
1456 const string& src_oid, const string& src_locator,
1457 librados::IoCtx& dst_ioctx,
1458 const string& dst_oid, const string& dst_locator);
1459 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
1460 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix, optional_yield y);
1461
1462 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
1463 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, bool check_size_only = false);
1464
1465 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
1466 uint64_t num_objs);
1467
1468 int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
1469
1470 uint64_t instance_id();
1471
1472 librados::Rados* get_rados_handle();
1473
1474 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
1475 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
1476 list<librados::AioCompletion *>& handles, bool keep_index_consistent,
1477 optional_yield y);
1478
1479 private:
1480 /**
1481 * Check the actual on-disk state of the object specified
1482 * by list_state, and fill in the time and size of object.
1483 * Then append any changes to suggested_updates for
1484 * the rgw class' dir_suggest_changes function.
1485 *
1486 * Note that this can maul list_state; don't use it afterwards. Also
1487 * it expects object to already be filled in from list_state; it only
1488 * sets the size and mtime.
1489 *
1490 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
1491 * and -errno on other failures. (-ENOENT is not a failure, and it
1492 * will encode that info as a suggested update.)
1493 */
1494 int check_disk_state(librados::IoCtx io_ctx,
1495 const RGWBucketInfo& bucket_info,
1496 rgw_bucket_dir_entry& list_state,
1497 rgw_bucket_dir_entry& object,
1498 bufferlist& suggested_updates,
1499 optional_yield y);
1500
1501 /**
1502 * Init pool iteration
1503 * pool: pool to use for the ctx initialization
1504 * ctx: context object to use for the iteration
1505 * Returns: 0 on success, -ERR# otherwise.
1506 */
1507 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
1508
1509 /**
1510 * Init pool iteration
1511 * pool: pool to use
1512 * cursor: position to start iteration
1513 * ctx: context object to use for the iteration
1514 * Returns: 0 on success, -ERR# otherwise.
1515 */
1516 int pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx);
1517
1518 /**
1519 * Get pool iteration position
1520 * ctx: context object to use for the iteration
1521 * Returns: string representation of position
1522 */
1523 string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
1524
1525 /**
1526 * Iterate over pool return object names, use optional filter
1527 * ctx: iteration context, initialized with pool_iterate_begin()
1528 * num: max number of objects to return
1529 * objs: a vector that the results will append into
1530 * is_truncated: if not NULL, will hold true iff iteration is complete
1531 * filter: if not NULL, will be used to filter returned objects
1532 * Returns: 0 on success, -ERR# otherwise.
1533 */
1534 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
1535 bool *is_truncated, RGWAccessListFilter *filter);
1536
1537 uint64_t next_bucket_id();
1538
1539 /**
1540 * This is broken out to facilitate unit testing.
1541 */
1542 static uint32_t calc_ordered_bucket_list_per_shard(uint32_t num_entries,
1543 uint32_t num_shards);
1544 };
1545
1546 #endif