]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/driver/rados/rgw_rados.h
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / rgw / driver / rados / rgw_rados.h
CommitLineData
7c673cae 1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
9f95a23c 2// vim: ts=8 sw=2 smarttab ft=cpp
7c673cae 3
1e59de90 4#pragma once
7c673cae 5
1e59de90 6#include <iostream>
7c673cae 7#include <functional>
9f95a23c 8#include <boost/container/flat_map.hpp>
1e59de90 9#include <boost/container/flat_set.hpp>
7c673cae
FG
10
11#include "include/rados/librados.hpp"
12#include "include/Context.h"
a4b75251 13#include "include/random.h"
7c673cae 14#include "common/RefCountedObj.h"
7c673cae 15#include "common/ceph_time.h"
a4b75251 16#include "common/Timer.h"
7c673cae
FG
17#include "rgw_common.h"
18#include "cls/rgw/cls_rgw_types.h"
19#include "cls/version/cls_version_types.h"
20#include "cls/log/cls_log_types.h"
7c673cae 21#include "cls/timeindex/cls_timeindex_types.h"
11fdf7f2 22#include "cls/otp/cls_otp_types.h"
1e59de90 23#include "rgw_quota.h"
7c673cae
FG
24#include "rgw_log.h"
25#include "rgw_metadata.h"
26#include "rgw_meta_sync_status.h"
27#include "rgw_period_puller.h"
9f95a23c 28#include "rgw_obj_manifest.h"
7c673cae 29#include "rgw_sync_module.h"
9f95a23c 30#include "rgw_trim_bilog.h"
11fdf7f2 31#include "rgw_service.h"
f67539c2 32#include "rgw_sal.h"
20effc67
TL
33#include "rgw_aio.h"
34#include "rgw_d3n_cacherequest.h"
11fdf7f2
TL
35
36#include "services/svc_rados.h"
9f95a23c 37#include "services/svc_bi_rados.h"
20effc67
TL
38#include "common/Throttle.h"
39#include "common/ceph_mutex.h"
40#include "rgw_cache.h"
1e59de90 41#include "rgw_sal_fwd.h"
20effc67
TL
42
43struct D3nDataCache;
7c673cae
FG
44
45class RGWWatcher;
7c673cae
FG
46class ACLOwner;
47class RGWGC;
48class RGWMetaNotifier;
49class RGWDataNotifier;
50class RGWLC;
51class RGWObjectExpirer;
52class RGWMetaSyncProcessorThread;
53class RGWDataSyncProcessorThread;
54class RGWSyncLogTrimThread;
11fdf7f2 55class RGWSyncTraceManager;
7c673cae
FG
56struct RGWZoneGroup;
57struct RGWZoneParams;
31f18b77
FG
58class RGWReshard;
59class RGWReshardWait;
7c673cae 60
20effc67 61struct get_obj_data;
11fdf7f2 62
7c673cae
FG
63/* flags for put_obj_meta() */
64#define PUT_OBJ_CREATE 0x01
65#define PUT_OBJ_EXCL 0x02
66#define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
67
20effc67 68static inline void prepend_bucket_marker(const rgw_bucket& bucket, const std::string& orig_oid, std::string& oid)
7c673cae
FG
69{
70 if (bucket.marker.empty() || orig_oid.empty()) {
71 oid = orig_oid;
72 } else {
73 oid = bucket.marker;
74 oid.append("_");
75 oid.append(orig_oid);
76 }
77}
78
20effc67 79static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, std::string& oid, std::string& locator)
7c673cae
FG
80{
81 const rgw_bucket& bucket = obj.bucket;
82 prepend_bucket_marker(bucket, obj.get_oid(), oid);
20effc67 83 const std::string& loc = obj.key.get_loc();
7c673cae
FG
84 if (!loc.empty()) {
85 prepend_bucket_marker(bucket, loc, locator);
86 } else {
87 locator.clear();
88 }
89}
90
7c673cae
FG
91struct RGWOLHInfo {
92 rgw_obj target;
93 bool removed;
94
95 RGWOLHInfo() : removed(false) {}
96
97 void encode(bufferlist& bl) const {
98 ENCODE_START(1, 1, bl);
11fdf7f2
TL
99 encode(target, bl);
100 encode(removed, bl);
7c673cae
FG
101 ENCODE_FINISH(bl);
102 }
103
11fdf7f2 104 void decode(bufferlist::const_iterator& bl) {
7c673cae 105 DECODE_START(1, bl);
11fdf7f2
TL
106 decode(target, bl);
107 decode(removed, bl);
7c673cae
FG
108 DECODE_FINISH(bl);
109 }
20effc67 110 static void generate_test_instances(std::list<RGWOLHInfo*>& o);
7c673cae
FG
111 void dump(Formatter *f) const;
112};
113WRITE_CLASS_ENCODER(RGWOLHInfo)
114
115struct RGWOLHPendingInfo {
116 ceph::real_time time;
117
118 RGWOLHPendingInfo() {}
119
120 void encode(bufferlist& bl) const {
121 ENCODE_START(1, 1, bl);
11fdf7f2 122 encode(time, bl);
7c673cae
FG
123 ENCODE_FINISH(bl);
124 }
125
11fdf7f2 126 void decode(bufferlist::const_iterator& bl) {
7c673cae 127 DECODE_START(1, bl);
11fdf7f2 128 decode(time, bl);
7c673cae
FG
129 DECODE_FINISH(bl);
130 }
131
132 void dump(Formatter *f) const;
133};
134WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
135
136struct RGWUsageBatch {
20effc67 137 std::map<ceph::real_time, rgw_usage_log_entry> m;
7c673cae
FG
138
139 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
140 bool exists = m.find(t) != m.end();
141 *account = !exists;
142 m[t].aggregate(entry);
143 }
144};
145
7c673cae
FG
146struct RGWCloneRangeInfo {
147 rgw_obj src;
148 off_t src_ofs;
149 off_t dst_ofs;
150 uint64_t len;
151};
152
9f95a23c
TL
153class RGWFetchObjFilter {
154public:
155 virtual ~RGWFetchObjFilter() {}
156
157 virtual int filter(CephContext *cct,
158 const rgw_obj_key& source_key,
159 const RGWBucketInfo& dest_bucket_info,
160 std::optional<rgw_placement_rule> dest_placement_rule,
20effc67 161 const std::map<std::string, bufferlist>& obj_attrs,
9f95a23c
TL
162 std::optional<rgw_user> *poverride_owner,
163 const rgw_placement_rule **prule) = 0;
164};
165
166class RGWFetchObjFilter_Default : public RGWFetchObjFilter {
167protected:
168 rgw_placement_rule dest_rule;
169public:
170 RGWFetchObjFilter_Default() {}
171
172 int filter(CephContext *cct,
173 const rgw_obj_key& source_key,
174 const RGWBucketInfo& dest_bucket_info,
175 std::optional<rgw_placement_rule> dest_placement_rule,
20effc67 176 const std::map<std::string, bufferlist>& obj_attrs,
9f95a23c
TL
177 std::optional<rgw_user> *poverride_owner,
178 const rgw_placement_rule **prule) override;
179};
180
1e59de90
TL
181struct RGWObjStateManifest {
182 RGWObjState state;
183 std::optional<RGWObjManifest> manifest;
184};
185
9f95a23c 186class RGWObjectCtx {
1e59de90 187 rgw::sal::Driver* driver;
9f95a23c 188 ceph::shared_mutex lock = ceph::make_shared_mutex("RGWObjectCtx");
9f95a23c 189
1e59de90 190 std::map<rgw_obj, RGWObjStateManifest> objs_state;
9f95a23c 191public:
1e59de90
TL
192 explicit RGWObjectCtx(rgw::sal::Driver* _driver) : driver(_driver) {}
193 RGWObjectCtx(RGWObjectCtx& _o) {
194 std::unique_lock wl{lock};
195 this->driver = _o.driver;
196 this->objs_state = _o.objs_state;
9f95a23c
TL
197 }
198
1e59de90
TL
199 rgw::sal::Driver* get_driver() {
200 return driver;
9f95a23c
TL
201 }
202
1e59de90 203 RGWObjStateManifest *get_state(const rgw_obj& obj);
9f95a23c 204
20effc67 205 void set_compressed(const rgw_obj& obj);
1e59de90 206 void set_atomic(const rgw_obj& obj);
9f95a23c
TL
207 void set_prefetch_data(const rgw_obj& obj);
208 void invalidate(const rgw_obj& obj);
209};
210
211
7c673cae
FG
212struct RGWRawObjState {
213 rgw_raw_obj obj;
214 bool has_attrs{false};
215 bool exists{false};
216 uint64_t size{0};
217 ceph::real_time mtime;
11fdf7f2 218 uint64_t epoch{0};
7c673cae
FG
219 bufferlist obj_tag;
220 bool has_data{false};
221 bufferlist data;
222 bool prefetch_data{false};
223 uint64_t pg_ver{0};
224
225 /* important! don't forget to update copy constructor */
226
227 RGWObjVersionTracker objv_tracker;
228
20effc67 229 std::map<std::string, bufferlist> attrset;
7c673cae
FG
230 RGWRawObjState() {}
231 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
232 has_attrs = rhs.has_attrs;
233 exists = rhs.exists;
234 size = rhs.size;
235 mtime = rhs.mtime;
236 epoch = rhs.epoch;
237 if (rhs.obj_tag.length()) {
238 obj_tag = rhs.obj_tag;
239 }
240 has_data = rhs.has_data;
241 if (rhs.data.length()) {
242 data = rhs.data;
243 }
244 prefetch_data = rhs.prefetch_data;
245 pg_ver = rhs.pg_ver;
246 objv_tracker = rhs.objv_tracker;
247 }
248};
249
250struct RGWPoolIterCtx {
251 librados::IoCtx io_ctx;
252 librados::NObjectIterator iter;
253};
254
255struct RGWListRawObjsCtx {
256 bool initialized;
257 RGWPoolIterCtx iter_ctx;
258
259 RGWListRawObjsCtx() : initialized(false) {}
260};
261
7c673cae 262struct objexp_hint_entry {
20effc67
TL
263 std::string tenant;
264 std::string bucket_name;
265 std::string bucket_id;
7c673cae
FG
266 rgw_obj_key obj_key;
267 ceph::real_time exp_time;
268
269 void encode(bufferlist& bl) const {
270 ENCODE_START(2, 1, bl);
11fdf7f2
TL
271 encode(bucket_name, bl);
272 encode(bucket_id, bl);
273 encode(obj_key, bl);
274 encode(exp_time, bl);
275 encode(tenant, bl);
7c673cae
FG
276 ENCODE_FINISH(bl);
277 }
278
11fdf7f2 279 void decode(bufferlist::const_iterator& bl) {
7c673cae
FG
280 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
281 DECODE_START(2, bl);
11fdf7f2
TL
282 decode(bucket_name, bl);
283 decode(bucket_id, bl);
284 decode(obj_key, bl);
285 decode(exp_time, bl);
7c673cae 286 if (struct_v >= 2) {
11fdf7f2 287 decode(tenant, bl);
7c673cae
FG
288 } else {
289 tenant.clear();
290 }
291 DECODE_FINISH(bl);
292 }
9f95a23c
TL
293
294 void dump(Formatter *f) const;
20effc67 295 static void generate_test_instances(std::list<objexp_hint_entry*>& o);
7c673cae
FG
296};
297WRITE_CLASS_ENCODER(objexp_hint_entry)
298
7c673cae
FG
299class RGWMetaSyncStatusManager;
300class RGWDataSyncStatusManager;
7c673cae 301class RGWCoroutinesManagerRegistry;
7c673cae 302
7c673cae
FG
303class RGWGetDirHeader_CB;
304class RGWGetUserHeader_CB;
f67539c2 305namespace rgw { namespace sal {
20effc67 306 class RadosStore;
f67539c2
TL
307 class MPRadosSerializer;
308 class LCRadosSerializer;
309} }
7c673cae 310
7c673cae
FG
311class RGWAsyncRadosProcessor;
312
313template <class T>
314class RGWChainedCacheImpl;
315
316struct bucket_info_entry {
317 RGWBucketInfo info;
318 real_time mtime;
20effc67 319 std::map<std::string, bufferlist> attrs;
7c673cae
FG
320};
321
9f95a23c 322struct tombstone_entry;
7c673cae 323
9f95a23c
TL
324template <class K, class V>
325class lru_map;
326using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
7c673cae 327
31f18b77
FG
328class RGWIndexCompletionManager;
329
9f95a23c 330class RGWRados
7c673cae
FG
331{
332 friend class RGWGC;
333 friend class RGWMetaNotifier;
334 friend class RGWDataNotifier;
7c673cae
FG
335 friend class RGWObjectExpirer;
336 friend class RGWMetaSyncProcessorThread;
337 friend class RGWDataSyncProcessorThread;
31f18b77
FG
338 friend class RGWReshard;
339 friend class RGWBucketReshard;
f64942e4 340 friend class RGWBucketReshardLock;
31f18b77 341 friend class BucketIndexLockGuard;
f67539c2
TL
342 friend class rgw::sal::MPRadosSerializer;
343 friend class rgw::sal::LCRadosSerializer;
20effc67 344 friend class rgw::sal::RadosStore;
7c673cae
FG
345
346 /** Open the pool used as root for this gateway */
b3b6e05e
TL
347 int open_root_pool_ctx(const DoutPrefixProvider *dpp);
348 int open_gc_pool_ctx(const DoutPrefixProvider *dpp);
349 int open_lc_pool_ctx(const DoutPrefixProvider *dpp);
350 int open_objexp_pool_ctx(const DoutPrefixProvider *dpp);
351 int open_reshard_pool_ctx(const DoutPrefixProvider *dpp);
352 int open_notif_pool_ctx(const DoutPrefixProvider *dpp);
353
354 int open_pool_ctx(const DoutPrefixProvider *dpp, const rgw_pool& pool, librados::IoCtx& io_ctx,
05a536ef 355 bool mostly_omap, bool bulk);
7c673cae 356
a4b75251 357
9f95a23c 358 ceph::mutex lock = ceph::make_mutex("rados_timer_lock");
7c673cae
FG
359 SafeTimer *timer;
360
1e59de90 361 rgw::sal::RadosStore* driver = nullptr;
522d829b 362 RGWGC *gc = nullptr;
7c673cae
FG
363 RGWLC *lc;
364 RGWObjectExpirer *obj_expirer;
365 bool use_gc_thread;
366 bool use_lc_thread;
367 bool quota_threads;
368 bool run_sync_thread;
31f18b77 369 bool run_reshard_thread;
7c673cae 370
7c673cae
FG
371 RGWMetaNotifier *meta_notifier;
372 RGWDataNotifier *data_notifier;
373 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
11fdf7f2 374 RGWSyncTraceManager *sync_tracer = nullptr;
20effc67 375 std::map<rgw_zone_id, RGWDataSyncProcessorThread *> data_sync_processor_threads;
7c673cae 376
b32b8144 377 boost::optional<rgw::BucketTrimManager> bucket_trim;
7c673cae
FG
378 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
379
9f95a23c
TL
380 ceph::mutex meta_sync_thread_lock = ceph::make_mutex("meta_sync_thread_lock");
381 ceph::mutex data_sync_thread_lock = ceph::make_mutex("data_sync_thread_lock");
7c673cae 382
7c673cae 383 librados::IoCtx root_pool_ctx; // .rgw
11fdf7f2 384
aee94f69 385 ceph::mutex bucket_id_lock{ceph::make_mutex("rados_bucket_id")};
7c673cae
FG
386
387 // This field represents the number of bucket index object shards
388 uint32_t bucket_index_max_shards;
389
20effc67
TL
390 std::string get_cluster_fsid(const DoutPrefixProvider *dpp, optional_yield y);
391
392 int get_obj_head_ref(const DoutPrefixProvider *dpp, const rgw_placement_rule& target_placement_rule, const rgw_obj& obj, rgw_rados_ref *ref);
b3b6e05e
TL
393 int get_obj_head_ref(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
394 int get_system_obj_ref(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae
FG
395 uint64_t max_bucket_id;
396
05a536ef
TL
397 int clear_olh(const DoutPrefixProvider *dpp,
398 RGWObjectCtx& obj_ctx,
399 const rgw_obj& obj,
400 RGWBucketInfo& bucket_info,
401 rgw_rados_ref& ref,
402 const std::string& tag,
403 const uint64_t ver,
404 optional_yield y);
405
1e59de90
TL
406 int get_olh_target_state(const DoutPrefixProvider *dpp, RGWObjectCtx& rctx,
407 RGWBucketInfo& bucket_info, const rgw_obj& obj,
408 RGWObjState *olh_state, RGWObjState **target_state,
409 RGWObjManifest **target_manifest, optional_yield y);
410 int get_obj_state_impl(const DoutPrefixProvider *dpp, RGWObjectCtx *rctx, RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state, RGWObjManifest** manifest,
9f95a23c 411 bool follow_olh, optional_yield y, bool assume_noent = false);
1e59de90
TL
412 int append_atomic_test(const DoutPrefixProvider *dpp, RGWObjectCtx* rctx, RGWBucketInfo& bucket_info, const rgw_obj& obj,
413 librados::ObjectOperation& op, RGWObjState **state,
414 RGWObjManifest** pmanifest, optional_yield y);
415
7c673cae 416 int update_placement_map();
20effc67 417 int store_bucket_info(RGWBucketInfo& info, std::map<std::string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
7c673cae
FG
418
419 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
20effc67 420 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const std::string& prefix, bool fail_if_exist);
7c673cae
FG
421 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
422protected:
423 CephContext *cct;
424
494da23a 425 librados::Rados rados;
7c673cae
FG
426
427 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
428 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
429
7c673cae
FG
430 tombstone_cache_t *obj_tombstone_cache;
431
432 librados::IoCtx gc_pool_ctx; // .rgw.gc
433 librados::IoCtx lc_pool_ctx; // .rgw.lc
434 librados::IoCtx objexp_pool_ctx;
31f18b77 435 librados::IoCtx reshard_pool_ctx;
f67539c2 436 librados::IoCtx notif_pool_ctx; // .rgw.notif
7c673cae 437
11fdf7f2 438 bool pools_initialized;
7c673cae 439
11fdf7f2 440 RGWQuotaHandler *quota_handler;
7c673cae 441
11fdf7f2 442 RGWCoroutinesManagerRegistry *cr_registry;
7c673cae 443
11fdf7f2
TL
444 RGWSyncModuleInstanceRef sync_module;
445 bool writeable_zone{false};
7c673cae 446
11fdf7f2 447 RGWIndexCompletionManager *index_completion_manager{nullptr};
7c673cae 448
11fdf7f2 449 bool use_cache{false};
522d829b 450 bool use_gc{true};
20effc67 451 bool use_datacache{false};
f67539c2 452
b3b6e05e 453 int get_obj_head_ioctx(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
11fdf7f2 454public:
9f95a23c 455 RGWRados(): timer(NULL),
11fdf7f2 456 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
9f95a23c 457 run_sync_thread(false), run_reshard_thread(false), meta_notifier(NULL),
11fdf7f2 458 data_notifier(NULL), meta_sync_processor_thread(NULL),
11fdf7f2
TL
459 bucket_index_max_shards(0),
460 max_bucket_id(0), cct(NULL),
11fdf7f2
TL
461 binfo_cache(NULL), obj_tombstone_cache(nullptr),
462 pools_initialized(false),
463 quota_handler(NULL),
464 cr_registry(NULL),
9f95a23c
TL
465 pctl(&ctl),
466 reshard(NULL) {}
7c673cae 467
11fdf7f2
TL
468 RGWRados& set_use_cache(bool status) {
469 use_cache = status;
470 return *this;
7c673cae
FG
471 }
472
522d829b
TL
473 RGWRados& set_use_gc(bool status) {
474 use_gc = status;
475 return *this;
476 }
477
20effc67
TL
478 RGWRados& set_use_datacache(bool status) {
479 use_datacache = status;
480 return *this;
481 }
482
483 bool get_use_datacache() {
484 return use_datacache;
485 }
486
11fdf7f2
TL
487 RGWLC *get_lc() {
488 return lc;
7c673cae
FG
489 }
490
20effc67
TL
491 RGWGC *get_gc() {
492 return gc;
493 }
494
11fdf7f2
TL
495 RGWRados& set_run_gc_thread(bool _use_gc_thread) {
496 use_gc_thread = _use_gc_thread;
497 return *this;
7c673cae
FG
498 }
499
11fdf7f2
TL
500 RGWRados& set_run_lc_thread(bool _use_lc_thread) {
501 use_lc_thread = _use_lc_thread;
502 return *this;
7c673cae
FG
503 }
504
11fdf7f2
TL
505 RGWRados& set_run_quota_threads(bool _run_quota_threads) {
506 quota_threads = _run_quota_threads;
507 return *this;
7c673cae
FG
508 }
509
11fdf7f2
TL
510 RGWRados& set_run_sync_thread(bool _run_sync_thread) {
511 run_sync_thread = _run_sync_thread;
512 return *this;
7c673cae
FG
513 }
514
11fdf7f2
TL
515 RGWRados& set_run_reshard_thread(bool _run_reshard_thread) {
516 run_reshard_thread = _run_reshard_thread;
517 return *this;
7c673cae
FG
518 }
519
11fdf7f2
TL
520 librados::IoCtx* get_lc_pool_ctx() {
521 return &lc_pool_ctx;
7c673cae 522 }
f67539c2
TL
523
524 librados::IoCtx& get_notif_pool_ctx() {
525 return notif_pool_ctx;
526 }
527
11fdf7f2
TL
528 void set_context(CephContext *_cct) {
529 cct = _cct;
7c673cae 530 }
1e59de90
TL
531 void set_store(rgw::sal::RadosStore* _driver) {
532 driver = _driver;
9f95a23c 533 }
31f18b77 534
11fdf7f2 535 RGWServices svc;
9f95a23c
TL
536 RGWCtl ctl;
537
538 RGWCtl *pctl{nullptr};
11fdf7f2
TL
539
540 /**
541 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
542 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
543 */
20effc67 544 std::string host_id;
31f18b77 545
31f18b77
FG
546 RGWReshard *reshard;
547 std::shared_ptr<RGWReshardWait> reshard_wait;
548
7c673cae
FG
549 virtual ~RGWRados() = default;
550
551 tombstone_cache_t *get_tombstone_cache() {
552 return obj_tombstone_cache;
553 }
7c673cae
FG
554 const RGWSyncModuleInstanceRef& get_sync_module() {
555 return sync_module;
556 }
11fdf7f2
TL
557 RGWSyncTraceManager *get_sync_tracer() {
558 return sync_tracer;
559 }
7c673cae 560
b3b6e05e 561 int get_required_alignment(const DoutPrefixProvider *dpp, const rgw_pool& pool, uint64_t *alignment);
11fdf7f2 562 void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size);
b3b6e05e
TL
563 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, const DoutPrefixProvider *dpp, uint64_t *palignment = nullptr);
564 int get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size, const DoutPrefixProvider *dpp, uint64_t *palignment = nullptr);
7c673cae
FG
565
566 uint32_t get_max_bucket_shards() {
9f95a23c 567 return RGWSI_BucketIndex_RADOS::shards_max();
7c673cae
FG
568 }
569
181888fb 570
b3b6e05e 571 int get_raw_obj_ref(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae 572
20effc67
TL
573 int list_raw_objects_init(const DoutPrefixProvider *dpp, const rgw_pool& pool, const std::string& marker, RGWListRawObjsCtx *ctx);
574 int list_raw_objects_next(const DoutPrefixProvider *dpp, const std::string& prefix_filter, int max,
575 RGWListRawObjsCtx& ctx, std::list<std::string>& oids,
181888fb 576 bool *is_truncated);
20effc67
TL
577 int list_raw_objects(const DoutPrefixProvider *dpp, const rgw_pool& pool, const std::string& prefix_filter, int max,
578 RGWListRawObjsCtx& ctx, std::list<std::string>& oids,
7c673cae 579 bool *is_truncated);
20effc67 580 std::string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
7c673cae 581
7c673cae
FG
582 CephContext *ctx() { return cct; }
583 /** do all necessary setup of the storage device */
1e59de90 584 int init_begin(CephContext *_cct, const DoutPrefixProvider *dpp) {
7c673cae 585 set_context(_cct);
1e59de90 586 return init_begin(dpp);
7c673cae
FG
587 }
588 /** Initialize the RADOS instance and prepare to do other ops */
b3b6e05e
TL
589 int init_svc(bool raw, const DoutPrefixProvider *dpp);
590 int init_ctl(const DoutPrefixProvider *dpp);
20effc67 591 virtual int init_rados();
1e59de90 592 int init_begin(const DoutPrefixProvider *dpp);
b3b6e05e 593 int init_complete(const DoutPrefixProvider *dpp);
7c673cae
FG
594 void finalize();
595
20effc67
TL
596 int register_to_service_map(const DoutPrefixProvider *dpp, const std::string& daemon_type, const std::map<std::string, std::string>& meta);
597 int update_service_map(const DoutPrefixProvider *dpp, std::map<std::string, std::string>&& status);
7c673cae
FG
598
599 /// list logs
20effc67
TL
600 int log_list_init(const DoutPrefixProvider *dpp, const std::string& prefix, RGWAccessHandle *handle);
601 int log_list_next(RGWAccessHandle handle, std::string *name);
7c673cae
FG
602
603 /// remove log
20effc67 604 int log_remove(const DoutPrefixProvider *dpp, const std::string& name);
7c673cae
FG
605
606 /// show log
20effc67
TL
607 int log_show_init(const DoutPrefixProvider *dpp, const std::string& name, RGWAccessHandle *handle);
608 int log_show_next(const DoutPrefixProvider *dpp, RGWAccessHandle handle, rgw_log_entry *entry);
7c673cae
FG
609
610 // log bandwidth info
20effc67
TL
611 int log_usage(const DoutPrefixProvider *dpp, std::map<rgw_user_bucket, RGWUsageBatch>& usage_info);
612 int read_usage(const DoutPrefixProvider *dpp, const rgw_user& user, const std::string& bucket_name, uint64_t start_epoch, uint64_t end_epoch,
613 uint32_t max_entries, bool *is_truncated, RGWUsageIter& read_iter, std::map<rgw_user_bucket,
11fdf7f2 614 rgw_usage_log_entry>& usage);
20effc67 615 int trim_usage(const DoutPrefixProvider *dpp, const rgw_user& user, const std::string& bucket_name, uint64_t start_epoch, uint64_t end_epoch);
b3b6e05e 616 int clear_usage(const DoutPrefixProvider *dpp);
7c673cae 617
b3b6e05e 618 int create_pool(const DoutPrefixProvider *dpp, const rgw_pool& pool);
7c673cae 619
20effc67 620 void create_bucket_id(std::string *bucket_id);
7c673cae 621
11fdf7f2
TL
622 bool get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool);
623 bool obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
7c673cae 624
11fdf7f2 625 int create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket,
20effc67 626 const std::string& zonegroup_id,
f67539c2 627 const rgw_placement_rule& placement_rule,
20effc67 628 const std::string& swift_ver_location,
f67539c2 629 const RGWQuotaInfo * pquota_info,
20effc67 630 std::map<std::string,bufferlist>& attrs,
f67539c2
TL
631 RGWBucketInfo& bucket_info,
632 obj_version *pobjv,
633 obj_version *pep_objv,
634 ceph::real_time creation_time,
635 rgw_bucket *master_bucket,
636 uint32_t *master_num_shards,
637 optional_yield y,
b3b6e05e 638 const DoutPrefixProvider *dpp,
f67539c2 639 bool exclusive = true);
7c673cae
FG
640
641 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
642
7c673cae
FG
643 struct BucketShard {
644 RGWRados *store;
645 rgw_bucket bucket;
646 int shard_id;
9f95a23c 647 RGWSI_RADOS::Obj bucket_obj;
7c673cae
FG
648
649 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
1e59de90
TL
650 int init(const rgw_bucket& _bucket, const rgw_obj& obj,
651 RGWBucketInfo* out, const DoutPrefixProvider *dpp);
b3b6e05e 652 int init(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
1e59de90
TL
653 int init(const DoutPrefixProvider *dpp,
654 const RGWBucketInfo& bucket_info,
655 const rgw::bucket_index_layout_generation& index, int sid);
656
657 friend std::ostream& operator<<(std::ostream& out, const BucketShard& bs) {
658 out << "BucketShard:{ bucket=" << bs.bucket <<
659 ", shard_id=" << bs.shard_id <<
660 ", bucket_ojb=" << bs.bucket_obj << "}";
661 return out;
662 }
7c673cae
FG
663 };
664
665 class Object {
666 RGWRados *store;
667 RGWBucketInfo bucket_info;
668 RGWObjectCtx& ctx;
669 rgw_obj obj;
670
671 BucketShard bs;
672
673 RGWObjState *state;
1e59de90 674 RGWObjManifest *manifest;
7c673cae
FG
675
676 bool versioning_disabled;
677
678 bool bs_initialized;
679
20effc67
TL
680 const rgw_placement_rule *pmeta_placement_rule;
681
7c673cae 682 protected:
1e59de90 683 int get_state(const DoutPrefixProvider *dpp, RGWObjState **pstate, RGWObjManifest **pmanifest, bool follow_olh, optional_yield y, bool assume_noent = false);
7c673cae
FG
684 void invalidate_state();
685
20effc67 686 int prepare_atomic_modification(const DoutPrefixProvider *dpp, librados::ObjectWriteOperation& op, bool reset_obj, const std::string *ptag,
9f95a23c 687 const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail, optional_yield y);
b3b6e05e 688 int complete_atomic_modification(const DoutPrefixProvider *dpp);
7c673cae
FG
689
690 public:
691 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
692 ctx(_ctx), obj(_obj), bs(store),
1e59de90 693 state(NULL), manifest(nullptr), versioning_disabled(false),
20effc67
TL
694 bs_initialized(false),
695 pmeta_placement_rule(nullptr) {}
7c673cae
FG
696
697 RGWRados *get_store() { return store; }
698 rgw_obj& get_obj() { return obj; }
699 RGWObjectCtx& get_ctx() { return ctx; }
700 RGWBucketInfo& get_bucket_info() { return bucket_info; }
1e59de90
TL
701 //const std::string& get_instance() { return obj->get_instance(); }
702 //rgw::sal::Object* get_target() { return obj; }
b3b6e05e 703 int get_manifest(const DoutPrefixProvider *dpp, RGWObjManifest **pmanifest, optional_yield y);
7c673cae 704
b3b6e05e 705 int get_bucket_shard(BucketShard **pbs, const DoutPrefixProvider *dpp) {
7c673cae 706 if (!bs_initialized) {
f64942e4 707 int r =
b3b6e05e 708 bs.init(bucket_info.bucket, obj, nullptr /* no RGWBucketInfo */, dpp);
7c673cae
FG
709 if (r < 0) {
710 return r;
711 }
712 bs_initialized = true;
713 }
714 *pbs = &bs;
715 return 0;
716 }
717
718 void set_versioning_disabled(bool status) {
719 versioning_disabled = status;
720 }
721
722 bool versioning_enabled() {
723 return (!versioning_disabled && bucket_info.versioning_enabled());
724 }
725
20effc67
TL
726 void set_meta_placement_rule(const rgw_placement_rule *p) {
727 pmeta_placement_rule = p;
728 }
729
730 const rgw_placement_rule& get_meta_placement_rule() {
731 return pmeta_placement_rule ? *pmeta_placement_rule : bucket_info.placement_rule;
732 }
733
7c673cae
FG
734 struct Read {
735 RGWRados::Object *source;
736
737 struct GetObjState {
20effc67 738 std::map<rgw_pool, librados::IoCtx> io_ctxs;
11fdf7f2
TL
739 rgw_pool cur_pool;
740 librados::IoCtx *cur_ioctx{nullptr};
7c673cae
FG
741 rgw_obj obj;
742 rgw_raw_obj head_obj;
743 } state;
1e59de90 744
7c673cae
FG
745 struct ConditionParams {
746 const ceph::real_time *mod_ptr;
747 const ceph::real_time *unmod_ptr;
748 bool high_precision_time;
749 uint32_t mod_zone_id;
750 uint64_t mod_pg_ver;
751 const char *if_match;
752 const char *if_nomatch;
1e59de90
TL
753
754 ConditionParams() :
7c673cae
FG
755 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
756 if_match(NULL), if_nomatch(NULL) {}
757 } conds;
758
759 struct Params {
760 ceph::real_time *lastmod;
761 uint64_t *obj_size;
20effc67 762 std::map<std::string, bufferlist> *attrs;
eafe8130 763 rgw_obj *target_obj;
7c673cae 764
eafe8130
TL
765 Params() : lastmod(nullptr), obj_size(nullptr), attrs(nullptr),
766 target_obj(nullptr) {}
7c673cae
FG
767 } params;
768
769 explicit Read(RGWRados::Object *_source) : source(_source) {}
770
b3b6e05e 771 int prepare(optional_yield y, const DoutPrefixProvider *dpp);
7c673cae 772 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
b3b6e05e
TL
773 int read(int64_t ofs, int64_t end, bufferlist& bl, optional_yield y, const DoutPrefixProvider *dpp);
774 int iterate(const DoutPrefixProvider *dpp, int64_t ofs, int64_t end, RGWGetDataCB *cb, optional_yield y);
775 int get_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& dest, optional_yield y);
7c673cae
FG
776 };
777
778 struct Write {
779 RGWRados::Object *target;
1e59de90 780
7c673cae
FG
781 struct MetaParams {
782 ceph::real_time *mtime;
20effc67 783 std::map<std::string, bufferlist>* rmattrs;
7c673cae
FG
784 const bufferlist *data;
785 RGWObjManifest *manifest;
20effc67
TL
786 const std::string *ptag;
787 std::list<rgw_obj_index_key> *remove_objs;
7c673cae
FG
788 ceph::real_time set_mtime;
789 rgw_user owner;
790 RGWObjCategory category;
791 int flags;
792 const char *if_match;
793 const char *if_nomatch;
11fdf7f2 794 std::optional<uint64_t> olh_epoch;
7c673cae
FG
795 ceph::real_time delete_at;
796 bool canceled;
20effc67 797 const std::string *user_data;
31f18b77 798 rgw_zone_set *zones_trace;
181888fb 799 bool modify_tail;
3efd9988 800 bool completeMultipart;
11fdf7f2 801 bool appendable;
7c673cae
FG
802
803 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
11fdf7f2 804 remove_objs(NULL), category(RGWObjCategory::Main), flags(0),
91327a77 805 if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr),
11fdf7f2 806 modify_tail(false), completeMultipart(false), appendable(false) {}
7c673cae
FG
807 } meta;
808
809 explicit Write(RGWRados::Object *_target) : target(_target) {}
810
20effc67 811 int _do_write_meta(const DoutPrefixProvider *dpp,
b3b6e05e 812 uint64_t size, uint64_t accounted_size,
20effc67 813 std::map<std::string, bufferlist>& attrs,
181888fb 814 bool modify_tail, bool assume_noent,
9f95a23c 815 void *index_op, optional_yield y);
b3b6e05e 816 int write_meta(const DoutPrefixProvider *dpp, uint64_t size, uint64_t accounted_size,
20effc67 817 std::map<std::string, bufferlist>& attrs, optional_yield y);
7c673cae 818 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
11fdf7f2 819 const req_state* get_req_state() {
1e59de90 820 return nullptr; /* XXX dang Only used by LTTng, and it handles null anyway */
11fdf7f2 821 }
7c673cae
FG
822 };
823
824 struct Delete {
825 RGWRados::Object *target;
826
827 struct DeleteParams {
828 rgw_user bucket_owner;
20effc67 829 int versioning_status; // versioning flags defined in enum RGWBucketFlags
b3b6e05e 830 ACLOwner obj_owner; // needed for creation of deletion marker
7c673cae 831 uint64_t olh_epoch;
20effc67 832 std::string marker_version_id;
7c673cae 833 uint32_t bilog_flags;
20effc67 834 std::list<rgw_obj_index_key> *remove_objs;
7c673cae
FG
835 ceph::real_time expiration_time;
836 ceph::real_time unmod_since;
837 ceph::real_time mtime; /* for setting delete marker mtime */
838 bool high_precision_time;
31f18b77 839 rgw_zone_set *zones_trace;
9f95a23c
TL
840 bool abortmp;
841 uint64_t parts_accounted_size;
7c673cae 842
9f95a23c 843 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr), abortmp(false), parts_accounted_size(0) {}
7c673cae
FG
844 } params;
845
846 struct DeleteResult {
847 bool delete_marker;
20effc67 848 std::string version_id;
7c673cae
FG
849
850 DeleteResult() : delete_marker(false) {}
851 } result;
1e59de90 852
7c673cae
FG
853 explicit Delete(RGWRados::Object *_target) : target(_target) {}
854
b3b6e05e 855 int delete_obj(optional_yield y, const DoutPrefixProvider *dpp);
7c673cae
FG
856 };
857
858 struct Stat {
859 RGWRados::Object *source;
860
861 struct Result {
862 rgw_obj obj;
1e59de90 863 std::optional<RGWObjManifest> manifest;
9f95a23c
TL
864 uint64_t size{0};
865 struct timespec mtime {};
20effc67 866 std::map<std::string, bufferlist> attrs;
7c673cae
FG
867 } result;
868
869 struct State {
870 librados::IoCtx io_ctx;
871 librados::AioCompletion *completion;
872 int ret;
873
874 State() : completion(NULL), ret(0) {}
875 } state;
876
877
878 explicit Stat(RGWRados::Object *_source) : source(_source) {}
879
b3b6e05e 880 int stat_async(const DoutPrefixProvider *dpp);
20effc67 881 int wait(const DoutPrefixProvider *dpp);
7c673cae
FG
882 int stat();
883 private:
20effc67 884 int finish(const DoutPrefixProvider *dpp);
7c673cae
FG
885 };
886 };
887
888 class Bucket {
889 RGWRados *store;
890 RGWBucketInfo bucket_info;
891 rgw_bucket& bucket;
892 int shard_id;
893
894 public:
895 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
896 shard_id(RGW_NO_SHARD) {}
897 RGWRados *get_store() { return store; }
898 rgw_bucket& get_bucket() { return bucket; }
899 RGWBucketInfo& get_bucket_info() { return bucket_info; }
900
20effc67 901 int update_bucket_id(const std::string& new_bucket_id, const DoutPrefixProvider *dpp);
31f18b77 902
7c673cae
FG
903 int get_shard_id() { return shard_id; }
904 void set_shard_id(int id) {
905 shard_id = id;
906 }
907
908 class UpdateIndex {
909 RGWRados::Bucket *target;
20effc67 910 std::string optag;
7c673cae
FG
911 rgw_obj obj;
912 uint16_t bilog_flags{0};
913 BucketShard bs;
914 bool bs_initialized{false};
915 bool blind;
916 bool prepared{false};
31f18b77
FG
917 rgw_zone_set *zones_trace{nullptr};
918
b3b6e05e 919 int init_bs(const DoutPrefixProvider *dpp) {
f64942e4 920 int r =
1e59de90 921 bs.init(target->get_bucket(), obj, &target->bucket_info, dpp);
31f18b77
FG
922 if (r < 0) {
923 return r;
924 }
925 bs_initialized = true;
926 return 0;
927 }
928
929 void invalidate_bs() {
930 bs_initialized = false;
931 }
932
1e59de90 933 int guard_reshard(const DoutPrefixProvider *dpp, const rgw_obj& obj_instance, BucketShard **pbs, std::function<int(BucketShard *)> call);
7c673cae
FG
934 public:
935
936 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
937 bs(target->get_store()) {
f67539c2 938 blind = (target->get_bucket_info().layout.current_index.layout.type == rgw::BucketIndexType::Indexless);
7c673cae
FG
939 }
940
b3b6e05e 941 int get_bucket_shard(BucketShard **pbs, const DoutPrefixProvider *dpp) {
7c673cae 942 if (!bs_initialized) {
b3b6e05e 943 int r = init_bs(dpp);
7c673cae
FG
944 if (r < 0) {
945 return r;
946 }
7c673cae
FG
947 }
948 *pbs = &bs;
949 return 0;
950 }
951
952 void set_bilog_flags(uint16_t flags) {
953 bilog_flags = flags;
954 }
1e59de90 955
31f18b77
FG
956 void set_zones_trace(rgw_zone_set *_zones_trace) {
957 zones_trace = _zones_trace;
958 }
7c673cae 959
20effc67 960 int prepare(const DoutPrefixProvider *dpp, RGWModifyOp, const std::string *write_tag, optional_yield y);
b3b6e05e 961 int complete(const DoutPrefixProvider *dpp, int64_t poolid, uint64_t epoch, uint64_t size,
7c673cae 962 uint64_t accounted_size, ceph::real_time& ut,
20effc67
TL
963 const std::string& etag, const std::string& content_type,
964 const std::string& storage_class,
7c673cae 965 bufferlist *acl_bl, RGWObjCategory category,
1e59de90
TL
966 std::list<rgw_obj_index_key> *remove_objs,
967 optional_yield y,
968 const std::string *user_data = nullptr,
969 bool appendable = false);
20effc67 970 int complete_del(const DoutPrefixProvider *dpp,
b3b6e05e 971 int64_t poolid, uint64_t epoch,
7c673cae 972 ceph::real_time& removed_mtime, /* mtime of removed object */
1e59de90
TL
973 std::list<rgw_obj_index_key> *remove_objs,
974 optional_yield y);
20effc67 975 int cancel(const DoutPrefixProvider *dpp,
1e59de90
TL
976 std::list<rgw_obj_index_key> *remove_objs,
977 optional_yield y);
7c673cae 978
20effc67 979 const std::string *get_optag() { return &optag; }
7c673cae
FG
980
981 bool is_prepared() { return prepared; }
1adf2230
AA
982 }; // class UpdateIndex
983
984 class List {
985 protected:
eafe8130
TL
986 // absolute maximum number of objects that
987 // list_objects_(un)ordered can return
988 static constexpr int64_t bucket_list_objects_absolute_max = 25000;
7c673cae 989
7c673cae
FG
990 RGWRados::Bucket *target;
991 rgw_obj_key next_marker;
992
20effc67 993 int list_objects_ordered(const DoutPrefixProvider *dpp,
b3b6e05e 994 int64_t max,
20effc67
TL
995 std::vector<rgw_bucket_dir_entry> *result,
996 std::map<std::string, bool> *common_prefixes,
9f95a23c
TL
997 bool *is_truncated,
998 optional_yield y);
20effc67 999 int list_objects_unordered(const DoutPrefixProvider *dpp,
b3b6e05e 1000 int64_t max,
20effc67
TL
1001 std::vector<rgw_bucket_dir_entry> *result,
1002 std::map<std::string, bool> *common_prefixes,
9f95a23c
TL
1003 bool *is_truncated,
1004 optional_yield y);
1adf2230
AA
1005
1006 public:
1007
7c673cae 1008 struct Params {
20effc67
TL
1009 std::string prefix;
1010 std::string delim;
7c673cae
FG
1011 rgw_obj_key marker;
1012 rgw_obj_key end_marker;
20effc67 1013 std::string ns;
7c673cae 1014 bool enforce_ns;
20effc67
TL
1015 RGWAccessListFilter* access_list_filter;
1016 RGWBucketListNameFilter force_check_filter;
7c673cae 1017 bool list_versions;
1adf2230
AA
1018 bool allow_unordered;
1019
1020 Params() :
1021 enforce_ns(true),
20effc67 1022 access_list_filter(nullptr),
1adf2230
AA
1023 list_versions(false),
1024 allow_unordered(false)
1025 {}
7c673cae
FG
1026 } params;
1027
7c673cae
FG
1028 explicit List(RGWRados::Bucket *_target) : target(_target) {}
1029
b3b6e05e 1030 int list_objects(const DoutPrefixProvider *dpp, int64_t max,
20effc67
TL
1031 std::vector<rgw_bucket_dir_entry> *result,
1032 std::map<std::string, bool> *common_prefixes,
9f95a23c
TL
1033 bool *is_truncated,
1034 optional_yield y) {
1adf2230 1035 if (params.allow_unordered) {
b3b6e05e 1036 return list_objects_unordered(dpp, max, result, common_prefixes,
9f95a23c 1037 is_truncated, y);
1adf2230 1038 } else {
b3b6e05e 1039 return list_objects_ordered(dpp, max, result, common_prefixes,
9f95a23c 1040 is_truncated, y);
1adf2230
AA
1041 }
1042 }
7c673cae
FG
1043 rgw_obj_key& get_next_marker() {
1044 return next_marker;
1045 }
1adf2230
AA
1046 }; // class List
1047 }; // class Bucket
7c673cae 1048
20effc67 1049 int on_last_entry_in_listing(const DoutPrefixProvider *dpp,
b3b6e05e 1050 RGWBucketInfo& bucket_info,
7c673cae
FG
1051 const std::string& obj_prefix,
1052 const std::string& obj_delim,
1053 std::function<int(const rgw_bucket_dir_entry&)> handler);
1054
1e59de90 1055 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const;
7c673cae
FG
1056
1057 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
1058 const rgw_user& user, /* in */
1e59de90
TL
1059 RGWBucketInfo& bucket_info, /* in */
1060 const rgw_obj& obj, /* in */
1061 const DoutPrefixProvider *dpp, /* in */
1062 optional_yield y); /* in */
9f95a23c 1063 int swift_versioning_restore(RGWObjectCtx& obj_ctx, /* in/out */
7c673cae 1064 const rgw_user& user, /* in */
1e59de90
TL
1065 RGWBucketInfo& bucket_info, /* in */
1066 rgw_obj& obj, /* in/out */
1067 bool& restored, /* out */
1068 const DoutPrefixProvider *dpp); /* in */
b3b6e05e
TL
1069 int copy_obj_to_remote_dest(const DoutPrefixProvider *dpp,
1070 RGWObjState *astate,
20effc67 1071 std::map<std::string, bufferlist>& src_attrs,
7c673cae
FG
1072 RGWRados::Object::Read& read_op,
1073 const rgw_user& user_id,
1e59de90 1074 const rgw_obj& dest_obj,
7c673cae
FG
1075 ceph::real_time *mtime);
1076
1077 enum AttrsMod {
1078 ATTRSMOD_NONE = 0,
1079 ATTRSMOD_REPLACE = 1,
1080 ATTRSMOD_MERGE = 2
1081 };
1082
20effc67
TL
1083 D3nDataCache* d3n_data_cache{nullptr};
1084
1e59de90
TL
1085 int rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj, const DoutPrefixProvider *dpp, optional_yield y);
1086 int reindex_obj(const RGWBucketInfo& dest_bucket_info,
1087 const rgw_obj& obj,
1088 const DoutPrefixProvider* dpp,
1089 optional_yield y);
7c673cae 1090
b3b6e05e
TL
1091 int stat_remote_obj(const DoutPrefixProvider *dpp,
1092 RGWObjectCtx& obj_ctx,
7c673cae 1093 const rgw_user& user_id,
7c673cae 1094 req_info *info,
9f95a23c 1095 const rgw_zone_id& source_zone,
1e59de90 1096 const rgw_obj& src_obj,
9f95a23c 1097 const RGWBucketInfo *src_bucket_info,
7c673cae
FG
1098 real_time *src_mtime,
1099 uint64_t *psize,
1100 const real_time *mod_ptr,
1101 const real_time *unmod_ptr,
1102 bool high_precision_time,
1103 const char *if_match,
1104 const char *if_nomatch,
20effc67
TL
1105 std::map<std::string, bufferlist> *pattrs,
1106 std::map<std::string, std::string> *pheaders,
1107 std::string *version_id,
1108 std::string *ptag,
1109 std::string *petag);
7c673cae
FG
1110
1111 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
1112 const rgw_user& user_id,
7c673cae 1113 req_info *info,
9f95a23c 1114 const rgw_zone_id& source_zone,
1e59de90
TL
1115 const rgw_obj& dest_obj,
1116 const rgw_obj& src_obj,
1117 RGWBucketInfo& dest_bucket_info,
1118 RGWBucketInfo *src_bucket_info,
11fdf7f2 1119 std::optional<rgw_placement_rule> dest_placement,
7c673cae
FG
1120 ceph::real_time *src_mtime,
1121 ceph::real_time *mtime,
1122 const ceph::real_time *mod_ptr,
1123 const ceph::real_time *unmod_ptr,
1124 bool high_precision_time,
1125 const char *if_match,
1126 const char *if_nomatch,
1127 AttrsMod attrs_mod,
1128 bool copy_if_newer,
20effc67 1129 rgw::sal::Attrs& attrs,
7c673cae 1130 RGWObjCategory category,
11fdf7f2 1131 std::optional<uint64_t> olh_epoch,
7c673cae 1132 ceph::real_time delete_at,
20effc67
TL
1133 std::string *ptag,
1134 std::string *petag,
7c673cae 1135 void (*progress_cb)(off_t, void *),
31f18b77 1136 void *progress_data,
9f95a23c
TL
1137 const DoutPrefixProvider *dpp,
1138 RGWFetchObjFilter *filter,
1e59de90
TL
1139 const rgw_zone_set_entry& source_trace_entry,
1140 rgw_zone_set *zones_trace = nullptr,
81eedcae 1141 std::optional<uint64_t>* bytes_transferred = 0);
7c673cae
FG
1142 /**
1143 * Copy an object.
1144 * dest_obj: the object to copy into
1145 * src_obj: the object to copy from
1146 * attrs: usage depends on attrs_mod parameter
1147 * attrs_mod: the modification mode of the attrs, may have the following values:
1148 * ATTRSMOD_NONE - the attributes of the source object will be
1149 * copied without modifications, attrs parameter is ignored;
1150 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
1151 * parameter, source object attributes are not copied;
1152 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
1153 * are overwritten by values contained in attrs parameter.
7c673cae
FG
1154 * Returns: 0 on success, -ERR# otherwise.
1155 */
1156 int copy_obj(RGWObjectCtx& obj_ctx,
1157 const rgw_user& user_id,
7c673cae 1158 req_info *info,
9f95a23c 1159 const rgw_zone_id& source_zone,
1e59de90
TL
1160 const rgw_obj& dest_obj,
1161 const rgw_obj& src_obj,
1162 RGWBucketInfo& dest_bucket_info,
1163 RGWBucketInfo& src_bucket_info,
11fdf7f2 1164 const rgw_placement_rule& dest_placement,
7c673cae
FG
1165 ceph::real_time *src_mtime,
1166 ceph::real_time *mtime,
1167 const ceph::real_time *mod_ptr,
1168 const ceph::real_time *unmod_ptr,
1169 bool high_precision_time,
1170 const char *if_match,
1171 const char *if_nomatch,
1172 AttrsMod attrs_mod,
1173 bool copy_if_newer,
20effc67 1174 std::map<std::string, bufferlist>& attrs,
7c673cae
FG
1175 RGWObjCategory category,
1176 uint64_t olh_epoch,
1177 ceph::real_time delete_at,
20effc67
TL
1178 std::string *version_id,
1179 std::string *ptag,
1180 std::string *petag,
7c673cae 1181 void (*progress_cb)(off_t, void *),
9f95a23c
TL
1182 void *progress_data,
1183 const DoutPrefixProvider *dpp,
1184 optional_yield y);
7c673cae
FG
1185
1186 int copy_obj_data(RGWObjectCtx& obj_ctx,
1e59de90 1187 RGWBucketInfo& dest_bucket_info,
11fdf7f2 1188 const rgw_placement_rule& dest_placement,
7c673cae 1189 RGWRados::Object::Read& read_op, off_t end,
1e59de90 1190 const rgw_obj& dest_obj,
7c673cae
FG
1191 ceph::real_time *mtime,
1192 ceph::real_time set_mtime,
20effc67 1193 std::map<std::string, bufferlist>& attrs,
7c673cae
FG
1194 uint64_t olh_epoch,
1195 ceph::real_time delete_at,
20effc67 1196 std::string *petag,
9f95a23c
TL
1197 const DoutPrefixProvider *dpp,
1198 optional_yield y);
1e59de90 1199
11fdf7f2 1200 int transition_obj(RGWObjectCtx& obj_ctx,
1e59de90
TL
1201 RGWBucketInfo& bucket_info,
1202 const rgw_obj& obj,
11fdf7f2
TL
1203 const rgw_placement_rule& placement_rule,
1204 const real_time& mtime,
9f95a23c
TL
1205 uint64_t olh_epoch,
1206 const DoutPrefixProvider *dpp,
1207 optional_yield y);
11fdf7f2 1208
b3b6e05e 1209 int check_bucket_empty(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, optional_yield y);
7c673cae
FG
1210
1211 /**
1212 * Delete a bucket.
1213 * bucket: the name of the bucket to delete
1214 * Returns 0 on success, -ERR# otherwise.
1215 */
b3b6e05e 1216 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, optional_yield y, const DoutPrefixProvider *dpp, bool check_empty = true);
7c673cae 1217
20effc67 1218 void wakeup_meta_sync_shards(std::set<int>& shard_ids);
1e59de90
TL
1219
1220 void wakeup_data_sync_shards(const DoutPrefixProvider *dpp, const rgw_zone_id& source_zone, bc::flat_map<int, bc::flat_set<rgw_data_notify_entry> >& entries);
7c673cae
FG
1221
1222 RGWMetaSyncStatusManager* get_meta_sync_manager();
9f95a23c 1223 RGWDataSyncStatusManager* get_data_sync_manager(const rgw_zone_id& source_zone);
7c673cae 1224
b3b6e05e
TL
1225 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner, const DoutPrefixProvider *dpp);
1226 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled, const DoutPrefixProvider *dpp);
1227 int bucket_suspended(const DoutPrefixProvider *dpp, rgw_bucket& bucket, bool *suspended);
7c673cae
FG
1228
1229 /** Delete an object.*/
b3b6e05e 1230 int delete_obj(const DoutPrefixProvider *dpp,
1e59de90
TL
1231 RGWObjectCtx& obj_ctx,
1232 const RGWBucketInfo& bucket_info,
1233 const rgw_obj& obj,
20effc67 1234 int versioning_status, // versioning flags defined in enum RGWBucketFlags
f67539c2
TL
1235 uint16_t bilog_flags = 0,
1236 const ceph::real_time& expiration_time = ceph::real_time(),
1237 rgw_zone_set *zones_trace = nullptr);
7c673cae 1238
b3b6e05e 1239 int delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj);
7c673cae 1240
7c673cae 1241 /** Remove an object from the bucket index */
1e59de90
TL
1242 int delete_obj_index(const rgw_obj& obj, ceph::real_time mtime,
1243 const DoutPrefixProvider *dpp, optional_yield y);
7c673cae 1244
7c673cae
FG
1245 /**
1246 * Set an attr on an object.
1247 * bucket: name of the bucket holding the object
1248 * obj: name of the object to set the attr on
1249 * name: the attr to set
1250 * bl: the contents of the attr
1251 * Returns: 0 on success, -ERR# otherwise.
1252 */
1e59de90 1253 int set_attr(const DoutPrefixProvider *dpp, RGWObjectCtx* ctx, RGWBucketInfo& bucket_info, const rgw_obj& obj, const char *name, bufferlist& bl);
7c673cae 1254
1e59de90 1255 int set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* ctx, RGWBucketInfo& bucket_info, const rgw_obj& obj,
20effc67
TL
1256 std::map<std::string, bufferlist>& attrs,
1257 std::map<std::string, bufferlist>* rmattrs,
aee94f69
TL
1258 optional_yield y,
1259 ceph::real_time set_mtime = ceph::real_clock::zero());
7c673cae 1260
1e59de90 1261 int get_obj_state(const DoutPrefixProvider *dpp, RGWObjectCtx *rctx, RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state, RGWObjManifest** manifest,
9f95a23c 1262 bool follow_olh, optional_yield y, bool assume_noent = false);
1e59de90
TL
1263 int get_obj_state(const DoutPrefixProvider *dpp, RGWObjectCtx *rctx, RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state, RGWObjManifest** manifest, optional_yield y) {
1264 return get_obj_state(dpp, rctx, bucket_info, obj, state, manifest, true, y);
7c673cae
FG
1265 }
1266
b3b6e05e 1267 using iterate_obj_cb = int (*)(const DoutPrefixProvider*, const rgw_raw_obj&, off_t, off_t,
11fdf7f2
TL
1268 off_t, bool, RGWObjState*, void*);
1269
1e59de90 1270 int iterate_obj(const DoutPrefixProvider *dpp, RGWObjectCtx& ctx, RGWBucketInfo& bucket_info,
11fdf7f2 1271 const rgw_obj& obj, off_t ofs, off_t end,
9f95a23c
TL
1272 uint64_t max_chunk_size, iterate_obj_cb cb, void *arg,
1273 optional_yield y);
7c673cae 1274
20effc67
TL
1275 int append_atomic_test(const DoutPrefixProvider *dpp, const RGWObjState* astate, librados::ObjectOperation& op);
1276
1277 virtual int get_obj_iterate_cb(const DoutPrefixProvider *dpp,
b3b6e05e 1278 const rgw_raw_obj& read_obj, off_t obj_ofs,
11fdf7f2
TL
1279 off_t read_ofs, off_t len, bool is_head_obj,
1280 RGWObjState *astate, void *arg);
7c673cae 1281
7c673cae
FG
1282 /**
1283 * a simple object read without keeping state
1284 */
1285
20effc67 1286 int raw_obj_stat(const DoutPrefixProvider *dpp,
b3b6e05e 1287 rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
20effc67 1288 std::map<std::string, bufferlist> *attrs, bufferlist *first_chunk,
9f95a23c 1289 RGWObjVersionTracker *objv_tracker, optional_yield y);
7c673cae 1290
b3b6e05e
TL
1291 int obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
1292 int obj_operate(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
7c673cae 1293
20effc67 1294 int guard_reshard(const DoutPrefixProvider *dpp,
b3b6e05e 1295 BucketShard *bs,
f64942e4 1296 const rgw_obj& obj_instance,
1e59de90 1297 RGWBucketInfo& bucket_info,
f64942e4
AA
1298 std::function<int(BucketShard *)> call);
1299 int block_while_resharding(RGWRados::BucketShard *bs,
1e59de90
TL
1300 const rgw_obj& obj_instance,
1301 RGWBucketInfo& bucket_info,
b3b6e05e
TL
1302 optional_yield y,
1303 const DoutPrefixProvider *dpp);
1304
1305 void bucket_index_guard_olh_op(const DoutPrefixProvider *dpp, RGWObjState& olh_state, librados::ObjectOperation& op);
05a536ef 1306 void olh_cancel_modification(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, const std::string& op_tag, optional_yield y);
20effc67
TL
1307 int olh_init_modification(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, std::string *op_tag);
1308 int olh_init_modification_impl(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, std::string *op_tag);
1309 int bucket_index_link_olh(const DoutPrefixProvider *dpp,
1e59de90 1310 RGWBucketInfo& bucket_info, RGWObjState& olh_state,
7c673cae 1311 const rgw_obj& obj_instance, bool delete_marker,
20effc67 1312 const std::string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
7c673cae 1313 uint64_t olh_epoch,
91327a77 1314 ceph::real_time unmod_since, bool high_precision_time,
1e59de90 1315 optional_yield y,
91327a77
AA
1316 rgw_zone_set *zones_trace = nullptr,
1317 bool log_data_change = false);
1e59de90
TL
1318 int bucket_index_unlink_instance(const DoutPrefixProvider *dpp,
1319 RGWBucketInfo& bucket_info,
1320 const rgw_obj& obj_instance,
1321 const std::string& op_tag, const std::string& olh_tag,
1322 uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
1323 int bucket_index_read_olh_log(const DoutPrefixProvider *dpp,
1324 RGWBucketInfo& bucket_info, RGWObjState& state,
1325 const rgw_obj& obj_instance, uint64_t ver_marker,
20effc67 1326 std::map<uint64_t, std::vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
1e59de90 1327 int bucket_index_trim_olh_log(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
05a536ef 1328 int bucket_index_clear_olh(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const std::string& olh_tag, const rgw_obj& obj_instance);
1e59de90 1329 int apply_olh_log(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState& obj_state, RGWBucketInfo& bucket_info, const rgw_obj& obj,
20effc67 1330 bufferlist& obj_tag, std::map<uint64_t, std::vector<rgw_bucket_olh_log_entry> >& log,
31f18b77 1331 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
1e59de90 1332 int update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
05a536ef
TL
1333 int clear_olh(const DoutPrefixProvider *dpp,
1334 RGWObjectCtx& obj_ctx,
1335 const rgw_obj& obj,
1336 RGWBucketInfo& bucket_info,
1337 const std::string& tag,
1338 const uint64_t ver,
1339 optional_yield y);
1e59de90 1340 int set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
91327a77 1341 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time,
9f95a23c 1342 optional_yield y, rgw_zone_set *zones_trace = nullptr, bool log_data_change = false);
b3b6e05e 1343 int repair_olh(const DoutPrefixProvider *dpp, RGWObjState* state, const RGWBucketInfo& bucket_info,
a8e16298 1344 const rgw_obj& obj);
b3b6e05e 1345 int unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
9f95a23c 1346 uint64_t olh_epoch, optional_yield y, rgw_zone_set *zones_trace = nullptr);
7c673cae 1347
20effc67 1348 void check_pending_olh_entries(const DoutPrefixProvider *dpp, std::map<std::string, bufferlist>& pending_entries, std::map<std::string, bufferlist> *rm_pending_entries);
05a536ef 1349 int remove_olh_pending_entries(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, std::map<std::string, bufferlist>& pending_attrs);
1e59de90
TL
1350 int follow_olh(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
1351 int get_olh(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
7c673cae 1352
11fdf7f2 1353 void gen_rand_obj_instance_name(rgw_obj_key *target_key);
7c673cae
FG
1354 void gen_rand_obj_instance_name(rgw_obj *target);
1355
20effc67 1356 int update_containers_stats(std::map<std::string, RGWBucketEnt>& m, const DoutPrefixProvider *dpp);
b3b6e05e 1357 int append_async(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, size_t size, bufferlist& bl);
7c673cae 1358
11fdf7f2 1359public:
1e59de90 1360 void set_atomic(void *ctx, const rgw_obj& obj) {
7c673cae 1361 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
11fdf7f2 1362 rctx->set_atomic(obj);
7c673cae 1363 }
11fdf7f2 1364 void set_prefetch_data(void *ctx, const rgw_obj& obj) {
7c673cae 1365 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
11fdf7f2 1366 rctx->set_prefetch_data(obj);
7c673cae 1367 }
20effc67
TL
1368 void set_compressed(void *ctx, const rgw_obj& obj) {
1369 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
1370 rctx->set_compressed(obj);
1371 }
1372 int decode_policy(const DoutPrefixProvider *dpp, bufferlist& bl, ACLOwner *owner);
1e59de90 1373 int get_bucket_stats(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, std::string *bucket_ver, std::string *master_ver,
20effc67 1374 std::map<RGWObjCategory, RGWStorageStats>& stats, std::string *max_marker, bool* syncstopped = NULL);
1e59de90 1375 int get_bucket_stats_async(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout, int shard_id, RGWGetBucketStats_CB *cb);
3a9019d9 1376
1e59de90 1377 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, std::map<std::string, bufferlist> *pattrs, const DoutPrefixProvider *dpp, optional_yield y);
9f95a23c 1378 /* xxx dang obj_ctx -> svc */
1e59de90
TL
1379 int get_bucket_instance_info(const std::string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, std::map<std::string, bufferlist> *pattrs, optional_yield y, const DoutPrefixProvider *dpp);
1380 int get_bucket_instance_info(const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, std::map<std::string, bufferlist> *pattrs, optional_yield y, const DoutPrefixProvider *dpp);
3a9019d9 1381
20effc67 1382 static void make_bucket_entry_name(const std::string& tenant_name, const std::string& bucket_name, std::string& bucket_entry);
b32b8144 1383
9f95a23c 1384 int get_bucket_info(RGWServices *svc,
20effc67 1385 const std::string& tenant_name, const std::string& bucket_name,
b32b8144 1386 RGWBucketInfo& info,
b3b6e05e 1387 ceph::real_time *pmtime, optional_yield y,
20effc67 1388 const DoutPrefixProvider *dpp, std::map<std::string, bufferlist> *pattrs = NULL);
b32b8144 1389
81eedcae
TL
1390 // Returns 0 on successful refresh. Returns error code if there was
1391 // an error or the version stored on the OSD is the same as that
b32b8144
FG
1392 // presented in the BucketInfo structure.
1393 //
1394 int try_refresh_bucket_info(RGWBucketInfo& info,
1395 ceph::real_time *pmtime,
b3b6e05e 1396 const DoutPrefixProvider *dpp,
20effc67 1397 std::map<std::string, bufferlist> *pattrs = nullptr);
b32b8144 1398
7c673cae 1399 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
20effc67 1400 std::map<std::string, bufferlist> *pattrs, bool create_entry_point,
1e59de90 1401 const DoutPrefixProvider *dpp, optional_yield y);
7c673cae 1402
20effc67
TL
1403 int cls_obj_prepare_op(const DoutPrefixProvider *dpp, BucketShard& bs, RGWModifyOp op, std::string& tag, rgw_obj& obj, uint16_t bilog_flags, optional_yield y, rgw_zone_set *zones_trace = nullptr);
1404 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, std::string& tag, int64_t pool, uint64_t epoch,
1405 rgw_bucket_dir_entry& ent, RGWObjCategory category, std::list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1406 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, std::string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
1407 RGWObjCategory category, std::list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1408 int cls_obj_complete_del(BucketShard& bs, std::string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
1409 ceph::real_time& removed_mtime, std::list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
1410 int cls_obj_complete_cancel(BucketShard& bs, std::string& tag, rgw_obj& obj,
1411 std::list<rgw_obj_index_key> *remove_objs,
1412 uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
b3b6e05e 1413 int cls_obj_set_bucket_tag_timeout(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, uint64_t timeout);
9f95a23c
TL
1414
1415 using ent_map_t =
1416 boost::container::flat_map<std::string, rgw_bucket_dir_entry>;
1417
b3b6e05e
TL
1418 int cls_bucket_list_ordered(const DoutPrefixProvider *dpp,
1419 RGWBucketInfo& bucket_info,
1e59de90
TL
1420 const rgw::bucket_index_layout_generation& idx_layout,
1421 const int shard_id,
9f95a23c 1422 const rgw_obj_index_key& start_after,
20effc67
TL
1423 const std::string& prefix,
1424 const std::string& delimiter,
9f95a23c
TL
1425 const uint32_t num_entries,
1426 const bool list_versions,
1427 const uint16_t exp_factor, // 0 means ignore
1428 ent_map_t& m,
1429 bool* is_truncated,
1430 bool* cls_filtered,
1adf2230 1431 rgw_obj_index_key *last_entry,
9f95a23c 1432 optional_yield y,
20effc67 1433 RGWBucketListNameFilter force_check_filter = {});
b3b6e05e
TL
1434 int cls_bucket_list_unordered(const DoutPrefixProvider *dpp,
1435 RGWBucketInfo& bucket_info,
1e59de90
TL
1436 const rgw::bucket_index_layout_generation& idx_layout,
1437 int shard_id,
9f95a23c 1438 const rgw_obj_index_key& start_after,
20effc67 1439 const std::string& prefix,
9f95a23c
TL
1440 uint32_t num_entries,
1441 bool list_versions,
20effc67 1442 std::vector<rgw_bucket_dir_entry>& ent_list,
9f95a23c
TL
1443 bool *is_truncated,
1444 rgw_obj_index_key *last_entry,
1445 optional_yield y,
20effc67
TL
1446 RGWBucketListNameFilter force_check_filter = {});
1447 int cls_bucket_head(const DoutPrefixProvider *dpp,
1448 const RGWBucketInfo& bucket_info,
1e59de90
TL
1449 const rgw::bucket_index_layout_generation& idx_layout,
1450 int shard_id, std::vector<rgw_bucket_dir_header>& headers,
20effc67 1451 std::map<int, std::string> *bucket_instance_ids = NULL);
1e59de90
TL
1452 int cls_bucket_head_async(const DoutPrefixProvider *dpp,
1453 const RGWBucketInfo& bucket_info,
1454 const rgw::bucket_index_layout_generation& idx_layout,
1455 int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
b3b6e05e
TL
1456 int bi_get_instance(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent);
1457 int bi_get_olh(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh);
1458 int bi_get(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
7c673cae
FG
1459 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
1460 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
b3b6e05e 1461 int bi_put(const DoutPrefixProvider *dpp, rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
20effc67
TL
1462 int bi_list(const DoutPrefixProvider *dpp,
1463 const RGWBucketInfo& bucket_info,
1464 int shard_id,
1465 const std::string& filter_obj,
1466 const std::string& marker,
1467 uint32_t max,
1468 std::list<rgw_cls_bi_entry> *entries,
1469 bool *is_truncated);
1470 int bi_list(BucketShard& bs, const std::string& filter_obj, const std::string& marker, uint32_t max, std::list<rgw_cls_bi_entry> *entries, bool *is_truncated);
1471 int bi_list(const DoutPrefixProvider *dpp, rgw_bucket& bucket, const std::string& obj_name, const std::string& marker, uint32_t max,
1472 std::list<rgw_cls_bi_entry> *entries, bool *is_truncated);
1473 int bi_remove(const DoutPrefixProvider *dpp, BucketShard& bs);
1474
1475 int cls_obj_usage_log_add(const DoutPrefixProvider *dpp, const std::string& oid, rgw_usage_log_info& info);
1476 int cls_obj_usage_log_read(const DoutPrefixProvider *dpp, const std::string& oid, const std::string& user, const std::string& bucket, uint64_t start_epoch,
1477 uint64_t end_epoch, uint32_t max_entries, std::string& read_iter,
1478 std::map<rgw_user_bucket, rgw_usage_log_entry>& usage, bool *is_truncated);
1479 int cls_obj_usage_log_trim(const DoutPrefixProvider *dpp, const std::string& oid, const std::string& user, const std::string& bucket, uint64_t start_epoch,
11fdf7f2 1480 uint64_t end_epoch);
20effc67 1481 int cls_obj_usage_log_clear(const DoutPrefixProvider *dpp, std::string& oid);
7c673cae 1482
20effc67 1483 int get_target_shard_id(const rgw::bucket_index_normal_layout& layout, const std::string& obj_key, int *shard_id);
9f95a23c 1484
20effc67
TL
1485 int lock_exclusive(const rgw_pool& pool, const std::string& oid, ceph::timespan& duration, rgw_zone_id& zone_id, std::string& owner_id);
1486 int unlock(const rgw_pool& pool, const std::string& oid, rgw_zone_id& zone_id, std::string& owner_id);
7c673cae 1487
1e59de90 1488 void update_gc_chain(const DoutPrefixProvider *dpp, rgw_obj head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
39ae355f 1489 std::tuple<int, std::optional<cls_rgw_obj_chain>> send_chain_to_gc(cls_rgw_obj_chain& chain, const std::string& tag);
20effc67
TL
1490 void delete_objs_inline(const DoutPrefixProvider *dpp, cls_rgw_obj_chain& chain, const std::string& tag);
1491 int gc_operate(const DoutPrefixProvider *dpp, std::string& oid, librados::ObjectWriteOperation *op);
9f95a23c
TL
1492 int gc_aio_operate(const std::string& oid, librados::AioCompletion *c,
1493 librados::ObjectWriteOperation *op);
20effc67 1494 int gc_operate(const DoutPrefixProvider *dpp, std::string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
7c673cae 1495
20effc67 1496 int list_gc_objs(int *index, std::string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated, bool& processing_queue);
11fdf7f2 1497 int process_gc(bool expired_only);
b3b6e05e 1498 bool process_expire_objects(const DoutPrefixProvider *dpp);
1e59de90 1499 int defer_gc(const DoutPrefixProvider *dpp, RGWObjectCtx* ctx, RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y);
7c673cae 1500
20effc67
TL
1501 int process_lc(const std::unique_ptr<rgw::sal::Bucket>& optional_bucket);
1502 int list_lc_progress(std::string& marker, uint32_t max_entries,
1e59de90
TL
1503 std::vector<std::unique_ptr<rgw::sal::Lifecycle::LCEntry>>& progress_map,
1504 int& index);
f6b5b4d7 1505
b3b6e05e 1506 int bucket_check_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info,
20effc67
TL
1507 std::map<RGWObjCategory, RGWStorageStats> *existing_stats,
1508 std::map<RGWObjCategory, RGWStorageStats> *calculated_stats);
b3b6e05e 1509 int bucket_rebuild_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info);
aee94f69
TL
1510
1511 // Search the bucket for encrypted multipart uploads, and increase their mtime
1512 // slightly to generate a bilog entry to trigger a resync to repair any
1513 // corrupted replicas. See https://tracker.ceph.com/issues/46062
1514 int bucket_resync_encrypted_multipart(const DoutPrefixProvider* dpp,
1515 optional_yield y,
1516 rgw::sal::RadosStore* driver,
1517 RGWBucketInfo& bucket_info,
1518 const std::string& marker,
1519 RGWFormatterFlusher& flusher);
1520
b3b6e05e 1521 int bucket_set_reshard(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
1e59de90
TL
1522 int remove_objs_from_index(const DoutPrefixProvider *dpp,
1523 RGWBucketInfo& bucket_info,
1524 const std::list<rgw_obj_index_key>& oid_list);
20effc67 1525 int move_rados_obj(const DoutPrefixProvider *dpp,
b3b6e05e 1526 librados::IoCtx& src_ioctx,
20effc67 1527 const std::string& src_oid, const std::string& src_locator,
7c673cae 1528 librados::IoCtx& dst_ioctx,
20effc67 1529 const std::string& dst_oid, const std::string& dst_locator);
b3b6e05e 1530 int fix_head_obj_locator(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
1e59de90
TL
1531 int fix_tail_obj_locator(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info,
1532 rgw_obj_key& key, bool fix, bool *need_fix, optional_yield y);
7c673cae 1533
20effc67 1534 int check_quota(const DoutPrefixProvider *dpp, const rgw_user& bucket_owner, rgw_bucket& bucket,
1e59de90 1535 RGWQuota& quota, uint64_t obj_size,
f67539c2 1536 optional_yield y, bool check_size_only = false);
7c673cae 1537
224ce89b 1538 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
b3b6e05e 1539 uint64_t num_objs, const DoutPrefixProvider *dpp);
31f18b77 1540
b3b6e05e 1541 int add_bucket_to_reshard(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
31f18b77 1542
7c673cae 1543 uint64_t instance_id();
3efd9988 1544
7c673cae
FG
1545 librados::Rados* get_rados_handle();
1546
20effc67 1547 int delete_raw_obj_aio(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, std::list<librados::AioCompletion *>& handles);
b3b6e05e 1548 int delete_obj_aio(const DoutPrefixProvider *dpp, const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
20effc67 1549 std::list<librados::AioCompletion *>& handles, bool keep_index_consistent,
9f95a23c 1550 optional_yield y);
11fdf7f2 1551
11fdf7f2 1552 private:
7c673cae
FG
1553 /**
1554 * Check the actual on-disk state of the object specified
1555 * by list_state, and fill in the time and size of object.
1556 * Then append any changes to suggested_updates for
1557 * the rgw class' dir_suggest_changes function.
1558 *
1559 * Note that this can maul list_state; don't use it afterwards. Also
1560 * it expects object to already be filled in from list_state; it only
1561 * sets the size and mtime.
1562 *
1563 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
1564 * and -errno on other failures. (-ENOENT is not a failure, and it
1565 * will encode that info as a suggested update.)
1566 */
20effc67 1567 int check_disk_state(const DoutPrefixProvider *dpp,
b3b6e05e 1568 librados::IoCtx io_ctx,
1e59de90 1569 RGWBucketInfo& bucket_info,
7c673cae
FG
1570 rgw_bucket_dir_entry& list_state,
1571 rgw_bucket_dir_entry& object,
9f95a23c
TL
1572 bufferlist& suggested_updates,
1573 optional_yield y);
7c673cae
FG
1574
1575 /**
1576 * Init pool iteration
31f18b77 1577 * pool: pool to use for the ctx initialization
7c673cae
FG
1578 * ctx: context object to use for the iteration
1579 * Returns: 0 on success, -ERR# otherwise.
1580 */
b3b6e05e 1581 int pool_iterate_begin(const DoutPrefixProvider *dpp, const rgw_pool& pool, RGWPoolIterCtx& ctx);
31f18b77 1582
181888fb
FG
1583 /**
1584 * Init pool iteration
1585 * pool: pool to use
1586 * cursor: position to start iteration
1587 * ctx: context object to use for the iteration
1588 * Returns: 0 on success, -ERR# otherwise.
1589 */
20effc67 1590 int pool_iterate_begin(const DoutPrefixProvider *dpp, const rgw_pool& pool, const std::string& cursor, RGWPoolIterCtx& ctx);
181888fb
FG
1591
1592 /**
1593 * Get pool iteration position
1594 * ctx: context object to use for the iteration
20effc67 1595 * Returns: std::string representation of position
181888fb 1596 */
20effc67 1597 std::string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
181888fb 1598
7c673cae
FG
1599 /**
1600 * Iterate over pool return object names, use optional filter
1601 * ctx: iteration context, initialized with pool_iterate_begin()
1602 * num: max number of objects to return
1603 * objs: a vector that the results will append into
1604 * is_truncated: if not NULL, will hold true iff iteration is complete
1605 * filter: if not NULL, will be used to filter returned objects
1606 * Returns: 0 on success, -ERR# otherwise.
1607 */
20effc67
TL
1608 int pool_iterate(const DoutPrefixProvider *dpp, RGWPoolIterCtx& ctx, uint32_t num,
1609 std::vector<rgw_bucket_dir_entry>& objs,
7c673cae
FG
1610 bool *is_truncated, RGWAccessListFilter *filter);
1611
1612 uint64_t next_bucket_id();
11fdf7f2 1613
9f95a23c
TL
1614 /**
1615 * This is broken out to facilitate unit testing.
1616 */
1617 static uint32_t calc_ordered_bucket_list_per_shard(uint32_t num_entries,
1618 uint32_t num_shards);
11fdf7f2
TL
1619};
1620
20effc67
TL
1621
1622struct get_obj_data {
1623 RGWRados* rgwrados;
1624 RGWGetDataCB* client_cb = nullptr;
1625 rgw::Aio* aio;
1626 uint64_t offset; // next offset to write to client
1627 rgw::AioResultList completed; // completed read results, sorted by offset
1628 optional_yield yield;
1629
1630 get_obj_data(RGWRados* rgwrados, RGWGetDataCB* cb, rgw::Aio* aio,
1631 uint64_t offset, optional_yield yield)
1632 : rgwrados(rgwrados), client_cb(cb), aio(aio), offset(offset), yield(yield) {}
1633 ~get_obj_data() {
1634 if (rgwrados->get_use_datacache()) {
1635 const std::lock_guard l(d3n_get_data.d3n_lock);
1636 }
1637 }
1638
1639 D3nGetObjData d3n_get_data;
1640 std::atomic_bool d3n_bypass_cache_write{false};
1641
1642 int flush(rgw::AioResultList&& results);
1643
1644 void cancel() {
1645 // wait for all completions to drain and ignore the results
1646 aio->drain();
1647 }
1648
1649 int drain() {
1650 auto c = aio->wait();
1651 while (!c.empty()) {
1652 int r = flush(std::move(c));
1653 if (r < 0) {
1654 cancel();
1655 return r;
1656 }
1657 c = aio->wait();
1658 }
1659 return flush(std::move(c));
1660 }
1661};