1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
10 #include <boost/utility/string_ref.hpp>
11 #include <boost/format.hpp>
13 #include "common/errno.h"
14 #include "common/ceph_json.h"
15 #include "rgw_rados.h"
17 #include "rgw_acl_s3.h"
19 #include "include/types.h"
20 #include "rgw_bucket.h"
22 #include "rgw_string.h"
23 #include "rgw_multi.h"
25 #include "include/rados/librados.hpp"
26 // until everything is moved from rgw_common
27 #include "rgw_common.h"
29 #include "cls/user/cls_user_types.h"
31 #define dout_context g_ceph_context
32 #define dout_subsys ceph_subsys_rgw
34 #define BUCKET_TAG_TIMEOUT 30
38 static RGWMetadataHandler
*bucket_meta_handler
= NULL
;
39 static RGWMetadataHandler
*bucket_instance_meta_handler
= NULL
;
41 // define as static when RGWBucket implementation compete
42 void rgw_get_buckets_obj(const rgw_user
& user_id
, string
& buckets_obj_id
)
44 buckets_obj_id
= user_id
.to_str();
45 buckets_obj_id
+= RGW_BUCKETS_OBJ_SUFFIX
;
49 * Note that this is not a reversal of parse_bucket(). That one deals
50 * with the syntax we need in metadata and such. This one deals with
51 * the representation in RADOS pools. We chose '/' because it's not
52 * acceptable in bucket names and thus qualified buckets cannot conflict
53 * with the legacy or S3 buckets.
55 std::string
rgw_make_bucket_entry_name(const std::string
& tenant_name
,
56 const std::string
& bucket_name
) {
57 std::string bucket_entry
;
59 if (bucket_name
.empty()) {
61 } else if (tenant_name
.empty()) {
62 bucket_entry
= bucket_name
;
64 bucket_entry
= tenant_name
+ "/" + bucket_name
;
71 * Tenants are separated from buckets in URLs by a colon in S3.
72 * This function is not to be used on Swift URLs, not even for COPY arguments.
74 void rgw_parse_url_bucket(const string
&bucket
, const string
& auth_tenant
,
75 string
&tenant_name
, string
&bucket_name
) {
77 int pos
= bucket
.find(':');
80 * N.B.: We allow ":bucket" syntax with explicit empty tenant in order
81 * to refer to the legacy tenant, in case users in new named tenants
82 * want to access old global buckets.
84 tenant_name
= bucket
.substr(0, pos
);
85 bucket_name
= bucket
.substr(pos
+ 1);
87 tenant_name
= auth_tenant
;
93 * Get all the buckets owned by a user and fill up an RGWUserBuckets with them.
94 * Returns: 0 on success, -ERR# on failure.
96 int rgw_read_user_buckets(RGWRados
* store
,
97 const rgw_user
& user_id
,
98 RGWUserBuckets
& buckets
,
100 const string
& end_marker
,
104 uint64_t default_amount
)
108 string buckets_obj_id
;
109 rgw_get_buckets_obj(user_id
, buckets_obj_id
);
110 rgw_raw_obj
obj(store
->get_zone_params().user_uid_pool
, buckets_obj_id
);
111 list
<cls_user_bucket_entry
> entries
;
113 bool truncated
= false;
119 max
= default_amount
;
123 ret
= store
->cls_user_list_buckets(obj
, m
, end_marker
, max
- total
, entries
, &m
, &truncated
);
130 for (const auto& entry
: entries
) {
131 buckets
.add(RGWBucketEnt(user_id
, entry
));
135 } while (truncated
&& total
< max
);
137 if (is_truncated
!= nullptr) {
138 *is_truncated
= truncated
;
142 map
<string
, RGWBucketEnt
>& m
= buckets
.get_buckets();
143 ret
= store
->update_containers_stats(m
);
144 if (ret
< 0 && ret
!= -ENOENT
) {
145 ldout(store
->ctx(), 0) << "ERROR: could not get stats for buckets" << dendl
;
152 int rgw_bucket_sync_user_stats(RGWRados
*store
, const rgw_user
& user_id
, const RGWBucketInfo
& bucket_info
)
154 string buckets_obj_id
;
155 rgw_get_buckets_obj(user_id
, buckets_obj_id
);
156 rgw_raw_obj
obj(store
->get_zone_params().user_uid_pool
, buckets_obj_id
);
158 return store
->cls_user_sync_bucket_stats(obj
, bucket_info
);
161 int rgw_bucket_sync_user_stats(RGWRados
*store
, const string
& tenant_name
, const string
& bucket_name
)
163 RGWBucketInfo bucket_info
;
164 RGWObjectCtx
obj_ctx(store
);
165 int ret
= store
->get_bucket_info(obj_ctx
, tenant_name
, bucket_name
, bucket_info
, NULL
);
167 ldout(store
->ctx(), 0) << "ERROR: could not fetch bucket info: ret=" << ret
<< dendl
;
171 ret
= rgw_bucket_sync_user_stats(store
, bucket_info
.owner
, bucket_info
);
173 ldout(store
->ctx(), 0) << "ERROR: could not sync user stats for bucket " << bucket_name
<< ": ret=" << ret
<< dendl
;
180 int rgw_link_bucket(RGWRados
*store
, const rgw_user
& user_id
, rgw_bucket
& bucket
, real_time creation_time
, bool update_entrypoint
)
183 string
& tenant_name
= bucket
.tenant
;
184 string
& bucket_name
= bucket
.name
;
186 cls_user_bucket_entry new_bucket
;
188 RGWBucketEntryPoint ep
;
189 RGWObjVersionTracker ot
;
191 bucket
.convert(&new_bucket
.bucket
);
193 if (real_clock::is_zero(creation_time
))
194 new_bucket
.creation_time
= real_clock::now();
196 new_bucket
.creation_time
= creation_time
;
198 map
<string
, bufferlist
> attrs
;
199 RGWObjectCtx
obj_ctx(store
);
201 if (update_entrypoint
) {
202 ret
= store
->get_bucket_entrypoint_info(obj_ctx
, tenant_name
, bucket_name
, ep
, &ot
, NULL
, &attrs
);
203 if (ret
< 0 && ret
!= -ENOENT
) {
204 ldout(store
->ctx(), 0) << "ERROR: store->get_bucket_entrypoint_info() returned: "
205 << cpp_strerror(-ret
) << dendl
;
209 string buckets_obj_id
;
210 rgw_get_buckets_obj(user_id
, buckets_obj_id
);
212 rgw_raw_obj
obj(store
->get_zone_params().user_uid_pool
, buckets_obj_id
);
213 ret
= store
->cls_user_add_bucket(obj
, new_bucket
);
215 ldout(store
->ctx(), 0) << "ERROR: error adding bucket to directory: "
216 << cpp_strerror(-ret
) << dendl
;
220 if (!update_entrypoint
)
226 ret
= store
->put_bucket_entrypoint_info(tenant_name
, bucket_name
, ep
, false, ot
, real_time(), &attrs
);
232 int r
= rgw_unlink_bucket(store
, user_id
, bucket
.tenant
, bucket
.name
);
234 ldout(store
->ctx(), 0) << "ERROR: failed unlinking bucket on error cleanup: "
235 << cpp_strerror(-r
) << dendl
;
240 int rgw_unlink_bucket(RGWRados
*store
, const rgw_user
& user_id
, const string
& tenant_name
, const string
& bucket_name
, bool update_entrypoint
)
244 string buckets_obj_id
;
245 rgw_get_buckets_obj(user_id
, buckets_obj_id
);
247 cls_user_bucket bucket
;
248 bucket
.name
= bucket_name
;
249 rgw_raw_obj
obj(store
->get_zone_params().user_uid_pool
, buckets_obj_id
);
250 ret
= store
->cls_user_remove_bucket(obj
, bucket
);
252 ldout(store
->ctx(), 0) << "ERROR: error removing bucket from directory: "
253 << cpp_strerror(-ret
)<< dendl
;
256 if (!update_entrypoint
)
259 RGWBucketEntryPoint ep
;
260 RGWObjVersionTracker ot
;
261 map
<string
, bufferlist
> attrs
;
262 RGWObjectCtx
obj_ctx(store
);
263 ret
= store
->get_bucket_entrypoint_info(obj_ctx
, tenant_name
, bucket_name
, ep
, &ot
, NULL
, &attrs
);
272 if (ep
.owner
!= user_id
) {
273 ldout(store
->ctx(), 0) << "bucket entry point user mismatch, can't unlink bucket: " << ep
.owner
<< " != " << user_id
<< dendl
;
278 return store
->put_bucket_entrypoint_info(tenant_name
, bucket_name
, ep
, false, ot
, real_time(), &attrs
);
281 int rgw_bucket_store_info(RGWRados
*store
, const string
& bucket_name
, bufferlist
& bl
, bool exclusive
,
282 map
<string
, bufferlist
> *pattrs
, RGWObjVersionTracker
*objv_tracker
,
284 return store
->meta_mgr
->put_entry(bucket_meta_handler
, bucket_name
, bl
, exclusive
, objv_tracker
, mtime
, pattrs
);
287 int rgw_bucket_instance_store_info(RGWRados
*store
, string
& entry
, bufferlist
& bl
, bool exclusive
,
288 map
<string
, bufferlist
> *pattrs
, RGWObjVersionTracker
*objv_tracker
,
290 return store
->meta_mgr
->put_entry(bucket_instance_meta_handler
, entry
, bl
, exclusive
, objv_tracker
, mtime
, pattrs
);
293 int rgw_bucket_instance_remove_entry(RGWRados
*store
, string
& entry
, RGWObjVersionTracker
*objv_tracker
) {
294 return store
->meta_mgr
->remove_entry(bucket_instance_meta_handler
, entry
, objv_tracker
);
297 // 'tenant/' is used in bucket instance keys for sync to avoid parsing ambiguity
298 // with the existing instance[:shard] format. once we parse the shard, the / is
299 // replaced with a : to match the [tenant:]instance format
300 void rgw_bucket_instance_key_to_oid(string
& key
)
302 // replace tenant/ with tenant:
303 auto c
= key
.find('/');
304 if (c
!= string::npos
) {
309 // convert bucket instance oids back to the tenant/ format for metadata keys.
310 // it's safe to parse 'tenant:' only for oids, because they won't contain the
311 // optional :shard at the end
312 void rgw_bucket_instance_oid_to_key(string
& oid
)
314 // find first : (could be tenant:bucket or bucket:instance)
315 auto c
= oid
.find(':');
316 if (c
!= string::npos
) {
317 // if we find another :, the first one was for tenant
318 if (oid
.find(':', c
+ 1) != string::npos
) {
324 int rgw_bucket_parse_bucket_instance(const string
& bucket_instance
, string
*target_bucket_instance
, int *shard_id
)
326 ssize_t pos
= bucket_instance
.rfind(':');
331 string first
= bucket_instance
.substr(0, pos
);
332 string second
= bucket_instance
.substr(pos
+ 1);
334 if (first
.find(':') == string::npos
) {
336 *target_bucket_instance
= bucket_instance
;
340 *target_bucket_instance
= first
;
342 *shard_id
= strict_strtol(second
.c_str(), 10, &err
);
350 // parse key in format: [tenant/]name:instance[:shard_id]
351 int rgw_bucket_parse_bucket_key(CephContext
*cct
, const string
& key
,
352 rgw_bucket
*bucket
, int *shard_id
)
354 boost::string_ref name
{key
};
355 boost::string_ref instance
;
358 auto pos
= name
.find('/');
359 if (pos
!= boost::string_ref::npos
) {
360 auto tenant
= name
.substr(0, pos
);
361 bucket
->tenant
.assign(tenant
.begin(), tenant
.end());
362 name
= name
.substr(pos
+ 1);
365 // split name:instance
366 pos
= name
.find(':');
367 if (pos
!= boost::string_ref::npos
) {
368 instance
= name
.substr(pos
+ 1);
369 name
= name
.substr(0, pos
);
371 bucket
->name
.assign(name
.begin(), name
.end());
373 // split instance:shard
374 pos
= instance
.find(':');
375 if (pos
== boost::string_ref::npos
) {
376 bucket
->bucket_id
.assign(instance
.begin(), instance
.end());
382 auto shard
= instance
.substr(pos
+ 1);
384 auto id
= strict_strtol(shard
.data(), 10, &err
);
386 ldout(cct
, 0) << "ERROR: failed to parse bucket shard '"
387 << instance
.data() << "': " << err
<< dendl
;
392 instance
= instance
.substr(0, pos
);
393 bucket
->bucket_id
.assign(instance
.begin(), instance
.end());
397 int rgw_bucket_set_attrs(RGWRados
*store
, RGWBucketInfo
& bucket_info
,
398 map
<string
, bufferlist
>& attrs
,
399 RGWObjVersionTracker
*objv_tracker
)
401 rgw_bucket
& bucket
= bucket_info
.bucket
;
403 if (!bucket_info
.has_instance_obj
) {
404 /* an old bucket object, need to convert it */
405 RGWObjectCtx
obj_ctx(store
);
406 int ret
= store
->convert_old_bucket_info(obj_ctx
, bucket
.tenant
, bucket
.name
);
408 ldout(store
->ctx(), 0) << "ERROR: failed converting old bucket info: " << ret
<< dendl
;
413 /* we want the bucket instance name without the oid prefix cruft */
414 string key
= bucket
.get_key();
417 ::encode(bucket_info
, bl
);
419 return rgw_bucket_instance_store_info(store
, key
, bl
, false, &attrs
, objv_tracker
, real_time());
422 static void dump_mulipart_index_results(list
<rgw_obj_index_key
>& objs_to_unlink
,
425 // make sure that an appropiately titled header has been opened previously
426 auto oiter
= objs_to_unlink
.begin();
428 f
->open_array_section("invalid_multipart_entries");
430 for ( ; oiter
!= objs_to_unlink
.end(); ++oiter
) {
431 f
->dump_string("object", oiter
->name
);
437 void check_bad_user_bucket_mapping(RGWRados
*store
, const rgw_user
& user_id
,
440 RGWUserBuckets user_buckets
;
441 bool is_truncated
= false;
444 CephContext
*cct
= store
->ctx();
446 size_t max_entries
= cct
->_conf
->rgw_list_buckets_max_chunk
;
449 int ret
= rgw_read_user_buckets(store
, user_id
, user_buckets
, marker
,
450 string(), max_entries
, false,
453 ldout(store
->ctx(), 0) << "failed to read user buckets: "
454 << cpp_strerror(-ret
) << dendl
;
458 map
<string
, RGWBucketEnt
>& buckets
= user_buckets
.get_buckets();
459 for (map
<string
, RGWBucketEnt
>::iterator i
= buckets
.begin();
464 RGWBucketEnt
& bucket_ent
= i
->second
;
465 rgw_bucket
& bucket
= bucket_ent
.bucket
;
467 RGWBucketInfo bucket_info
;
469 RGWObjectCtx
obj_ctx(store
);
470 int r
= store
->get_bucket_info(obj_ctx
, user_id
.tenant
, bucket
.name
, bucket_info
, &mtime
);
472 ldout(store
->ctx(), 0) << "could not get bucket info for bucket=" << bucket
<< dendl
;
476 rgw_bucket
& actual_bucket
= bucket_info
.bucket
;
478 if (actual_bucket
.name
.compare(bucket
.name
) != 0 ||
479 actual_bucket
.tenant
.compare(bucket
.tenant
) != 0 ||
480 actual_bucket
.marker
.compare(bucket
.marker
) != 0 ||
481 actual_bucket
.bucket_id
.compare(bucket
.bucket_id
) != 0) {
482 cout
<< "bucket info mismatch: expected " << actual_bucket
<< " got " << bucket
<< std::endl
;
484 cout
<< "fixing" << std::endl
;
485 r
= rgw_link_bucket(store
, user_id
, actual_bucket
, bucket_info
.creation_time
);
487 cerr
<< "failed to fix bucket: " << cpp_strerror(-r
) << std::endl
;
492 } while (is_truncated
);
495 static bool bucket_object_check_filter(const string
& oid
)
499 return rgw_obj_key::oid_to_key_in_ns(oid
, &key
, ns
);
502 int rgw_remove_object(RGWRados
*store
, RGWBucketInfo
& bucket_info
, rgw_bucket
& bucket
, rgw_obj_key
& key
)
504 RGWObjectCtx
rctx(store
);
506 if (key
.instance
.empty()) {
507 key
.instance
= "null";
510 rgw_obj
obj(bucket
, key
);
512 return store
->delete_obj(rctx
, bucket_info
, obj
, bucket_info
.versioning_status());
515 int rgw_remove_bucket(RGWRados
*store
, rgw_bucket
& bucket
, bool delete_children
)
518 map
<RGWObjCategory
, RGWStorageStats
> stats
;
519 std::vector
<rgw_bucket_dir_entry
> objs
;
520 map
<string
, bool> common_prefixes
;
522 RGWObjectCtx
obj_ctx(store
);
524 string bucket_ver
, master_ver
;
526 ret
= store
->get_bucket_info(obj_ctx
, bucket
.tenant
, bucket
.name
, info
, NULL
);
530 ret
= store
->get_bucket_stats(info
, RGW_NO_SHARD
, &bucket_ver
, &master_ver
, stats
, NULL
);
534 RGWRados::Bucket
target(store
, info
);
535 RGWRados::Bucket::List
list_op(&target
);
536 CephContext
*cct
= store
->ctx();
539 list_op
.params
.list_versions
= true;
544 ret
= list_op
.list_objects(max
, &objs
, &common_prefixes
, NULL
);
548 if (!objs
.empty() && !delete_children
) {
549 lderr(store
->ctx()) << "ERROR: could not remove non-empty bucket " << bucket
.name
<< dendl
;
553 for (const auto& obj
: objs
) {
554 rgw_obj_key
key(obj
.key
);
555 ret
= rgw_remove_object(store
, info
, bucket
, key
);
560 } while (!objs
.empty());
562 string prefix
, delimiter
;
564 ret
= abort_bucket_multiparts(store
, cct
, info
, prefix
, delimiter
);
569 ret
= rgw_bucket_sync_user_stats(store
, bucket
.tenant
, info
);
571 dout(1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret
<< dendl
;
574 RGWObjVersionTracker objv_tracker
;
576 ret
= store
->delete_bucket(info
, objv_tracker
);
578 lderr(store
->ctx()) << "ERROR: could not remove bucket " << bucket
.name
<< dendl
;
582 ret
= rgw_unlink_bucket(store
, info
.owner
, bucket
.tenant
, bucket
.name
, false);
584 lderr(store
->ctx()) << "ERROR: unable to remove user bucket information" << dendl
;
590 static int aio_wait(librados::AioCompletion
*handle
)
592 librados::AioCompletion
*c
= (librados::AioCompletion
*)handle
;
594 int ret
= c
->get_return_value();
599 static int drain_handles(list
<librados::AioCompletion
*>& pending
)
602 while (!pending
.empty()) {
603 librados::AioCompletion
*handle
= pending
.front();
605 int r
= aio_wait(handle
);
613 int rgw_remove_bucket_bypass_gc(RGWRados
*store
, rgw_bucket
& bucket
,
614 int concurrent_max
, bool keep_index_consistent
)
617 map
<RGWObjCategory
, RGWStorageStats
> stats
;
618 std::vector
<rgw_bucket_dir_entry
> objs
;
619 map
<string
, bool> common_prefixes
;
621 RGWObjectCtx
obj_ctx(store
);
622 CephContext
*cct
= store
->ctx();
624 string bucket_ver
, master_ver
;
626 ret
= store
->get_bucket_info(obj_ctx
, bucket
.tenant
, bucket
.name
, info
, NULL
);
630 ret
= store
->get_bucket_stats(info
, RGW_NO_SHARD
, &bucket_ver
, &master_ver
, stats
, NULL
);
634 string prefix
, delimiter
;
636 ret
= abort_bucket_multiparts(store
, cct
, info
, prefix
, delimiter
);
641 RGWRados::Bucket
target(store
, info
);
642 RGWRados::Bucket::List
list_op(&target
);
644 list_op
.params
.list_versions
= true;
646 std::list
<librados::AioCompletion
*> handles
;
649 int max_aio
= concurrent_max
;
650 ret
= list_op
.list_objects(max
, &objs
, &common_prefixes
, NULL
);
654 while (!objs
.empty()) {
655 std::vector
<rgw_bucket_dir_entry
>::iterator it
= objs
.begin();
656 for (; it
!= objs
.end(); ++it
) {
657 RGWObjState
*astate
= NULL
;
658 rgw_obj
obj(bucket
, (*it
).key
);
660 ret
= store
->get_obj_state(&obj_ctx
, info
, obj
, &astate
, false);
661 if (ret
== -ENOENT
) {
662 dout(1) << "WARNING: cannot find obj state for obj " << obj
.get_oid() << dendl
;
666 lderr(store
->ctx()) << "ERROR: get obj state returned with error " << ret
<< dendl
;
670 if (astate
->has_manifest
) {
671 RGWObjManifest
& manifest
= astate
->manifest
;
672 RGWObjManifest::obj_iterator miter
= manifest
.obj_begin();
673 rgw_obj head_obj
= manifest
.get_obj();
674 rgw_raw_obj raw_head_obj
;
675 store
->obj_to_raw(info
.placement_rule
, head_obj
, &raw_head_obj
);
678 for (; miter
!= manifest
.obj_end() && max_aio
--; ++miter
) {
680 ret
= drain_handles(handles
);
682 lderr(store
->ctx()) << "ERROR: could not drain handles as aio completion returned with " << ret
<< dendl
;
685 max_aio
= concurrent_max
;
688 rgw_raw_obj last_obj
= miter
.get_location().get_raw_obj(store
);
689 if (last_obj
== raw_head_obj
) {
690 // have the head obj deleted at the end
694 ret
= store
->delete_raw_obj_aio(last_obj
, handles
);
696 lderr(store
->ctx()) << "ERROR: delete obj aio failed with " << ret
<< dendl
;
699 } // for all shadow objs
701 ret
= store
->delete_obj_aio(head_obj
, info
, astate
, handles
, keep_index_consistent
);
703 lderr(store
->ctx()) << "ERROR: delete obj aio failed with " << ret
<< dendl
;
709 ret
= drain_handles(handles
);
711 lderr(store
->ctx()) << "ERROR: could not drain handles as aio completion returned with " << ret
<< dendl
;
714 max_aio
= concurrent_max
;
716 } // for all RGW objects
719 ret
= list_op
.list_objects(max
, &objs
, &common_prefixes
, NULL
);
724 ret
= drain_handles(handles
);
726 lderr(store
->ctx()) << "ERROR: could not drain handles as aio completion returned with " << ret
<< dendl
;
730 ret
= rgw_bucket_sync_user_stats(store
, bucket
.tenant
, info
);
732 dout(1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret
<< dendl
;
735 RGWObjVersionTracker objv_tracker
;
737 ret
= rgw_bucket_delete_bucket_obj(store
, bucket
.tenant
, bucket
.name
, objv_tracker
);
739 lderr(store
->ctx()) << "ERROR: could not remove bucket " << bucket
.name
<< "with ret as " << ret
<< dendl
;
743 if (!store
->is_syncing_bucket_meta(bucket
)) {
744 RGWObjVersionTracker objv_tracker
;
745 string entry
= bucket
.get_key();
746 ret
= rgw_bucket_instance_remove_entry(store
, entry
, &objv_tracker
);
748 lderr(store
->ctx()) << "ERROR: could not remove bucket instance entry" << bucket
.name
<< "with ret as " << ret
<< dendl
;
753 ret
= rgw_unlink_bucket(store
, info
.owner
, bucket
.tenant
, bucket
.name
, false);
755 lderr(store
->ctx()) << "ERROR: unable to remove user bucket information" << dendl
;
761 int rgw_bucket_delete_bucket_obj(RGWRados
*store
,
762 const string
& tenant_name
,
763 const string
& bucket_name
,
764 RGWObjVersionTracker
& objv_tracker
)
768 rgw_make_bucket_entry_name(tenant_name
, bucket_name
, key
);
769 return store
->meta_mgr
->remove_entry(bucket_meta_handler
, key
, &objv_tracker
);
772 static void set_err_msg(std::string
*sink
, std::string msg
)
774 if (sink
&& !msg
.empty())
778 int RGWBucket::init(RGWRados
*storage
, RGWBucketAdminOpState
& op_state
)
785 rgw_user user_id
= op_state
.get_user_id();
786 tenant
= user_id
.tenant
;
787 bucket_name
= op_state
.get_bucket_name();
788 RGWUserBuckets user_buckets
;
789 RGWObjectCtx
obj_ctx(store
);
791 if (bucket_name
.empty() && user_id
.empty())
794 if (!bucket_name
.empty()) {
795 int r
= store
->get_bucket_info(obj_ctx
, tenant
, bucket_name
, bucket_info
, NULL
);
797 ldout(store
->ctx(), 0) << "could not get bucket info for bucket=" << bucket_name
<< dendl
;
801 op_state
.set_bucket(bucket_info
.bucket
);
804 if (!user_id
.empty()) {
805 int r
= rgw_get_user_info_by_uid(store
, user_id
, user_info
);
809 op_state
.display_name
= user_info
.display_name
;
816 int RGWBucket::link(RGWBucketAdminOpState
& op_state
, std::string
*err_msg
)
818 if (!op_state
.is_user_op()) {
819 set_err_msg(err_msg
, "empty user id");
823 string bucket_id
= op_state
.get_bucket_id();
824 if (bucket_id
.empty()) {
825 set_err_msg(err_msg
, "empty bucket instance id");
829 std::string display_name
= op_state
.get_user_display_name();
830 rgw_bucket bucket
= op_state
.get_bucket();
832 const rgw_pool
& root_pool
= store
->get_zone_params().domain_root
;
833 rgw_raw_obj
obj(root_pool
, bucket
.name
);
834 RGWObjVersionTracker objv_tracker
;
836 map
<string
, bufferlist
> attrs
;
837 RGWBucketInfo bucket_info
;
839 string key
= bucket
.name
+ ":" + bucket_id
;
840 RGWObjectCtx
obj_ctx(store
);
841 int r
= store
->get_bucket_instance_info(obj_ctx
, key
, bucket_info
, NULL
, &attrs
);
846 rgw_user user_id
= op_state
.get_user_id();
848 map
<string
, bufferlist
>::iterator aiter
= attrs
.find(RGW_ATTR_ACL
);
849 if (aiter
!= attrs
.end()) {
850 bufferlist aclbl
= aiter
->second
;
851 RGWAccessControlPolicy policy
;
854 bufferlist::iterator iter
= aclbl
.begin();
855 ::decode(policy
, iter
);
856 owner
= policy
.get_owner();
857 } catch (buffer::error
& err
) {
858 set_err_msg(err_msg
, "couldn't decode policy");
862 r
= rgw_unlink_bucket(store
, owner
.get_id(), bucket
.tenant
, bucket
.name
, false);
864 set_err_msg(err_msg
, "could not unlink policy from user " + owner
.get_id().to_str());
868 // now update the user for the bucket...
869 if (display_name
.empty()) {
870 ldout(store
->ctx(), 0) << "WARNING: user " << user_info
.user_id
<< " has no display name set" << dendl
;
872 policy
.create_default(user_info
.user_id
, display_name
);
874 owner
= policy
.get_owner();
875 r
= store
->set_bucket_owner(bucket_info
.bucket
, owner
);
877 set_err_msg(err_msg
, "failed to set bucket owner: " + cpp_strerror(-r
));
881 // ...and encode the acl
883 policy
.encode(aclbl
);
885 r
= store
->system_obj_set_attr(NULL
, obj
, RGW_ATTR_ACL
, aclbl
, &objv_tracker
);
890 RGWAccessControlPolicy policy_instance
;
891 policy_instance
.create_default(user_info
.user_id
, display_name
);
893 policy_instance
.encode(aclbl
);
895 string oid_bucket_instance
= RGW_BUCKET_INSTANCE_MD_PREFIX
+ key
;
896 rgw_raw_obj
obj_bucket_instance(root_pool
, oid_bucket_instance
);
897 r
= store
->system_obj_set_attr(NULL
, obj_bucket_instance
, RGW_ATTR_ACL
, aclbl
, &objv_tracker
);
902 r
= rgw_link_bucket(store
, user_info
.user_id
, bucket_info
.bucket
, real_time());
911 int RGWBucket::unlink(RGWBucketAdminOpState
& op_state
, std::string
*err_msg
)
913 rgw_bucket bucket
= op_state
.get_bucket();
915 if (!op_state
.is_user_op()) {
916 set_err_msg(err_msg
, "could not fetch user or user bucket info");
920 int r
= rgw_unlink_bucket(store
, user_info
.user_id
, bucket
.tenant
, bucket
.name
);
922 set_err_msg(err_msg
, "error unlinking bucket" + cpp_strerror(-r
));
928 int RGWBucket::remove(RGWBucketAdminOpState
& op_state
, bool bypass_gc
,
929 bool keep_index_consistent
, std::string
*err_msg
)
931 bool delete_children
= op_state
.will_delete_children();
932 rgw_bucket bucket
= op_state
.get_bucket();
936 if (delete_children
) {
937 ret
= rgw_remove_bucket_bypass_gc(store
, bucket
, op_state
.get_max_aio(), keep_index_consistent
);
939 set_err_msg(err_msg
, "purge objects should be set for gc to be bypassed");
943 ret
= rgw_remove_bucket(store
, bucket
, delete_children
);
947 set_err_msg(err_msg
, "unable to remove bucket" + cpp_strerror(-ret
));
954 int RGWBucket::remove_object(RGWBucketAdminOpState
& op_state
, std::string
*err_msg
)
956 rgw_bucket bucket
= op_state
.get_bucket();
957 std::string object_name
= op_state
.get_object_name();
959 rgw_obj_key
key(object_name
);
961 int ret
= rgw_remove_object(store
, bucket_info
, bucket
, key
);
963 set_err_msg(err_msg
, "unable to remove object" + cpp_strerror(-ret
));
970 static void dump_bucket_index(map
<string
, rgw_bucket_dir_entry
> result
, Formatter
*f
)
972 map
<string
, rgw_bucket_dir_entry
>::iterator iter
;
973 for (iter
= result
.begin(); iter
!= result
.end(); ++iter
) {
974 f
->dump_string("object", iter
->first
);
978 static void dump_bucket_usage(map
<RGWObjCategory
, RGWStorageStats
>& stats
, Formatter
*formatter
)
980 map
<RGWObjCategory
, RGWStorageStats
>::iterator iter
;
982 formatter
->open_object_section("usage");
983 for (iter
= stats
.begin(); iter
!= stats
.end(); ++iter
) {
984 RGWStorageStats
& s
= iter
->second
;
985 const char *cat_name
= rgw_obj_category_name(iter
->first
);
986 formatter
->open_object_section(cat_name
);
988 formatter
->close_section();
990 formatter
->close_section();
993 static void dump_index_check(map
<RGWObjCategory
, RGWStorageStats
> existing_stats
,
994 map
<RGWObjCategory
, RGWStorageStats
> calculated_stats
,
995 Formatter
*formatter
)
997 formatter
->open_object_section("check_result");
998 formatter
->open_object_section("existing_header");
999 dump_bucket_usage(existing_stats
, formatter
);
1000 formatter
->close_section();
1001 formatter
->open_object_section("calculated_header");
1002 dump_bucket_usage(calculated_stats
, formatter
);
1003 formatter
->close_section();
1004 formatter
->close_section();
1007 int RGWBucket::check_bad_index_multipart(RGWBucketAdminOpState
& op_state
,
1008 list
<rgw_obj_index_key
>& objs_to_unlink
, std::string
*err_msg
)
1010 bool fix_index
= op_state
.will_fix_index();
1011 rgw_bucket bucket
= op_state
.get_bucket();
1015 map
<string
, bool> common_prefixes
;
1018 map
<string
, bool> meta_objs
;
1019 map
<rgw_obj_index_key
, string
> all_objs
;
1021 RGWBucketInfo bucket_info
;
1022 RGWObjectCtx
obj_ctx(store
);
1023 int r
= store
->get_bucket_instance_info(obj_ctx
, bucket
, bucket_info
, nullptr, nullptr);
1025 ldout(store
->ctx(), 0) << "ERROR: " << __func__
<< "(): get_bucket_instance_info(bucket=" << bucket
<< ") returned r=" << r
<< dendl
;
1029 RGWRados::Bucket
target(store
, bucket_info
);
1030 RGWRados::Bucket::List
list_op(&target
);
1032 list_op
.params
.list_versions
= true;
1033 list_op
.params
.ns
= RGW_OBJ_NS_MULTIPART
;
1036 vector
<rgw_bucket_dir_entry
> result
;
1037 int r
= list_op
.list_objects(max
, &result
, &common_prefixes
, &is_truncated
);
1039 set_err_msg(err_msg
, "failed to list objects in bucket=" + bucket
.name
+
1040 " err=" + cpp_strerror(-r
));
1045 vector
<rgw_bucket_dir_entry
>::iterator iter
;
1046 for (iter
= result
.begin(); iter
!= result
.end(); ++iter
) {
1047 rgw_obj_index_key key
= iter
->key
;
1048 rgw_obj
obj(bucket
, key
);
1049 string oid
= obj
.get_oid();
1051 int pos
= oid
.find_last_of('.');
1053 /* obj has no suffix */
1054 all_objs
[key
] = oid
;
1056 /* obj has suffix */
1057 string name
= oid
.substr(0, pos
);
1058 string suffix
= oid
.substr(pos
+ 1);
1060 if (suffix
.compare("meta") == 0) {
1061 meta_objs
[name
] = true;
1063 all_objs
[key
] = name
;
1068 } while (is_truncated
);
1070 for (auto aiter
= all_objs
.begin(); aiter
!= all_objs
.end(); ++aiter
) {
1071 string
& name
= aiter
->second
;
1073 if (meta_objs
.find(name
) == meta_objs
.end()) {
1074 objs_to_unlink
.push_back(aiter
->first
);
1078 if (objs_to_unlink
.empty())
1082 int r
= store
->remove_objs_from_index(bucket_info
, objs_to_unlink
);
1084 set_err_msg(err_msg
, "ERROR: remove_obj_from_index() returned error: " +
1094 int RGWBucket::check_object_index(RGWBucketAdminOpState
& op_state
,
1095 RGWFormatterFlusher
& flusher
,
1096 std::string
*err_msg
)
1099 bool fix_index
= op_state
.will_fix_index();
1101 rgw_bucket bucket
= op_state
.get_bucket();
1104 set_err_msg(err_msg
, "check-objects flag requires fix index enabled");
1108 store
->cls_obj_set_bucket_tag_timeout(bucket_info
, BUCKET_TAG_TIMEOUT
);
1111 rgw_obj_index_key marker
;
1112 bool is_truncated
= true;
1114 Formatter
*formatter
= flusher
.get_formatter();
1115 formatter
->open_object_section("objects");
1116 while (is_truncated
) {
1117 map
<string
, rgw_bucket_dir_entry
> result
;
1119 int r
= store
->cls_bucket_list(bucket_info
, RGW_NO_SHARD
, marker
, prefix
, 1000, true,
1120 result
, &is_truncated
, &marker
,
1121 bucket_object_check_filter
);
1124 } else if (r
< 0 && r
!= -ENOENT
) {
1125 set_err_msg(err_msg
, "ERROR: failed operation r=" + cpp_strerror(-r
));
1129 dump_bucket_index(result
, formatter
);
1134 formatter
->close_section();
1136 store
->cls_obj_set_bucket_tag_timeout(bucket_info
, 0);
1142 int RGWBucket::check_index(RGWBucketAdminOpState
& op_state
,
1143 map
<RGWObjCategory
, RGWStorageStats
>& existing_stats
,
1144 map
<RGWObjCategory
, RGWStorageStats
>& calculated_stats
,
1145 std::string
*err_msg
)
1147 rgw_bucket bucket
= op_state
.get_bucket();
1148 bool fix_index
= op_state
.will_fix_index();
1150 int r
= store
->bucket_check_index(bucket_info
, &existing_stats
, &calculated_stats
);
1152 set_err_msg(err_msg
, "failed to check index error=" + cpp_strerror(-r
));
1157 r
= store
->bucket_rebuild_index(bucket_info
);
1159 set_err_msg(err_msg
, "failed to rebuild index err=" + cpp_strerror(-r
));
1168 int RGWBucket::policy_bl_to_stream(bufferlist
& bl
, ostream
& o
)
1170 RGWAccessControlPolicy_S3
policy(g_ceph_context
);
1171 bufferlist::iterator iter
= bl
.begin();
1173 policy
.decode(iter
);
1174 } catch (buffer::error
& err
) {
1175 dout(0) << "ERROR: caught buffer::error, could not decode policy" << dendl
;
1182 static int policy_decode(RGWRados
*store
, bufferlist
& bl
, RGWAccessControlPolicy
& policy
)
1184 bufferlist::iterator iter
= bl
.begin();
1186 policy
.decode(iter
);
1187 } catch (buffer::error
& err
) {
1188 ldout(store
->ctx(), 0) << "ERROR: caught buffer::error, could not decode policy" << dendl
;
1194 int RGWBucket::get_policy(RGWBucketAdminOpState
& op_state
, RGWAccessControlPolicy
& policy
)
1196 std::string object_name
= op_state
.get_object_name();
1197 rgw_bucket bucket
= op_state
.get_bucket();
1198 RGWObjectCtx
obj_ctx(store
);
1200 RGWBucketInfo bucket_info
;
1201 map
<string
, bufferlist
> attrs
;
1202 int ret
= store
->get_bucket_info(obj_ctx
, bucket
.tenant
, bucket
.name
, bucket_info
, NULL
, &attrs
);
1207 if (!object_name
.empty()) {
1209 rgw_obj
obj(bucket
, object_name
);
1211 RGWRados::Object
op_target(store
, bucket_info
, obj_ctx
, obj
);
1212 RGWRados::Object::Read
rop(&op_target
);
1214 int ret
= rop
.get_attr(RGW_ATTR_ACL
, bl
);
1218 return policy_decode(store
, bl
, policy
);
1221 map
<string
, bufferlist
>::iterator aiter
= attrs
.find(RGW_ATTR_ACL
);
1222 if (aiter
== attrs
.end()) {
1226 return policy_decode(store
, aiter
->second
, policy
);
1230 int RGWBucketAdminOp::get_policy(RGWRados
*store
, RGWBucketAdminOpState
& op_state
,
1231 RGWAccessControlPolicy
& policy
)
1235 int ret
= bucket
.init(store
, op_state
);
1239 ret
= bucket
.get_policy(op_state
, policy
);
1246 /* Wrappers to facilitate RESTful interface */
1249 int RGWBucketAdminOp::get_policy(RGWRados
*store
, RGWBucketAdminOpState
& op_state
,
1250 RGWFormatterFlusher
& flusher
)
1252 RGWAccessControlPolicy
policy(store
->ctx());
1254 int ret
= get_policy(store
, op_state
, policy
);
1258 Formatter
*formatter
= flusher
.get_formatter();
1262 formatter
->open_object_section("policy");
1263 policy
.dump(formatter
);
1264 formatter
->close_section();
1271 int RGWBucketAdminOp::dump_s3_policy(RGWRados
*store
, RGWBucketAdminOpState
& op_state
,
1274 RGWAccessControlPolicy_S3
policy(store
->ctx());
1276 int ret
= get_policy(store
, op_state
, policy
);
1285 int RGWBucketAdminOp::unlink(RGWRados
*store
, RGWBucketAdminOpState
& op_state
)
1289 int ret
= bucket
.init(store
, op_state
);
1293 return bucket
.unlink(op_state
);
1296 int RGWBucketAdminOp::link(RGWRados
*store
, RGWBucketAdminOpState
& op_state
, string
*err
)
1300 int ret
= bucket
.init(store
, op_state
);
1304 return bucket
.link(op_state
, err
);
1308 int RGWBucketAdminOp::check_index(RGWRados
*store
, RGWBucketAdminOpState
& op_state
,
1309 RGWFormatterFlusher
& flusher
)
1312 map
<string
, rgw_bucket_dir_entry
> result
;
1313 map
<RGWObjCategory
, RGWStorageStats
> existing_stats
;
1314 map
<RGWObjCategory
, RGWStorageStats
> calculated_stats
;
1315 list
<rgw_obj_index_key
> objs_to_unlink
;
1319 ret
= bucket
.init(store
, op_state
);
1323 Formatter
*formatter
= flusher
.get_formatter();
1326 ret
= bucket
.check_bad_index_multipart(op_state
, objs_to_unlink
);
1330 dump_mulipart_index_results(objs_to_unlink
, formatter
);
1333 ret
= bucket
.check_object_index(op_state
, flusher
);
1337 ret
= bucket
.check_index(op_state
, existing_stats
, calculated_stats
);
1341 dump_index_check(existing_stats
, calculated_stats
, formatter
);
1347 int RGWBucketAdminOp::remove_bucket(RGWRados
*store
, RGWBucketAdminOpState
& op_state
,
1348 bool bypass_gc
, bool keep_index_consistent
)
1352 int ret
= bucket
.init(store
, op_state
);
1356 return bucket
.remove(op_state
, bypass_gc
, keep_index_consistent
);
1359 int RGWBucketAdminOp::remove_object(RGWRados
*store
, RGWBucketAdminOpState
& op_state
)
1363 int ret
= bucket
.init(store
, op_state
);
1367 return bucket
.remove_object(op_state
);
1370 static int bucket_stats(RGWRados
*store
, const std::string
& tenant_name
, std::string
& bucket_name
, Formatter
*formatter
)
1372 RGWBucketInfo bucket_info
;
1373 map
<RGWObjCategory
, RGWStorageStats
> stats
;
1376 RGWObjectCtx
obj_ctx(store
);
1377 int r
= store
->get_bucket_info(obj_ctx
, tenant_name
, bucket_name
, bucket_info
, &mtime
);
1381 rgw_bucket
& bucket
= bucket_info
.bucket
;
1383 string bucket_ver
, master_ver
;
1385 int ret
= store
->get_bucket_stats(bucket_info
, RGW_NO_SHARD
, &bucket_ver
, &master_ver
, stats
, &max_marker
);
1387 cerr
<< "error getting bucket stats ret=" << ret
<< std::endl
;
1393 formatter
->open_object_section("stats");
1394 formatter
->dump_string("bucket", bucket
.name
);
1395 formatter
->dump_string("zonegroup", bucket_info
.zonegroup
);
1396 formatter
->dump_string("placement_rule", bucket_info
.placement_rule
);
1397 ::encode_json("explicit_placement", bucket
.explicit_placement
, formatter
);
1398 formatter
->dump_string("id", bucket
.bucket_id
);
1399 formatter
->dump_string("marker", bucket
.marker
);
1400 formatter
->dump_stream("index_type") << bucket_info
.index_type
;
1401 ::encode_json("owner", bucket_info
.owner
, formatter
);
1402 formatter
->dump_string("ver", bucket_ver
);
1403 formatter
->dump_string("master_ver", master_ver
);
1404 formatter
->dump_stream("mtime") << ut
;
1405 formatter
->dump_string("max_marker", max_marker
);
1406 dump_bucket_usage(stats
, formatter
);
1407 encode_json("bucket_quota", bucket_info
.quota
, formatter
);
1408 formatter
->close_section();
1413 int RGWBucketAdminOp::limit_check(RGWRados
*store
,
1414 RGWBucketAdminOpState
& op_state
,
1415 const std::list
<std::string
>& user_ids
,
1416 RGWFormatterFlusher
& flusher
,
1420 const size_t max_entries
=
1421 store
->ctx()->_conf
->rgw_list_buckets_max_chunk
;
1423 const size_t safe_max_objs_per_shard
=
1424 store
->ctx()->_conf
->rgw_safe_max_objects_per_shard
;
1426 uint16_t shard_warn_pct
=
1427 store
->ctx()->_conf
->rgw_shard_warning_threshold
;
1428 if (shard_warn_pct
> 100)
1429 shard_warn_pct
= 90;
1431 Formatter
*formatter
= flusher
.get_formatter();
1434 formatter
->open_array_section("users");
1436 for (const auto& user_id
: user_ids
) {
1437 formatter
->open_object_section("user");
1438 formatter
->dump_string("user_id", user_id
);
1440 formatter
->open_array_section("buckets");
1442 RGWUserBuckets buckets
;
1446 ret
= rgw_read_user_buckets(store
, user_id
, buckets
,
1447 marker
, string(), max_entries
, false,
1452 map
<string
, RGWBucketEnt
>& m_buckets
= buckets
.get_buckets();
1454 for (const auto& iter
: m_buckets
) {
1455 auto& bucket
= iter
.second
.bucket
;
1456 uint32_t num_shards
= 1;
1457 uint64_t num_objects
= 0;
1459 /* need info for num_shards */
1461 RGWObjectCtx
obj_ctx(store
);
1463 marker
= bucket
.name
; /* Casey's location for marker update,
1464 * as we may now not reach the end of
1467 ret
= store
->get_bucket_info(obj_ctx
, bucket
.tenant
, bucket
.name
,
1472 /* need stats for num_entries */
1473 string bucket_ver
, master_ver
;
1474 std::map
<RGWObjCategory
, RGWStorageStats
> stats
;
1475 ret
= store
->get_bucket_stats(info
, RGW_NO_SHARD
, &bucket_ver
,
1476 &master_ver
, stats
, nullptr);
1481 for (const auto& s
: stats
) {
1482 num_objects
+= s
.second
.num_objects
;
1485 num_shards
= info
.num_shards
;
1486 uint64_t objs_per_shard
=
1487 (num_shards
) ? num_objects
/num_shards
: num_objects
;
1491 if (objs_per_shard
> safe_max_objs_per_shard
) {
1493 100 - (safe_max_objs_per_shard
/objs_per_shard
* 100);
1494 ss
<< boost::format("OVER %4f%%") % over
;
1498 objs_per_shard
/ safe_max_objs_per_shard
* 100;
1499 if (fill_pct
>= shard_warn_pct
) {
1500 ss
<< boost::format("WARN %4f%%") % fill_pct
;
1507 if (warn
|| (! warnings_only
)) {
1508 formatter
->open_object_section("bucket");
1509 formatter
->dump_string("bucket", bucket
.name
);
1510 formatter
->dump_string("tenant", bucket
.tenant
);
1511 formatter
->dump_int("num_objects", num_objects
);
1512 formatter
->dump_int("num_shards", num_shards
);
1513 formatter
->dump_int("objects_per_shard", objs_per_shard
);
1514 formatter
->dump_string("fill_status", ss
.str());
1515 formatter
->close_section();
1520 done
= (m_buckets
.size() < max_entries
);
1521 } while (!done
); /* foreach: bucket */
1523 formatter
->close_section();
1524 formatter
->close_section();
1525 formatter
->flush(cout
);
1527 } /* foreach: user_id */
1529 formatter
->close_section();
1530 formatter
->flush(cout
);
1533 } /* RGWBucketAdminOp::limit_check */
1535 int RGWBucketAdminOp::info(RGWRados
*store
, RGWBucketAdminOpState
& op_state
,
1536 RGWFormatterFlusher
& flusher
)
1541 string bucket_name
= op_state
.get_bucket_name();
1543 if (!bucket_name
.empty()) {
1544 ret
= bucket
.init(store
, op_state
);
1549 Formatter
*formatter
= flusher
.get_formatter();
1552 CephContext
*cct
= store
->ctx();
1554 const size_t max_entries
= cct
->_conf
->rgw_list_buckets_max_chunk
;
1556 bool show_stats
= op_state
.will_fetch_stats();
1557 rgw_user user_id
= op_state
.get_user_id();
1558 if (op_state
.is_user_op()) {
1559 formatter
->open_array_section("buckets");
1561 RGWUserBuckets buckets
;
1563 bool is_truncated
= false;
1566 ret
= rgw_read_user_buckets(store
, op_state
.get_user_id(), buckets
,
1567 marker
, string(), max_entries
, false,
1572 map
<string
, RGWBucketEnt
>& m
= buckets
.get_buckets();
1573 map
<string
, RGWBucketEnt
>::iterator iter
;
1575 for (iter
= m
.begin(); iter
!= m
.end(); ++iter
) {
1576 std::string obj_name
= iter
->first
;
1578 bucket_stats(store
, user_id
.tenant
, obj_name
, formatter
);
1580 formatter
->dump_string("bucket", obj_name
);
1586 } while (is_truncated
);
1588 formatter
->close_section();
1589 } else if (!bucket_name
.empty()) {
1590 bucket_stats(store
, user_id
.tenant
, bucket_name
, formatter
);
1592 RGWAccessHandle handle
;
1594 formatter
->open_array_section("buckets");
1595 if (store
->list_buckets_init(&handle
) >= 0) {
1596 rgw_bucket_dir_entry obj
;
1597 while (store
->list_buckets_next(obj
, &handle
) >= 0) {
1599 bucket_stats(store
, user_id
.tenant
, obj
.key
.name
, formatter
);
1601 formatter
->dump_string("bucket", obj
.key
.name
);
1605 formatter
->close_section();
1614 void rgw_data_change::dump(Formatter
*f
) const
1617 switch (entity_type
) {
1618 case ENTITY_TYPE_BUCKET
:
1624 encode_json("entity_type", type
, f
);
1625 encode_json("key", key
, f
);
1626 utime_t
ut(timestamp
);
1627 encode_json("timestamp", ut
, f
);
1630 void rgw_data_change::decode_json(JSONObj
*obj
) {
1632 JSONDecoder::decode_json("entity_type", s
, obj
);
1633 if (s
== "bucket") {
1634 entity_type
= ENTITY_TYPE_BUCKET
;
1636 entity_type
= ENTITY_TYPE_UNKNOWN
;
1638 JSONDecoder::decode_json("key", key
, obj
);
1640 JSONDecoder::decode_json("timestamp", ut
, obj
);
1641 timestamp
= ut
.to_real_time();
1644 void rgw_data_change_log_entry::dump(Formatter
*f
) const
1646 encode_json("log_id", log_id
, f
);
1647 utime_t
ut(log_timestamp
);
1648 encode_json("log_timestamp", ut
, f
);
1649 encode_json("entry", entry
, f
);
1652 void rgw_data_change_log_entry::decode_json(JSONObj
*obj
) {
1653 JSONDecoder::decode_json("log_id", log_id
, obj
);
1655 JSONDecoder::decode_json("log_timestamp", ut
, obj
);
1656 log_timestamp
= ut
.to_real_time();
1657 JSONDecoder::decode_json("entry", entry
, obj
);
1660 int RGWDataChangesLog::choose_oid(const rgw_bucket_shard
& bs
) {
1661 const string
& name
= bs
.bucket
.name
;
1662 int shard_shift
= (bs
.shard_id
> 0 ? bs
.shard_id
: 0);
1663 uint32_t r
= (ceph_str_hash_linux(name
.c_str(), name
.size()) + shard_shift
) % num_shards
;
1668 int RGWDataChangesLog::renew_entries()
1670 if (!store
->need_to_log_data())
1673 /* we can't keep the bucket name as part of the cls_log_entry, and we need
1674 * it later, so we keep two lists under the map */
1675 map
<int, pair
<list
<rgw_bucket_shard
>, list
<cls_log_entry
> > > m
;
1678 map
<rgw_bucket_shard
, bool> entries
;
1679 entries
.swap(cur_cycle
);
1682 map
<rgw_bucket_shard
, bool>::iterator iter
;
1684 real_time ut
= real_clock::now();
1685 for (iter
= entries
.begin(); iter
!= entries
.end(); ++iter
) {
1686 const rgw_bucket_shard
& bs
= iter
->first
;
1688 int index
= choose_oid(bs
);
1690 cls_log_entry entry
;
1692 rgw_data_change change
;
1694 change
.entity_type
= ENTITY_TYPE_BUCKET
;
1695 change
.key
= bs
.get_key();
1696 change
.timestamp
= ut
;
1697 ::encode(change
, bl
);
1699 store
->time_log_prepare_entry(entry
, ut
, section
, change
.key
, bl
);
1701 m
[index
].first
.push_back(bs
);
1702 m
[index
].second
.emplace_back(std::move(entry
));
1705 map
<int, pair
<list
<rgw_bucket_shard
>, list
<cls_log_entry
> > >::iterator miter
;
1706 for (miter
= m
.begin(); miter
!= m
.end(); ++miter
) {
1707 list
<cls_log_entry
>& entries
= miter
->second
.second
;
1709 real_time now
= real_clock::now();
1711 int ret
= store
->time_log_add(oids
[miter
->first
], entries
, NULL
);
1713 /* we don't really need to have a special handling for failed cases here,
1714 * as this is just an optimization. */
1715 lderr(cct
) << "ERROR: store->time_log_add() returned " << ret
<< dendl
;
1719 real_time expiration
= now
;
1720 expiration
+= make_timespan(cct
->_conf
->rgw_data_log_window
);
1722 list
<rgw_bucket_shard
>& buckets
= miter
->second
.first
;
1723 list
<rgw_bucket_shard
>::iterator liter
;
1724 for (liter
= buckets
.begin(); liter
!= buckets
.end(); ++liter
) {
1725 update_renewed(*liter
, expiration
);
1732 void RGWDataChangesLog::_get_change(const rgw_bucket_shard
& bs
, ChangeStatusPtr
& status
)
1734 assert(lock
.is_locked());
1735 if (!changes
.find(bs
, status
)) {
1736 status
= ChangeStatusPtr(new ChangeStatus
);
1737 changes
.add(bs
, status
);
1741 void RGWDataChangesLog::register_renew(rgw_bucket_shard
& bs
)
1743 Mutex::Locker
l(lock
);
1744 cur_cycle
[bs
] = true;
1747 void RGWDataChangesLog::update_renewed(rgw_bucket_shard
& bs
, real_time
& expiration
)
1749 Mutex::Locker
l(lock
);
1750 ChangeStatusPtr status
;
1751 _get_change(bs
, status
);
1753 ldout(cct
, 20) << "RGWDataChangesLog::update_renewd() bucket_name=" << bs
.bucket
.name
<< " shard_id=" << bs
.shard_id
<< " expiration=" << expiration
<< dendl
;
1754 status
->cur_expiration
= expiration
;
1757 int RGWDataChangesLog::get_log_shard_id(rgw_bucket
& bucket
, int shard_id
) {
1758 rgw_bucket_shard
bs(bucket
, shard_id
);
1760 return choose_oid(bs
);
1763 int RGWDataChangesLog::add_entry(rgw_bucket
& bucket
, int shard_id
) {
1764 if (!store
->need_to_log_data())
1767 rgw_bucket_shard
bs(bucket
, shard_id
);
1769 int index
= choose_oid(bs
);
1770 mark_modified(index
, bs
);
1774 ChangeStatusPtr status
;
1775 _get_change(bs
, status
);
1779 real_time now
= real_clock::now();
1781 status
->lock
->Lock();
1783 ldout(cct
, 20) << "RGWDataChangesLog::add_entry() bucket.name=" << bucket
.name
<< " shard_id=" << shard_id
<< " now=" << now
<< " cur_expiration=" << status
->cur_expiration
<< dendl
;
1785 if (now
< status
->cur_expiration
) {
1786 /* no need to send, recently completed */
1787 status
->lock
->Unlock();
1793 RefCountedCond
*cond
;
1795 if (status
->pending
) {
1796 cond
= status
->cond
;
1800 status
->cond
->get();
1801 status
->lock
->Unlock();
1803 int ret
= cond
->wait();
1811 status
->cond
= new RefCountedCond
;
1812 status
->pending
= true;
1814 string
& oid
= oids
[index
];
1815 real_time expiration
;
1820 status
->cur_sent
= now
;
1823 expiration
+= ceph::make_timespan(cct
->_conf
->rgw_data_log_window
);
1825 status
->lock
->Unlock();
1828 rgw_data_change change
;
1829 change
.entity_type
= ENTITY_TYPE_BUCKET
;
1830 change
.key
= bs
.get_key();
1831 change
.timestamp
= now
;
1832 ::encode(change
, bl
);
1835 ldout(cct
, 20) << "RGWDataChangesLog::add_entry() sending update with now=" << now
<< " cur_expiration=" << expiration
<< dendl
;
1837 ret
= store
->time_log_add(oid
, now
, section
, change
.key
, bl
);
1839 now
= real_clock::now();
1841 status
->lock
->Lock();
1843 } while (!ret
&& real_clock::now() > expiration
);
1845 cond
= status
->cond
;
1847 status
->pending
= false;
1848 status
->cur_expiration
= status
->cur_sent
; /* time of when operation started, not completed */
1849 status
->cur_expiration
+= make_timespan(cct
->_conf
->rgw_data_log_window
);
1850 status
->cond
= NULL
;
1851 status
->lock
->Unlock();
1859 int RGWDataChangesLog::list_entries(int shard
, const real_time
& start_time
, const real_time
& end_time
, int max_entries
,
1860 list
<rgw_data_change_log_entry
>& entries
,
1861 const string
& marker
,
1864 if (shard
>= num_shards
)
1867 list
<cls_log_entry
> log_entries
;
1869 int ret
= store
->time_log_list(oids
[shard
], start_time
, end_time
,
1870 max_entries
, log_entries
, marker
,
1871 out_marker
, truncated
);
1875 list
<cls_log_entry
>::iterator iter
;
1876 for (iter
= log_entries
.begin(); iter
!= log_entries
.end(); ++iter
) {
1877 rgw_data_change_log_entry log_entry
;
1878 log_entry
.log_id
= iter
->id
;
1879 real_time rt
= iter
->timestamp
.to_real_time();
1880 log_entry
.log_timestamp
= rt
;
1881 bufferlist::iterator liter
= iter
->data
.begin();
1883 ::decode(log_entry
.entry
, liter
);
1884 } catch (buffer::error
& err
) {
1885 lderr(cct
) << "ERROR: failed to decode data changes log entry" << dendl
;
1888 entries
.push_back(log_entry
);
1894 int RGWDataChangesLog::list_entries(const real_time
& start_time
, const real_time
& end_time
, int max_entries
,
1895 list
<rgw_data_change_log_entry
>& entries
, LogMarker
& marker
, bool *ptruncated
) {
1899 for (; marker
.shard
< num_shards
&& (int)entries
.size() < max_entries
;
1900 marker
.shard
++, marker
.marker
.clear()) {
1901 int ret
= list_entries(marker
.shard
, start_time
, end_time
, max_entries
- entries
.size(), entries
,
1902 marker
.marker
, NULL
, &truncated
);
1903 if (ret
== -ENOENT
) {
1915 *ptruncated
= (marker
.shard
< num_shards
);
1920 int RGWDataChangesLog::get_info(int shard_id
, RGWDataChangesLogInfo
*info
)
1922 if (shard_id
>= num_shards
)
1925 string oid
= oids
[shard_id
];
1927 cls_log_header header
;
1929 int ret
= store
->time_log_info(oid
, &header
);
1930 if ((ret
< 0) && (ret
!= -ENOENT
))
1933 info
->marker
= header
.max_marker
;
1934 info
->last_update
= header
.max_time
.to_real_time();
1939 int RGWDataChangesLog::trim_entries(int shard_id
, const real_time
& start_time
, const real_time
& end_time
,
1940 const string
& start_marker
, const string
& end_marker
)
1944 if (shard_id
> num_shards
)
1947 ret
= store
->time_log_trim(oids
[shard_id
], start_time
, end_time
, start_marker
, end_marker
);
1949 if (ret
== -ENOENT
|| ret
== -ENODATA
)
1955 int RGWDataChangesLog::trim_entries(const real_time
& start_time
, const real_time
& end_time
,
1956 const string
& start_marker
, const string
& end_marker
)
1958 for (int shard
= 0; shard
< num_shards
; shard
++) {
1959 int ret
= store
->time_log_trim(oids
[shard
], start_time
, end_time
, start_marker
, end_marker
);
1960 if (ret
== -ENOENT
|| ret
== -ENODATA
) {
1970 bool RGWDataChangesLog::going_down()
1975 RGWDataChangesLog::~RGWDataChangesLog() {
1977 renew_thread
->stop();
1978 renew_thread
->join();
1979 delete renew_thread
;
1983 void *RGWDataChangesLog::ChangesRenewThread::entry() {
1985 dout(2) << "RGWDataChangesLog::ChangesRenewThread: start" << dendl
;
1986 int r
= log
->renew_entries();
1988 dout(0) << "ERROR: RGWDataChangesLog::renew_entries returned error r=" << r
<< dendl
;
1991 if (log
->going_down())
1994 int interval
= cct
->_conf
->rgw_data_log_window
* 3 / 4;
1996 cond
.WaitInterval(lock
, utime_t(interval
, 0));
1998 } while (!log
->going_down());
2003 void RGWDataChangesLog::ChangesRenewThread::stop()
2005 Mutex::Locker
l(lock
);
2009 void RGWDataChangesLog::mark_modified(int shard_id
, const rgw_bucket_shard
& bs
)
2011 auto key
= bs
.get_key();
2012 modified_lock
.get_read();
2013 map
<int, set
<string
> >::iterator iter
= modified_shards
.find(shard_id
);
2014 if (iter
!= modified_shards
.end()) {
2015 set
<string
>& keys
= iter
->second
;
2016 if (keys
.find(key
) != keys
.end()) {
2017 modified_lock
.unlock();
2021 modified_lock
.unlock();
2023 RWLock::WLocker
wl(modified_lock
);
2024 modified_shards
[shard_id
].insert(key
);
2027 void RGWDataChangesLog::read_clear_modified(map
<int, set
<string
> > &modified
)
2029 RWLock::WLocker
wl(modified_lock
);
2030 modified
.swap(modified_shards
);
2031 modified_shards
.clear();
2034 void RGWBucketCompleteInfo::dump(Formatter
*f
) const {
2035 encode_json("bucket_info", info
, f
);
2036 encode_json("attrs", attrs
, f
);
2039 void RGWBucketCompleteInfo::decode_json(JSONObj
*obj
) {
2040 JSONDecoder::decode_json("bucket_info", info
, obj
);
2041 JSONDecoder::decode_json("attrs", attrs
, obj
);
2044 class RGWBucketMetadataHandler
: public RGWMetadataHandler
{
2047 string
get_type() override
{ return "bucket"; }
2049 int get(RGWRados
*store
, string
& entry
, RGWMetadataObject
**obj
) override
{
2050 RGWObjVersionTracker ot
;
2051 RGWBucketEntryPoint be
;
2054 map
<string
, bufferlist
> attrs
;
2055 RGWObjectCtx
obj_ctx(store
);
2057 string tenant_name
, bucket_name
;
2058 parse_bucket(entry
, &tenant_name
, &bucket_name
);
2059 int ret
= store
->get_bucket_entrypoint_info(obj_ctx
, tenant_name
, bucket_name
, be
, &ot
, &mtime
, &attrs
);
2063 RGWBucketEntryMetadataObject
*mdo
= new RGWBucketEntryMetadataObject(be
, ot
.read_version
, mtime
);
2070 int put(RGWRados
*store
, string
& entry
, RGWObjVersionTracker
& objv_tracker
,
2071 real_time mtime
, JSONObj
*obj
, sync_type_t sync_type
) override
{
2072 RGWBucketEntryPoint be
, old_be
;
2074 decode_json_obj(be
, obj
);
2075 } catch (JSONDecoder::err
& e
) {
2079 real_time orig_mtime
;
2080 map
<string
, bufferlist
> attrs
;
2082 RGWObjVersionTracker old_ot
;
2083 RGWObjectCtx
obj_ctx(store
);
2085 string tenant_name
, bucket_name
;
2086 parse_bucket(entry
, &tenant_name
, &bucket_name
);
2087 int ret
= store
->get_bucket_entrypoint_info(obj_ctx
, tenant_name
, bucket_name
, old_be
, &old_ot
, &orig_mtime
, &attrs
);
2088 if (ret
< 0 && ret
!= -ENOENT
)
2091 // are we actually going to perform this put, or is it too old?
2092 if (ret
!= -ENOENT
&&
2093 !check_versions(old_ot
.read_version
, orig_mtime
,
2094 objv_tracker
.write_version
, mtime
, sync_type
)) {
2095 return STATUS_NO_APPLY
;
2098 objv_tracker
.read_version
= old_ot
.read_version
; /* maintain the obj version we just read */
2100 ret
= store
->put_bucket_entrypoint_info(tenant_name
, bucket_name
, be
, false, objv_tracker
, mtime
, &attrs
);
2106 ret
= rgw_link_bucket(store
, be
.owner
, be
.bucket
, be
.creation_time
, false);
2108 ret
= rgw_unlink_bucket(store
, be
.owner
, be
.bucket
.tenant
, be
.bucket
.name
, false);
2114 struct list_keys_info
{
2116 RGWListRawObjsCtx ctx
;
2119 int remove(RGWRados
*store
, string
& entry
, RGWObjVersionTracker
& objv_tracker
) override
{
2120 RGWBucketEntryPoint be
;
2121 RGWObjectCtx
obj_ctx(store
);
2123 string tenant_name
, bucket_name
;
2124 parse_bucket(entry
, &tenant_name
, &bucket_name
);
2125 int ret
= store
->get_bucket_entrypoint_info(obj_ctx
, tenant_name
, bucket_name
, be
, &objv_tracker
, NULL
, NULL
);
2130 * We're unlinking the bucket but we don't want to update the entrypoint here - we're removing
2131 * it immediately and don't want to invalidate our cached objv_version or the bucket obj removal
2132 * will incorrectly fail.
2134 ret
= rgw_unlink_bucket(store
, be
.owner
, tenant_name
, bucket_name
, false);
2136 lderr(store
->ctx()) << "could not unlink bucket=" << entry
<< " owner=" << be
.owner
<< dendl
;
2139 ret
= rgw_bucket_delete_bucket_obj(store
, tenant_name
, bucket_name
, objv_tracker
);
2141 lderr(store
->ctx()) << "could not delete bucket=" << entry
<< dendl
;
2147 void get_pool_and_oid(RGWRados
*store
, const string
& key
, rgw_pool
& pool
, string
& oid
) override
{
2149 pool
= store
->get_zone_params().domain_root
;
2152 int list_keys_init(RGWRados
*store
, void **phandle
) override
2154 list_keys_info
*info
= new list_keys_info
;
2156 info
->store
= store
;
2158 *phandle
= (void *)info
;
2163 int list_keys_next(void *handle
, int max
, list
<string
>& keys
, bool *truncated
) override
{
2164 list_keys_info
*info
= static_cast<list_keys_info
*>(handle
);
2170 RGWRados
*store
= info
->store
;
2172 list
<string
> unfiltered_keys
;
2174 int ret
= store
->list_raw_objects(store
->get_zone_params().domain_root
, no_filter
,
2175 max
, info
->ctx
, unfiltered_keys
, truncated
);
2176 if (ret
< 0 && ret
!= -ENOENT
)
2178 if (ret
== -ENOENT
) {
2184 // now filter out the system entries
2185 list
<string
>::iterator iter
;
2186 for (iter
= unfiltered_keys
.begin(); iter
!= unfiltered_keys
.end(); ++iter
) {
2197 void list_keys_complete(void *handle
) override
{
2198 list_keys_info
*info
= static_cast<list_keys_info
*>(handle
);
2203 class RGWBucketInstanceMetadataHandler
: public RGWMetadataHandler
{
2206 string
get_type() override
{ return "bucket.instance"; }
2208 int get(RGWRados
*store
, string
& oid
, RGWMetadataObject
**obj
) override
{
2209 RGWBucketCompleteInfo bci
;
2212 RGWObjectCtx
obj_ctx(store
);
2214 int ret
= store
->get_bucket_instance_info(obj_ctx
, oid
, bci
.info
, &mtime
, &bci
.attrs
);
2218 RGWBucketInstanceMetadataObject
*mdo
= new RGWBucketInstanceMetadataObject(bci
, bci
.info
.objv_tracker
.read_version
, mtime
);
2225 int put(RGWRados
*store
, string
& entry
, RGWObjVersionTracker
& objv_tracker
,
2226 real_time mtime
, JSONObj
*obj
, sync_type_t sync_type
) override
{
2227 RGWBucketCompleteInfo bci
, old_bci
;
2229 decode_json_obj(bci
, obj
);
2230 } catch (JSONDecoder::err
& e
) {
2234 real_time orig_mtime
;
2235 RGWObjectCtx
obj_ctx(store
);
2237 int ret
= store
->get_bucket_instance_info(obj_ctx
, entry
, old_bci
.info
,
2238 &orig_mtime
, &old_bci
.attrs
);
2239 bool exists
= (ret
!= -ENOENT
);
2240 if (ret
< 0 && exists
)
2243 if (!exists
|| old_bci
.info
.bucket
.bucket_id
!= bci
.info
.bucket
.bucket_id
) {
2244 /* a new bucket, we need to select a new bucket placement for it */
2246 rgw_bucket_instance_oid_to_key(key
);
2249 string bucket_instance
;
2250 parse_bucket(key
, &tenant_name
, &bucket_name
, &bucket_instance
);
2252 RGWZonePlacementInfo rule_info
;
2253 bci
.info
.bucket
.name
= bucket_name
;
2254 bci
.info
.bucket
.bucket_id
= bucket_instance
;
2255 bci
.info
.bucket
.tenant
= tenant_name
;
2256 ret
= store
->select_bucket_location_by_rule(bci
.info
.placement_rule
, &rule_info
);
2258 ldout(store
->ctx(), 0) << "ERROR: select_bucket_placement() returned " << ret
<< dendl
;
2261 bci
.info
.index_type
= rule_info
.index_type
;
2263 /* existing bucket, keep its placement */
2264 bci
.info
.bucket
.explicit_placement
= old_bci
.info
.bucket
.explicit_placement
;
2265 bci
.info
.placement_rule
= old_bci
.info
.placement_rule
;
2268 // are we actually going to perform this put, or is it too old?
2270 !check_versions(old_bci
.info
.objv_tracker
.read_version
, orig_mtime
,
2271 objv_tracker
.write_version
, mtime
, sync_type
)) {
2272 objv_tracker
.read_version
= old_bci
.info
.objv_tracker
.read_version
;
2273 return STATUS_NO_APPLY
;
2276 /* record the read version (if any), store the new version */
2277 bci
.info
.objv_tracker
.read_version
= old_bci
.info
.objv_tracker
.read_version
;
2278 bci
.info
.objv_tracker
.write_version
= objv_tracker
.write_version
;
2280 ret
= store
->put_bucket_instance_info(bci
.info
, false, mtime
, &bci
.attrs
);
2284 objv_tracker
= bci
.info
.objv_tracker
;
2286 ret
= store
->init_bucket_index(bci
.info
, bci
.info
.num_shards
);
2290 return STATUS_APPLIED
;
2293 struct list_keys_info
{
2295 RGWListRawObjsCtx ctx
;
2298 int remove(RGWRados
*store
, string
& entry
, RGWObjVersionTracker
& objv_tracker
) override
{
2300 RGWObjectCtx
obj_ctx(store
);
2302 int ret
= store
->get_bucket_instance_info(obj_ctx
, entry
, info
, NULL
, NULL
);
2303 if (ret
< 0 && ret
!= -ENOENT
)
2306 return rgw_bucket_instance_remove_entry(store
, entry
, &info
.objv_tracker
);
2309 void get_pool_and_oid(RGWRados
*store
, const string
& key
, rgw_pool
& pool
, string
& oid
) override
{
2310 oid
= RGW_BUCKET_INSTANCE_MD_PREFIX
+ key
;
2311 rgw_bucket_instance_key_to_oid(oid
);
2312 pool
= store
->get_zone_params().domain_root
;
2315 int list_keys_init(RGWRados
*store
, void **phandle
) override
2317 list_keys_info
*info
= new list_keys_info
;
2319 info
->store
= store
;
2321 *phandle
= (void *)info
;
2326 int list_keys_next(void *handle
, int max
, list
<string
>& keys
, bool *truncated
) override
{
2327 list_keys_info
*info
= static_cast<list_keys_info
*>(handle
);
2333 RGWRados
*store
= info
->store
;
2335 list
<string
> unfiltered_keys
;
2337 int ret
= store
->list_raw_objects(store
->get_zone_params().domain_root
, no_filter
,
2338 max
, info
->ctx
, unfiltered_keys
, truncated
);
2339 if (ret
< 0 && ret
!= -ENOENT
)
2341 if (ret
== -ENOENT
) {
2347 constexpr int prefix_size
= sizeof(RGW_BUCKET_INSTANCE_MD_PREFIX
) - 1;
2348 // now filter in the relevant entries
2349 list
<string
>::iterator iter
;
2350 for (iter
= unfiltered_keys
.begin(); iter
!= unfiltered_keys
.end(); ++iter
) {
2353 if (k
.compare(0, prefix_size
, RGW_BUCKET_INSTANCE_MD_PREFIX
) == 0) {
2354 auto oid
= k
.substr(prefix_size
);
2355 rgw_bucket_instance_oid_to_key(oid
);
2356 keys
.emplace_back(std::move(oid
));
2363 void list_keys_complete(void *handle
) override
{
2364 list_keys_info
*info
= static_cast<list_keys_info
*>(handle
);
2369 * hash entry for mdlog placement. Use the same hash key we'd have for the bucket entry
2370 * point, so that the log entries end up at the same log shard, so that we process them
2373 void get_hash_key(const string
& section
, const string
& key
, string
& hash_key
) override
{
2375 int pos
= key
.find(':');
2379 k
= key
.substr(0, pos
);
2380 hash_key
= "bucket:" + k
;
2384 void rgw_bucket_init(RGWMetadataManager
*mm
)
2386 bucket_meta_handler
= new RGWBucketMetadataHandler
;
2387 mm
->register_handler(bucket_meta_handler
);
2388 bucket_instance_meta_handler
= new RGWBucketInstanceMetadataHandler
;
2389 mm
->register_handler(bucket_instance_meta_handler
);