return 0;
}
+int RGWRados::BucketShard::init(const RGWBucketInfo& bucket_info,
+ const rgw_obj& obj)
+{
+ bucket = bucket_info.bucket;
+
+ int ret = store->open_bucket_index_shard(bucket_info, index_ctx,
+ obj.get_hash_object(), &bucket_obj,
+ &shard_id);
+ if (ret < 0) {
+ ldout(store->ctx(), 0) << "ERROR: open_bucket_index_shard() returned ret=" << ret << dendl;
+ return ret;
+ }
+ ldout(store->ctx(), 20) << " bucket index object: " << bucket_obj << dendl;
+
+ return 0;
+}
+
int RGWRados::BucketShard::init(const RGWBucketInfo& bucket_info, int sid)
{
bucket = bucket_info.bucket;
src_attrs.erase(RGW_ATTR_COMPRESSION);
src_attrs.erase(RGW_ATTR_MANIFEST); // not interested in original object layout
+
+ // filter out olh attributes
+ auto iter = src_attrs.lower_bound(RGW_ATTR_OLH_PREFIX);
+ while (iter != src_attrs.end()) {
+ if (!boost::algorithm::starts_with(iter->first, RGW_ATTR_OLH_PREFIX)) {
+ break;
+ }
+ iter = src_attrs.erase(iter);
+ }
}
if (plugin && src_attrs.find(RGW_ATTR_CRYPT_MODE) == src_attrs.end()) {
return 0;
}
+// a multisite sync bug resulted in the OLH head attributes being overwritten by
+// the attributes from another zone, causing link_olh() to fail endlessly due to
+// olh_tag mismatch. this attempts to detect this case and reconstruct the OLH
+// attributes from the bucket index. see http://tracker.ceph.com/issues/37792
+int RGWRados::repair_olh(RGWObjState* state, const RGWBucketInfo& bucket_info,
+ const rgw_obj& obj)
+{
+ // fetch the current olh entry from the bucket index
+ rgw_bucket_olh_entry olh;
+ int r = bi_get_olh(bucket_info, obj, &olh);
+ if (r < 0) {
+ ldout(cct, 0) << "repair_olh failed to read olh entry for " << obj << dendl;
+ return r;
+ }
+ if (olh.tag == state->olh_tag.to_str()) { // mismatch already resolved?
+ return 0;
+ }
+
+ ldout(cct, 4) << "repair_olh setting olh_tag=" << olh.tag
+ << " key=" << olh.key << " delete_marker=" << olh.delete_marker << dendl;
+
+ // rewrite OLH_ID_TAG and OLH_INFO from current olh
+ ObjectWriteOperation op;
+ // assert this is the same olh tag we think we're fixing
+ bucket_index_guard_olh_op(*state, op);
+ // preserve existing mtime
+ struct timespec mtime_ts = ceph::real_clock::to_timespec(state->mtime);
+ op.mtime2(&mtime_ts);
+ {
+ bufferlist bl;
+ bl.append(olh.tag.c_str(), olh.tag.size());
+ op.setxattr(RGW_ATTR_OLH_ID_TAG, bl);
+ }
+ {
+ RGWOLHInfo info;
+ info.target = rgw_obj(bucket_info.bucket, olh.key);
+ info.removed = olh.delete_marker;
+ bufferlist bl;
+ encode(info, bl);
+ op.setxattr(RGW_ATTR_OLH_INFO, bl);
+ }
+ rgw_rados_ref ref;
+ r = get_obj_head_ref(bucket_info, obj, &ref);
+ if (r < 0) {
+ return r;
+ }
+ r = ref.ioctx.operate(ref.oid, &op);
+ if (r < 0) {
+ ldout(cct, 0) << "repair_olh failed to write olh attributes with "
+ << cpp_strerror(r) << dendl;
+ return r;
+ }
+ return 0;
+}
+
int RGWRados::bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver)
{
rgw_rados_ref ref;
op.cmpxattr(RGW_ATTR_OLH_ID_TAG, CEPH_OSD_CMPXATTR_OP_EQ, olh_tag);
op.cmpxattr(RGW_ATTR_OLH_VER, CEPH_OSD_CMPXATTR_OP_GT, last_ver);
+ bufferlist ver_bl;
+ string last_ver_s = to_string(last_ver);
+ ver_bl.append(last_ver_s.c_str(), last_ver_s.size());
+ op.setxattr(RGW_ATTR_OLH_VER, ver_bl);
+
struct timespec mtime_ts = real_clock::to_timespec(state.mtime);
op.mtime2(&mtime_ts);
ObjectWriteOperation rm_op;
rm_op.cmpxattr(RGW_ATTR_OLH_ID_TAG, CEPH_OSD_CMPXATTR_OP_EQ, olh_tag);
- rm_op.cmpxattr(RGW_ATTR_OLH_VER, CEPH_OSD_CMPXATTR_OP_GT, last_ver);
+ rm_op.cmpxattr(RGW_ATTR_OLH_VER, CEPH_OSD_CMPXATTR_OP_EQ, last_ver);
cls_obj_check_prefix_exist(rm_op, RGW_ATTR_OLH_PENDING_PREFIX, true); /* fail if found one of these, pending modification */
rm_op.remove();
if (ret < 0) {
ldout(cct, 20) << "bucket_index_link_olh() target_obj=" << target_obj << " delete_marker=" << (int)delete_marker << " returned " << ret << dendl;
if (ret == -ECANCELED) {
+ // the bucket index rejected the link_olh() due to olh tag mismatch;
+ // attempt to reconstruct olh head attributes based on the bucket index
+ int r2 = repair_olh(state, bucket_info, olh_obj);
+ if (r2 < 0 && r2 != -ECANCELED) {
+ return r2;
+ }
continue;
}
return ret;
int RGWRados::get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool *syncstopped)
{
- map<string, rgw_bucket_dir_header> headers;
+ vector<rgw_bucket_dir_header> headers;
map<int, string> bucket_instance_ids;
int r = cls_bucket_head(bucket_info, shard_id, headers, &bucket_instance_ids);
if (r < 0) {
assert(headers.size() == bucket_instance_ids.size());
- map<string, rgw_bucket_dir_header>::iterator iter = headers.begin();
+ auto iter = headers.begin();
map<int, string>::iterator viter = bucket_instance_ids.begin();
BucketIndexShardsManager ver_mgr;
BucketIndexShardsManager master_ver_mgr;
BucketIndexShardsManager marker_mgr;
char buf[64];
for(; iter != headers.end(); ++iter, ++viter) {
- accumulate_raw_stats(iter->second, stats);
- snprintf(buf, sizeof(buf), "%lu", (unsigned long)iter->second.ver);
+ accumulate_raw_stats(*iter, stats);
+ snprintf(buf, sizeof(buf), "%lu", (unsigned long)iter->ver);
ver_mgr.add(viter->first, string(buf));
- snprintf(buf, sizeof(buf), "%lu", (unsigned long)iter->second.master_ver);
+ snprintf(buf, sizeof(buf), "%lu", (unsigned long)iter->master_ver);
master_ver_mgr.add(viter->first, string(buf));
if (shard_id >= 0) {
- *max_marker = iter->second.max_marker;
+ *max_marker = iter->max_marker;
} else {
- marker_mgr.add(viter->first, iter->second.max_marker);
+ marker_mgr.add(viter->first, iter->max_marker);
}
if (syncstopped != NULL)
- *syncstopped = iter->second.syncstopped;
+ *syncstopped = iter->syncstopped;
}
ver_mgr.to_string(bucket_ver);
master_ver_mgr.to_string(master_ver);
int RGWRados::get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id,
map<int, string>& markers)
{
- map<string, rgw_bucket_dir_header> headers;
+ vector<rgw_bucket_dir_header> headers;
map<int, string> bucket_instance_ids;
int r = cls_bucket_head(bucket_info, shard_id, headers, &bucket_instance_ids);
if (r < 0)
assert(headers.size() == bucket_instance_ids.size());
- map<string, rgw_bucket_dir_header>::iterator iter = headers.begin();
+ auto iter = headers.begin();
map<int, string>::iterator viter = bucket_instance_ids.begin();
for(; iter != headers.end(); ++iter, ++viter) {
if (shard_id >= 0) {
- markers[shard_id] = iter->second.max_marker;
+ markers[shard_id] = iter->max_marker;
} else {
- markers[viter->first] = iter->second.max_marker;
+ markers[viter->first] = iter->max_marker;
}
}
return 0;
ent.size = 0;
ent.size_rounded = 0;
- map<string, rgw_bucket_dir_header> headers;
+ vector<rgw_bucket_dir_header> headers;
RGWBucketInfo bucket_info;
int ret = get_bucket_instance_info(obj_ctx, bucket, bucket_info, NULL, NULL);
if (r < 0)
return r;
- map<string, rgw_bucket_dir_header>::iterator hiter = headers.begin();
+ auto hiter = headers.begin();
for (; hiter != headers.end(); ++hiter) {
RGWObjCategory category = main_category;
- map<uint8_t, struct rgw_bucket_category_stats>::iterator iter = (hiter->second.stats).find((uint8_t)category);
- if (iter != hiter->second.stats.end()) {
+ map<uint8_t, struct rgw_bucket_category_stats>::iterator iter = (hiter->stats).find((uint8_t)category);
+ if (iter != hiter->stats.end()) {
struct rgw_bucket_category_stats& stats = iter->second;
ent.count += stats.num_entries;
ent.size += stats.total_size;
return CLSRGWIssueBucketBILogStop(index_ctx, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
}
-int RGWRados::bi_get_instance(const RGWBucketInfo& bucket_info, rgw_obj& obj, rgw_bucket_dir_entry *dirent)
+int RGWRados::bi_get_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ rgw_bucket_dir_entry *dirent)
{
- rgw_rados_ref ref;
- int r = get_obj_head_ref(bucket_info, obj, &ref);
+ rgw_cls_bi_entry bi_entry;
+ int r = bi_get(bucket_info, obj, InstanceIdx, &bi_entry);
+ if (r < 0 && r != -ENOENT) {
+ ldout(cct, 0) << "ERROR: bi_get() returned r=" << r << dendl;
+ }
if (r < 0) {
return r;
}
+ bufferlist::iterator iter = bi_entry.data.begin();
+ try {
+ ::decode(*dirent, iter);
+ } catch (buffer::error& err) {
+ ldout(cct, 0) << "ERROR: failed to decode bi_entry()" << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+int RGWRados::bi_get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ rgw_bucket_olh_entry *olh)
+{
rgw_cls_bi_entry bi_entry;
- r = bi_get(obj.bucket, obj, InstanceIdx, &bi_entry);
+ int r = bi_get(bucket_info, obj, OLHIdx, &bi_entry);
if (r < 0 && r != -ENOENT) {
ldout(cct, 0) << "ERROR: bi_get() returned r=" << r << dendl;
}
if (r < 0) {
return r;
}
- bufferlist::iterator iter = bi_entry.data.begin();
+ auto iter = bi_entry.data.begin();
try {
- ::decode(*dirent, iter);
+ decode(*olh, iter);
} catch (buffer::error& err) {
ldout(cct, 0) << "ERROR: failed to decode bi_entry()" << dendl;
return -EIO;
return 0;
}
-int RGWRados::bi_get(rgw_bucket& bucket, rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry)
+int RGWRados::bi_get(const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ BIIndexType index_type, rgw_cls_bi_entry *entry)
{
BucketShard bs(this);
- int ret = bs.init(bucket, obj, nullptr /* no RGWBucketInfo */);
+ int ret = bs.init(bucket_info, obj);
if (ret < 0) {
ldout(cct, 5) << "bs.init() returned ret=" << ret << dendl;
return ret;
cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance);
- ret = cls_rgw_bi_get(bs.index_ctx, bs.bucket_obj, index_type, key, entry);
- if (ret < 0)
- return ret;
-
- return 0;
+ return cls_rgw_bi_get(bs.index_ctx, bs.bucket_obj, index_type, key, entry);
}
void RGWRados::bi_put(ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry)
return 0;
}
-int RGWRados::cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, map<string, struct rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids)
+int RGWRados::cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids)
{
librados::IoCtx index_ctx;
map<int, string> oids;
map<int, struct rgw_cls_list_ret>::iterator iter = list_results.begin();
for(; iter != list_results.end(); ++iter) {
- headers[oids[iter->first]] = iter->second.dir.header;
+ headers.push_back(std::move(iter->second.dir.header));
}
return 0;
}
int RGWRados::cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info)
{
- map<string, struct rgw_bucket_dir_header> headers;
+ vector<rgw_bucket_dir_header> headers;
int r = cls_bucket_head(bucket_info, RGW_NO_SHARD, headers);
if (r < 0) {
ldout(cct, 20) << "cls_bucket_header() returned " << r << dendl;
bucket_info.bucket.convert(&entry.bucket);
for (const auto& hiter : headers) {
- for (const auto& iter : hiter.second.stats) {
+ for (const auto& iter : hiter.stats) {
const struct rgw_bucket_category_stats& header_stats = iter.second;
entry.size += header_stats.total_size;
entry.size_rounded += header_stats.total_size_rounded;
int RGWRados::cls_user_get_bucket_stats(const rgw_bucket& bucket, cls_user_bucket_entry& entry)
{
- map<string, struct rgw_bucket_dir_header> headers;
+ vector<rgw_bucket_dir_header> headers;
RGWBucketInfo bucket_info;
RGWObjectCtx obj_ctx(this);
int ret = get_bucket_instance_info(obj_ctx, bucket, bucket_info, NULL, NULL);
bucket.convert(&entry.bucket);
for (const auto& hiter : headers) {
- for (const auto& iter : hiter.second.stats) {
+ for (const auto& iter : hiter.stats) {
const struct rgw_bucket_category_stats& header_stats = iter.second;
entry.size += header_stats.total_size;
entry.size_rounded += header_stats.total_size_rounded;