]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_zone.cc
51e2fd3e0e0b89e38874940dca180cddcc405d3d
[ceph.git] / ceph / src / rgw / rgw_zone.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
3
4 #include "common/errno.h"
5
6 #include "rgw_zone.h"
7 #include "rgw_realm_watcher.h"
8 #include "rgw_meta_sync_status.h"
9 #include "rgw_sync.h"
10
11 #include "services/svc_zone.h"
12 #include "services/svc_sys_obj.h"
13
14 #define dout_subsys ceph_subsys_rgw
15
16 namespace rgw_zone_defaults {
17
18 std::string zone_info_oid_prefix = "zone_info.";
19 std::string zone_names_oid_prefix = "zone_names.";
20 std::string region_info_oid_prefix = "region_info.";
21 std::string realm_names_oid_prefix = "realms_names.";
22 std::string zone_group_info_oid_prefix = "zonegroup_info.";
23 std::string realm_info_oid_prefix = "realms.";
24 std::string default_region_info_oid = "default.region";
25 std::string default_zone_group_info_oid = "default.zonegroup";
26 std::string period_info_oid_prefix = "periods.";
27 std::string period_latest_epoch_info_oid = ".latest_epoch";
28 std::string region_map_oid = "region_map";
29 std::string default_realm_info_oid = "default.realm";
30 std::string default_zonegroup_name = "default";
31 std::string default_zone_name = "default";
32 std::string zonegroup_names_oid_prefix = "zonegroups_names.";
33 std::string RGW_DEFAULT_ZONE_ROOT_POOL = "rgw.root";
34 std::string RGW_DEFAULT_ZONEGROUP_ROOT_POOL = "rgw.root";
35 std::string RGW_DEFAULT_REALM_ROOT_POOL = "rgw.root";
36 std::string RGW_DEFAULT_PERIOD_ROOT_POOL = "rgw.root";
37 std::string default_bucket_index_pool_suffix = "rgw.buckets.index";
38 std::string default_storage_extra_pool_suffix = "rgw.buckets.non-ec";
39 std::string avail_pools = ".pools.avail";
40 std::string default_storage_pool_suffix = "rgw.buckets.data";
41
42 }
43
44 using namespace rgw_zone_defaults;
45
46 #define FIRST_EPOCH 1
47
48 void RGWDefaultZoneGroupInfo::dump(Formatter *f) const {
49 encode_json("default_zonegroup", default_zonegroup, f);
50 }
51
52 void RGWDefaultZoneGroupInfo::decode_json(JSONObj *obj) {
53
54 JSONDecoder::decode_json("default_zonegroup", default_zonegroup, obj);
55 /* backward compatability with region */
56 if (default_zonegroup.empty()) {
57 JSONDecoder::decode_json("default_region", default_zonegroup, obj);
58 }
59 }
60
61 rgw_pool RGWZoneGroup::get_pool(CephContext *cct_) const
62 {
63 if (cct_->_conf->rgw_zonegroup_root_pool.empty()) {
64 return rgw_pool(RGW_DEFAULT_ZONEGROUP_ROOT_POOL);
65 }
66
67 return rgw_pool(cct_->_conf->rgw_zonegroup_root_pool);
68 }
69
70 int RGWZoneGroup::create_default(optional_yield y, bool old_format)
71 {
72 name = default_zonegroup_name;
73 api_name = default_zonegroup_name;
74 is_master = true;
75
76 RGWZoneGroupPlacementTarget placement_target;
77 placement_target.name = "default-placement";
78 placement_targets[placement_target.name] = placement_target;
79 default_placement.name = "default-placement";
80
81 RGWZoneParams zone_params(default_zone_name);
82
83 int r = zone_params.init(cct, sysobj_svc, y, false);
84 if (r < 0) {
85 ldout(cct, 0) << "create_default: error initializing zone params: " << cpp_strerror(-r) << dendl;
86 return r;
87 }
88
89 r = zone_params.create_default(y);
90 if (r < 0 && r != -EEXIST) {
91 ldout(cct, 0) << "create_default: error in create_default zone params: " << cpp_strerror(-r) << dendl;
92 return r;
93 } else if (r == -EEXIST) {
94 ldout(cct, 10) << "zone_params::create_default() returned -EEXIST, we raced with another default zone_params creation" << dendl;
95 zone_params.clear_id();
96 r = zone_params.init(cct, sysobj_svc, y);
97 if (r < 0) {
98 ldout(cct, 0) << "create_default: error in init existing zone params: " << cpp_strerror(-r) << dendl;
99 return r;
100 }
101 ldout(cct, 20) << "zone_params::create_default() " << zone_params.get_name() << " id " << zone_params.get_id()
102 << dendl;
103 }
104
105 RGWZone& default_zone = zones[zone_params.get_id()];
106 default_zone.name = zone_params.get_name();
107 default_zone.id = zone_params.get_id();
108 master_zone = default_zone.id;
109
110 r = create(y);
111 if (r < 0 && r != -EEXIST) {
112 ldout(cct, 0) << "error storing zone group info: " << cpp_strerror(-r) << dendl;
113 return r;
114 }
115
116 if (r == -EEXIST) {
117 ldout(cct, 10) << "create_default() returned -EEXIST, we raced with another zonegroup creation" << dendl;
118 id.clear();
119 r = init(cct, sysobj_svc, y);
120 if (r < 0) {
121 return r;
122 }
123 }
124
125 if (old_format) {
126 name = id;
127 }
128
129 post_process_params(y);
130
131 return 0;
132 }
133
134 const string RGWZoneGroup::get_default_oid(bool old_region_format) const
135 {
136 if (old_region_format) {
137 if (cct->_conf->rgw_default_region_info_oid.empty()) {
138 return default_region_info_oid;
139 }
140 return cct->_conf->rgw_default_region_info_oid;
141 }
142
143 string default_oid = cct->_conf->rgw_default_zonegroup_info_oid;
144
145 if (cct->_conf->rgw_default_zonegroup_info_oid.empty()) {
146 default_oid = default_zone_group_info_oid;
147 }
148
149 default_oid += "." + realm_id;
150
151 return default_oid;
152 }
153
154 const string& RGWZoneGroup::get_info_oid_prefix(bool old_region_format) const
155 {
156 if (old_region_format) {
157 return region_info_oid_prefix;
158 }
159 return zone_group_info_oid_prefix;
160 }
161
162 const string& RGWZoneGroup::get_names_oid_prefix() const
163 {
164 return zonegroup_names_oid_prefix;
165 }
166
167 const string& RGWZoneGroup::get_predefined_name(CephContext *cct) const {
168 return cct->_conf->rgw_zonegroup;
169 }
170
171 int RGWZoneGroup::equals(const string& other_zonegroup) const
172 {
173 if (is_master && other_zonegroup.empty())
174 return true;
175
176 return (id == other_zonegroup);
177 }
178
179 int RGWZoneGroup::add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only,
180 const list<string>& endpoints, const string *ptier_type,
181 bool *psync_from_all, list<string>& sync_from, list<string>& sync_from_rm,
182 string *predirect_zone, std::optional<int> bucket_index_max_shards,
183 RGWSyncModulesManager *sync_mgr,
184 optional_yield y)
185 {
186 auto& zone_id = zone_params.get_id();
187 auto& zone_name = zone_params.get_name();
188
189 // check for duplicate zone name on insert
190 if (!zones.count(zone_id)) {
191 for (const auto& zone : zones) {
192 if (zone.second.name == zone_name) {
193 ldout(cct, 0) << "ERROR: found existing zone name " << zone_name
194 << " (" << zone.first << ") in zonegroup " << get_name() << dendl;
195 return -EEXIST;
196 }
197 }
198 }
199
200 if (is_master) {
201 if (*is_master) {
202 if (!master_zone.empty() && master_zone != zone_id) {
203 ldout(cct, 0) << "NOTICE: overriding master zone: " << master_zone << dendl;
204 }
205 master_zone = zone_id;
206 } else if (master_zone == zone_id) {
207 master_zone.clear();
208 }
209 }
210
211 RGWZone& zone = zones[zone_id];
212 zone.name = zone_name;
213 zone.id = zone_id;
214 if (!endpoints.empty()) {
215 zone.endpoints = endpoints;
216 }
217 if (read_only) {
218 zone.read_only = *read_only;
219 }
220 if (ptier_type) {
221 zone.tier_type = *ptier_type;
222 if (!sync_mgr->get_module(*ptier_type, nullptr)) {
223 ldout(cct, 0) << "ERROR: could not found sync module: " << *ptier_type
224 << ", valid sync modules: "
225 << sync_mgr->get_registered_module_names()
226 << dendl;
227 return -ENOENT;
228 }
229 }
230
231 if (psync_from_all) {
232 zone.sync_from_all = *psync_from_all;
233 }
234
235 if (predirect_zone) {
236 zone.redirect_zone = *predirect_zone;
237 }
238
239 if (bucket_index_max_shards) {
240 zone.bucket_index_max_shards = *bucket_index_max_shards;
241 }
242
243 for (auto add : sync_from) {
244 zone.sync_from.insert(add);
245 }
246
247 for (auto rm : sync_from_rm) {
248 zone.sync_from.erase(rm);
249 }
250
251 post_process_params(y);
252
253 return update(y);
254 }
255
256
257 int RGWZoneGroup::rename_zone(const RGWZoneParams& zone_params,
258 optional_yield y)
259 {
260 RGWZone& zone = zones[zone_params.get_id()];
261 zone.name = zone_params.get_name();
262
263 return update(y);
264 }
265
266 void RGWZoneGroup::post_process_params(optional_yield y)
267 {
268 bool log_data = zones.size() > 1;
269
270 if (master_zone.empty()) {
271 auto iter = zones.begin();
272 if (iter != zones.end()) {
273 master_zone = iter->first;
274 }
275 }
276
277 for (auto& item : zones) {
278 RGWZone& zone = item.second;
279 zone.log_data = log_data;
280
281 RGWZoneParams zone_params(zone.id, zone.name);
282 int ret = zone_params.init(cct, sysobj_svc, y);
283 if (ret < 0) {
284 ldout(cct, 0) << "WARNING: could not read zone params for zone id=" << zone.id << " name=" << zone.name << dendl;
285 continue;
286 }
287
288 for (auto& pitem : zone_params.placement_pools) {
289 const string& placement_name = pitem.first;
290 if (placement_targets.find(placement_name) == placement_targets.end()) {
291 RGWZoneGroupPlacementTarget placement_target;
292 placement_target.name = placement_name;
293 placement_targets[placement_name] = placement_target;
294 }
295 }
296 }
297
298 if (default_placement.empty() && !placement_targets.empty()) {
299 default_placement.init(placement_targets.begin()->first, RGW_STORAGE_CLASS_STANDARD);
300 }
301 }
302
303 int RGWZoneGroup::remove_zone(const std::string& zone_id, optional_yield y)
304 {
305 auto iter = zones.find(zone_id);
306 if (iter == zones.end()) {
307 ldout(cct, 0) << "zone id " << zone_id << " is not a part of zonegroup "
308 << name << dendl;
309 return -ENOENT;
310 }
311
312 zones.erase(iter);
313
314 post_process_params(y);
315
316 return update(y);
317 }
318
319 int RGWZoneGroup::read_default_id(string& default_id, optional_yield y,
320 bool old_format)
321 {
322 if (realm_id.empty()) {
323 /* try using default realm */
324 RGWRealm realm;
325 int ret = realm.init(cct, sysobj_svc, y);
326 // no default realm exist
327 if (ret < 0) {
328 return read_id(default_zonegroup_name, default_id, y);
329 }
330 realm_id = realm.get_id();
331 }
332
333 return RGWSystemMetaObj::read_default_id(default_id, y, old_format);
334 }
335
336 int RGWZoneGroup::set_as_default(optional_yield y, bool exclusive)
337 {
338 if (realm_id.empty()) {
339 /* try using default realm */
340 RGWRealm realm;
341 int ret = realm.init(cct, sysobj_svc, y);
342 if (ret < 0) {
343 ldout(cct, 10) << "could not read realm id: " << cpp_strerror(-ret) << dendl;
344 return -EINVAL;
345 }
346 realm_id = realm.get_id();
347 }
348
349 return RGWSystemMetaObj::set_as_default(y, exclusive);
350 }
351
352 void RGWSystemMetaObj::reinit_instance(CephContext *_cct, RGWSI_SysObj *_sysobj_svc)
353 {
354 cct = _cct;
355 sysobj_svc = _sysobj_svc;
356 zone_svc = _sysobj_svc->get_zone_svc();
357 }
358
359 int RGWSystemMetaObj::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc,
360 optional_yield y,
361 bool setup_obj, bool old_format)
362 {
363 reinit_instance(_cct, _sysobj_svc);
364
365 if (!setup_obj)
366 return 0;
367
368 if (old_format && id.empty()) {
369 id = name;
370 }
371
372 if (id.empty()) {
373 int r;
374 if (name.empty()) {
375 name = get_predefined_name(cct);
376 }
377 if (name.empty()) {
378 r = use_default(y, old_format);
379 if (r < 0) {
380 return r;
381 }
382 } else if (!old_format) {
383 r = read_id(name, id, y);
384 if (r < 0) {
385 if (r != -ENOENT) {
386 ldout(cct, 0) << "error in read_id for object name: " << name << " : " << cpp_strerror(-r) << dendl;
387 }
388 return r;
389 }
390 }
391 }
392
393 return read_info(id, y, old_format);
394 }
395
396 int RGWSystemMetaObj::read_default(RGWDefaultSystemMetaObjInfo& default_info,
397 const string& oid, optional_yield y)
398 {
399 using ceph::decode;
400 auto pool = get_pool(cct);
401 bufferlist bl;
402
403 auto obj_ctx = sysobj_svc->init_obj_ctx();
404 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
405 int ret = sysobj.rop().read(&bl, y);
406 if (ret < 0)
407 return ret;
408
409 try {
410 auto iter = bl.cbegin();
411 decode(default_info, iter);
412 } catch (buffer::error& err) {
413 ldout(cct, 0) << "error decoding data from " << pool << ":" << oid << dendl;
414 return -EIO;
415 }
416
417 return 0;
418 }
419
420 int RGWSystemMetaObj::read_default_id(string& default_id, optional_yield y,
421 bool old_format)
422 {
423 RGWDefaultSystemMetaObjInfo default_info;
424
425 int ret = read_default(default_info, get_default_oid(old_format), y);
426 if (ret < 0) {
427 return ret;
428 }
429
430 default_id = default_info.default_id;
431
432 return 0;
433 }
434
435 int RGWSystemMetaObj::use_default(optional_yield y, bool old_format)
436 {
437 return read_default_id(id, y, old_format);
438 }
439
440 int RGWSystemMetaObj::set_as_default(optional_yield y, bool exclusive)
441 {
442 using ceph::encode;
443 string oid = get_default_oid();
444
445 rgw_pool pool(get_pool(cct));
446 bufferlist bl;
447
448 RGWDefaultSystemMetaObjInfo default_info;
449 default_info.default_id = id;
450
451 encode(default_info, bl);
452
453 auto obj_ctx = sysobj_svc->init_obj_ctx();
454 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
455 int ret = sysobj.wop()
456 .set_exclusive(exclusive)
457 .write(bl, y);
458 if (ret < 0)
459 return ret;
460
461 return 0;
462 }
463
464 int RGWSystemMetaObj::read_id(const string& obj_name, string& object_id,
465 optional_yield y)
466 {
467 using ceph::decode;
468 rgw_pool pool(get_pool(cct));
469 bufferlist bl;
470
471 string oid = get_names_oid_prefix() + obj_name;
472
473 auto obj_ctx = sysobj_svc->init_obj_ctx();
474 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
475 int ret = sysobj.rop().read(&bl, y);
476 if (ret < 0) {
477 return ret;
478 }
479
480 RGWNameToId nameToId;
481 try {
482 auto iter = bl.cbegin();
483 decode(nameToId, iter);
484 } catch (buffer::error& err) {
485 ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << oid << dendl;
486 return -EIO;
487 }
488 object_id = nameToId.obj_id;
489 return 0;
490 }
491
492 int RGWSystemMetaObj::delete_obj(optional_yield y, bool old_format)
493 {
494 rgw_pool pool(get_pool(cct));
495
496 auto obj_ctx = sysobj_svc->init_obj_ctx();
497
498 /* check to see if obj is the default */
499 RGWDefaultSystemMetaObjInfo default_info;
500 int ret = read_default(default_info, get_default_oid(old_format), y);
501 if (ret < 0 && ret != -ENOENT)
502 return ret;
503 if (default_info.default_id == id || (old_format && default_info.default_id == name)) {
504 string oid = get_default_oid(old_format);
505 rgw_raw_obj default_named_obj(pool, oid);
506 auto sysobj = sysobj_svc->get_obj(obj_ctx, default_named_obj);
507 ret = sysobj.wop().remove(y);
508 if (ret < 0) {
509 ldout(cct, 0) << "Error delete default obj name " << name << ": " << cpp_strerror(-ret) << dendl;
510 return ret;
511 }
512 }
513 if (!old_format) {
514 string oid = get_names_oid_prefix() + name;
515 rgw_raw_obj object_name(pool, oid);
516 auto sysobj = sysobj_svc->get_obj(obj_ctx, object_name);
517 ret = sysobj.wop().remove(y);
518 if (ret < 0) {
519 ldout(cct, 0) << "Error delete obj name " << name << ": " << cpp_strerror(-ret) << dendl;
520 return ret;
521 }
522 }
523
524 string oid = get_info_oid_prefix(old_format);
525 if (old_format) {
526 oid += name;
527 } else {
528 oid += id;
529 }
530
531 rgw_raw_obj object_id(pool, oid);
532 auto sysobj = sysobj_svc->get_obj(obj_ctx, object_id);
533 ret = sysobj.wop().remove(y);
534 if (ret < 0) {
535 ldout(cct, 0) << "Error delete object id " << id << ": " << cpp_strerror(-ret) << dendl;
536 }
537
538 return ret;
539 }
540
541 int RGWSystemMetaObj::store_name(bool exclusive, optional_yield y)
542 {
543 rgw_pool pool(get_pool(cct));
544 string oid = get_names_oid_prefix() + name;
545
546 RGWNameToId nameToId;
547 nameToId.obj_id = id;
548
549 bufferlist bl;
550 using ceph::encode;
551 encode(nameToId, bl);
552 auto obj_ctx = sysobj_svc->init_obj_ctx();
553 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
554 return sysobj.wop()
555 .set_exclusive(exclusive)
556 .write(bl, y);
557 }
558
559 int RGWSystemMetaObj::rename(const string& new_name, optional_yield y)
560 {
561 string new_id;
562 int ret = read_id(new_name, new_id, y);
563 if (!ret) {
564 return -EEXIST;
565 }
566 if (ret < 0 && ret != -ENOENT) {
567 ldout(cct, 0) << "Error read_id " << new_name << ": " << cpp_strerror(-ret) << dendl;
568 return ret;
569 }
570 string old_name = name;
571 name = new_name;
572 ret = update(y);
573 if (ret < 0) {
574 ldout(cct, 0) << "Error storing new obj info " << new_name << ": " << cpp_strerror(-ret) << dendl;
575 return ret;
576 }
577 ret = store_name(true, y);
578 if (ret < 0) {
579 ldout(cct, 0) << "Error storing new name " << new_name << ": " << cpp_strerror(-ret) << dendl;
580 return ret;
581 }
582 /* delete old name */
583 rgw_pool pool(get_pool(cct));
584 string oid = get_names_oid_prefix() + old_name;
585 rgw_raw_obj old_name_obj(pool, oid);
586 auto obj_ctx = sysobj_svc->init_obj_ctx();
587 auto sysobj = sysobj_svc->get_obj(obj_ctx, old_name_obj);
588 ret = sysobj.wop().remove(y);
589 if (ret < 0) {
590 ldout(cct, 0) << "Error delete old obj name " << old_name << ": " << cpp_strerror(-ret) << dendl;
591 return ret;
592 }
593
594 return ret;
595 }
596
597 int RGWSystemMetaObj::read_info(const string& obj_id, optional_yield y,
598 bool old_format)
599 {
600 rgw_pool pool(get_pool(cct));
601
602 bufferlist bl;
603
604 string oid = get_info_oid_prefix(old_format) + obj_id;
605
606 auto obj_ctx = sysobj_svc->init_obj_ctx();
607 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
608 int ret = sysobj.rop().read(&bl, y);
609 if (ret < 0) {
610 ldout(cct, 0) << "failed reading obj info from " << pool << ":" << oid << ": " << cpp_strerror(-ret) << dendl;
611 return ret;
612 }
613 using ceph::decode;
614
615 try {
616 auto iter = bl.cbegin();
617 decode(*this, iter);
618 } catch (buffer::error& err) {
619 ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << oid << dendl;
620 return -EIO;
621 }
622
623 return 0;
624 }
625
626 int RGWSystemMetaObj::read(optional_yield y)
627 {
628 int ret = read_id(name, id, y);
629 if (ret < 0) {
630 return ret;
631 }
632
633 return read_info(id, y);
634 }
635
636 int RGWSystemMetaObj::create(optional_yield y, bool exclusive)
637 {
638 int ret;
639
640 /* check to see the name is not used */
641 ret = read_id(name, id, y);
642 if (exclusive && ret == 0) {
643 ldout(cct, 10) << "ERROR: name " << name << " already in use for obj id " << id << dendl;
644 return -EEXIST;
645 } else if ( ret < 0 && ret != -ENOENT) {
646 ldout(cct, 0) << "failed reading obj id " << id << ": " << cpp_strerror(-ret) << dendl;
647 return ret;
648 }
649
650 if (id.empty()) {
651 /* create unique id */
652 uuid_d new_uuid;
653 char uuid_str[37];
654 new_uuid.generate_random();
655 new_uuid.print(uuid_str);
656 id = uuid_str;
657 }
658
659 ret = store_info(exclusive, y);
660 if (ret < 0) {
661 ldout(cct, 0) << "ERROR: storing info for " << id << ": " << cpp_strerror(-ret) << dendl;
662 return ret;
663 }
664
665 return store_name(exclusive, y);
666 }
667
668 int RGWSystemMetaObj::store_info(bool exclusive, optional_yield y)
669 {
670 rgw_pool pool(get_pool(cct));
671
672 string oid = get_info_oid_prefix() + id;
673
674 bufferlist bl;
675 using ceph::encode;
676 encode(*this, bl);
677 auto obj_ctx = sysobj_svc->init_obj_ctx();
678 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
679 return sysobj.wop()
680 .set_exclusive(exclusive)
681 .write(bl, y);
682 }
683
684 int RGWSystemMetaObj::write(bool exclusive, optional_yield y)
685 {
686 int ret = store_info(exclusive, y);
687 if (ret < 0) {
688 ldout(cct, 20) << __func__ << "(): store_info() returned ret=" << ret << dendl;
689 return ret;
690 }
691 ret = store_name(exclusive, y);
692 if (ret < 0) {
693 ldout(cct, 20) << __func__ << "(): store_name() returned ret=" << ret << dendl;
694 return ret;
695 }
696 return 0;
697 }
698
699
700 const string& RGWRealm::get_predefined_name(CephContext *cct) const {
701 return cct->_conf->rgw_realm;
702 }
703
704 int RGWRealm::create(optional_yield y, bool exclusive)
705 {
706 int ret = RGWSystemMetaObj::create(y, exclusive);
707 if (ret < 0) {
708 ldout(cct, 0) << "ERROR creating new realm object " << name << ": " << cpp_strerror(-ret) << dendl;
709 return ret;
710 }
711 // create the control object for watch/notify
712 ret = create_control(exclusive, y);
713 if (ret < 0) {
714 ldout(cct, 0) << "ERROR creating control for new realm " << name << ": " << cpp_strerror(-ret) << dendl;
715 return ret;
716 }
717 RGWPeriod period;
718 if (current_period.empty()) {
719 /* create new period for the realm */
720 ret = period.init(cct, sysobj_svc, id, y, name, false);
721 if (ret < 0 ) {
722 return ret;
723 }
724 ret = period.create(y, true);
725 if (ret < 0) {
726 ldout(cct, 0) << "ERROR: creating new period for realm " << name << ": " << cpp_strerror(-ret) << dendl;
727 return ret;
728 }
729 } else {
730 period = RGWPeriod(current_period, 0);
731 int ret = period.init(cct, sysobj_svc, id, y, name);
732 if (ret < 0) {
733 ldout(cct, 0) << "ERROR: failed to init period " << current_period << dendl;
734 return ret;
735 }
736 }
737 ret = set_current_period(period, y);
738 if (ret < 0) {
739 ldout(cct, 0) << "ERROR: failed set current period " << current_period << dendl;
740 return ret;
741 }
742 // try to set as default. may race with another create, so pass exclusive=true
743 // so we don't override an existing default
744 ret = set_as_default(y, true);
745 if (ret < 0 && ret != -EEXIST) {
746 ldout(cct, 0) << "WARNING: failed to set realm as default realm, ret=" << ret << dendl;
747 }
748
749 return 0;
750 }
751
752 int RGWRealm::delete_obj(optional_yield y)
753 {
754 int ret = RGWSystemMetaObj::delete_obj(y);
755 if (ret < 0) {
756 return ret;
757 }
758 return delete_control(y);
759 }
760
761 int RGWRealm::create_control(bool exclusive, optional_yield y)
762 {
763 auto pool = rgw_pool{get_pool(cct)};
764 auto oid = get_control_oid();
765 bufferlist bl;
766 auto obj_ctx = sysobj_svc->init_obj_ctx();
767 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
768 return sysobj.wop()
769 .set_exclusive(exclusive)
770 .write(bl, y);
771 }
772
773 int RGWRealm::delete_control(optional_yield y)
774 {
775 auto pool = rgw_pool{get_pool(cct)};
776 auto obj = rgw_raw_obj{pool, get_control_oid()};
777 auto obj_ctx = sysobj_svc->init_obj_ctx();
778 auto sysobj = sysobj_svc->get_obj(obj_ctx, obj);
779 return sysobj.wop().remove(y);
780 }
781
782 rgw_pool RGWRealm::get_pool(CephContext *cct) const
783 {
784 if (cct->_conf->rgw_realm_root_pool.empty()) {
785 return rgw_pool(RGW_DEFAULT_REALM_ROOT_POOL);
786 }
787 return rgw_pool(cct->_conf->rgw_realm_root_pool);
788 }
789
790 const string RGWRealm::get_default_oid(bool old_format) const
791 {
792 if (cct->_conf->rgw_default_realm_info_oid.empty()) {
793 return default_realm_info_oid;
794 }
795 return cct->_conf->rgw_default_realm_info_oid;
796 }
797
798 const string& RGWRealm::get_names_oid_prefix() const
799 {
800 return realm_names_oid_prefix;
801 }
802
803 const string& RGWRealm::get_info_oid_prefix(bool old_format) const
804 {
805 return realm_info_oid_prefix;
806 }
807
808 int RGWRealm::set_current_period(RGWPeriod& period, optional_yield y)
809 {
810 // update realm epoch to match the period's
811 if (epoch > period.get_realm_epoch()) {
812 ldout(cct, 0) << "ERROR: set_current_period with old realm epoch "
813 << period.get_realm_epoch() << ", current epoch=" << epoch << dendl;
814 return -EINVAL;
815 }
816 if (epoch == period.get_realm_epoch() && current_period != period.get_id()) {
817 ldout(cct, 0) << "ERROR: set_current_period with same realm epoch "
818 << period.get_realm_epoch() << ", but different period id "
819 << period.get_id() << " != " << current_period << dendl;
820 return -EINVAL;
821 }
822
823 epoch = period.get_realm_epoch();
824 current_period = period.get_id();
825
826 int ret = update(y);
827 if (ret < 0) {
828 ldout(cct, 0) << "ERROR: period update: " << cpp_strerror(-ret) << dendl;
829 return ret;
830 }
831
832 ret = period.reflect(y);
833 if (ret < 0) {
834 ldout(cct, 0) << "ERROR: period.reflect(): " << cpp_strerror(-ret) << dendl;
835 return ret;
836 }
837
838 return 0;
839 }
840
841 string RGWRealm::get_control_oid() const
842 {
843 return get_info_oid_prefix() + id + ".control";
844 }
845
846 int RGWRealm::notify_zone(bufferlist& bl, optional_yield y)
847 {
848 rgw_pool pool{get_pool(cct)};
849 auto obj_ctx = sysobj_svc->init_obj_ctx();
850 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, get_control_oid()});
851 int ret = sysobj.wn().notify(bl, 0, nullptr, y);
852 if (ret < 0) {
853 return ret;
854 }
855 return 0;
856 }
857
858 int RGWRealm::notify_new_period(const RGWPeriod& period, optional_yield y)
859 {
860 bufferlist bl;
861 using ceph::encode;
862 // push the period to dependent zonegroups/zones
863 encode(RGWRealmNotify::ZonesNeedPeriod, bl);
864 encode(period, bl);
865 // reload the gateway with the new period
866 encode(RGWRealmNotify::Reload, bl);
867
868 return notify_zone(bl, y);
869 }
870
871 std::string RGWPeriodConfig::get_oid(const std::string& realm_id)
872 {
873 if (realm_id.empty()) {
874 return "period_config.default";
875 }
876 return "period_config." + realm_id;
877 }
878
879 rgw_pool RGWPeriodConfig::get_pool(CephContext *cct)
880 {
881 const auto& pool_name = cct->_conf->rgw_period_root_pool;
882 if (pool_name.empty()) {
883 return {RGW_DEFAULT_PERIOD_ROOT_POOL};
884 }
885 return {pool_name};
886 }
887
888 int RGWPeriodConfig::read(RGWSI_SysObj *sysobj_svc, const std::string& realm_id,
889 optional_yield y)
890 {
891 const auto& pool = get_pool(sysobj_svc->ctx());
892 const auto& oid = get_oid(realm_id);
893 bufferlist bl;
894
895 auto obj_ctx = sysobj_svc->init_obj_ctx();
896 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
897 int ret = sysobj.rop().read(&bl, y);
898 if (ret < 0) {
899 return ret;
900 }
901 using ceph::decode;
902 try {
903 auto iter = bl.cbegin();
904 decode(*this, iter);
905 } catch (buffer::error& err) {
906 return -EIO;
907 }
908 return 0;
909 }
910
911 int RGWPeriodConfig::write(RGWSI_SysObj *sysobj_svc,
912 const std::string& realm_id, optional_yield y)
913 {
914 const auto& pool = get_pool(sysobj_svc->ctx());
915 const auto& oid = get_oid(realm_id);
916 bufferlist bl;
917 using ceph::encode;
918 encode(*this, bl);
919 auto obj_ctx = sysobj_svc->init_obj_ctx();
920 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
921 return sysobj.wop()
922 .set_exclusive(false)
923 .write(bl, y);
924 }
925
926 int RGWPeriod::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc,
927 const string& period_realm_id, optional_yield y,
928 const string& period_realm_name, bool setup_obj)
929 {
930 cct = _cct;
931 sysobj_svc = _sysobj_svc;
932
933 realm_id = period_realm_id;
934 realm_name = period_realm_name;
935
936 if (!setup_obj)
937 return 0;
938
939 return init(_cct, _sysobj_svc, y, setup_obj);
940 }
941
942
943 int RGWPeriod::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc,
944 optional_yield y, bool setup_obj)
945 {
946 cct = _cct;
947 sysobj_svc = _sysobj_svc;
948
949 if (!setup_obj)
950 return 0;
951
952 if (id.empty()) {
953 RGWRealm realm(realm_id, realm_name);
954 int ret = realm.init(cct, sysobj_svc, y);
955 if (ret < 0) {
956 ldout(cct, 0) << "RGWPeriod::init failed to init realm " << realm_name << " id " << realm_id << " : " <<
957 cpp_strerror(-ret) << dendl;
958 return ret;
959 }
960 id = realm.get_current_period();
961 realm_id = realm.get_id();
962 }
963
964 if (!epoch) {
965 int ret = use_latest_epoch(y);
966 if (ret < 0) {
967 ldout(cct, 0) << "failed to use_latest_epoch period id " << id << " realm " << realm_name << " id " << realm_id
968 << " : " << cpp_strerror(-ret) << dendl;
969 return ret;
970 }
971 }
972
973 return read_info(y);
974 }
975
976
977 int RGWPeriod::get_zonegroup(RGWZoneGroup& zonegroup,
978 const string& zonegroup_id) const
979 {
980 map<string, RGWZoneGroup>::const_iterator iter;
981 if (!zonegroup_id.empty()) {
982 iter = period_map.zonegroups.find(zonegroup_id);
983 } else {
984 iter = period_map.zonegroups.find("default");
985 }
986 if (iter != period_map.zonegroups.end()) {
987 zonegroup = iter->second;
988 return 0;
989 }
990
991 return -ENOENT;
992 }
993
994 const string& RGWPeriod::get_latest_epoch_oid() const
995 {
996 if (cct->_conf->rgw_period_latest_epoch_info_oid.empty()) {
997 return period_latest_epoch_info_oid;
998 }
999 return cct->_conf->rgw_period_latest_epoch_info_oid;
1000 }
1001
1002 const string& RGWPeriod::get_info_oid_prefix() const
1003 {
1004 return period_info_oid_prefix;
1005 }
1006
1007 const string RGWPeriod::get_period_oid_prefix() const
1008 {
1009 return get_info_oid_prefix() + id;
1010 }
1011
1012 const string RGWPeriod::get_period_oid() const
1013 {
1014 std::ostringstream oss;
1015 oss << get_period_oid_prefix();
1016 // skip the epoch for the staging period
1017 if (id != get_staging_id(realm_id))
1018 oss << "." << epoch;
1019 return oss.str();
1020 }
1021
1022 int RGWPeriod::read_latest_epoch(RGWPeriodLatestEpochInfo& info,
1023 optional_yield y,
1024 RGWObjVersionTracker *objv)
1025 {
1026 string oid = get_period_oid_prefix() + get_latest_epoch_oid();
1027
1028 rgw_pool pool(get_pool(cct));
1029 bufferlist bl;
1030 auto obj_ctx = sysobj_svc->init_obj_ctx();
1031 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
1032 int ret = sysobj.rop().read(&bl, y);
1033 if (ret < 0) {
1034 ldout(cct, 1) << "error read_lastest_epoch " << pool << ":" << oid << dendl;
1035 return ret;
1036 }
1037 try {
1038 auto iter = bl.cbegin();
1039 using ceph::decode;
1040 decode(info, iter);
1041 } catch (buffer::error& err) {
1042 ldout(cct, 0) << "error decoding data from " << pool << ":" << oid << dendl;
1043 return -EIO;
1044 }
1045
1046 return 0;
1047 }
1048
1049 int RGWPeriod::get_latest_epoch(epoch_t& latest_epoch, optional_yield y)
1050 {
1051 RGWPeriodLatestEpochInfo info;
1052
1053 int ret = read_latest_epoch(info, y);
1054 if (ret < 0) {
1055 return ret;
1056 }
1057
1058 latest_epoch = info.epoch;
1059
1060 return 0;
1061 }
1062
1063 int RGWPeriod::use_latest_epoch(optional_yield y)
1064 {
1065 RGWPeriodLatestEpochInfo info;
1066 int ret = read_latest_epoch(info, y);
1067 if (ret < 0) {
1068 return ret;
1069 }
1070
1071 epoch = info.epoch;
1072
1073 return 0;
1074 }
1075
1076 int RGWPeriod::set_latest_epoch(optional_yield y,
1077 epoch_t epoch, bool exclusive,
1078 RGWObjVersionTracker *objv)
1079 {
1080 string oid = get_period_oid_prefix() + get_latest_epoch_oid();
1081
1082 rgw_pool pool(get_pool(cct));
1083 bufferlist bl;
1084
1085 RGWPeriodLatestEpochInfo info;
1086 info.epoch = epoch;
1087
1088 using ceph::encode;
1089 encode(info, bl);
1090
1091 auto obj_ctx = sysobj_svc->init_obj_ctx();
1092 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
1093 return sysobj.wop()
1094 .set_exclusive(exclusive)
1095 .write(bl, y);
1096 }
1097
1098 int RGWPeriod::update_latest_epoch(epoch_t epoch, optional_yield y)
1099 {
1100 static constexpr int MAX_RETRIES = 20;
1101
1102 for (int i = 0; i < MAX_RETRIES; i++) {
1103 RGWPeriodLatestEpochInfo info;
1104 RGWObjVersionTracker objv;
1105 bool exclusive = false;
1106
1107 // read existing epoch
1108 int r = read_latest_epoch(info, y, &objv);
1109 if (r == -ENOENT) {
1110 // use an exclusive create to set the epoch atomically
1111 exclusive = true;
1112 ldout(cct, 20) << "creating initial latest_epoch=" << epoch
1113 << " for period=" << id << dendl;
1114 } else if (r < 0) {
1115 ldout(cct, 0) << "ERROR: failed to read latest_epoch" << dendl;
1116 return r;
1117 } else if (epoch <= info.epoch) {
1118 r = -EEXIST; // fail with EEXIST if epoch is not newer
1119 ldout(cct, 10) << "found existing latest_epoch " << info.epoch
1120 << " >= given epoch " << epoch << ", returning r=" << r << dendl;
1121 return r;
1122 } else {
1123 ldout(cct, 20) << "updating latest_epoch from " << info.epoch
1124 << " -> " << epoch << " on period=" << id << dendl;
1125 }
1126
1127 r = set_latest_epoch(y, epoch, exclusive, &objv);
1128 if (r == -EEXIST) {
1129 continue; // exclusive create raced with another update, retry
1130 } else if (r == -ECANCELED) {
1131 continue; // write raced with a conflicting version, retry
1132 }
1133 if (r < 0) {
1134 ldout(cct, 0) << "ERROR: failed to write latest_epoch" << dendl;
1135 return r;
1136 }
1137 return 0; // return success
1138 }
1139
1140 return -ECANCELED; // fail after max retries
1141 }
1142
1143 int RGWPeriod::delete_obj(optional_yield y)
1144 {
1145 rgw_pool pool(get_pool(cct));
1146
1147 // delete the object for each period epoch
1148 for (epoch_t e = 1; e <= epoch; e++) {
1149 RGWPeriod p{get_id(), e};
1150 rgw_raw_obj oid{pool, p.get_period_oid()};
1151 auto obj_ctx = sysobj_svc->init_obj_ctx();
1152 auto sysobj = sysobj_svc->get_obj(obj_ctx, oid);
1153 int ret = sysobj.wop().remove(y);
1154 if (ret < 0) {
1155 ldout(cct, 0) << "WARNING: failed to delete period object " << oid
1156 << ": " << cpp_strerror(-ret) << dendl;
1157 }
1158 }
1159
1160 // delete the .latest_epoch object
1161 rgw_raw_obj oid{pool, get_period_oid_prefix() + get_latest_epoch_oid()};
1162 auto obj_ctx = sysobj_svc->init_obj_ctx();
1163 auto sysobj = sysobj_svc->get_obj(obj_ctx, oid);
1164 int ret = sysobj.wop().remove(y);
1165 if (ret < 0) {
1166 ldout(cct, 0) << "WARNING: failed to delete period object " << oid
1167 << ": " << cpp_strerror(-ret) << dendl;
1168 }
1169 return ret;
1170 }
1171
1172 int RGWPeriod::read_info(optional_yield y)
1173 {
1174 rgw_pool pool(get_pool(cct));
1175
1176 bufferlist bl;
1177
1178 auto obj_ctx = sysobj_svc->init_obj_ctx();
1179 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, get_period_oid()});
1180 int ret = sysobj.rop().read(&bl, y);
1181 if (ret < 0) {
1182 ldout(cct, 0) << "failed reading obj info from " << pool << ":" << get_period_oid() << ": " << cpp_strerror(-ret) << dendl;
1183 return ret;
1184 }
1185
1186 try {
1187 using ceph::decode;
1188 auto iter = bl.cbegin();
1189 decode(*this, iter);
1190 } catch (buffer::error& err) {
1191 ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << get_period_oid() << dendl;
1192 return -EIO;
1193 }
1194
1195 return 0;
1196 }
1197
1198 int RGWPeriod::create(optional_yield y, bool exclusive)
1199 {
1200 int ret;
1201
1202 /* create unique id */
1203 uuid_d new_uuid;
1204 char uuid_str[37];
1205 new_uuid.generate_random();
1206 new_uuid.print(uuid_str);
1207 id = uuid_str;
1208
1209 epoch = FIRST_EPOCH;
1210
1211 period_map.id = id;
1212
1213 ret = store_info(exclusive, y);
1214 if (ret < 0) {
1215 ldout(cct, 0) << "ERROR: storing info for " << id << ": " << cpp_strerror(-ret) << dendl;
1216 return ret;
1217 }
1218
1219 ret = set_latest_epoch(y, epoch);
1220 if (ret < 0) {
1221 ldout(cct, 0) << "ERROR: setting latest epoch " << id << ": " << cpp_strerror(-ret) << dendl;
1222 }
1223
1224 return ret;
1225 }
1226
1227 int RGWPeriod::store_info(bool exclusive, optional_yield y)
1228 {
1229 rgw_pool pool(get_pool(cct));
1230
1231 string oid = get_period_oid();
1232 bufferlist bl;
1233 using ceph::encode;
1234 encode(*this, bl);
1235
1236 auto obj_ctx = sysobj_svc->init_obj_ctx();
1237 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
1238 return sysobj.wop()
1239 .set_exclusive(exclusive)
1240 .write(bl, y);
1241 }
1242
1243 rgw_pool RGWPeriod::get_pool(CephContext *cct) const
1244 {
1245 if (cct->_conf->rgw_period_root_pool.empty()) {
1246 return rgw_pool(RGW_DEFAULT_PERIOD_ROOT_POOL);
1247 }
1248 return rgw_pool(cct->_conf->rgw_period_root_pool);
1249 }
1250
1251 int RGWPeriod::add_zonegroup(const RGWZoneGroup& zonegroup, optional_yield y)
1252 {
1253 if (zonegroup.realm_id != realm_id) {
1254 return 0;
1255 }
1256 int ret = period_map.update(zonegroup, cct);
1257 if (ret < 0) {
1258 ldout(cct, 0) << "ERROR: updating period map: " << cpp_strerror(-ret) << dendl;
1259 return ret;
1260 }
1261
1262 return store_info(false, y);
1263 }
1264
1265 int RGWPeriod::update(optional_yield y)
1266 {
1267 auto zone_svc = sysobj_svc->get_zone_svc();
1268 ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << get_id() << dendl;
1269 list<string> zonegroups;
1270 int ret = zone_svc->list_zonegroups(zonegroups);
1271 if (ret < 0) {
1272 ldout(cct, 0) << "ERROR: failed to list zonegroups: " << cpp_strerror(-ret) << dendl;
1273 return ret;
1274 }
1275
1276 // clear zone short ids of removed zones. period_map.update() will add the
1277 // remaining zones back
1278 period_map.short_zone_ids.clear();
1279
1280 for (auto& iter : zonegroups) {
1281 RGWZoneGroup zg(string(), iter);
1282 ret = zg.init(cct, sysobj_svc, y);
1283 if (ret < 0) {
1284 ldout(cct, 0) << "WARNING: zg.init() failed: " << cpp_strerror(-ret) << dendl;
1285 continue;
1286 }
1287
1288 if (zg.realm_id != realm_id) {
1289 ldout(cct, 20) << "skipping zonegroup " << zg.get_name() << " zone realm id " << zg.realm_id << ", not on our realm " << realm_id << dendl;
1290 continue;
1291 }
1292
1293 if (zg.master_zone.empty()) {
1294 ldout(cct, 0) << "ERROR: zonegroup " << zg.get_name() << " should have a master zone " << dendl;
1295 return -EINVAL;
1296 }
1297
1298 if (zg.zones.find(zg.master_zone) == zg.zones.end()) {
1299 ldout(cct,0) << "ERROR: zonegroup " << zg.get_name()
1300 << " has a non existent master zone "<< dendl;
1301 return -EINVAL;
1302 }
1303
1304 if (zg.is_master_zonegroup()) {
1305 master_zonegroup = zg.get_id();
1306 master_zone = zg.master_zone;
1307 }
1308
1309 int ret = period_map.update(zg, cct);
1310 if (ret < 0) {
1311 return ret;
1312 }
1313 }
1314
1315 ret = period_config.read(sysobj_svc, realm_id, y);
1316 if (ret < 0 && ret != -ENOENT) {
1317 ldout(cct, 0) << "ERROR: failed to read period config: "
1318 << cpp_strerror(ret) << dendl;
1319 return ret;
1320 }
1321 return 0;
1322 }
1323
1324 int RGWPeriod::reflect(optional_yield y)
1325 {
1326 for (auto& iter : period_map.zonegroups) {
1327 RGWZoneGroup& zg = iter.second;
1328 zg.reinit_instance(cct, sysobj_svc);
1329 int r = zg.write(false, y);
1330 if (r < 0) {
1331 ldout(cct, 0) << "ERROR: failed to store zonegroup info for zonegroup=" << iter.first << ": " << cpp_strerror(-r) << dendl;
1332 return r;
1333 }
1334 if (zg.is_master_zonegroup()) {
1335 // set master as default if no default exists
1336 r = zg.set_as_default(y, true);
1337 if (r == 0) {
1338 ldout(cct, 1) << "Set the period's master zonegroup " << zg.get_id()
1339 << " as the default" << dendl;
1340 }
1341 }
1342 }
1343
1344 int r = period_config.write(sysobj_svc, realm_id, y);
1345 if (r < 0) {
1346 ldout(cct, 0) << "ERROR: failed to store period config: "
1347 << cpp_strerror(-r) << dendl;
1348 return r;
1349 }
1350 return 0;
1351 }
1352
1353 void RGWPeriod::fork()
1354 {
1355 ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << id << dendl;
1356 predecessor_uuid = id;
1357 id = get_staging_id(realm_id);
1358 period_map.reset();
1359 realm_epoch++;
1360 }
1361
1362 static int read_sync_status(rgw::sal::RGWRadosStore *store, rgw_meta_sync_status *sync_status)
1363 {
1364 // initialize a sync status manager to read the status
1365 RGWMetaSyncStatusManager mgr(store, store->svc()->rados->get_async_processor());
1366 int r = mgr.init();
1367 if (r < 0) {
1368 return r;
1369 }
1370 r = mgr.read_sync_status(sync_status);
1371 mgr.stop();
1372 return r;
1373 }
1374
1375 int RGWPeriod::update_sync_status(rgw::sal::RGWRadosStore *store, /* for now */
1376 const RGWPeriod &current_period,
1377 std::ostream& error_stream,
1378 bool force_if_stale)
1379 {
1380 rgw_meta_sync_status status;
1381 int r = read_sync_status(store, &status);
1382 if (r < 0) {
1383 ldout(cct, 0) << "period failed to read sync status: "
1384 << cpp_strerror(-r) << dendl;
1385 return r;
1386 }
1387
1388 std::vector<std::string> markers;
1389
1390 const auto current_epoch = current_period.get_realm_epoch();
1391 if (current_epoch != status.sync_info.realm_epoch) {
1392 // no sync status markers for the current period
1393 ceph_assert(current_epoch > status.sync_info.realm_epoch);
1394 const int behind = current_epoch - status.sync_info.realm_epoch;
1395 if (!force_if_stale && current_epoch > 1) {
1396 error_stream << "ERROR: This zone is " << behind << " period(s) behind "
1397 "the current master zone in metadata sync. If this zone is promoted "
1398 "to master, any metadata changes during that time are likely to "
1399 "be lost.\n"
1400 "Waiting for this zone to catch up on metadata sync (see "
1401 "'radosgw-admin sync status') is recommended.\n"
1402 "To promote this zone to master anyway, add the flag "
1403 "--yes-i-really-mean-it." << std::endl;
1404 return -EINVAL;
1405 }
1406 // empty sync status markers - other zones will skip this period during
1407 // incremental metadata sync
1408 markers.resize(status.sync_info.num_shards);
1409 } else {
1410 markers.reserve(status.sync_info.num_shards);
1411 for (auto& i : status.sync_markers) {
1412 auto& marker = i.second;
1413 // filter out markers from other periods
1414 if (marker.realm_epoch != current_epoch) {
1415 marker.marker.clear();
1416 }
1417 markers.emplace_back(std::move(marker.marker));
1418 }
1419 }
1420
1421 std::swap(sync_status, markers);
1422 return 0;
1423 }
1424
1425 int RGWPeriod::commit(rgw::sal::RGWRadosStore *store,
1426 RGWRealm& realm, const RGWPeriod& current_period,
1427 std::ostream& error_stream, optional_yield y,
1428 bool force_if_stale)
1429 {
1430 auto zone_svc = sysobj_svc->get_zone_svc();
1431 ldout(cct, 20) << __func__ << " realm " << realm.get_id() << " period " << current_period.get_id() << dendl;
1432 // gateway must be in the master zone to commit
1433 if (master_zone != zone_svc->get_zone_params().get_id()) {
1434 error_stream << "Cannot commit period on zone "
1435 << zone_svc->get_zone_params().get_id() << ", it must be sent to "
1436 "the period's master zone " << master_zone << '.' << std::endl;
1437 return -EINVAL;
1438 }
1439 // period predecessor must match current period
1440 if (predecessor_uuid != current_period.get_id()) {
1441 error_stream << "Period predecessor " << predecessor_uuid
1442 << " does not match current period " << current_period.get_id()
1443 << ". Use 'period pull' to get the latest period from the master, "
1444 "reapply your changes, and try again." << std::endl;
1445 return -EINVAL;
1446 }
1447 // realm epoch must be 1 greater than current period
1448 if (realm_epoch != current_period.get_realm_epoch() + 1) {
1449 error_stream << "Period's realm epoch " << realm_epoch
1450 << " does not come directly after current realm epoch "
1451 << current_period.get_realm_epoch() << ". Use 'realm pull' to get the "
1452 "latest realm and period from the master zone, reapply your changes, "
1453 "and try again." << std::endl;
1454 return -EINVAL;
1455 }
1456 // did the master zone change?
1457 if (master_zone != current_period.get_master_zone()) {
1458 // store the current metadata sync status in the period
1459 int r = update_sync_status(store, current_period, error_stream, force_if_stale);
1460 if (r < 0) {
1461 ldout(cct, 0) << "failed to update metadata sync status: "
1462 << cpp_strerror(-r) << dendl;
1463 return r;
1464 }
1465 // create an object with a new period id
1466 r = create(y, true);
1467 if (r < 0) {
1468 ldout(cct, 0) << "failed to create new period: " << cpp_strerror(-r) << dendl;
1469 return r;
1470 }
1471 // set as current period
1472 r = realm.set_current_period(*this, y);
1473 if (r < 0) {
1474 ldout(cct, 0) << "failed to update realm's current period: "
1475 << cpp_strerror(-r) << dendl;
1476 return r;
1477 }
1478 ldout(cct, 4) << "Promoted to master zone and committed new period "
1479 << id << dendl;
1480 realm.notify_new_period(*this, y);
1481 return 0;
1482 }
1483 // period must be based on current epoch
1484 if (epoch != current_period.get_epoch()) {
1485 error_stream << "Period epoch " << epoch << " does not match "
1486 "predecessor epoch " << current_period.get_epoch()
1487 << ". Use 'period pull' to get the latest epoch from the master zone, "
1488 "reapply your changes, and try again." << std::endl;
1489 return -EINVAL;
1490 }
1491 // set period as next epoch
1492 set_id(current_period.get_id());
1493 set_epoch(current_period.get_epoch() + 1);
1494 set_predecessor(current_period.get_predecessor());
1495 realm_epoch = current_period.get_realm_epoch();
1496 // write the period to rados
1497 int r = store_info(false, y);
1498 if (r < 0) {
1499 ldout(cct, 0) << "failed to store period: " << cpp_strerror(-r) << dendl;
1500 return r;
1501 }
1502 // set as latest epoch
1503 r = update_latest_epoch(epoch, y);
1504 if (r == -EEXIST) {
1505 // already have this epoch (or a more recent one)
1506 return 0;
1507 }
1508 if (r < 0) {
1509 ldout(cct, 0) << "failed to set latest epoch: " << cpp_strerror(-r) << dendl;
1510 return r;
1511 }
1512 r = reflect(y);
1513 if (r < 0) {
1514 ldout(cct, 0) << "failed to update local objects: " << cpp_strerror(-r) << dendl;
1515 return r;
1516 }
1517 ldout(cct, 4) << "Committed new epoch " << epoch
1518 << " for period " << id << dendl;
1519 realm.notify_new_period(*this, y);
1520 return 0;
1521 }
1522
1523 int RGWZoneParams::create_default(optional_yield y, bool old_format)
1524 {
1525 name = default_zone_name;
1526
1527 int r = create(y);
1528 if (r < 0) {
1529 return r;
1530 }
1531
1532 if (old_format) {
1533 name = id;
1534 }
1535
1536 return r;
1537 }
1538
1539
1540 namespace {
1541 int get_zones_pool_set(CephContext* cct,
1542 RGWSI_SysObj* sysobj_svc,
1543 const list<string>& zones,
1544 const string& my_zone_id,
1545 set<rgw_pool>& pool_names,
1546 optional_yield y)
1547 {
1548 for(auto const& iter : zones) {
1549 RGWZoneParams zone(iter);
1550 int r = zone.init(cct, sysobj_svc, y);
1551 if (r < 0) {
1552 ldout(cct, 0) << "Error: init zone " << iter << ":" << cpp_strerror(-r) << dendl;
1553 return r;
1554 }
1555 if (zone.get_id() != my_zone_id) {
1556 pool_names.insert(zone.domain_root);
1557 pool_names.insert(zone.control_pool);
1558 pool_names.insert(zone.gc_pool);
1559 pool_names.insert(zone.log_pool);
1560 pool_names.insert(zone.intent_log_pool);
1561 pool_names.insert(zone.usage_log_pool);
1562 pool_names.insert(zone.user_keys_pool);
1563 pool_names.insert(zone.user_email_pool);
1564 pool_names.insert(zone.user_swift_pool);
1565 pool_names.insert(zone.user_uid_pool);
1566 pool_names.insert(zone.otp_pool);
1567 pool_names.insert(zone.roles_pool);
1568 pool_names.insert(zone.reshard_pool);
1569 pool_names.insert(zone.notif_pool);
1570 for(auto& iter : zone.placement_pools) {
1571 pool_names.insert(iter.second.index_pool);
1572 for (auto& pi : iter.second.storage_classes.get_all()) {
1573 if (pi.second.data_pool) {
1574 pool_names.insert(pi.second.data_pool.get());
1575 }
1576 }
1577 pool_names.insert(iter.second.data_extra_pool);
1578 }
1579 pool_names.insert(zone.oidc_pool);
1580 }
1581 }
1582 return 0;
1583 }
1584
1585 rgw_pool fix_zone_pool_dup(set<rgw_pool> pools,
1586 const string& default_prefix,
1587 const string& default_suffix,
1588 const rgw_pool& suggested_pool)
1589 {
1590 string suggested_name = suggested_pool.to_str();
1591
1592 string prefix = default_prefix;
1593 string suffix = default_suffix;
1594
1595 if (!suggested_pool.empty()) {
1596 prefix = suggested_name.substr(0, suggested_name.find("."));
1597 suffix = suggested_name.substr(prefix.length());
1598 }
1599
1600 rgw_pool pool(prefix + suffix);
1601
1602 if (pools.find(pool) == pools.end()) {
1603 return pool;
1604 } else {
1605 while(true) {
1606 pool = prefix + "_" + std::to_string(std::rand()) + suffix;
1607 if (pools.find(pool) == pools.end()) {
1608 return pool;
1609 }
1610 }
1611 }
1612 }
1613 }
1614
1615 int RGWZoneParams::fix_pool_names(optional_yield y)
1616 {
1617
1618 list<string> zones;
1619 int r = zone_svc->list_zones(zones);
1620 if (r < 0) {
1621 ldout(cct, 10) << "WARNING: store->list_zones() returned r=" << r << dendl;
1622 }
1623
1624 set<rgw_pool> pools;
1625 r = get_zones_pool_set(cct, sysobj_svc, zones, id, pools, y);
1626 if (r < 0) {
1627 ldout(cct, 0) << "Error: get_zones_pool_names" << r << dendl;
1628 return r;
1629 }
1630
1631 domain_root = fix_zone_pool_dup(pools, name, ".rgw.meta:root", domain_root);
1632 control_pool = fix_zone_pool_dup(pools, name, ".rgw.control", control_pool);
1633 gc_pool = fix_zone_pool_dup(pools, name ,".rgw.log:gc", gc_pool);
1634 lc_pool = fix_zone_pool_dup(pools, name ,".rgw.log:lc", lc_pool);
1635 log_pool = fix_zone_pool_dup(pools, name, ".rgw.log", log_pool);
1636 intent_log_pool = fix_zone_pool_dup(pools, name, ".rgw.log:intent", intent_log_pool);
1637 usage_log_pool = fix_zone_pool_dup(pools, name, ".rgw.log:usage", usage_log_pool);
1638 user_keys_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.keys", user_keys_pool);
1639 user_email_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.email", user_email_pool);
1640 user_swift_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.swift", user_swift_pool);
1641 user_uid_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.uid", user_uid_pool);
1642 roles_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:roles", roles_pool);
1643 reshard_pool = fix_zone_pool_dup(pools, name, ".rgw.log:reshard", reshard_pool);
1644 otp_pool = fix_zone_pool_dup(pools, name, ".rgw.otp", otp_pool);
1645 oidc_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:oidc", oidc_pool);
1646 notif_pool = fix_zone_pool_dup(pools, name ,".rgw.log:notif", notif_pool);
1647
1648 for(auto& iter : placement_pools) {
1649 iter.second.index_pool = fix_zone_pool_dup(pools, name, "." + default_bucket_index_pool_suffix,
1650 iter.second.index_pool);
1651 for (auto& pi : iter.second.storage_classes.get_all()) {
1652 if (pi.second.data_pool) {
1653 rgw_pool& pool = pi.second.data_pool.get();
1654 pool = fix_zone_pool_dup(pools, name, "." + default_storage_pool_suffix,
1655 pool);
1656 }
1657 }
1658 iter.second.data_extra_pool= fix_zone_pool_dup(pools, name, "." + default_storage_extra_pool_suffix,
1659 iter.second.data_extra_pool);
1660 }
1661
1662 return 0;
1663 }
1664
1665 int RGWZoneParams::create(optional_yield y, bool exclusive)
1666 {
1667 /* check for old pools config */
1668 rgw_raw_obj obj(domain_root, avail_pools);
1669 auto obj_ctx = sysobj_svc->init_obj_ctx();
1670 auto sysobj = sysobj_svc->get_obj(obj_ctx, obj);
1671 int r = sysobj.rop().stat(y);
1672 if (r < 0) {
1673 ldout(cct, 10) << "couldn't find old data placement pools config, setting up new ones for the zone" << dendl;
1674 /* a new system, let's set new placement info */
1675 RGWZonePlacementInfo default_placement;
1676 default_placement.index_pool = name + "." + default_bucket_index_pool_suffix;
1677 rgw_pool pool = name + "." + default_storage_pool_suffix;
1678 default_placement.storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr);
1679 default_placement.data_extra_pool = name + "." + default_storage_extra_pool_suffix;
1680 placement_pools["default-placement"] = default_placement;
1681 }
1682
1683 r = fix_pool_names(y);
1684 if (r < 0) {
1685 ldout(cct, 0) << "ERROR: fix_pool_names returned r=" << r << dendl;
1686 return r;
1687 }
1688
1689 r = RGWSystemMetaObj::create(y, exclusive);
1690 if (r < 0) {
1691 return r;
1692 }
1693
1694 // try to set as default. may race with another create, so pass exclusive=true
1695 // so we don't override an existing default
1696 r = set_as_default(y, true);
1697 if (r < 0 && r != -EEXIST) {
1698 ldout(cct, 10) << "WARNING: failed to set zone as default, r=" << r << dendl;
1699 }
1700
1701 return 0;
1702 }
1703
1704 rgw_pool RGWZoneParams::get_pool(CephContext *cct) const
1705 {
1706 if (cct->_conf->rgw_zone_root_pool.empty()) {
1707 return rgw_pool(RGW_DEFAULT_ZONE_ROOT_POOL);
1708 }
1709
1710 return rgw_pool(cct->_conf->rgw_zone_root_pool);
1711 }
1712
1713 const string RGWZoneParams::get_default_oid(bool old_format) const
1714 {
1715 if (old_format) {
1716 return cct->_conf->rgw_default_zone_info_oid;
1717 }
1718
1719 return cct->_conf->rgw_default_zone_info_oid + "." + realm_id;
1720 }
1721
1722 const string& RGWZoneParams::get_names_oid_prefix() const
1723 {
1724 return zone_names_oid_prefix;
1725 }
1726
1727 const string& RGWZoneParams::get_info_oid_prefix(bool old_format) const
1728 {
1729 return zone_info_oid_prefix;
1730 }
1731
1732 const string& RGWZoneParams::get_predefined_name(CephContext *cct) const {
1733 return cct->_conf->rgw_zone;
1734 }
1735
1736 int RGWZoneParams::init(CephContext *cct, RGWSI_SysObj *sysobj_svc,
1737 optional_yield y, bool setup_obj, bool old_format)
1738 {
1739 if (name.empty()) {
1740 name = cct->_conf->rgw_zone;
1741 }
1742
1743 return RGWSystemMetaObj::init(cct, sysobj_svc, y, setup_obj, old_format);
1744 }
1745
1746 int RGWZoneParams::read_default_id(string& default_id, optional_yield y,
1747 bool old_format)
1748 {
1749 if (realm_id.empty()) {
1750 /* try using default realm */
1751 RGWRealm realm;
1752 int ret = realm.init(cct, sysobj_svc, y);
1753 //no default realm exist
1754 if (ret < 0) {
1755 return read_id(default_zone_name, default_id, y);
1756 }
1757 realm_id = realm.get_id();
1758 }
1759
1760 return RGWSystemMetaObj::read_default_id(default_id, y, old_format);
1761 }
1762
1763
1764 int RGWZoneParams::set_as_default(optional_yield y, bool exclusive)
1765 {
1766 if (realm_id.empty()) {
1767 /* try using default realm */
1768 RGWRealm realm;
1769 int ret = realm.init(cct, sysobj_svc, y);
1770 if (ret < 0) {
1771 ldout(cct, 10) << "could not read realm id: " << cpp_strerror(-ret) << dendl;
1772 return -EINVAL;
1773 }
1774 realm_id = realm.get_id();
1775 }
1776
1777 return RGWSystemMetaObj::set_as_default(y, exclusive);
1778 }
1779
1780 const string& RGWZoneParams::get_compression_type(const rgw_placement_rule& placement_rule) const
1781 {
1782 static const std::string NONE{"none"};
1783 auto p = placement_pools.find(placement_rule.name);
1784 if (p == placement_pools.end()) {
1785 return NONE;
1786 }
1787 const auto& type = p->second.get_compression_type(placement_rule.get_storage_class());
1788 return !type.empty() ? type : NONE;
1789 }
1790
1791 void RGWPeriodMap::encode(bufferlist& bl) const {
1792 ENCODE_START(2, 1, bl);
1793 encode(id, bl);
1794 encode(zonegroups, bl);
1795 encode(master_zonegroup, bl);
1796 encode(short_zone_ids, bl);
1797 ENCODE_FINISH(bl);
1798 }
1799
1800 void RGWPeriodMap::decode(bufferlist::const_iterator& bl) {
1801 DECODE_START(2, bl);
1802 decode(id, bl);
1803 decode(zonegroups, bl);
1804 decode(master_zonegroup, bl);
1805 if (struct_v >= 2) {
1806 decode(short_zone_ids, bl);
1807 }
1808 DECODE_FINISH(bl);
1809
1810 zonegroups_by_api.clear();
1811 for (map<string, RGWZoneGroup>::iterator iter = zonegroups.begin();
1812 iter != zonegroups.end(); ++iter) {
1813 RGWZoneGroup& zonegroup = iter->second;
1814 zonegroups_by_api[zonegroup.api_name] = zonegroup;
1815 if (zonegroup.is_master_zonegroup()) {
1816 master_zonegroup = zonegroup.get_id();
1817 }
1818 }
1819 }
1820
1821 // run an MD5 hash on the zone_id and return the first 32 bits
1822 static uint32_t gen_short_zone_id(const std::string zone_id)
1823 {
1824 unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE];
1825 MD5 hash;
1826 hash.Update((const unsigned char *)zone_id.c_str(), zone_id.size());
1827 hash.Final(md5);
1828
1829 uint32_t short_id;
1830 memcpy((char *)&short_id, md5, sizeof(short_id));
1831 return std::max(short_id, 1u);
1832 }
1833
1834 int RGWPeriodMap::update(const RGWZoneGroup& zonegroup, CephContext *cct)
1835 {
1836 if (zonegroup.is_master_zonegroup() && (!master_zonegroup.empty() && zonegroup.get_id() != master_zonegroup)) {
1837 ldout(cct,0) << "Error updating periodmap, multiple master zonegroups configured "<< dendl;
1838 ldout(cct,0) << "master zonegroup: " << master_zonegroup << " and " << zonegroup.get_id() <<dendl;
1839 return -EINVAL;
1840 }
1841 map<string, RGWZoneGroup>::iterator iter = zonegroups.find(zonegroup.get_id());
1842 if (iter != zonegroups.end()) {
1843 RGWZoneGroup& old_zonegroup = iter->second;
1844 if (!old_zonegroup.api_name.empty()) {
1845 zonegroups_by_api.erase(old_zonegroup.api_name);
1846 }
1847 }
1848 zonegroups[zonegroup.get_id()] = zonegroup;
1849
1850 if (!zonegroup.api_name.empty()) {
1851 zonegroups_by_api[zonegroup.api_name] = zonegroup;
1852 }
1853
1854 if (zonegroup.is_master_zonegroup()) {
1855 master_zonegroup = zonegroup.get_id();
1856 } else if (master_zonegroup == zonegroup.get_id()) {
1857 master_zonegroup = "";
1858 }
1859
1860 for (auto& i : zonegroup.zones) {
1861 auto& zone = i.second;
1862 if (short_zone_ids.find(zone.id) != short_zone_ids.end()) {
1863 continue;
1864 }
1865 // calculate the zone's short id
1866 uint32_t short_id = gen_short_zone_id(zone.id);
1867
1868 // search for an existing zone with the same short id
1869 for (auto& s : short_zone_ids) {
1870 if (s.second == short_id) {
1871 ldout(cct, 0) << "New zone '" << zone.name << "' (" << zone.id
1872 << ") generates the same short_zone_id " << short_id
1873 << " as existing zone id " << s.first << dendl;
1874 return -EEXIST;
1875 }
1876 }
1877
1878 short_zone_ids[zone.id] = short_id;
1879 }
1880
1881 return 0;
1882 }
1883
1884 uint32_t RGWPeriodMap::get_zone_short_id(const string& zone_id) const
1885 {
1886 auto i = short_zone_ids.find(zone_id);
1887 if (i == short_zone_ids.end()) {
1888 return 0;
1889 }
1890 return i->second;
1891 }
1892
1893 int RGWZoneGroupMap::read(CephContext *cct, RGWSI_SysObj *sysobj_svc, optional_yield y)
1894 {
1895
1896 RGWPeriod period;
1897 int ret = period.init(cct, sysobj_svc, y);
1898 if (ret < 0) {
1899 cerr << "failed to read current period info: " << cpp_strerror(ret);
1900 return ret;
1901 }
1902
1903 bucket_quota = period.get_config().bucket_quota;
1904 user_quota = period.get_config().user_quota;
1905 zonegroups = period.get_map().zonegroups;
1906 zonegroups_by_api = period.get_map().zonegroups_by_api;
1907 master_zonegroup = period.get_map().master_zonegroup;
1908
1909 return 0;
1910 }
1911
1912 void RGWRegionMap::encode(bufferlist& bl) const {
1913 ENCODE_START( 3, 1, bl);
1914 encode(regions, bl);
1915 encode(master_region, bl);
1916 encode(bucket_quota, bl);
1917 encode(user_quota, bl);
1918 ENCODE_FINISH(bl);
1919 }
1920
1921 void RGWRegionMap::decode(bufferlist::const_iterator& bl) {
1922 DECODE_START(3, bl);
1923 decode(regions, bl);
1924 decode(master_region, bl);
1925 if (struct_v >= 2)
1926 decode(bucket_quota, bl);
1927 if (struct_v >= 3)
1928 decode(user_quota, bl);
1929 DECODE_FINISH(bl);
1930 }
1931
1932 void RGWZoneGroupMap::encode(bufferlist& bl) const {
1933 ENCODE_START( 3, 1, bl);
1934 encode(zonegroups, bl);
1935 encode(master_zonegroup, bl);
1936 encode(bucket_quota, bl);
1937 encode(user_quota, bl);
1938 ENCODE_FINISH(bl);
1939 }
1940
1941 void RGWZoneGroupMap::decode(bufferlist::const_iterator& bl) {
1942 DECODE_START(3, bl);
1943 decode(zonegroups, bl);
1944 decode(master_zonegroup, bl);
1945 if (struct_v >= 2)
1946 decode(bucket_quota, bl);
1947 if (struct_v >= 3)
1948 decode(user_quota, bl);
1949 DECODE_FINISH(bl);
1950
1951 zonegroups_by_api.clear();
1952 for (map<string, RGWZoneGroup>::iterator iter = zonegroups.begin();
1953 iter != zonegroups.end(); ++iter) {
1954 RGWZoneGroup& zonegroup = iter->second;
1955 zonegroups_by_api[zonegroup.api_name] = zonegroup;
1956 if (zonegroup.is_master_zonegroup()) {
1957 master_zonegroup = zonegroup.get_name();
1958 }
1959 }
1960 }
1961
1962