]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_zone.cc
import ceph 14.2.5
[ceph.git] / ceph / src / rgw / rgw_zone.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "common/errno.h"
5
6 #include "rgw_zone.h"
7 #include "rgw_realm_watcher.h"
8 #include "rgw_meta_sync_status.h"
9 #include "rgw_sync.h"
10
11 #include "services/svc_zone.h"
12 #include "services/svc_sys_obj.h"
13
14 #define dout_subsys ceph_subsys_rgw
15
16 namespace rgw_zone_defaults {
17
18 std::string zone_info_oid_prefix = "zone_info.";
19 std::string zone_names_oid_prefix = "zone_names.";
20 std::string region_info_oid_prefix = "region_info.";
21 std::string realm_names_oid_prefix = "realms_names.";
22 std::string zone_group_info_oid_prefix = "zonegroup_info.";
23 std::string realm_info_oid_prefix = "realms.";
24 std::string default_region_info_oid = "default.region";
25 std::string default_zone_group_info_oid = "default.zonegroup";
26 std::string period_info_oid_prefix = "periods.";
27 std::string period_latest_epoch_info_oid = ".latest_epoch";
28 std::string region_map_oid = "region_map";
29 std::string default_realm_info_oid = "default.realm";
30 std::string default_zonegroup_name = "default";
31 std::string default_zone_name = "default";
32 std::string zonegroup_names_oid_prefix = "zonegroups_names.";
33 std::string RGW_DEFAULT_ZONE_ROOT_POOL = "rgw.root";
34 std::string RGW_DEFAULT_ZONEGROUP_ROOT_POOL = "rgw.root";
35 std::string RGW_DEFAULT_REALM_ROOT_POOL = "rgw.root";
36 std::string RGW_DEFAULT_PERIOD_ROOT_POOL = "rgw.root";
37 std::string default_bucket_index_pool_suffix = "rgw.buckets.index";
38 std::string default_storage_extra_pool_suffix = "rgw.buckets.non-ec";
39 std::string avail_pools = ".pools.avail";
40 std::string default_storage_pool_suffix = "rgw.buckets.data";
41
42 }
43
44 using namespace rgw_zone_defaults;
45
46 #define FIRST_EPOCH 1
47
48 void RGWDefaultZoneGroupInfo::dump(Formatter *f) const {
49 encode_json("default_zonegroup", default_zonegroup, f);
50 }
51
52 void RGWDefaultZoneGroupInfo::decode_json(JSONObj *obj) {
53
54 JSONDecoder::decode_json("default_zonegroup", default_zonegroup, obj);
55 /* backward compatability with region */
56 if (default_zonegroup.empty()) {
57 JSONDecoder::decode_json("default_region", default_zonegroup, obj);
58 }
59 }
60
61 rgw_pool RGWZoneGroup::get_pool(CephContext *cct_) const
62 {
63 if (cct_->_conf->rgw_zonegroup_root_pool.empty()) {
64 return rgw_pool(RGW_DEFAULT_ZONEGROUP_ROOT_POOL);
65 }
66
67 return rgw_pool(cct_->_conf->rgw_zonegroup_root_pool);
68 }
69
70 int RGWZoneGroup::create_default(bool old_format)
71 {
72 name = default_zonegroup_name;
73 api_name = default_zonegroup_name;
74 is_master = true;
75
76 RGWZoneGroupPlacementTarget placement_target;
77 placement_target.name = "default-placement";
78 placement_targets[placement_target.name] = placement_target;
79 default_placement.name = "default-placement";
80
81 RGWZoneParams zone_params(default_zone_name);
82
83 int r = zone_params.init(cct, sysobj_svc, false);
84 if (r < 0) {
85 ldout(cct, 0) << "create_default: error initializing zone params: " << cpp_strerror(-r) << dendl;
86 return r;
87 }
88
89 r = zone_params.create_default();
90 if (r < 0 && r != -EEXIST) {
91 ldout(cct, 0) << "create_default: error in create_default zone params: " << cpp_strerror(-r) << dendl;
92 return r;
93 } else if (r == -EEXIST) {
94 ldout(cct, 10) << "zone_params::create_default() returned -EEXIST, we raced with another default zone_params creation" << dendl;
95 zone_params.clear_id();
96 r = zone_params.init(cct, sysobj_svc);
97 if (r < 0) {
98 ldout(cct, 0) << "create_default: error in init existing zone params: " << cpp_strerror(-r) << dendl;
99 return r;
100 }
101 ldout(cct, 20) << "zone_params::create_default() " << zone_params.get_name() << " id " << zone_params.get_id()
102 << dendl;
103 }
104
105 RGWZone& default_zone = zones[zone_params.get_id()];
106 default_zone.name = zone_params.get_name();
107 default_zone.id = zone_params.get_id();
108 master_zone = default_zone.id;
109
110 r = create();
111 if (r < 0 && r != -EEXIST) {
112 ldout(cct, 0) << "error storing zone group info: " << cpp_strerror(-r) << dendl;
113 return r;
114 }
115
116 if (r == -EEXIST) {
117 ldout(cct, 10) << "create_default() returned -EEXIST, we raced with another zonegroup creation" << dendl;
118 id.clear();
119 r = init(cct, sysobj_svc);
120 if (r < 0) {
121 return r;
122 }
123 }
124
125 if (old_format) {
126 name = id;
127 }
128
129 post_process_params();
130
131 return 0;
132 }
133
134 const string RGWZoneGroup::get_default_oid(bool old_region_format) const
135 {
136 if (old_region_format) {
137 if (cct->_conf->rgw_default_region_info_oid.empty()) {
138 return default_region_info_oid;
139 }
140 return cct->_conf->rgw_default_region_info_oid;
141 }
142
143 string default_oid = cct->_conf->rgw_default_zonegroup_info_oid;
144
145 if (cct->_conf->rgw_default_zonegroup_info_oid.empty()) {
146 default_oid = default_zone_group_info_oid;
147 }
148
149 default_oid += "." + realm_id;
150
151 return default_oid;
152 }
153
154 const string& RGWZoneGroup::get_info_oid_prefix(bool old_region_format) const
155 {
156 if (old_region_format) {
157 return region_info_oid_prefix;
158 }
159 return zone_group_info_oid_prefix;
160 }
161
162 const string& RGWZoneGroup::get_names_oid_prefix() const
163 {
164 return zonegroup_names_oid_prefix;
165 }
166
167 const string& RGWZoneGroup::get_predefined_name(CephContext *cct) const {
168 return cct->_conf->rgw_zonegroup;
169 }
170
171 int RGWZoneGroup::equals(const string& other_zonegroup) const
172 {
173 if (is_master && other_zonegroup.empty())
174 return true;
175
176 return (id == other_zonegroup);
177 }
178
179 int RGWZoneGroup::add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only,
180 const list<string>& endpoints, const string *ptier_type,
181 bool *psync_from_all, list<string>& sync_from, list<string>& sync_from_rm,
182 string *predirect_zone, RGWSyncModulesManager *sync_mgr)
183 {
184 auto& zone_id = zone_params.get_id();
185 auto& zone_name = zone_params.get_name();
186
187 // check for duplicate zone name on insert
188 if (!zones.count(zone_id)) {
189 for (const auto& zone : zones) {
190 if (zone.second.name == zone_name) {
191 ldout(cct, 0) << "ERROR: found existing zone name " << zone_name
192 << " (" << zone.first << ") in zonegroup " << get_name() << dendl;
193 return -EEXIST;
194 }
195 }
196 }
197
198 if (is_master) {
199 if (*is_master) {
200 if (!master_zone.empty() && master_zone != zone_id) {
201 ldout(cct, 0) << "NOTICE: overriding master zone: " << master_zone << dendl;
202 }
203 master_zone = zone_id;
204 } else if (master_zone == zone_id) {
205 master_zone.clear();
206 }
207 }
208
209 RGWZone& zone = zones[zone_id];
210 zone.name = zone_name;
211 zone.id = zone_id;
212 if (!endpoints.empty()) {
213 zone.endpoints = endpoints;
214 }
215 if (read_only) {
216 zone.read_only = *read_only;
217 }
218 if (ptier_type) {
219 zone.tier_type = *ptier_type;
220 if (!sync_mgr->get_module(*ptier_type, nullptr)) {
221 ldout(cct, 0) << "ERROR: could not found sync module: " << *ptier_type
222 << ", valid sync modules: "
223 << sync_mgr->get_registered_module_names()
224 << dendl;
225 return -ENOENT;
226 }
227 }
228
229 if (psync_from_all) {
230 zone.sync_from_all = *psync_from_all;
231 }
232
233 if (predirect_zone) {
234 zone.redirect_zone = *predirect_zone;
235 }
236
237 for (auto add : sync_from) {
238 zone.sync_from.insert(add);
239 }
240
241 for (auto rm : sync_from_rm) {
242 zone.sync_from.erase(rm);
243 }
244
245 post_process_params();
246
247 return update();
248 }
249
250
251 int RGWZoneGroup::rename_zone(const RGWZoneParams& zone_params)
252 {
253 RGWZone& zone = zones[zone_params.get_id()];
254 zone.name = zone_params.get_name();
255
256 return update();
257 }
258
259 void RGWZoneGroup::post_process_params()
260 {
261 bool log_data = zones.size() > 1;
262
263 if (master_zone.empty()) {
264 map<string, RGWZone>::iterator iter = zones.begin();
265 if (iter != zones.end()) {
266 master_zone = iter->first;
267 }
268 }
269
270 for (map<string, RGWZone>::iterator iter = zones.begin(); iter != zones.end(); ++iter) {
271 RGWZone& zone = iter->second;
272 zone.log_data = log_data;
273
274 RGWZoneParams zone_params(zone.id, zone.name);
275 int ret = zone_params.init(cct, sysobj_svc);
276 if (ret < 0) {
277 ldout(cct, 0) << "WARNING: could not read zone params for zone id=" << zone.id << " name=" << zone.name << dendl;
278 continue;
279 }
280
281 for (map<string, RGWZonePlacementInfo>::iterator iter = zone_params.placement_pools.begin();
282 iter != zone_params.placement_pools.end(); ++iter) {
283 const string& placement_name = iter->first;
284 if (placement_targets.find(placement_name) == placement_targets.end()) {
285 RGWZoneGroupPlacementTarget placement_target;
286 placement_target.name = placement_name;
287 placement_targets[placement_name] = placement_target;
288 }
289 }
290 }
291
292 if (default_placement.empty() && !placement_targets.empty()) {
293 default_placement.init(placement_targets.begin()->first, RGW_STORAGE_CLASS_STANDARD);
294 }
295 }
296
297 int RGWZoneGroup::remove_zone(const std::string& zone_id)
298 {
299 map<string, RGWZone>::iterator iter = zones.find(zone_id);
300 if (iter == zones.end()) {
301 ldout(cct, 0) << "zone id " << zone_id << " is not a part of zonegroup "
302 << name << dendl;
303 return -ENOENT;
304 }
305
306 zones.erase(iter);
307
308 post_process_params();
309
310 return update();
311 }
312
313 int RGWZoneGroup::read_default_id(string& default_id, bool old_format)
314 {
315 if (realm_id.empty()) {
316 /* try using default realm */
317 RGWRealm realm;
318 int ret = realm.init(cct, sysobj_svc);
319 // no default realm exist
320 if (ret < 0) {
321 return read_id(default_zonegroup_name, default_id);
322 }
323 realm_id = realm.get_id();
324 }
325
326 return RGWSystemMetaObj::read_default_id(default_id, old_format);
327 }
328
329 int RGWZoneGroup::set_as_default(bool exclusive)
330 {
331 if (realm_id.empty()) {
332 /* try using default realm */
333 RGWRealm realm;
334 int ret = realm.init(cct, sysobj_svc);
335 if (ret < 0) {
336 ldout(cct, 10) << "could not read realm id: " << cpp_strerror(-ret) << dendl;
337 return -EINVAL;
338 }
339 realm_id = realm.get_id();
340 }
341
342 return RGWSystemMetaObj::set_as_default(exclusive);
343 }
344
345 void RGWSystemMetaObj::reinit_instance(CephContext *_cct, RGWSI_SysObj *_sysobj_svc)
346 {
347 cct = _cct;
348 sysobj_svc = _sysobj_svc;
349 zone_svc = _sysobj_svc->get_zone_svc();
350 }
351
352 int RGWSystemMetaObj::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, bool setup_obj, bool old_format)
353 {
354 reinit_instance(_cct, _sysobj_svc);
355
356 if (!setup_obj)
357 return 0;
358
359 if (old_format && id.empty()) {
360 id = name;
361 }
362
363 if (id.empty()) {
364 int r;
365 if (name.empty()) {
366 name = get_predefined_name(cct);
367 }
368 if (name.empty()) {
369 r = use_default(old_format);
370 if (r < 0) {
371 return r;
372 }
373 } else if (!old_format) {
374 r = read_id(name, id);
375 if (r < 0) {
376 if (r != -ENOENT) {
377 ldout(cct, 0) << "error in read_id for object name: " << name << " : " << cpp_strerror(-r) << dendl;
378 }
379 return r;
380 }
381 }
382 }
383
384 return read_info(id, old_format);
385 }
386
387 int RGWSystemMetaObj::read_default(RGWDefaultSystemMetaObjInfo& default_info, const string& oid)
388 {
389 using ceph::decode;
390 auto pool = get_pool(cct);
391 bufferlist bl;
392
393 auto obj_ctx = sysobj_svc->init_obj_ctx();
394 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
395 int ret = sysobj.rop().read(&bl);
396 if (ret < 0)
397 return ret;
398
399 try {
400 auto iter = bl.cbegin();
401 decode(default_info, iter);
402 } catch (buffer::error& err) {
403 ldout(cct, 0) << "error decoding data from " << pool << ":" << oid << dendl;
404 return -EIO;
405 }
406
407 return 0;
408 }
409
410 int RGWSystemMetaObj::read_default_id(string& default_id, bool old_format)
411 {
412 RGWDefaultSystemMetaObjInfo default_info;
413
414 int ret = read_default(default_info, get_default_oid(old_format));
415 if (ret < 0) {
416 return ret;
417 }
418
419 default_id = default_info.default_id;
420
421 return 0;
422 }
423
424 int RGWSystemMetaObj::use_default(bool old_format)
425 {
426 return read_default_id(id, old_format);
427 }
428
429 int RGWSystemMetaObj::set_as_default(bool exclusive)
430 {
431 using ceph::encode;
432 string oid = get_default_oid();
433
434 rgw_pool pool(get_pool(cct));
435 bufferlist bl;
436
437 RGWDefaultSystemMetaObjInfo default_info;
438 default_info.default_id = id;
439
440 encode(default_info, bl);
441
442 auto obj_ctx = sysobj_svc->init_obj_ctx();
443 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
444 int ret = sysobj.wop()
445 .set_exclusive(exclusive)
446 .write(bl);
447 if (ret < 0)
448 return ret;
449
450 return 0;
451 }
452
453 int RGWSystemMetaObj::read_id(const string& obj_name, string& object_id)
454 {
455 using ceph::decode;
456 rgw_pool pool(get_pool(cct));
457 bufferlist bl;
458
459 string oid = get_names_oid_prefix() + obj_name;
460
461 auto obj_ctx = sysobj_svc->init_obj_ctx();
462 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
463 int ret = sysobj.rop().read(&bl);
464 if (ret < 0) {
465 return ret;
466 }
467
468 RGWNameToId nameToId;
469 try {
470 auto iter = bl.cbegin();
471 decode(nameToId, iter);
472 } catch (buffer::error& err) {
473 ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << oid << dendl;
474 return -EIO;
475 }
476 object_id = nameToId.obj_id;
477 return 0;
478 }
479
480 int RGWSystemMetaObj::delete_obj(bool old_format)
481 {
482 rgw_pool pool(get_pool(cct));
483
484 auto obj_ctx = sysobj_svc->init_obj_ctx();
485
486 /* check to see if obj is the default */
487 RGWDefaultSystemMetaObjInfo default_info;
488 int ret = read_default(default_info, get_default_oid(old_format));
489 if (ret < 0 && ret != -ENOENT)
490 return ret;
491 if (default_info.default_id == id || (old_format && default_info.default_id == name)) {
492 string oid = get_default_oid(old_format);
493 rgw_raw_obj default_named_obj(pool, oid);
494 auto sysobj = sysobj_svc->get_obj(obj_ctx, default_named_obj);
495 ret = sysobj.wop().remove();
496 if (ret < 0) {
497 ldout(cct, 0) << "Error delete default obj name " << name << ": " << cpp_strerror(-ret) << dendl;
498 return ret;
499 }
500 }
501 if (!old_format) {
502 string oid = get_names_oid_prefix() + name;
503 rgw_raw_obj object_name(pool, oid);
504 auto sysobj = sysobj_svc->get_obj(obj_ctx, object_name);
505 ret = sysobj.wop().remove();
506 if (ret < 0) {
507 ldout(cct, 0) << "Error delete obj name " << name << ": " << cpp_strerror(-ret) << dendl;
508 return ret;
509 }
510 }
511
512 string oid = get_info_oid_prefix(old_format);
513 if (old_format) {
514 oid += name;
515 } else {
516 oid += id;
517 }
518
519 rgw_raw_obj object_id(pool, oid);
520 auto sysobj = sysobj_svc->get_obj(obj_ctx, object_id);
521 ret = sysobj.wop().remove();
522 if (ret < 0) {
523 ldout(cct, 0) << "Error delete object id " << id << ": " << cpp_strerror(-ret) << dendl;
524 }
525
526 return ret;
527 }
528
529 int RGWSystemMetaObj::store_name(bool exclusive)
530 {
531 rgw_pool pool(get_pool(cct));
532 string oid = get_names_oid_prefix() + name;
533
534 RGWNameToId nameToId;
535 nameToId.obj_id = id;
536
537 bufferlist bl;
538 using ceph::encode;
539 encode(nameToId, bl);
540 auto obj_ctx = sysobj_svc->init_obj_ctx();
541 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
542 return sysobj.wop()
543 .set_exclusive(exclusive)
544 .write(bl);
545 }
546
547 int RGWSystemMetaObj::rename(const string& new_name)
548 {
549 string new_id;
550 int ret = read_id(new_name, new_id);
551 if (!ret) {
552 return -EEXIST;
553 }
554 if (ret < 0 && ret != -ENOENT) {
555 ldout(cct, 0) << "Error read_id " << new_name << ": " << cpp_strerror(-ret) << dendl;
556 return ret;
557 }
558 string old_name = name;
559 name = new_name;
560 ret = update();
561 if (ret < 0) {
562 ldout(cct, 0) << "Error storing new obj info " << new_name << ": " << cpp_strerror(-ret) << dendl;
563 return ret;
564 }
565 ret = store_name(true);
566 if (ret < 0) {
567 ldout(cct, 0) << "Error storing new name " << new_name << ": " << cpp_strerror(-ret) << dendl;
568 return ret;
569 }
570 /* delete old name */
571 rgw_pool pool(get_pool(cct));
572 string oid = get_names_oid_prefix() + old_name;
573 rgw_raw_obj old_name_obj(pool, oid);
574 auto obj_ctx = sysobj_svc->init_obj_ctx();
575 auto sysobj = sysobj_svc->get_obj(obj_ctx, old_name_obj);
576 ret = sysobj.wop().remove();
577 if (ret < 0) {
578 ldout(cct, 0) << "Error delete old obj name " << old_name << ": " << cpp_strerror(-ret) << dendl;
579 return ret;
580 }
581
582 return ret;
583 }
584
585 int RGWSystemMetaObj::read_info(const string& obj_id, bool old_format)
586 {
587 rgw_pool pool(get_pool(cct));
588
589 bufferlist bl;
590
591 string oid = get_info_oid_prefix(old_format) + obj_id;
592
593 auto obj_ctx = sysobj_svc->init_obj_ctx();
594 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
595 int ret = sysobj.rop().read(&bl);
596 if (ret < 0) {
597 ldout(cct, 0) << "failed reading obj info from " << pool << ":" << oid << ": " << cpp_strerror(-ret) << dendl;
598 return ret;
599 }
600 using ceph::decode;
601
602 try {
603 auto iter = bl.cbegin();
604 decode(*this, iter);
605 } catch (buffer::error& err) {
606 ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << oid << dendl;
607 return -EIO;
608 }
609
610 return 0;
611 }
612
613 int RGWSystemMetaObj::read()
614 {
615 int ret = read_id(name, id);
616 if (ret < 0) {
617 return ret;
618 }
619
620 return read_info(id);
621 }
622
623 int RGWSystemMetaObj::create(bool exclusive)
624 {
625 int ret;
626
627 /* check to see the name is not used */
628 ret = read_id(name, id);
629 if (exclusive && ret == 0) {
630 ldout(cct, 10) << "ERROR: name " << name << " already in use for obj id " << id << dendl;
631 return -EEXIST;
632 } else if ( ret < 0 && ret != -ENOENT) {
633 ldout(cct, 0) << "failed reading obj id " << id << ": " << cpp_strerror(-ret) << dendl;
634 return ret;
635 }
636
637 if (id.empty()) {
638 /* create unique id */
639 uuid_d new_uuid;
640 char uuid_str[37];
641 new_uuid.generate_random();
642 new_uuid.print(uuid_str);
643 id = uuid_str;
644 }
645
646 ret = store_info(exclusive);
647 if (ret < 0) {
648 ldout(cct, 0) << "ERROR: storing info for " << id << ": " << cpp_strerror(-ret) << dendl;
649 return ret;
650 }
651
652 return store_name(exclusive);
653 }
654
655 int RGWSystemMetaObj::store_info(bool exclusive)
656 {
657 rgw_pool pool(get_pool(cct));
658
659 string oid = get_info_oid_prefix() + id;
660
661 bufferlist bl;
662 using ceph::encode;
663 encode(*this, bl);
664 auto obj_ctx = sysobj_svc->init_obj_ctx();
665 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
666 return sysobj.wop()
667 .set_exclusive(exclusive)
668 .write(bl);
669 }
670
671 int RGWSystemMetaObj::write(bool exclusive)
672 {
673 int ret = store_info(exclusive);
674 if (ret < 0) {
675 ldout(cct, 20) << __func__ << "(): store_info() returned ret=" << ret << dendl;
676 return ret;
677 }
678 ret = store_name(exclusive);
679 if (ret < 0) {
680 ldout(cct, 20) << __func__ << "(): store_name() returned ret=" << ret << dendl;
681 return ret;
682 }
683 return 0;
684 }
685
686
687 const string& RGWRealm::get_predefined_name(CephContext *cct) const {
688 return cct->_conf->rgw_realm;
689 }
690
691 int RGWRealm::create(bool exclusive)
692 {
693 int ret = RGWSystemMetaObj::create(exclusive);
694 if (ret < 0) {
695 ldout(cct, 0) << "ERROR creating new realm object " << name << ": " << cpp_strerror(-ret) << dendl;
696 return ret;
697 }
698 // create the control object for watch/notify
699 ret = create_control(exclusive);
700 if (ret < 0) {
701 ldout(cct, 0) << "ERROR creating control for new realm " << name << ": " << cpp_strerror(-ret) << dendl;
702 return ret;
703 }
704 RGWPeriod period;
705 if (current_period.empty()) {
706 /* create new period for the realm */
707 ret = period.init(cct, sysobj_svc, id, name, false);
708 if (ret < 0 ) {
709 return ret;
710 }
711 ret = period.create(true);
712 if (ret < 0) {
713 ldout(cct, 0) << "ERROR: creating new period for realm " << name << ": " << cpp_strerror(-ret) << dendl;
714 return ret;
715 }
716 } else {
717 period = RGWPeriod(current_period, 0);
718 int ret = period.init(cct, sysobj_svc, id, name);
719 if (ret < 0) {
720 ldout(cct, 0) << "ERROR: failed to init period " << current_period << dendl;
721 return ret;
722 }
723 }
724 ret = set_current_period(period);
725 if (ret < 0) {
726 ldout(cct, 0) << "ERROR: failed set current period " << current_period << dendl;
727 return ret;
728 }
729 // try to set as default. may race with another create, so pass exclusive=true
730 // so we don't override an existing default
731 ret = set_as_default(true);
732 if (ret < 0 && ret != -EEXIST) {
733 ldout(cct, 0) << "WARNING: failed to set realm as default realm, ret=" << ret << dendl;
734 }
735
736 return 0;
737 }
738
739 int RGWRealm::delete_obj()
740 {
741 int ret = RGWSystemMetaObj::delete_obj();
742 if (ret < 0) {
743 return ret;
744 }
745 return delete_control();
746 }
747
748 int RGWRealm::create_control(bool exclusive)
749 {
750 auto pool = rgw_pool{get_pool(cct)};
751 auto oid = get_control_oid();
752 bufferlist bl;
753 auto obj_ctx = sysobj_svc->init_obj_ctx();
754 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
755 return sysobj.wop()
756 .set_exclusive(exclusive)
757 .write(bl);
758 }
759
760 int RGWRealm::delete_control()
761 {
762 auto pool = rgw_pool{get_pool(cct)};
763 auto obj = rgw_raw_obj{pool, get_control_oid()};
764 auto obj_ctx = sysobj_svc->init_obj_ctx();
765 auto sysobj = sysobj_svc->get_obj(obj_ctx, obj);
766 return sysobj.wop().remove();
767 }
768
769 rgw_pool RGWRealm::get_pool(CephContext *cct) const
770 {
771 if (cct->_conf->rgw_realm_root_pool.empty()) {
772 return rgw_pool(RGW_DEFAULT_REALM_ROOT_POOL);
773 }
774 return rgw_pool(cct->_conf->rgw_realm_root_pool);
775 }
776
777 const string RGWRealm::get_default_oid(bool old_format) const
778 {
779 if (cct->_conf->rgw_default_realm_info_oid.empty()) {
780 return default_realm_info_oid;
781 }
782 return cct->_conf->rgw_default_realm_info_oid;
783 }
784
785 const string& RGWRealm::get_names_oid_prefix() const
786 {
787 return realm_names_oid_prefix;
788 }
789
790 const string& RGWRealm::get_info_oid_prefix(bool old_format) const
791 {
792 return realm_info_oid_prefix;
793 }
794
795 int RGWRealm::set_current_period(RGWPeriod& period)
796 {
797 // update realm epoch to match the period's
798 if (epoch > period.get_realm_epoch()) {
799 ldout(cct, 0) << "ERROR: set_current_period with old realm epoch "
800 << period.get_realm_epoch() << ", current epoch=" << epoch << dendl;
801 return -EINVAL;
802 }
803 if (epoch == period.get_realm_epoch() && current_period != period.get_id()) {
804 ldout(cct, 0) << "ERROR: set_current_period with same realm epoch "
805 << period.get_realm_epoch() << ", but different period id "
806 << period.get_id() << " != " << current_period << dendl;
807 return -EINVAL;
808 }
809
810 epoch = period.get_realm_epoch();
811 current_period = period.get_id();
812
813 int ret = update();
814 if (ret < 0) {
815 ldout(cct, 0) << "ERROR: period update: " << cpp_strerror(-ret) << dendl;
816 return ret;
817 }
818
819 ret = period.reflect();
820 if (ret < 0) {
821 ldout(cct, 0) << "ERROR: period.reflect(): " << cpp_strerror(-ret) << dendl;
822 return ret;
823 }
824
825 return 0;
826 }
827
828 string RGWRealm::get_control_oid() const
829 {
830 return get_info_oid_prefix() + id + ".control";
831 }
832
833 int RGWRealm::notify_zone(bufferlist& bl)
834 {
835 rgw_pool pool{get_pool(cct)};
836 auto obj_ctx = sysobj_svc->init_obj_ctx();
837 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, get_control_oid()});
838 int ret = sysobj.wn().notify(bl, 0, nullptr);
839 if (ret < 0) {
840 return ret;
841 }
842 return 0;
843 }
844
845 int RGWRealm::notify_new_period(const RGWPeriod& period)
846 {
847 bufferlist bl;
848 using ceph::encode;
849 // push the period to dependent zonegroups/zones
850 encode(RGWRealmNotify::ZonesNeedPeriod, bl);
851 encode(period, bl);
852 // reload the gateway with the new period
853 encode(RGWRealmNotify::Reload, bl);
854
855 return notify_zone(bl);
856 }
857
858 std::string RGWPeriodConfig::get_oid(const std::string& realm_id)
859 {
860 if (realm_id.empty()) {
861 return "period_config.default";
862 }
863 return "period_config." + realm_id;
864 }
865
866 rgw_pool RGWPeriodConfig::get_pool(CephContext *cct)
867 {
868 const auto& pool_name = cct->_conf->rgw_period_root_pool;
869 if (pool_name.empty()) {
870 return {RGW_DEFAULT_PERIOD_ROOT_POOL};
871 }
872 return {pool_name};
873 }
874
875 int RGWPeriodConfig::read(RGWSI_SysObj *sysobj_svc, const std::string& realm_id)
876 {
877 const auto& pool = get_pool(sysobj_svc->ctx());
878 const auto& oid = get_oid(realm_id);
879 bufferlist bl;
880
881 auto obj_ctx = sysobj_svc->init_obj_ctx();
882 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
883 int ret = sysobj.rop().read(&bl);
884 if (ret < 0) {
885 return ret;
886 }
887 using ceph::decode;
888 try {
889 auto iter = bl.cbegin();
890 decode(*this, iter);
891 } catch (buffer::error& err) {
892 return -EIO;
893 }
894 return 0;
895 }
896
897 int RGWPeriodConfig::write(RGWSI_SysObj *sysobj_svc, const std::string& realm_id)
898 {
899 const auto& pool = get_pool(sysobj_svc->ctx());
900 const auto& oid = get_oid(realm_id);
901 bufferlist bl;
902 using ceph::encode;
903 encode(*this, bl);
904 auto obj_ctx = sysobj_svc->init_obj_ctx();
905 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
906 return sysobj.wop()
907 .set_exclusive(false)
908 .write(bl);
909 }
910
911 int RGWPeriod::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, const string& period_realm_id,
912 const string& period_realm_name, bool setup_obj)
913 {
914 cct = _cct;
915 sysobj_svc = _sysobj_svc;
916
917 realm_id = period_realm_id;
918 realm_name = period_realm_name;
919
920 if (!setup_obj)
921 return 0;
922
923 return init(_cct, _sysobj_svc, setup_obj);
924 }
925
926
927 int RGWPeriod::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, bool setup_obj)
928 {
929 cct = _cct;
930 sysobj_svc = _sysobj_svc;
931
932 if (!setup_obj)
933 return 0;
934
935 if (id.empty()) {
936 RGWRealm realm(realm_id, realm_name);
937 int ret = realm.init(cct, sysobj_svc);
938 if (ret < 0) {
939 ldout(cct, 0) << "RGWPeriod::init failed to init realm " << realm_name << " id " << realm_id << " : " <<
940 cpp_strerror(-ret) << dendl;
941 return ret;
942 }
943 id = realm.get_current_period();
944 realm_id = realm.get_id();
945 }
946
947 if (!epoch) {
948 int ret = use_latest_epoch();
949 if (ret < 0) {
950 ldout(cct, 0) << "failed to use_latest_epoch period id " << id << " realm " << realm_name << " id " << realm_id
951 << " : " << cpp_strerror(-ret) << dendl;
952 return ret;
953 }
954 }
955
956 return read_info();
957 }
958
959
960 int RGWPeriod::get_zonegroup(RGWZoneGroup& zonegroup,
961 const string& zonegroup_id) const
962 {
963 map<string, RGWZoneGroup>::const_iterator iter;
964 if (!zonegroup_id.empty()) {
965 iter = period_map.zonegroups.find(zonegroup_id);
966 } else {
967 iter = period_map.zonegroups.find("default");
968 }
969 if (iter != period_map.zonegroups.end()) {
970 zonegroup = iter->second;
971 return 0;
972 }
973
974 return -ENOENT;
975 }
976
977 const string& RGWPeriod::get_latest_epoch_oid() const
978 {
979 if (cct->_conf->rgw_period_latest_epoch_info_oid.empty()) {
980 return period_latest_epoch_info_oid;
981 }
982 return cct->_conf->rgw_period_latest_epoch_info_oid;
983 }
984
985 const string& RGWPeriod::get_info_oid_prefix() const
986 {
987 return period_info_oid_prefix;
988 }
989
990 const string RGWPeriod::get_period_oid_prefix() const
991 {
992 return get_info_oid_prefix() + id;
993 }
994
995 const string RGWPeriod::get_period_oid() const
996 {
997 std::ostringstream oss;
998 oss << get_period_oid_prefix();
999 // skip the epoch for the staging period
1000 if (id != get_staging_id(realm_id))
1001 oss << "." << epoch;
1002 return oss.str();
1003 }
1004
1005 int RGWPeriod::read_latest_epoch(RGWPeriodLatestEpochInfo& info,
1006 RGWObjVersionTracker *objv)
1007 {
1008 string oid = get_period_oid_prefix() + get_latest_epoch_oid();
1009
1010 rgw_pool pool(get_pool(cct));
1011 bufferlist bl;
1012 auto obj_ctx = sysobj_svc->init_obj_ctx();
1013 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
1014 int ret = sysobj.rop().read(&bl);
1015 if (ret < 0) {
1016 ldout(cct, 1) << "error read_lastest_epoch " << pool << ":" << oid << dendl;
1017 return ret;
1018 }
1019 try {
1020 auto iter = bl.cbegin();
1021 using ceph::decode;
1022 decode(info, iter);
1023 } catch (buffer::error& err) {
1024 ldout(cct, 0) << "error decoding data from " << pool << ":" << oid << dendl;
1025 return -EIO;
1026 }
1027
1028 return 0;
1029 }
1030
1031 int RGWPeriod::get_latest_epoch(epoch_t& latest_epoch)
1032 {
1033 RGWPeriodLatestEpochInfo info;
1034
1035 int ret = read_latest_epoch(info);
1036 if (ret < 0) {
1037 return ret;
1038 }
1039
1040 latest_epoch = info.epoch;
1041
1042 return 0;
1043 }
1044
1045 int RGWPeriod::use_latest_epoch()
1046 {
1047 RGWPeriodLatestEpochInfo info;
1048 int ret = read_latest_epoch(info);
1049 if (ret < 0) {
1050 return ret;
1051 }
1052
1053 epoch = info.epoch;
1054
1055 return 0;
1056 }
1057
1058 int RGWPeriod::set_latest_epoch(epoch_t epoch, bool exclusive,
1059 RGWObjVersionTracker *objv)
1060 {
1061 string oid = get_period_oid_prefix() + get_latest_epoch_oid();
1062
1063 rgw_pool pool(get_pool(cct));
1064 bufferlist bl;
1065
1066 RGWPeriodLatestEpochInfo info;
1067 info.epoch = epoch;
1068
1069 using ceph::encode;
1070 encode(info, bl);
1071
1072 auto obj_ctx = sysobj_svc->init_obj_ctx();
1073 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
1074 return sysobj.wop()
1075 .set_exclusive(exclusive)
1076 .write(bl);
1077 }
1078
1079 int RGWPeriod::update_latest_epoch(epoch_t epoch)
1080 {
1081 static constexpr int MAX_RETRIES = 20;
1082
1083 for (int i = 0; i < MAX_RETRIES; i++) {
1084 RGWPeriodLatestEpochInfo info;
1085 RGWObjVersionTracker objv;
1086 bool exclusive = false;
1087
1088 // read existing epoch
1089 int r = read_latest_epoch(info, &objv);
1090 if (r == -ENOENT) {
1091 // use an exclusive create to set the epoch atomically
1092 exclusive = true;
1093 ldout(cct, 20) << "creating initial latest_epoch=" << epoch
1094 << " for period=" << id << dendl;
1095 } else if (r < 0) {
1096 ldout(cct, 0) << "ERROR: failed to read latest_epoch" << dendl;
1097 return r;
1098 } else if (epoch <= info.epoch) {
1099 r = -EEXIST; // fail with EEXIST if epoch is not newer
1100 ldout(cct, 1) << "found existing latest_epoch " << info.epoch
1101 << " >= given epoch " << epoch << ", returning r=" << r << dendl;
1102 return r;
1103 } else {
1104 ldout(cct, 20) << "updating latest_epoch from " << info.epoch
1105 << " -> " << epoch << " on period=" << id << dendl;
1106 }
1107
1108 r = set_latest_epoch(epoch, exclusive, &objv);
1109 if (r == -EEXIST) {
1110 continue; // exclusive create raced with another update, retry
1111 } else if (r == -ECANCELED) {
1112 continue; // write raced with a conflicting version, retry
1113 }
1114 if (r < 0) {
1115 ldout(cct, 0) << "ERROR: failed to write latest_epoch" << dendl;
1116 return r;
1117 }
1118 return 0; // return success
1119 }
1120
1121 return -ECANCELED; // fail after max retries
1122 }
1123
1124 int RGWPeriod::delete_obj()
1125 {
1126 rgw_pool pool(get_pool(cct));
1127
1128 // delete the object for each period epoch
1129 for (epoch_t e = 1; e <= epoch; e++) {
1130 RGWPeriod p{get_id(), e};
1131 rgw_raw_obj oid{pool, p.get_period_oid()};
1132 auto obj_ctx = sysobj_svc->init_obj_ctx();
1133 auto sysobj = sysobj_svc->get_obj(obj_ctx, oid);
1134 int ret = sysobj.wop().remove();
1135 if (ret < 0) {
1136 ldout(cct, 0) << "WARNING: failed to delete period object " << oid
1137 << ": " << cpp_strerror(-ret) << dendl;
1138 }
1139 }
1140
1141 // delete the .latest_epoch object
1142 rgw_raw_obj oid{pool, get_period_oid_prefix() + get_latest_epoch_oid()};
1143 auto obj_ctx = sysobj_svc->init_obj_ctx();
1144 auto sysobj = sysobj_svc->get_obj(obj_ctx, oid);
1145 int ret = sysobj.wop().remove();
1146 if (ret < 0) {
1147 ldout(cct, 0) << "WARNING: failed to delete period object " << oid
1148 << ": " << cpp_strerror(-ret) << dendl;
1149 }
1150 return ret;
1151 }
1152
1153 int RGWPeriod::read_info()
1154 {
1155 rgw_pool pool(get_pool(cct));
1156
1157 bufferlist bl;
1158
1159 auto obj_ctx = sysobj_svc->init_obj_ctx();
1160 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, get_period_oid()});
1161 int ret = sysobj.rop().read(&bl);
1162 if (ret < 0) {
1163 ldout(cct, 0) << "failed reading obj info from " << pool << ":" << get_period_oid() << ": " << cpp_strerror(-ret) << dendl;
1164 return ret;
1165 }
1166
1167 try {
1168 using ceph::decode;
1169 auto iter = bl.cbegin();
1170 decode(*this, iter);
1171 } catch (buffer::error& err) {
1172 ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << get_period_oid() << dendl;
1173 return -EIO;
1174 }
1175
1176 return 0;
1177 }
1178
1179 int RGWPeriod::create(bool exclusive)
1180 {
1181 int ret;
1182
1183 /* create unique id */
1184 uuid_d new_uuid;
1185 char uuid_str[37];
1186 new_uuid.generate_random();
1187 new_uuid.print(uuid_str);
1188 id = uuid_str;
1189
1190 epoch = FIRST_EPOCH;
1191
1192 period_map.id = id;
1193
1194 ret = store_info(exclusive);
1195 if (ret < 0) {
1196 ldout(cct, 0) << "ERROR: storing info for " << id << ": " << cpp_strerror(-ret) << dendl;
1197 return ret;
1198 }
1199
1200 ret = set_latest_epoch(epoch);
1201 if (ret < 0) {
1202 ldout(cct, 0) << "ERROR: setting latest epoch " << id << ": " << cpp_strerror(-ret) << dendl;
1203 }
1204
1205 return ret;
1206 }
1207
1208 int RGWPeriod::store_info(bool exclusive)
1209 {
1210 rgw_pool pool(get_pool(cct));
1211
1212 string oid = get_period_oid();
1213 bufferlist bl;
1214 using ceph::encode;
1215 encode(*this, bl);
1216
1217 auto obj_ctx = sysobj_svc->init_obj_ctx();
1218 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
1219 return sysobj.wop()
1220 .set_exclusive(exclusive)
1221 .write(bl);
1222 }
1223
1224 rgw_pool RGWPeriod::get_pool(CephContext *cct) const
1225 {
1226 if (cct->_conf->rgw_period_root_pool.empty()) {
1227 return rgw_pool(RGW_DEFAULT_PERIOD_ROOT_POOL);
1228 }
1229 return rgw_pool(cct->_conf->rgw_period_root_pool);
1230 }
1231
1232 int RGWPeriod::add_zonegroup(const RGWZoneGroup& zonegroup)
1233 {
1234 if (zonegroup.realm_id != realm_id) {
1235 return 0;
1236 }
1237 int ret = period_map.update(zonegroup, cct);
1238 if (ret < 0) {
1239 ldout(cct, 0) << "ERROR: updating period map: " << cpp_strerror(-ret) << dendl;
1240 return ret;
1241 }
1242
1243 return store_info(false);
1244 }
1245
1246 int RGWPeriod::update()
1247 {
1248 auto zone_svc = sysobj_svc->get_zone_svc();
1249 ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << get_id() << dendl;
1250 list<string> zonegroups;
1251 int ret = zone_svc->list_zonegroups(zonegroups);
1252 if (ret < 0) {
1253 ldout(cct, 0) << "ERROR: failed to list zonegroups: " << cpp_strerror(-ret) << dendl;
1254 return ret;
1255 }
1256
1257 // clear zone short ids of removed zones. period_map.update() will add the
1258 // remaining zones back
1259 period_map.short_zone_ids.clear();
1260
1261 for (auto& iter : zonegroups) {
1262 RGWZoneGroup zg(string(), iter);
1263 ret = zg.init(cct, sysobj_svc);
1264 if (ret < 0) {
1265 ldout(cct, 0) << "WARNING: zg.init() failed: " << cpp_strerror(-ret) << dendl;
1266 continue;
1267 }
1268
1269 if (zg.realm_id != realm_id) {
1270 ldout(cct, 20) << "skipping zonegroup " << zg.get_name() << " zone realm id " << zg.realm_id << ", not on our realm " << realm_id << dendl;
1271 continue;
1272 }
1273
1274 if (zg.master_zone.empty()) {
1275 ldout(cct, 0) << "ERROR: zonegroup " << zg.get_name() << " should have a master zone " << dendl;
1276 return -EINVAL;
1277 }
1278
1279 if (zg.zones.find(zg.master_zone) == zg.zones.end()) {
1280 ldout(cct,0) << "ERROR: zonegroup " << zg.get_name()
1281 << " has a non existent master zone "<< dendl;
1282 return -EINVAL;
1283 }
1284
1285 if (zg.is_master_zonegroup()) {
1286 master_zonegroup = zg.get_id();
1287 master_zone = zg.master_zone;
1288 }
1289
1290 int ret = period_map.update(zg, cct);
1291 if (ret < 0) {
1292 return ret;
1293 }
1294 }
1295
1296 ret = period_config.read(sysobj_svc, realm_id);
1297 if (ret < 0 && ret != -ENOENT) {
1298 ldout(cct, 0) << "ERROR: failed to read period config: "
1299 << cpp_strerror(ret) << dendl;
1300 return ret;
1301 }
1302 return 0;
1303 }
1304
1305 int RGWPeriod::reflect()
1306 {
1307 for (auto& iter : period_map.zonegroups) {
1308 RGWZoneGroup& zg = iter.second;
1309 zg.reinit_instance(cct, sysobj_svc);
1310 int r = zg.write(false);
1311 if (r < 0) {
1312 ldout(cct, 0) << "ERROR: failed to store zonegroup info for zonegroup=" << iter.first << ": " << cpp_strerror(-r) << dendl;
1313 return r;
1314 }
1315 if (zg.is_master_zonegroup()) {
1316 // set master as default if no default exists
1317 r = zg.set_as_default(true);
1318 if (r == 0) {
1319 ldout(cct, 1) << "Set the period's master zonegroup " << zg.get_id()
1320 << " as the default" << dendl;
1321 }
1322 }
1323 }
1324
1325 int r = period_config.write(sysobj_svc, realm_id);
1326 if (r < 0) {
1327 ldout(cct, 0) << "ERROR: failed to store period config: "
1328 << cpp_strerror(-r) << dendl;
1329 return r;
1330 }
1331 return 0;
1332 }
1333
1334 void RGWPeriod::fork()
1335 {
1336 ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << id << dendl;
1337 predecessor_uuid = id;
1338 id = get_staging_id(realm_id);
1339 period_map.reset();
1340 realm_epoch++;
1341 }
1342
1343 static int read_sync_status(RGWRados *store, rgw_meta_sync_status *sync_status)
1344 {
1345 // initialize a sync status manager to read the status
1346 RGWMetaSyncStatusManager mgr(store, store->get_async_rados());
1347 int r = mgr.init();
1348 if (r < 0) {
1349 return r;
1350 }
1351 r = mgr.read_sync_status(sync_status);
1352 mgr.stop();
1353 return r;
1354 }
1355
1356 int RGWPeriod::update_sync_status(RGWRados *store, /* for now */
1357 const RGWPeriod &current_period,
1358 std::ostream& error_stream,
1359 bool force_if_stale)
1360 {
1361 rgw_meta_sync_status status;
1362 int r = read_sync_status(store, &status);
1363 if (r < 0) {
1364 ldout(cct, 0) << "period failed to read sync status: "
1365 << cpp_strerror(-r) << dendl;
1366 return r;
1367 }
1368
1369 std::vector<std::string> markers;
1370
1371 const auto current_epoch = current_period.get_realm_epoch();
1372 if (current_epoch != status.sync_info.realm_epoch) {
1373 // no sync status markers for the current period
1374 ceph_assert(current_epoch > status.sync_info.realm_epoch);
1375 const int behind = current_epoch - status.sync_info.realm_epoch;
1376 if (!force_if_stale && current_epoch > 1) {
1377 error_stream << "ERROR: This zone is " << behind << " period(s) behind "
1378 "the current master zone in metadata sync. If this zone is promoted "
1379 "to master, any metadata changes during that time are likely to "
1380 "be lost.\n"
1381 "Waiting for this zone to catch up on metadata sync (see "
1382 "'radosgw-admin sync status') is recommended.\n"
1383 "To promote this zone to master anyway, add the flag "
1384 "--yes-i-really-mean-it." << std::endl;
1385 return -EINVAL;
1386 }
1387 // empty sync status markers - other zones will skip this period during
1388 // incremental metadata sync
1389 markers.resize(status.sync_info.num_shards);
1390 } else {
1391 markers.reserve(status.sync_info.num_shards);
1392 for (auto& i : status.sync_markers) {
1393 auto& marker = i.second;
1394 // filter out markers from other periods
1395 if (marker.realm_epoch != current_epoch) {
1396 marker.marker.clear();
1397 }
1398 markers.emplace_back(std::move(marker.marker));
1399 }
1400 }
1401
1402 std::swap(sync_status, markers);
1403 return 0;
1404 }
1405
1406 int RGWPeriod::commit(RGWRados *store,
1407 RGWRealm& realm, const RGWPeriod& current_period,
1408 std::ostream& error_stream, bool force_if_stale)
1409 {
1410 auto zone_svc = sysobj_svc->get_zone_svc();
1411 ldout(cct, 20) << __func__ << " realm " << realm.get_id() << " period " << current_period.get_id() << dendl;
1412 // gateway must be in the master zone to commit
1413 if (master_zone != zone_svc->get_zone_params().get_id()) {
1414 error_stream << "Cannot commit period on zone "
1415 << zone_svc->get_zone_params().get_id() << ", it must be sent to "
1416 "the period's master zone " << master_zone << '.' << std::endl;
1417 return -EINVAL;
1418 }
1419 // period predecessor must match current period
1420 if (predecessor_uuid != current_period.get_id()) {
1421 error_stream << "Period predecessor " << predecessor_uuid
1422 << " does not match current period " << current_period.get_id()
1423 << ". Use 'period pull' to get the latest period from the master, "
1424 "reapply your changes, and try again." << std::endl;
1425 return -EINVAL;
1426 }
1427 // realm epoch must be 1 greater than current period
1428 if (realm_epoch != current_period.get_realm_epoch() + 1) {
1429 error_stream << "Period's realm epoch " << realm_epoch
1430 << " does not come directly after current realm epoch "
1431 << current_period.get_realm_epoch() << ". Use 'realm pull' to get the "
1432 "latest realm and period from the master zone, reapply your changes, "
1433 "and try again." << std::endl;
1434 return -EINVAL;
1435 }
1436 // did the master zone change?
1437 if (master_zone != current_period.get_master_zone()) {
1438 // store the current metadata sync status in the period
1439 int r = update_sync_status(store, current_period, error_stream, force_if_stale);
1440 if (r < 0) {
1441 ldout(cct, 0) << "failed to update metadata sync status: "
1442 << cpp_strerror(-r) << dendl;
1443 return r;
1444 }
1445 // create an object with a new period id
1446 r = create(true);
1447 if (r < 0) {
1448 ldout(cct, 0) << "failed to create new period: " << cpp_strerror(-r) << dendl;
1449 return r;
1450 }
1451 // set as current period
1452 r = realm.set_current_period(*this);
1453 if (r < 0) {
1454 ldout(cct, 0) << "failed to update realm's current period: "
1455 << cpp_strerror(-r) << dendl;
1456 return r;
1457 }
1458 ldout(cct, 4) << "Promoted to master zone and committed new period "
1459 << id << dendl;
1460 realm.notify_new_period(*this);
1461 return 0;
1462 }
1463 // period must be based on current epoch
1464 if (epoch != current_period.get_epoch()) {
1465 error_stream << "Period epoch " << epoch << " does not match "
1466 "predecessor epoch " << current_period.get_epoch()
1467 << ". Use 'period pull' to get the latest epoch from the master zone, "
1468 "reapply your changes, and try again." << std::endl;
1469 return -EINVAL;
1470 }
1471 // set period as next epoch
1472 set_id(current_period.get_id());
1473 set_epoch(current_period.get_epoch() + 1);
1474 set_predecessor(current_period.get_predecessor());
1475 realm_epoch = current_period.get_realm_epoch();
1476 // write the period to rados
1477 int r = store_info(false);
1478 if (r < 0) {
1479 ldout(cct, 0) << "failed to store period: " << cpp_strerror(-r) << dendl;
1480 return r;
1481 }
1482 // set as latest epoch
1483 r = update_latest_epoch(epoch);
1484 if (r == -EEXIST) {
1485 // already have this epoch (or a more recent one)
1486 return 0;
1487 }
1488 if (r < 0) {
1489 ldout(cct, 0) << "failed to set latest epoch: " << cpp_strerror(-r) << dendl;
1490 return r;
1491 }
1492 r = reflect();
1493 if (r < 0) {
1494 ldout(cct, 0) << "failed to update local objects: " << cpp_strerror(-r) << dendl;
1495 return r;
1496 }
1497 ldout(cct, 4) << "Committed new epoch " << epoch
1498 << " for period " << id << dendl;
1499 realm.notify_new_period(*this);
1500 return 0;
1501 }
1502
1503 int RGWZoneParams::create_default(bool old_format)
1504 {
1505 name = default_zone_name;
1506
1507 int r = create();
1508 if (r < 0) {
1509 return r;
1510 }
1511
1512 if (old_format) {
1513 name = id;
1514 }
1515
1516 return r;
1517 }
1518
1519
1520 int get_zones_pool_set(CephContext* cct,
1521 RGWSI_SysObj* sysobj_svc,
1522 const list<string>& zones,
1523 const string& my_zone_id,
1524 set<rgw_pool>& pool_names)
1525 {
1526 for(auto const& iter : zones) {
1527 RGWZoneParams zone(iter);
1528 int r = zone.init(cct, sysobj_svc);
1529 if (r < 0) {
1530 ldout(cct, 0) << "Error: init zone " << iter << ":" << cpp_strerror(-r) << dendl;
1531 return r;
1532 }
1533 if (zone.get_id() != my_zone_id) {
1534 pool_names.insert(zone.domain_root);
1535 pool_names.insert(zone.metadata_heap);
1536 pool_names.insert(zone.control_pool);
1537 pool_names.insert(zone.gc_pool);
1538 pool_names.insert(zone.log_pool);
1539 pool_names.insert(zone.intent_log_pool);
1540 pool_names.insert(zone.usage_log_pool);
1541 pool_names.insert(zone.user_keys_pool);
1542 pool_names.insert(zone.user_email_pool);
1543 pool_names.insert(zone.user_swift_pool);
1544 pool_names.insert(zone.user_uid_pool);
1545 pool_names.insert(zone.otp_pool);
1546 pool_names.insert(zone.roles_pool);
1547 pool_names.insert(zone.reshard_pool);
1548 for(auto& iter : zone.placement_pools) {
1549 pool_names.insert(iter.second.index_pool);
1550 for (auto& pi : iter.second.storage_classes.get_all()) {
1551 if (pi.second.data_pool) {
1552 pool_names.insert(pi.second.data_pool.get());
1553 }
1554 }
1555 pool_names.insert(iter.second.data_extra_pool);
1556 }
1557 }
1558 }
1559 return 0;
1560 }
1561
1562 rgw_pool fix_zone_pool_dup(set<rgw_pool> pools,
1563 const string& default_prefix,
1564 const string& default_suffix,
1565 const rgw_pool& suggested_pool)
1566 {
1567 string suggested_name = suggested_pool.to_str();
1568
1569 string prefix = default_prefix;
1570 string suffix = default_suffix;
1571
1572 if (!suggested_pool.empty()) {
1573 prefix = suggested_name.substr(0, suggested_name.find("."));
1574 suffix = suggested_name.substr(prefix.length());
1575 }
1576
1577 rgw_pool pool(prefix + suffix);
1578
1579 if (pools.find(pool) == pools.end()) {
1580 return pool;
1581 } else {
1582 while(true) {
1583 pool = prefix + "_" + std::to_string(std::rand()) + suffix;
1584 if (pools.find(pool) == pools.end()) {
1585 return pool;
1586 }
1587 }
1588 }
1589 }
1590
1591 int RGWZoneParams::fix_pool_names()
1592 {
1593
1594 list<string> zones;
1595 int r = zone_svc->list_zones(zones);
1596 if (r < 0) {
1597 ldout(cct, 10) << "WARNING: store->list_zones() returned r=" << r << dendl;
1598 }
1599
1600 set<rgw_pool> pools;
1601 r = get_zones_pool_set(cct, sysobj_svc, zones, id, pools);
1602 if (r < 0) {
1603 ldout(cct, 0) << "Error: get_zones_pool_names" << r << dendl;
1604 return r;
1605 }
1606
1607 domain_root = fix_zone_pool_dup(pools, name, ".rgw.meta:root", domain_root);
1608 if (!metadata_heap.name.empty()) {
1609 metadata_heap = fix_zone_pool_dup(pools, name, ".rgw.meta:heap", metadata_heap);
1610 }
1611 control_pool = fix_zone_pool_dup(pools, name, ".rgw.control", control_pool);
1612 gc_pool = fix_zone_pool_dup(pools, name ,".rgw.log:gc", gc_pool);
1613 lc_pool = fix_zone_pool_dup(pools, name ,".rgw.log:lc", lc_pool);
1614 log_pool = fix_zone_pool_dup(pools, name, ".rgw.log", log_pool);
1615 intent_log_pool = fix_zone_pool_dup(pools, name, ".rgw.log:intent", intent_log_pool);
1616 usage_log_pool = fix_zone_pool_dup(pools, name, ".rgw.log:usage", usage_log_pool);
1617 user_keys_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.keys", user_keys_pool);
1618 user_email_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.email", user_email_pool);
1619 user_swift_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.swift", user_swift_pool);
1620 user_uid_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.uid", user_uid_pool);
1621 roles_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:roles", roles_pool);
1622 reshard_pool = fix_zone_pool_dup(pools, name, ".rgw.log:reshard", reshard_pool);
1623 otp_pool = fix_zone_pool_dup(pools, name, ".rgw.otp", otp_pool);
1624
1625 for(auto& iter : placement_pools) {
1626 iter.second.index_pool = fix_zone_pool_dup(pools, name, "." + default_bucket_index_pool_suffix,
1627 iter.second.index_pool);
1628 for (auto& pi : iter.second.storage_classes.get_all()) {
1629 if (pi.second.data_pool) {
1630 rgw_pool& pool = pi.second.data_pool.get();
1631 pool = fix_zone_pool_dup(pools, name, "." + default_storage_pool_suffix,
1632 pool);
1633 }
1634 }
1635 iter.second.data_extra_pool= fix_zone_pool_dup(pools, name, "." + default_storage_extra_pool_suffix,
1636 iter.second.data_extra_pool);
1637 }
1638
1639 return 0;
1640 }
1641
1642 int RGWZoneParams::create(bool exclusive)
1643 {
1644 /* check for old pools config */
1645 rgw_raw_obj obj(domain_root, avail_pools);
1646 auto obj_ctx = sysobj_svc->init_obj_ctx();
1647 auto sysobj = sysobj_svc->get_obj(obj_ctx, obj);
1648 int r = sysobj.rop().stat();
1649 if (r < 0) {
1650 ldout(cct, 10) << "couldn't find old data placement pools config, setting up new ones for the zone" << dendl;
1651 /* a new system, let's set new placement info */
1652 RGWZonePlacementInfo default_placement;
1653 default_placement.index_pool = name + "." + default_bucket_index_pool_suffix;
1654 rgw_pool pool = name + "." + default_storage_pool_suffix;
1655 default_placement.storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr);
1656 default_placement.data_extra_pool = name + "." + default_storage_extra_pool_suffix;
1657 placement_pools["default-placement"] = default_placement;
1658 }
1659
1660 r = fix_pool_names();
1661 if (r < 0) {
1662 ldout(cct, 0) << "ERROR: fix_pool_names returned r=" << r << dendl;
1663 return r;
1664 }
1665
1666 r = RGWSystemMetaObj::create(exclusive);
1667 if (r < 0) {
1668 return r;
1669 }
1670
1671 // try to set as default. may race with another create, so pass exclusive=true
1672 // so we don't override an existing default
1673 r = set_as_default(true);
1674 if (r < 0 && r != -EEXIST) {
1675 ldout(cct, 10) << "WARNING: failed to set zone as default, r=" << r << dendl;
1676 }
1677
1678 return 0;
1679 }
1680
1681 rgw_pool RGWZoneParams::get_pool(CephContext *cct) const
1682 {
1683 if (cct->_conf->rgw_zone_root_pool.empty()) {
1684 return rgw_pool(RGW_DEFAULT_ZONE_ROOT_POOL);
1685 }
1686
1687 return rgw_pool(cct->_conf->rgw_zone_root_pool);
1688 }
1689
1690 const string RGWZoneParams::get_default_oid(bool old_format) const
1691 {
1692 if (old_format) {
1693 return cct->_conf->rgw_default_zone_info_oid;
1694 }
1695
1696 return cct->_conf->rgw_default_zone_info_oid + "." + realm_id;
1697 }
1698
1699 const string& RGWZoneParams::get_names_oid_prefix() const
1700 {
1701 return zone_names_oid_prefix;
1702 }
1703
1704 const string& RGWZoneParams::get_info_oid_prefix(bool old_format) const
1705 {
1706 return zone_info_oid_prefix;
1707 }
1708
1709 const string& RGWZoneParams::get_predefined_name(CephContext *cct) const {
1710 return cct->_conf->rgw_zone;
1711 }
1712
1713 int RGWZoneParams::init(CephContext *cct, RGWSI_SysObj *sysobj_svc, bool setup_obj, bool old_format)
1714 {
1715 if (name.empty()) {
1716 name = cct->_conf->rgw_zone;
1717 }
1718
1719 return RGWSystemMetaObj::init(cct, sysobj_svc, setup_obj, old_format);
1720 }
1721
1722 int RGWZoneParams::read_default_id(string& default_id, bool old_format)
1723 {
1724 if (realm_id.empty()) {
1725 /* try using default realm */
1726 RGWRealm realm;
1727 int ret = realm.init(cct, sysobj_svc);
1728 //no default realm exist
1729 if (ret < 0) {
1730 return read_id(default_zone_name, default_id);
1731 }
1732 realm_id = realm.get_id();
1733 }
1734
1735 return RGWSystemMetaObj::read_default_id(default_id, old_format);
1736 }
1737
1738
1739 int RGWZoneParams::set_as_default(bool exclusive)
1740 {
1741 if (realm_id.empty()) {
1742 /* try using default realm */
1743 RGWRealm realm;
1744 int ret = realm.init(cct, sysobj_svc);
1745 if (ret < 0) {
1746 ldout(cct, 10) << "could not read realm id: " << cpp_strerror(-ret) << dendl;
1747 return -EINVAL;
1748 }
1749 realm_id = realm.get_id();
1750 }
1751
1752 return RGWSystemMetaObj::set_as_default(exclusive);
1753 }
1754
1755 const string& RGWZoneParams::get_compression_type(const rgw_placement_rule& placement_rule) const
1756 {
1757 static const std::string NONE{"none"};
1758 auto p = placement_pools.find(placement_rule.name);
1759 if (p == placement_pools.end()) {
1760 return NONE;
1761 }
1762 const auto& type = p->second.get_compression_type(placement_rule.get_storage_class());
1763 return !type.empty() ? type : NONE;
1764 }
1765
1766 void RGWPeriodMap::encode(bufferlist& bl) const {
1767 ENCODE_START(2, 1, bl);
1768 encode(id, bl);
1769 encode(zonegroups, bl);
1770 encode(master_zonegroup, bl);
1771 encode(short_zone_ids, bl);
1772 ENCODE_FINISH(bl);
1773 }
1774
1775 void RGWPeriodMap::decode(bufferlist::const_iterator& bl) {
1776 DECODE_START(2, bl);
1777 decode(id, bl);
1778 decode(zonegroups, bl);
1779 decode(master_zonegroup, bl);
1780 if (struct_v >= 2) {
1781 decode(short_zone_ids, bl);
1782 }
1783 DECODE_FINISH(bl);
1784
1785 zonegroups_by_api.clear();
1786 for (map<string, RGWZoneGroup>::iterator iter = zonegroups.begin();
1787 iter != zonegroups.end(); ++iter) {
1788 RGWZoneGroup& zonegroup = iter->second;
1789 zonegroups_by_api[zonegroup.api_name] = zonegroup;
1790 if (zonegroup.is_master_zonegroup()) {
1791 master_zonegroup = zonegroup.get_id();
1792 }
1793 }
1794 }
1795
1796 // run an MD5 hash on the zone_id and return the first 32 bits
1797 static uint32_t gen_short_zone_id(const std::string zone_id)
1798 {
1799 unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE];
1800 MD5 hash;
1801 hash.Update((const unsigned char *)zone_id.c_str(), zone_id.size());
1802 hash.Final(md5);
1803
1804 uint32_t short_id;
1805 memcpy((char *)&short_id, md5, sizeof(short_id));
1806 return std::max(short_id, 1u);
1807 }
1808
1809 int RGWPeriodMap::update(const RGWZoneGroup& zonegroup, CephContext *cct)
1810 {
1811 if (zonegroup.is_master_zonegroup() && (!master_zonegroup.empty() && zonegroup.get_id() != master_zonegroup)) {
1812 ldout(cct,0) << "Error updating periodmap, multiple master zonegroups configured "<< dendl;
1813 ldout(cct,0) << "master zonegroup: " << master_zonegroup << " and " << zonegroup.get_id() <<dendl;
1814 return -EINVAL;
1815 }
1816 map<string, RGWZoneGroup>::iterator iter = zonegroups.find(zonegroup.get_id());
1817 if (iter != zonegroups.end()) {
1818 RGWZoneGroup& old_zonegroup = iter->second;
1819 if (!old_zonegroup.api_name.empty()) {
1820 zonegroups_by_api.erase(old_zonegroup.api_name);
1821 }
1822 }
1823 zonegroups[zonegroup.get_id()] = zonegroup;
1824
1825 if (!zonegroup.api_name.empty()) {
1826 zonegroups_by_api[zonegroup.api_name] = zonegroup;
1827 }
1828
1829 if (zonegroup.is_master_zonegroup()) {
1830 master_zonegroup = zonegroup.get_id();
1831 } else if (master_zonegroup == zonegroup.get_id()) {
1832 master_zonegroup = "";
1833 }
1834
1835 for (auto& i : zonegroup.zones) {
1836 auto& zone = i.second;
1837 if (short_zone_ids.find(zone.id) != short_zone_ids.end()) {
1838 continue;
1839 }
1840 // calculate the zone's short id
1841 uint32_t short_id = gen_short_zone_id(zone.id);
1842
1843 // search for an existing zone with the same short id
1844 for (auto& s : short_zone_ids) {
1845 if (s.second == short_id) {
1846 ldout(cct, 0) << "New zone '" << zone.name << "' (" << zone.id
1847 << ") generates the same short_zone_id " << short_id
1848 << " as existing zone id " << s.first << dendl;
1849 return -EEXIST;
1850 }
1851 }
1852
1853 short_zone_ids[zone.id] = short_id;
1854 }
1855
1856 return 0;
1857 }
1858
1859 uint32_t RGWPeriodMap::get_zone_short_id(const string& zone_id) const
1860 {
1861 auto i = short_zone_ids.find(zone_id);
1862 if (i == short_zone_ids.end()) {
1863 return 0;
1864 }
1865 return i->second;
1866 }
1867
1868 int RGWZoneGroupMap::read(CephContext *cct, RGWSI_SysObj *sysobj_svc)
1869 {
1870
1871 RGWPeriod period;
1872 int ret = period.init(cct, sysobj_svc);
1873 if (ret < 0) {
1874 cerr << "failed to read current period info: " << cpp_strerror(ret);
1875 return ret;
1876 }
1877
1878 bucket_quota = period.get_config().bucket_quota;
1879 user_quota = period.get_config().user_quota;
1880 zonegroups = period.get_map().zonegroups;
1881 zonegroups_by_api = period.get_map().zonegroups_by_api;
1882 master_zonegroup = period.get_map().master_zonegroup;
1883
1884 return 0;
1885 }
1886
1887 void RGWRegionMap::encode(bufferlist& bl) const {
1888 ENCODE_START( 3, 1, bl);
1889 encode(regions, bl);
1890 encode(master_region, bl);
1891 encode(bucket_quota, bl);
1892 encode(user_quota, bl);
1893 ENCODE_FINISH(bl);
1894 }
1895
1896 void RGWRegionMap::decode(bufferlist::const_iterator& bl) {
1897 DECODE_START(3, bl);
1898 decode(regions, bl);
1899 decode(master_region, bl);
1900 if (struct_v >= 2)
1901 decode(bucket_quota, bl);
1902 if (struct_v >= 3)
1903 decode(user_quota, bl);
1904 DECODE_FINISH(bl);
1905 }
1906
1907 void RGWZoneGroupMap::encode(bufferlist& bl) const {
1908 ENCODE_START( 3, 1, bl);
1909 encode(zonegroups, bl);
1910 encode(master_zonegroup, bl);
1911 encode(bucket_quota, bl);
1912 encode(user_quota, bl);
1913 ENCODE_FINISH(bl);
1914 }
1915
1916 void RGWZoneGroupMap::decode(bufferlist::const_iterator& bl) {
1917 DECODE_START(3, bl);
1918 decode(zonegroups, bl);
1919 decode(master_zonegroup, bl);
1920 if (struct_v >= 2)
1921 decode(bucket_quota, bl);
1922 if (struct_v >= 3)
1923 decode(user_quota, bl);
1924 DECODE_FINISH(bl);
1925
1926 zonegroups_by_api.clear();
1927 for (map<string, RGWZoneGroup>::iterator iter = zonegroups.begin();
1928 iter != zonegroups.end(); ++iter) {
1929 RGWZoneGroup& zonegroup = iter->second;
1930 zonegroups_by_api[zonegroup.api_name] = zonegroup;
1931 if (zonegroup.is_master_zonegroup()) {
1932 master_zonegroup = zonegroup.get_name();
1933 }
1934 }
1935 }
1936
1937