]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_zone.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / rgw / rgw_zone.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
3
4 #include "common/errno.h"
5
6 #include "rgw_zone.h"
7 #include "rgw_realm_watcher.h"
8 #include "rgw_meta_sync_status.h"
9 #include "rgw_sync.h"
10
11 #include "services/svc_zone.h"
12 #include "services/svc_sys_obj.h"
13
14 #define dout_subsys ceph_subsys_rgw
15
16 namespace rgw_zone_defaults {
17
18 std::string zone_info_oid_prefix = "zone_info.";
19 std::string zone_names_oid_prefix = "zone_names.";
20 std::string region_info_oid_prefix = "region_info.";
21 std::string realm_names_oid_prefix = "realms_names.";
22 std::string zone_group_info_oid_prefix = "zonegroup_info.";
23 std::string realm_info_oid_prefix = "realms.";
24 std::string default_region_info_oid = "default.region";
25 std::string default_zone_group_info_oid = "default.zonegroup";
26 std::string period_info_oid_prefix = "periods.";
27 std::string period_latest_epoch_info_oid = ".latest_epoch";
28 std::string region_map_oid = "region_map";
29 std::string default_realm_info_oid = "default.realm";
30 std::string default_zonegroup_name = "default";
31 std::string default_zone_name = "default";
32 std::string zonegroup_names_oid_prefix = "zonegroups_names.";
33 std::string RGW_DEFAULT_ZONE_ROOT_POOL = "rgw.root";
34 std::string RGW_DEFAULT_ZONEGROUP_ROOT_POOL = "rgw.root";
35 std::string RGW_DEFAULT_REALM_ROOT_POOL = "rgw.root";
36 std::string RGW_DEFAULT_PERIOD_ROOT_POOL = "rgw.root";
37 std::string default_bucket_index_pool_suffix = "rgw.buckets.index";
38 std::string default_storage_extra_pool_suffix = "rgw.buckets.non-ec";
39 std::string avail_pools = ".pools.avail";
40 std::string default_storage_pool_suffix = "rgw.buckets.data";
41
42 }
43
44 using namespace rgw_zone_defaults;
45
46 #define FIRST_EPOCH 1
47
48 void RGWDefaultZoneGroupInfo::dump(Formatter *f) const {
49 encode_json("default_zonegroup", default_zonegroup, f);
50 }
51
52 void RGWDefaultZoneGroupInfo::decode_json(JSONObj *obj) {
53
54 JSONDecoder::decode_json("default_zonegroup", default_zonegroup, obj);
55 /* backward compatability with region */
56 if (default_zonegroup.empty()) {
57 JSONDecoder::decode_json("default_region", default_zonegroup, obj);
58 }
59 }
60
61 rgw_pool RGWZoneGroup::get_pool(CephContext *cct_) const
62 {
63 if (cct_->_conf->rgw_zonegroup_root_pool.empty()) {
64 return rgw_pool(RGW_DEFAULT_ZONEGROUP_ROOT_POOL);
65 }
66
67 return rgw_pool(cct_->_conf->rgw_zonegroup_root_pool);
68 }
69
70 int RGWZoneGroup::create_default(bool old_format)
71 {
72 name = default_zonegroup_name;
73 api_name = default_zonegroup_name;
74 is_master = true;
75
76 RGWZoneGroupPlacementTarget placement_target;
77 placement_target.name = "default-placement";
78 placement_targets[placement_target.name] = placement_target;
79 default_placement.name = "default-placement";
80
81 RGWZoneParams zone_params(default_zone_name);
82
83 int r = zone_params.init(cct, sysobj_svc, false);
84 if (r < 0) {
85 ldout(cct, 0) << "create_default: error initializing zone params: " << cpp_strerror(-r) << dendl;
86 return r;
87 }
88
89 r = zone_params.create_default();
90 if (r < 0 && r != -EEXIST) {
91 ldout(cct, 0) << "create_default: error in create_default zone params: " << cpp_strerror(-r) << dendl;
92 return r;
93 } else if (r == -EEXIST) {
94 ldout(cct, 10) << "zone_params::create_default() returned -EEXIST, we raced with another default zone_params creation" << dendl;
95 zone_params.clear_id();
96 r = zone_params.init(cct, sysobj_svc);
97 if (r < 0) {
98 ldout(cct, 0) << "create_default: error in init existing zone params: " << cpp_strerror(-r) << dendl;
99 return r;
100 }
101 ldout(cct, 20) << "zone_params::create_default() " << zone_params.get_name() << " id " << zone_params.get_id()
102 << dendl;
103 }
104
105 RGWZone& default_zone = zones[zone_params.get_id()];
106 default_zone.name = zone_params.get_name();
107 default_zone.id = zone_params.get_id();
108 master_zone = default_zone.id;
109
110 r = create();
111 if (r < 0 && r != -EEXIST) {
112 ldout(cct, 0) << "error storing zone group info: " << cpp_strerror(-r) << dendl;
113 return r;
114 }
115
116 if (r == -EEXIST) {
117 ldout(cct, 10) << "create_default() returned -EEXIST, we raced with another zonegroup creation" << dendl;
118 id.clear();
119 r = init(cct, sysobj_svc);
120 if (r < 0) {
121 return r;
122 }
123 }
124
125 if (old_format) {
126 name = id;
127 }
128
129 post_process_params();
130
131 return 0;
132 }
133
134 const string RGWZoneGroup::get_default_oid(bool old_region_format) const
135 {
136 if (old_region_format) {
137 if (cct->_conf->rgw_default_region_info_oid.empty()) {
138 return default_region_info_oid;
139 }
140 return cct->_conf->rgw_default_region_info_oid;
141 }
142
143 string default_oid = cct->_conf->rgw_default_zonegroup_info_oid;
144
145 if (cct->_conf->rgw_default_zonegroup_info_oid.empty()) {
146 default_oid = default_zone_group_info_oid;
147 }
148
149 default_oid += "." + realm_id;
150
151 return default_oid;
152 }
153
154 const string& RGWZoneGroup::get_info_oid_prefix(bool old_region_format) const
155 {
156 if (old_region_format) {
157 return region_info_oid_prefix;
158 }
159 return zone_group_info_oid_prefix;
160 }
161
162 const string& RGWZoneGroup::get_names_oid_prefix() const
163 {
164 return zonegroup_names_oid_prefix;
165 }
166
167 const string& RGWZoneGroup::get_predefined_name(CephContext *cct) const {
168 return cct->_conf->rgw_zonegroup;
169 }
170
171 int RGWZoneGroup::equals(const string& other_zonegroup) const
172 {
173 if (is_master && other_zonegroup.empty())
174 return true;
175
176 return (id == other_zonegroup);
177 }
178
179 int RGWZoneGroup::add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only,
180 const list<string>& endpoints, const string *ptier_type,
181 bool *psync_from_all, list<string>& sync_from, list<string>& sync_from_rm,
182 string *predirect_zone, std::optional<int> bucket_index_max_shards,
183 RGWSyncModulesManager *sync_mgr)
184 {
185 auto& zone_id = zone_params.get_id();
186 auto& zone_name = zone_params.get_name();
187
188 // check for duplicate zone name on insert
189 if (!zones.count(zone_id)) {
190 for (const auto& zone : zones) {
191 if (zone.second.name == zone_name) {
192 ldout(cct, 0) << "ERROR: found existing zone name " << zone_name
193 << " (" << zone.first << ") in zonegroup " << get_name() << dendl;
194 return -EEXIST;
195 }
196 }
197 }
198
199 if (is_master) {
200 if (*is_master) {
201 if (!master_zone.empty() && master_zone != zone_id) {
202 ldout(cct, 0) << "NOTICE: overriding master zone: " << master_zone << dendl;
203 }
204 master_zone = zone_id;
205 } else if (master_zone == zone_id) {
206 master_zone.clear();
207 }
208 }
209
210 RGWZone& zone = zones[zone_id];
211 zone.name = zone_name;
212 zone.id = zone_id;
213 if (!endpoints.empty()) {
214 zone.endpoints = endpoints;
215 }
216 if (read_only) {
217 zone.read_only = *read_only;
218 }
219 if (ptier_type) {
220 zone.tier_type = *ptier_type;
221 if (!sync_mgr->get_module(*ptier_type, nullptr)) {
222 ldout(cct, 0) << "ERROR: could not found sync module: " << *ptier_type
223 << ", valid sync modules: "
224 << sync_mgr->get_registered_module_names()
225 << dendl;
226 return -ENOENT;
227 }
228 }
229
230 if (psync_from_all) {
231 zone.sync_from_all = *psync_from_all;
232 }
233
234 if (predirect_zone) {
235 zone.redirect_zone = *predirect_zone;
236 }
237
238 if (bucket_index_max_shards) {
239 zone.bucket_index_max_shards = *bucket_index_max_shards;
240 }
241
242 for (auto add : sync_from) {
243 zone.sync_from.insert(add);
244 }
245
246 for (auto rm : sync_from_rm) {
247 zone.sync_from.erase(rm);
248 }
249
250 post_process_params();
251
252 return update();
253 }
254
255
256 int RGWZoneGroup::rename_zone(const RGWZoneParams& zone_params)
257 {
258 RGWZone& zone = zones[zone_params.get_id()];
259 zone.name = zone_params.get_name();
260
261 return update();
262 }
263
264 void RGWZoneGroup::post_process_params()
265 {
266 bool log_data = zones.size() > 1;
267
268 if (master_zone.empty()) {
269 auto iter = zones.begin();
270 if (iter != zones.end()) {
271 master_zone = iter->first;
272 }
273 }
274
275 for (auto& item : zones) {
276 RGWZone& zone = item.second;
277 zone.log_data = log_data;
278
279 RGWZoneParams zone_params(zone.id, zone.name);
280 int ret = zone_params.init(cct, sysobj_svc);
281 if (ret < 0) {
282 ldout(cct, 0) << "WARNING: could not read zone params for zone id=" << zone.id << " name=" << zone.name << dendl;
283 continue;
284 }
285
286 for (auto& pitem : zone_params.placement_pools) {
287 const string& placement_name = pitem.first;
288 if (placement_targets.find(placement_name) == placement_targets.end()) {
289 RGWZoneGroupPlacementTarget placement_target;
290 placement_target.name = placement_name;
291 placement_targets[placement_name] = placement_target;
292 }
293 }
294 }
295
296 if (default_placement.empty() && !placement_targets.empty()) {
297 default_placement.init(placement_targets.begin()->first, RGW_STORAGE_CLASS_STANDARD);
298 }
299 }
300
301 int RGWZoneGroup::remove_zone(const std::string& zone_id)
302 {
303 auto iter = zones.find(zone_id);
304 if (iter == zones.end()) {
305 ldout(cct, 0) << "zone id " << zone_id << " is not a part of zonegroup "
306 << name << dendl;
307 return -ENOENT;
308 }
309
310 zones.erase(iter);
311
312 post_process_params();
313
314 return update();
315 }
316
317 int RGWZoneGroup::read_default_id(string& default_id, bool old_format)
318 {
319 if (realm_id.empty()) {
320 /* try using default realm */
321 RGWRealm realm;
322 int ret = realm.init(cct, sysobj_svc);
323 // no default realm exist
324 if (ret < 0) {
325 return read_id(default_zonegroup_name, default_id);
326 }
327 realm_id = realm.get_id();
328 }
329
330 return RGWSystemMetaObj::read_default_id(default_id, old_format);
331 }
332
333 int RGWZoneGroup::set_as_default(bool exclusive)
334 {
335 if (realm_id.empty()) {
336 /* try using default realm */
337 RGWRealm realm;
338 int ret = realm.init(cct, sysobj_svc);
339 if (ret < 0) {
340 ldout(cct, 10) << "could not read realm id: " << cpp_strerror(-ret) << dendl;
341 return -EINVAL;
342 }
343 realm_id = realm.get_id();
344 }
345
346 return RGWSystemMetaObj::set_as_default(exclusive);
347 }
348
349 void RGWSystemMetaObj::reinit_instance(CephContext *_cct, RGWSI_SysObj *_sysobj_svc)
350 {
351 cct = _cct;
352 sysobj_svc = _sysobj_svc;
353 zone_svc = _sysobj_svc->get_zone_svc();
354 }
355
356 int RGWSystemMetaObj::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, bool setup_obj, bool old_format)
357 {
358 reinit_instance(_cct, _sysobj_svc);
359
360 if (!setup_obj)
361 return 0;
362
363 if (old_format && id.empty()) {
364 id = name;
365 }
366
367 if (id.empty()) {
368 int r;
369 if (name.empty()) {
370 name = get_predefined_name(cct);
371 }
372 if (name.empty()) {
373 r = use_default(old_format);
374 if (r < 0) {
375 return r;
376 }
377 } else if (!old_format) {
378 r = read_id(name, id);
379 if (r < 0) {
380 if (r != -ENOENT) {
381 ldout(cct, 0) << "error in read_id for object name: " << name << " : " << cpp_strerror(-r) << dendl;
382 }
383 return r;
384 }
385 }
386 }
387
388 return read_info(id, old_format);
389 }
390
391 int RGWSystemMetaObj::read_default(RGWDefaultSystemMetaObjInfo& default_info, const string& oid)
392 {
393 using ceph::decode;
394 auto pool = get_pool(cct);
395 bufferlist bl;
396
397 auto obj_ctx = sysobj_svc->init_obj_ctx();
398 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
399 int ret = sysobj.rop().read(&bl, null_yield);
400 if (ret < 0)
401 return ret;
402
403 try {
404 auto iter = bl.cbegin();
405 decode(default_info, iter);
406 } catch (buffer::error& err) {
407 ldout(cct, 0) << "error decoding data from " << pool << ":" << oid << dendl;
408 return -EIO;
409 }
410
411 return 0;
412 }
413
414 int RGWSystemMetaObj::read_default_id(string& default_id, bool old_format)
415 {
416 RGWDefaultSystemMetaObjInfo default_info;
417
418 int ret = read_default(default_info, get_default_oid(old_format));
419 if (ret < 0) {
420 return ret;
421 }
422
423 default_id = default_info.default_id;
424
425 return 0;
426 }
427
428 int RGWSystemMetaObj::use_default(bool old_format)
429 {
430 return read_default_id(id, old_format);
431 }
432
433 int RGWSystemMetaObj::set_as_default(bool exclusive)
434 {
435 using ceph::encode;
436 string oid = get_default_oid();
437
438 rgw_pool pool(get_pool(cct));
439 bufferlist bl;
440
441 RGWDefaultSystemMetaObjInfo default_info;
442 default_info.default_id = id;
443
444 encode(default_info, bl);
445
446 auto obj_ctx = sysobj_svc->init_obj_ctx();
447 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
448 int ret = sysobj.wop()
449 .set_exclusive(exclusive)
450 .write(bl, null_yield);
451 if (ret < 0)
452 return ret;
453
454 return 0;
455 }
456
457 int RGWSystemMetaObj::read_id(const string& obj_name, string& object_id)
458 {
459 using ceph::decode;
460 rgw_pool pool(get_pool(cct));
461 bufferlist bl;
462
463 string oid = get_names_oid_prefix() + obj_name;
464
465 auto obj_ctx = sysobj_svc->init_obj_ctx();
466 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
467 int ret = sysobj.rop().read(&bl, null_yield);
468 if (ret < 0) {
469 return ret;
470 }
471
472 RGWNameToId nameToId;
473 try {
474 auto iter = bl.cbegin();
475 decode(nameToId, iter);
476 } catch (buffer::error& err) {
477 ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << oid << dendl;
478 return -EIO;
479 }
480 object_id = nameToId.obj_id;
481 return 0;
482 }
483
484 int RGWSystemMetaObj::delete_obj(bool old_format)
485 {
486 rgw_pool pool(get_pool(cct));
487
488 auto obj_ctx = sysobj_svc->init_obj_ctx();
489
490 /* check to see if obj is the default */
491 RGWDefaultSystemMetaObjInfo default_info;
492 int ret = read_default(default_info, get_default_oid(old_format));
493 if (ret < 0 && ret != -ENOENT)
494 return ret;
495 if (default_info.default_id == id || (old_format && default_info.default_id == name)) {
496 string oid = get_default_oid(old_format);
497 rgw_raw_obj default_named_obj(pool, oid);
498 auto sysobj = sysobj_svc->get_obj(obj_ctx, default_named_obj);
499 ret = sysobj.wop().remove(null_yield);
500 if (ret < 0) {
501 ldout(cct, 0) << "Error delete default obj name " << name << ": " << cpp_strerror(-ret) << dendl;
502 return ret;
503 }
504 }
505 if (!old_format) {
506 string oid = get_names_oid_prefix() + name;
507 rgw_raw_obj object_name(pool, oid);
508 auto sysobj = sysobj_svc->get_obj(obj_ctx, object_name);
509 ret = sysobj.wop().remove(null_yield);
510 if (ret < 0) {
511 ldout(cct, 0) << "Error delete obj name " << name << ": " << cpp_strerror(-ret) << dendl;
512 return ret;
513 }
514 }
515
516 string oid = get_info_oid_prefix(old_format);
517 if (old_format) {
518 oid += name;
519 } else {
520 oid += id;
521 }
522
523 rgw_raw_obj object_id(pool, oid);
524 auto sysobj = sysobj_svc->get_obj(obj_ctx, object_id);
525 ret = sysobj.wop().remove(null_yield);
526 if (ret < 0) {
527 ldout(cct, 0) << "Error delete object id " << id << ": " << cpp_strerror(-ret) << dendl;
528 }
529
530 return ret;
531 }
532
533 int RGWSystemMetaObj::store_name(bool exclusive)
534 {
535 rgw_pool pool(get_pool(cct));
536 string oid = get_names_oid_prefix() + name;
537
538 RGWNameToId nameToId;
539 nameToId.obj_id = id;
540
541 bufferlist bl;
542 using ceph::encode;
543 encode(nameToId, bl);
544 auto obj_ctx = sysobj_svc->init_obj_ctx();
545 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
546 return sysobj.wop()
547 .set_exclusive(exclusive)
548 .write(bl, null_yield);
549 }
550
551 int RGWSystemMetaObj::rename(const string& new_name)
552 {
553 string new_id;
554 int ret = read_id(new_name, new_id);
555 if (!ret) {
556 return -EEXIST;
557 }
558 if (ret < 0 && ret != -ENOENT) {
559 ldout(cct, 0) << "Error read_id " << new_name << ": " << cpp_strerror(-ret) << dendl;
560 return ret;
561 }
562 string old_name = name;
563 name = new_name;
564 ret = update();
565 if (ret < 0) {
566 ldout(cct, 0) << "Error storing new obj info " << new_name << ": " << cpp_strerror(-ret) << dendl;
567 return ret;
568 }
569 ret = store_name(true);
570 if (ret < 0) {
571 ldout(cct, 0) << "Error storing new name " << new_name << ": " << cpp_strerror(-ret) << dendl;
572 return ret;
573 }
574 /* delete old name */
575 rgw_pool pool(get_pool(cct));
576 string oid = get_names_oid_prefix() + old_name;
577 rgw_raw_obj old_name_obj(pool, oid);
578 auto obj_ctx = sysobj_svc->init_obj_ctx();
579 auto sysobj = sysobj_svc->get_obj(obj_ctx, old_name_obj);
580 ret = sysobj.wop().remove(null_yield);
581 if (ret < 0) {
582 ldout(cct, 0) << "Error delete old obj name " << old_name << ": " << cpp_strerror(-ret) << dendl;
583 return ret;
584 }
585
586 return ret;
587 }
588
589 int RGWSystemMetaObj::read_info(const string& obj_id, bool old_format)
590 {
591 rgw_pool pool(get_pool(cct));
592
593 bufferlist bl;
594
595 string oid = get_info_oid_prefix(old_format) + obj_id;
596
597 auto obj_ctx = sysobj_svc->init_obj_ctx();
598 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
599 int ret = sysobj.rop().read(&bl, null_yield);
600 if (ret < 0) {
601 ldout(cct, 0) << "failed reading obj info from " << pool << ":" << oid << ": " << cpp_strerror(-ret) << dendl;
602 return ret;
603 }
604 using ceph::decode;
605
606 try {
607 auto iter = bl.cbegin();
608 decode(*this, iter);
609 } catch (buffer::error& err) {
610 ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << oid << dendl;
611 return -EIO;
612 }
613
614 return 0;
615 }
616
617 int RGWSystemMetaObj::read()
618 {
619 int ret = read_id(name, id);
620 if (ret < 0) {
621 return ret;
622 }
623
624 return read_info(id);
625 }
626
627 int RGWSystemMetaObj::create(bool exclusive)
628 {
629 int ret;
630
631 /* check to see the name is not used */
632 ret = read_id(name, id);
633 if (exclusive && ret == 0) {
634 ldout(cct, 10) << "ERROR: name " << name << " already in use for obj id " << id << dendl;
635 return -EEXIST;
636 } else if ( ret < 0 && ret != -ENOENT) {
637 ldout(cct, 0) << "failed reading obj id " << id << ": " << cpp_strerror(-ret) << dendl;
638 return ret;
639 }
640
641 if (id.empty()) {
642 /* create unique id */
643 uuid_d new_uuid;
644 char uuid_str[37];
645 new_uuid.generate_random();
646 new_uuid.print(uuid_str);
647 id = uuid_str;
648 }
649
650 ret = store_info(exclusive);
651 if (ret < 0) {
652 ldout(cct, 0) << "ERROR: storing info for " << id << ": " << cpp_strerror(-ret) << dendl;
653 return ret;
654 }
655
656 return store_name(exclusive);
657 }
658
659 int RGWSystemMetaObj::store_info(bool exclusive)
660 {
661 rgw_pool pool(get_pool(cct));
662
663 string oid = get_info_oid_prefix() + id;
664
665 bufferlist bl;
666 using ceph::encode;
667 encode(*this, bl);
668 auto obj_ctx = sysobj_svc->init_obj_ctx();
669 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
670 return sysobj.wop()
671 .set_exclusive(exclusive)
672 .write(bl, null_yield);
673 }
674
675 int RGWSystemMetaObj::write(bool exclusive)
676 {
677 int ret = store_info(exclusive);
678 if (ret < 0) {
679 ldout(cct, 20) << __func__ << "(): store_info() returned ret=" << ret << dendl;
680 return ret;
681 }
682 ret = store_name(exclusive);
683 if (ret < 0) {
684 ldout(cct, 20) << __func__ << "(): store_name() returned ret=" << ret << dendl;
685 return ret;
686 }
687 return 0;
688 }
689
690
691 const string& RGWRealm::get_predefined_name(CephContext *cct) const {
692 return cct->_conf->rgw_realm;
693 }
694
695 int RGWRealm::create(bool exclusive)
696 {
697 int ret = RGWSystemMetaObj::create(exclusive);
698 if (ret < 0) {
699 ldout(cct, 0) << "ERROR creating new realm object " << name << ": " << cpp_strerror(-ret) << dendl;
700 return ret;
701 }
702 // create the control object for watch/notify
703 ret = create_control(exclusive);
704 if (ret < 0) {
705 ldout(cct, 0) << "ERROR creating control for new realm " << name << ": " << cpp_strerror(-ret) << dendl;
706 return ret;
707 }
708 RGWPeriod period;
709 if (current_period.empty()) {
710 /* create new period for the realm */
711 ret = period.init(cct, sysobj_svc, id, name, false);
712 if (ret < 0 ) {
713 return ret;
714 }
715 ret = period.create(true);
716 if (ret < 0) {
717 ldout(cct, 0) << "ERROR: creating new period for realm " << name << ": " << cpp_strerror(-ret) << dendl;
718 return ret;
719 }
720 } else {
721 period = RGWPeriod(current_period, 0);
722 int ret = period.init(cct, sysobj_svc, id, name);
723 if (ret < 0) {
724 ldout(cct, 0) << "ERROR: failed to init period " << current_period << dendl;
725 return ret;
726 }
727 }
728 ret = set_current_period(period);
729 if (ret < 0) {
730 ldout(cct, 0) << "ERROR: failed set current period " << current_period << dendl;
731 return ret;
732 }
733 // try to set as default. may race with another create, so pass exclusive=true
734 // so we don't override an existing default
735 ret = set_as_default(true);
736 if (ret < 0 && ret != -EEXIST) {
737 ldout(cct, 0) << "WARNING: failed to set realm as default realm, ret=" << ret << dendl;
738 }
739
740 return 0;
741 }
742
743 int RGWRealm::delete_obj()
744 {
745 int ret = RGWSystemMetaObj::delete_obj();
746 if (ret < 0) {
747 return ret;
748 }
749 return delete_control();
750 }
751
752 int RGWRealm::create_control(bool exclusive)
753 {
754 auto pool = rgw_pool{get_pool(cct)};
755 auto oid = get_control_oid();
756 bufferlist bl;
757 auto obj_ctx = sysobj_svc->init_obj_ctx();
758 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
759 return sysobj.wop()
760 .set_exclusive(exclusive)
761 .write(bl, null_yield);
762 }
763
764 int RGWRealm::delete_control()
765 {
766 auto pool = rgw_pool{get_pool(cct)};
767 auto obj = rgw_raw_obj{pool, get_control_oid()};
768 auto obj_ctx = sysobj_svc->init_obj_ctx();
769 auto sysobj = sysobj_svc->get_obj(obj_ctx, obj);
770 return sysobj.wop().remove(null_yield);
771 }
772
773 rgw_pool RGWRealm::get_pool(CephContext *cct) const
774 {
775 if (cct->_conf->rgw_realm_root_pool.empty()) {
776 return rgw_pool(RGW_DEFAULT_REALM_ROOT_POOL);
777 }
778 return rgw_pool(cct->_conf->rgw_realm_root_pool);
779 }
780
781 const string RGWRealm::get_default_oid(bool old_format) const
782 {
783 if (cct->_conf->rgw_default_realm_info_oid.empty()) {
784 return default_realm_info_oid;
785 }
786 return cct->_conf->rgw_default_realm_info_oid;
787 }
788
789 const string& RGWRealm::get_names_oid_prefix() const
790 {
791 return realm_names_oid_prefix;
792 }
793
794 const string& RGWRealm::get_info_oid_prefix(bool old_format) const
795 {
796 return realm_info_oid_prefix;
797 }
798
799 int RGWRealm::set_current_period(RGWPeriod& period)
800 {
801 // update realm epoch to match the period's
802 if (epoch > period.get_realm_epoch()) {
803 ldout(cct, 0) << "ERROR: set_current_period with old realm epoch "
804 << period.get_realm_epoch() << ", current epoch=" << epoch << dendl;
805 return -EINVAL;
806 }
807 if (epoch == period.get_realm_epoch() && current_period != period.get_id()) {
808 ldout(cct, 0) << "ERROR: set_current_period with same realm epoch "
809 << period.get_realm_epoch() << ", but different period id "
810 << period.get_id() << " != " << current_period << dendl;
811 return -EINVAL;
812 }
813
814 epoch = period.get_realm_epoch();
815 current_period = period.get_id();
816
817 int ret = update();
818 if (ret < 0) {
819 ldout(cct, 0) << "ERROR: period update: " << cpp_strerror(-ret) << dendl;
820 return ret;
821 }
822
823 ret = period.reflect();
824 if (ret < 0) {
825 ldout(cct, 0) << "ERROR: period.reflect(): " << cpp_strerror(-ret) << dendl;
826 return ret;
827 }
828
829 return 0;
830 }
831
832 string RGWRealm::get_control_oid() const
833 {
834 return get_info_oid_prefix() + id + ".control";
835 }
836
837 int RGWRealm::notify_zone(bufferlist& bl)
838 {
839 rgw_pool pool{get_pool(cct)};
840 auto obj_ctx = sysobj_svc->init_obj_ctx();
841 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, get_control_oid()});
842 int ret = sysobj.wn().notify(bl, 0, nullptr, null_yield);
843 if (ret < 0) {
844 return ret;
845 }
846 return 0;
847 }
848
849 int RGWRealm::notify_new_period(const RGWPeriod& period)
850 {
851 bufferlist bl;
852 using ceph::encode;
853 // push the period to dependent zonegroups/zones
854 encode(RGWRealmNotify::ZonesNeedPeriod, bl);
855 encode(period, bl);
856 // reload the gateway with the new period
857 encode(RGWRealmNotify::Reload, bl);
858
859 return notify_zone(bl);
860 }
861
862 std::string RGWPeriodConfig::get_oid(const std::string& realm_id)
863 {
864 if (realm_id.empty()) {
865 return "period_config.default";
866 }
867 return "period_config." + realm_id;
868 }
869
870 rgw_pool RGWPeriodConfig::get_pool(CephContext *cct)
871 {
872 const auto& pool_name = cct->_conf->rgw_period_root_pool;
873 if (pool_name.empty()) {
874 return {RGW_DEFAULT_PERIOD_ROOT_POOL};
875 }
876 return {pool_name};
877 }
878
879 int RGWPeriodConfig::read(RGWSI_SysObj *sysobj_svc, const std::string& realm_id)
880 {
881 const auto& pool = get_pool(sysobj_svc->ctx());
882 const auto& oid = get_oid(realm_id);
883 bufferlist bl;
884
885 auto obj_ctx = sysobj_svc->init_obj_ctx();
886 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
887 int ret = sysobj.rop().read(&bl, null_yield);
888 if (ret < 0) {
889 return ret;
890 }
891 using ceph::decode;
892 try {
893 auto iter = bl.cbegin();
894 decode(*this, iter);
895 } catch (buffer::error& err) {
896 return -EIO;
897 }
898 return 0;
899 }
900
901 int RGWPeriodConfig::write(RGWSI_SysObj *sysobj_svc, const std::string& realm_id)
902 {
903 const auto& pool = get_pool(sysobj_svc->ctx());
904 const auto& oid = get_oid(realm_id);
905 bufferlist bl;
906 using ceph::encode;
907 encode(*this, bl);
908 auto obj_ctx = sysobj_svc->init_obj_ctx();
909 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
910 return sysobj.wop()
911 .set_exclusive(false)
912 .write(bl, null_yield);
913 }
914
915 int RGWPeriod::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, const string& period_realm_id,
916 const string& period_realm_name, bool setup_obj)
917 {
918 cct = _cct;
919 sysobj_svc = _sysobj_svc;
920
921 realm_id = period_realm_id;
922 realm_name = period_realm_name;
923
924 if (!setup_obj)
925 return 0;
926
927 return init(_cct, _sysobj_svc, setup_obj);
928 }
929
930
931 int RGWPeriod::init(CephContext *_cct, RGWSI_SysObj *_sysobj_svc, bool setup_obj)
932 {
933 cct = _cct;
934 sysobj_svc = _sysobj_svc;
935
936 if (!setup_obj)
937 return 0;
938
939 if (id.empty()) {
940 RGWRealm realm(realm_id, realm_name);
941 int ret = realm.init(cct, sysobj_svc);
942 if (ret < 0) {
943 ldout(cct, 0) << "RGWPeriod::init failed to init realm " << realm_name << " id " << realm_id << " : " <<
944 cpp_strerror(-ret) << dendl;
945 return ret;
946 }
947 id = realm.get_current_period();
948 realm_id = realm.get_id();
949 }
950
951 if (!epoch) {
952 int ret = use_latest_epoch();
953 if (ret < 0) {
954 ldout(cct, 0) << "failed to use_latest_epoch period id " << id << " realm " << realm_name << " id " << realm_id
955 << " : " << cpp_strerror(-ret) << dendl;
956 return ret;
957 }
958 }
959
960 return read_info();
961 }
962
963
964 int RGWPeriod::get_zonegroup(RGWZoneGroup& zonegroup,
965 const string& zonegroup_id) const
966 {
967 map<string, RGWZoneGroup>::const_iterator iter;
968 if (!zonegroup_id.empty()) {
969 iter = period_map.zonegroups.find(zonegroup_id);
970 } else {
971 iter = period_map.zonegroups.find("default");
972 }
973 if (iter != period_map.zonegroups.end()) {
974 zonegroup = iter->second;
975 return 0;
976 }
977
978 return -ENOENT;
979 }
980
981 const string& RGWPeriod::get_latest_epoch_oid() const
982 {
983 if (cct->_conf->rgw_period_latest_epoch_info_oid.empty()) {
984 return period_latest_epoch_info_oid;
985 }
986 return cct->_conf->rgw_period_latest_epoch_info_oid;
987 }
988
989 const string& RGWPeriod::get_info_oid_prefix() const
990 {
991 return period_info_oid_prefix;
992 }
993
994 const string RGWPeriod::get_period_oid_prefix() const
995 {
996 return get_info_oid_prefix() + id;
997 }
998
999 const string RGWPeriod::get_period_oid() const
1000 {
1001 std::ostringstream oss;
1002 oss << get_period_oid_prefix();
1003 // skip the epoch for the staging period
1004 if (id != get_staging_id(realm_id))
1005 oss << "." << epoch;
1006 return oss.str();
1007 }
1008
1009 int RGWPeriod::read_latest_epoch(RGWPeriodLatestEpochInfo& info,
1010 RGWObjVersionTracker *objv)
1011 {
1012 string oid = get_period_oid_prefix() + get_latest_epoch_oid();
1013
1014 rgw_pool pool(get_pool(cct));
1015 bufferlist bl;
1016 auto obj_ctx = sysobj_svc->init_obj_ctx();
1017 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, oid});
1018 int ret = sysobj.rop().read(&bl, null_yield);
1019 if (ret < 0) {
1020 ldout(cct, 1) << "error read_lastest_epoch " << pool << ":" << oid << dendl;
1021 return ret;
1022 }
1023 try {
1024 auto iter = bl.cbegin();
1025 using ceph::decode;
1026 decode(info, iter);
1027 } catch (buffer::error& err) {
1028 ldout(cct, 0) << "error decoding data from " << pool << ":" << oid << dendl;
1029 return -EIO;
1030 }
1031
1032 return 0;
1033 }
1034
1035 int RGWPeriod::get_latest_epoch(epoch_t& latest_epoch)
1036 {
1037 RGWPeriodLatestEpochInfo info;
1038
1039 int ret = read_latest_epoch(info);
1040 if (ret < 0) {
1041 return ret;
1042 }
1043
1044 latest_epoch = info.epoch;
1045
1046 return 0;
1047 }
1048
1049 int RGWPeriod::use_latest_epoch()
1050 {
1051 RGWPeriodLatestEpochInfo info;
1052 int ret = read_latest_epoch(info);
1053 if (ret < 0) {
1054 return ret;
1055 }
1056
1057 epoch = info.epoch;
1058
1059 return 0;
1060 }
1061
1062 int RGWPeriod::set_latest_epoch(epoch_t epoch, bool exclusive,
1063 RGWObjVersionTracker *objv)
1064 {
1065 string oid = get_period_oid_prefix() + get_latest_epoch_oid();
1066
1067 rgw_pool pool(get_pool(cct));
1068 bufferlist bl;
1069
1070 RGWPeriodLatestEpochInfo info;
1071 info.epoch = epoch;
1072
1073 using ceph::encode;
1074 encode(info, bl);
1075
1076 auto obj_ctx = sysobj_svc->init_obj_ctx();
1077 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
1078 return sysobj.wop()
1079 .set_exclusive(exclusive)
1080 .write(bl, null_yield);
1081 }
1082
1083 int RGWPeriod::update_latest_epoch(epoch_t epoch)
1084 {
1085 static constexpr int MAX_RETRIES = 20;
1086
1087 for (int i = 0; i < MAX_RETRIES; i++) {
1088 RGWPeriodLatestEpochInfo info;
1089 RGWObjVersionTracker objv;
1090 bool exclusive = false;
1091
1092 // read existing epoch
1093 int r = read_latest_epoch(info, &objv);
1094 if (r == -ENOENT) {
1095 // use an exclusive create to set the epoch atomically
1096 exclusive = true;
1097 ldout(cct, 20) << "creating initial latest_epoch=" << epoch
1098 << " for period=" << id << dendl;
1099 } else if (r < 0) {
1100 ldout(cct, 0) << "ERROR: failed to read latest_epoch" << dendl;
1101 return r;
1102 } else if (epoch <= info.epoch) {
1103 r = -EEXIST; // fail with EEXIST if epoch is not newer
1104 ldout(cct, 10) << "found existing latest_epoch " << info.epoch
1105 << " >= given epoch " << epoch << ", returning r=" << r << dendl;
1106 return r;
1107 } else {
1108 ldout(cct, 20) << "updating latest_epoch from " << info.epoch
1109 << " -> " << epoch << " on period=" << id << dendl;
1110 }
1111
1112 r = set_latest_epoch(epoch, exclusive, &objv);
1113 if (r == -EEXIST) {
1114 continue; // exclusive create raced with another update, retry
1115 } else if (r == -ECANCELED) {
1116 continue; // write raced with a conflicting version, retry
1117 }
1118 if (r < 0) {
1119 ldout(cct, 0) << "ERROR: failed to write latest_epoch" << dendl;
1120 return r;
1121 }
1122 return 0; // return success
1123 }
1124
1125 return -ECANCELED; // fail after max retries
1126 }
1127
1128 int RGWPeriod::delete_obj()
1129 {
1130 rgw_pool pool(get_pool(cct));
1131
1132 // delete the object for each period epoch
1133 for (epoch_t e = 1; e <= epoch; e++) {
1134 RGWPeriod p{get_id(), e};
1135 rgw_raw_obj oid{pool, p.get_period_oid()};
1136 auto obj_ctx = sysobj_svc->init_obj_ctx();
1137 auto sysobj = sysobj_svc->get_obj(obj_ctx, oid);
1138 int ret = sysobj.wop().remove(null_yield);
1139 if (ret < 0) {
1140 ldout(cct, 0) << "WARNING: failed to delete period object " << oid
1141 << ": " << cpp_strerror(-ret) << dendl;
1142 }
1143 }
1144
1145 // delete the .latest_epoch object
1146 rgw_raw_obj oid{pool, get_period_oid_prefix() + get_latest_epoch_oid()};
1147 auto obj_ctx = sysobj_svc->init_obj_ctx();
1148 auto sysobj = sysobj_svc->get_obj(obj_ctx, oid);
1149 int ret = sysobj.wop().remove(null_yield);
1150 if (ret < 0) {
1151 ldout(cct, 0) << "WARNING: failed to delete period object " << oid
1152 << ": " << cpp_strerror(-ret) << dendl;
1153 }
1154 return ret;
1155 }
1156
1157 int RGWPeriod::read_info()
1158 {
1159 rgw_pool pool(get_pool(cct));
1160
1161 bufferlist bl;
1162
1163 auto obj_ctx = sysobj_svc->init_obj_ctx();
1164 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj{pool, get_period_oid()});
1165 int ret = sysobj.rop().read(&bl, null_yield);
1166 if (ret < 0) {
1167 ldout(cct, 0) << "failed reading obj info from " << pool << ":" << get_period_oid() << ": " << cpp_strerror(-ret) << dendl;
1168 return ret;
1169 }
1170
1171 try {
1172 using ceph::decode;
1173 auto iter = bl.cbegin();
1174 decode(*this, iter);
1175 } catch (buffer::error& err) {
1176 ldout(cct, 0) << "ERROR: failed to decode obj from " << pool << ":" << get_period_oid() << dendl;
1177 return -EIO;
1178 }
1179
1180 return 0;
1181 }
1182
1183 int RGWPeriod::create(bool exclusive)
1184 {
1185 int ret;
1186
1187 /* create unique id */
1188 uuid_d new_uuid;
1189 char uuid_str[37];
1190 new_uuid.generate_random();
1191 new_uuid.print(uuid_str);
1192 id = uuid_str;
1193
1194 epoch = FIRST_EPOCH;
1195
1196 period_map.id = id;
1197
1198 ret = store_info(exclusive);
1199 if (ret < 0) {
1200 ldout(cct, 0) << "ERROR: storing info for " << id << ": " << cpp_strerror(-ret) << dendl;
1201 return ret;
1202 }
1203
1204 ret = set_latest_epoch(epoch);
1205 if (ret < 0) {
1206 ldout(cct, 0) << "ERROR: setting latest epoch " << id << ": " << cpp_strerror(-ret) << dendl;
1207 }
1208
1209 return ret;
1210 }
1211
1212 int RGWPeriod::store_info(bool exclusive)
1213 {
1214 rgw_pool pool(get_pool(cct));
1215
1216 string oid = get_period_oid();
1217 bufferlist bl;
1218 using ceph::encode;
1219 encode(*this, bl);
1220
1221 auto obj_ctx = sysobj_svc->init_obj_ctx();
1222 auto sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid));
1223 return sysobj.wop()
1224 .set_exclusive(exclusive)
1225 .write(bl, null_yield);
1226 }
1227
1228 rgw_pool RGWPeriod::get_pool(CephContext *cct) const
1229 {
1230 if (cct->_conf->rgw_period_root_pool.empty()) {
1231 return rgw_pool(RGW_DEFAULT_PERIOD_ROOT_POOL);
1232 }
1233 return rgw_pool(cct->_conf->rgw_period_root_pool);
1234 }
1235
1236 int RGWPeriod::add_zonegroup(const RGWZoneGroup& zonegroup)
1237 {
1238 if (zonegroup.realm_id != realm_id) {
1239 return 0;
1240 }
1241 int ret = period_map.update(zonegroup, cct);
1242 if (ret < 0) {
1243 ldout(cct, 0) << "ERROR: updating period map: " << cpp_strerror(-ret) << dendl;
1244 return ret;
1245 }
1246
1247 return store_info(false);
1248 }
1249
1250 int RGWPeriod::update()
1251 {
1252 auto zone_svc = sysobj_svc->get_zone_svc();
1253 ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << get_id() << dendl;
1254 list<string> zonegroups;
1255 int ret = zone_svc->list_zonegroups(zonegroups);
1256 if (ret < 0) {
1257 ldout(cct, 0) << "ERROR: failed to list zonegroups: " << cpp_strerror(-ret) << dendl;
1258 return ret;
1259 }
1260
1261 // clear zone short ids of removed zones. period_map.update() will add the
1262 // remaining zones back
1263 period_map.short_zone_ids.clear();
1264
1265 for (auto& iter : zonegroups) {
1266 RGWZoneGroup zg(string(), iter);
1267 ret = zg.init(cct, sysobj_svc);
1268 if (ret < 0) {
1269 ldout(cct, 0) << "WARNING: zg.init() failed: " << cpp_strerror(-ret) << dendl;
1270 continue;
1271 }
1272
1273 if (zg.realm_id != realm_id) {
1274 ldout(cct, 20) << "skipping zonegroup " << zg.get_name() << " zone realm id " << zg.realm_id << ", not on our realm " << realm_id << dendl;
1275 continue;
1276 }
1277
1278 if (zg.master_zone.empty()) {
1279 ldout(cct, 0) << "ERROR: zonegroup " << zg.get_name() << " should have a master zone " << dendl;
1280 return -EINVAL;
1281 }
1282
1283 if (zg.zones.find(zg.master_zone) == zg.zones.end()) {
1284 ldout(cct,0) << "ERROR: zonegroup " << zg.get_name()
1285 << " has a non existent master zone "<< dendl;
1286 return -EINVAL;
1287 }
1288
1289 if (zg.is_master_zonegroup()) {
1290 master_zonegroup = zg.get_id();
1291 master_zone = zg.master_zone;
1292 }
1293
1294 int ret = period_map.update(zg, cct);
1295 if (ret < 0) {
1296 return ret;
1297 }
1298 }
1299
1300 ret = period_config.read(sysobj_svc, realm_id);
1301 if (ret < 0 && ret != -ENOENT) {
1302 ldout(cct, 0) << "ERROR: failed to read period config: "
1303 << cpp_strerror(ret) << dendl;
1304 return ret;
1305 }
1306 return 0;
1307 }
1308
1309 int RGWPeriod::reflect()
1310 {
1311 for (auto& iter : period_map.zonegroups) {
1312 RGWZoneGroup& zg = iter.second;
1313 zg.reinit_instance(cct, sysobj_svc);
1314 int r = zg.write(false);
1315 if (r < 0) {
1316 ldout(cct, 0) << "ERROR: failed to store zonegroup info for zonegroup=" << iter.first << ": " << cpp_strerror(-r) << dendl;
1317 return r;
1318 }
1319 if (zg.is_master_zonegroup()) {
1320 // set master as default if no default exists
1321 r = zg.set_as_default(true);
1322 if (r == 0) {
1323 ldout(cct, 1) << "Set the period's master zonegroup " << zg.get_id()
1324 << " as the default" << dendl;
1325 }
1326 }
1327 }
1328
1329 int r = period_config.write(sysobj_svc, realm_id);
1330 if (r < 0) {
1331 ldout(cct, 0) << "ERROR: failed to store period config: "
1332 << cpp_strerror(-r) << dendl;
1333 return r;
1334 }
1335 return 0;
1336 }
1337
1338 void RGWPeriod::fork()
1339 {
1340 ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << id << dendl;
1341 predecessor_uuid = id;
1342 id = get_staging_id(realm_id);
1343 period_map.reset();
1344 realm_epoch++;
1345 }
1346
1347 static int read_sync_status(rgw::sal::RGWRadosStore *store, rgw_meta_sync_status *sync_status)
1348 {
1349 // initialize a sync status manager to read the status
1350 RGWMetaSyncStatusManager mgr(store, store->svc()->rados->get_async_processor());
1351 int r = mgr.init();
1352 if (r < 0) {
1353 return r;
1354 }
1355 r = mgr.read_sync_status(sync_status);
1356 mgr.stop();
1357 return r;
1358 }
1359
1360 int RGWPeriod::update_sync_status(rgw::sal::RGWRadosStore *store, /* for now */
1361 const RGWPeriod &current_period,
1362 std::ostream& error_stream,
1363 bool force_if_stale)
1364 {
1365 rgw_meta_sync_status status;
1366 int r = read_sync_status(store, &status);
1367 if (r < 0) {
1368 ldout(cct, 0) << "period failed to read sync status: "
1369 << cpp_strerror(-r) << dendl;
1370 return r;
1371 }
1372
1373 std::vector<std::string> markers;
1374
1375 const auto current_epoch = current_period.get_realm_epoch();
1376 if (current_epoch != status.sync_info.realm_epoch) {
1377 // no sync status markers for the current period
1378 ceph_assert(current_epoch > status.sync_info.realm_epoch);
1379 const int behind = current_epoch - status.sync_info.realm_epoch;
1380 if (!force_if_stale && current_epoch > 1) {
1381 error_stream << "ERROR: This zone is " << behind << " period(s) behind "
1382 "the current master zone in metadata sync. If this zone is promoted "
1383 "to master, any metadata changes during that time are likely to "
1384 "be lost.\n"
1385 "Waiting for this zone to catch up on metadata sync (see "
1386 "'radosgw-admin sync status') is recommended.\n"
1387 "To promote this zone to master anyway, add the flag "
1388 "--yes-i-really-mean-it." << std::endl;
1389 return -EINVAL;
1390 }
1391 // empty sync status markers - other zones will skip this period during
1392 // incremental metadata sync
1393 markers.resize(status.sync_info.num_shards);
1394 } else {
1395 markers.reserve(status.sync_info.num_shards);
1396 for (auto& i : status.sync_markers) {
1397 auto& marker = i.second;
1398 // filter out markers from other periods
1399 if (marker.realm_epoch != current_epoch) {
1400 marker.marker.clear();
1401 }
1402 markers.emplace_back(std::move(marker.marker));
1403 }
1404 }
1405
1406 std::swap(sync_status, markers);
1407 return 0;
1408 }
1409
1410 int RGWPeriod::commit(rgw::sal::RGWRadosStore *store,
1411 RGWRealm& realm, const RGWPeriod& current_period,
1412 std::ostream& error_stream, bool force_if_stale)
1413 {
1414 auto zone_svc = sysobj_svc->get_zone_svc();
1415 ldout(cct, 20) << __func__ << " realm " << realm.get_id() << " period " << current_period.get_id() << dendl;
1416 // gateway must be in the master zone to commit
1417 if (master_zone != zone_svc->get_zone_params().get_id()) {
1418 error_stream << "Cannot commit period on zone "
1419 << zone_svc->get_zone_params().get_id() << ", it must be sent to "
1420 "the period's master zone " << master_zone << '.' << std::endl;
1421 return -EINVAL;
1422 }
1423 // period predecessor must match current period
1424 if (predecessor_uuid != current_period.get_id()) {
1425 error_stream << "Period predecessor " << predecessor_uuid
1426 << " does not match current period " << current_period.get_id()
1427 << ". Use 'period pull' to get the latest period from the master, "
1428 "reapply your changes, and try again." << std::endl;
1429 return -EINVAL;
1430 }
1431 // realm epoch must be 1 greater than current period
1432 if (realm_epoch != current_period.get_realm_epoch() + 1) {
1433 error_stream << "Period's realm epoch " << realm_epoch
1434 << " does not come directly after current realm epoch "
1435 << current_period.get_realm_epoch() << ". Use 'realm pull' to get the "
1436 "latest realm and period from the master zone, reapply your changes, "
1437 "and try again." << std::endl;
1438 return -EINVAL;
1439 }
1440 // did the master zone change?
1441 if (master_zone != current_period.get_master_zone()) {
1442 // store the current metadata sync status in the period
1443 int r = update_sync_status(store, current_period, error_stream, force_if_stale);
1444 if (r < 0) {
1445 ldout(cct, 0) << "failed to update metadata sync status: "
1446 << cpp_strerror(-r) << dendl;
1447 return r;
1448 }
1449 // create an object with a new period id
1450 r = create(true);
1451 if (r < 0) {
1452 ldout(cct, 0) << "failed to create new period: " << cpp_strerror(-r) << dendl;
1453 return r;
1454 }
1455 // set as current period
1456 r = realm.set_current_period(*this);
1457 if (r < 0) {
1458 ldout(cct, 0) << "failed to update realm's current period: "
1459 << cpp_strerror(-r) << dendl;
1460 return r;
1461 }
1462 ldout(cct, 4) << "Promoted to master zone and committed new period "
1463 << id << dendl;
1464 realm.notify_new_period(*this);
1465 return 0;
1466 }
1467 // period must be based on current epoch
1468 if (epoch != current_period.get_epoch()) {
1469 error_stream << "Period epoch " << epoch << " does not match "
1470 "predecessor epoch " << current_period.get_epoch()
1471 << ". Use 'period pull' to get the latest epoch from the master zone, "
1472 "reapply your changes, and try again." << std::endl;
1473 return -EINVAL;
1474 }
1475 // set period as next epoch
1476 set_id(current_period.get_id());
1477 set_epoch(current_period.get_epoch() + 1);
1478 set_predecessor(current_period.get_predecessor());
1479 realm_epoch = current_period.get_realm_epoch();
1480 // write the period to rados
1481 int r = store_info(false);
1482 if (r < 0) {
1483 ldout(cct, 0) << "failed to store period: " << cpp_strerror(-r) << dendl;
1484 return r;
1485 }
1486 // set as latest epoch
1487 r = update_latest_epoch(epoch);
1488 if (r == -EEXIST) {
1489 // already have this epoch (or a more recent one)
1490 return 0;
1491 }
1492 if (r < 0) {
1493 ldout(cct, 0) << "failed to set latest epoch: " << cpp_strerror(-r) << dendl;
1494 return r;
1495 }
1496 r = reflect();
1497 if (r < 0) {
1498 ldout(cct, 0) << "failed to update local objects: " << cpp_strerror(-r) << dendl;
1499 return r;
1500 }
1501 ldout(cct, 4) << "Committed new epoch " << epoch
1502 << " for period " << id << dendl;
1503 realm.notify_new_period(*this);
1504 return 0;
1505 }
1506
1507 int RGWZoneParams::create_default(bool old_format)
1508 {
1509 name = default_zone_name;
1510
1511 int r = create();
1512 if (r < 0) {
1513 return r;
1514 }
1515
1516 if (old_format) {
1517 name = id;
1518 }
1519
1520 return r;
1521 }
1522
1523
1524 int get_zones_pool_set(CephContext* cct,
1525 RGWSI_SysObj* sysobj_svc,
1526 const list<string>& zones,
1527 const string& my_zone_id,
1528 set<rgw_pool>& pool_names)
1529 {
1530 for(auto const& iter : zones) {
1531 RGWZoneParams zone(iter);
1532 int r = zone.init(cct, sysobj_svc);
1533 if (r < 0) {
1534 ldout(cct, 0) << "Error: init zone " << iter << ":" << cpp_strerror(-r) << dendl;
1535 return r;
1536 }
1537 if (zone.get_id() != my_zone_id) {
1538 pool_names.insert(zone.domain_root);
1539 pool_names.insert(zone.control_pool);
1540 pool_names.insert(zone.gc_pool);
1541 pool_names.insert(zone.log_pool);
1542 pool_names.insert(zone.intent_log_pool);
1543 pool_names.insert(zone.usage_log_pool);
1544 pool_names.insert(zone.user_keys_pool);
1545 pool_names.insert(zone.user_email_pool);
1546 pool_names.insert(zone.user_swift_pool);
1547 pool_names.insert(zone.user_uid_pool);
1548 pool_names.insert(zone.otp_pool);
1549 pool_names.insert(zone.roles_pool);
1550 pool_names.insert(zone.reshard_pool);
1551 for(auto& iter : zone.placement_pools) {
1552 pool_names.insert(iter.second.index_pool);
1553 for (auto& pi : iter.second.storage_classes.get_all()) {
1554 if (pi.second.data_pool) {
1555 pool_names.insert(pi.second.data_pool.get());
1556 }
1557 }
1558 pool_names.insert(iter.second.data_extra_pool);
1559 }
1560 }
1561 }
1562 return 0;
1563 }
1564
1565 rgw_pool fix_zone_pool_dup(set<rgw_pool> pools,
1566 const string& default_prefix,
1567 const string& default_suffix,
1568 const rgw_pool& suggested_pool)
1569 {
1570 string suggested_name = suggested_pool.to_str();
1571
1572 string prefix = default_prefix;
1573 string suffix = default_suffix;
1574
1575 if (!suggested_pool.empty()) {
1576 prefix = suggested_name.substr(0, suggested_name.find("."));
1577 suffix = suggested_name.substr(prefix.length());
1578 }
1579
1580 rgw_pool pool(prefix + suffix);
1581
1582 if (pools.find(pool) == pools.end()) {
1583 return pool;
1584 } else {
1585 while(true) {
1586 pool = prefix + "_" + std::to_string(std::rand()) + suffix;
1587 if (pools.find(pool) == pools.end()) {
1588 return pool;
1589 }
1590 }
1591 }
1592 }
1593
1594 int RGWZoneParams::fix_pool_names()
1595 {
1596
1597 list<string> zones;
1598 int r = zone_svc->list_zones(zones);
1599 if (r < 0) {
1600 ldout(cct, 10) << "WARNING: store->list_zones() returned r=" << r << dendl;
1601 }
1602
1603 set<rgw_pool> pools;
1604 r = get_zones_pool_set(cct, sysobj_svc, zones, id, pools);
1605 if (r < 0) {
1606 ldout(cct, 0) << "Error: get_zones_pool_names" << r << dendl;
1607 return r;
1608 }
1609
1610 domain_root = fix_zone_pool_dup(pools, name, ".rgw.meta:root", domain_root);
1611 control_pool = fix_zone_pool_dup(pools, name, ".rgw.control", control_pool);
1612 gc_pool = fix_zone_pool_dup(pools, name ,".rgw.log:gc", gc_pool);
1613 lc_pool = fix_zone_pool_dup(pools, name ,".rgw.log:lc", lc_pool);
1614 log_pool = fix_zone_pool_dup(pools, name, ".rgw.log", log_pool);
1615 intent_log_pool = fix_zone_pool_dup(pools, name, ".rgw.log:intent", intent_log_pool);
1616 usage_log_pool = fix_zone_pool_dup(pools, name, ".rgw.log:usage", usage_log_pool);
1617 user_keys_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.keys", user_keys_pool);
1618 user_email_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.email", user_email_pool);
1619 user_swift_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.swift", user_swift_pool);
1620 user_uid_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:users.uid", user_uid_pool);
1621 roles_pool = fix_zone_pool_dup(pools, name, ".rgw.meta:roles", roles_pool);
1622 reshard_pool = fix_zone_pool_dup(pools, name, ".rgw.log:reshard", reshard_pool);
1623 otp_pool = fix_zone_pool_dup(pools, name, ".rgw.otp", otp_pool);
1624
1625 for(auto& iter : placement_pools) {
1626 iter.second.index_pool = fix_zone_pool_dup(pools, name, "." + default_bucket_index_pool_suffix,
1627 iter.second.index_pool);
1628 for (auto& pi : iter.second.storage_classes.get_all()) {
1629 if (pi.second.data_pool) {
1630 rgw_pool& pool = pi.second.data_pool.get();
1631 pool = fix_zone_pool_dup(pools, name, "." + default_storage_pool_suffix,
1632 pool);
1633 }
1634 }
1635 iter.second.data_extra_pool= fix_zone_pool_dup(pools, name, "." + default_storage_extra_pool_suffix,
1636 iter.second.data_extra_pool);
1637 }
1638
1639 return 0;
1640 }
1641
1642 int RGWZoneParams::create(bool exclusive)
1643 {
1644 /* check for old pools config */
1645 rgw_raw_obj obj(domain_root, avail_pools);
1646 auto obj_ctx = sysobj_svc->init_obj_ctx();
1647 auto sysobj = sysobj_svc->get_obj(obj_ctx, obj);
1648 int r = sysobj.rop().stat(null_yield);
1649 if (r < 0) {
1650 ldout(cct, 10) << "couldn't find old data placement pools config, setting up new ones for the zone" << dendl;
1651 /* a new system, let's set new placement info */
1652 RGWZonePlacementInfo default_placement;
1653 default_placement.index_pool = name + "." + default_bucket_index_pool_suffix;
1654 rgw_pool pool = name + "." + default_storage_pool_suffix;
1655 default_placement.storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr);
1656 default_placement.data_extra_pool = name + "." + default_storage_extra_pool_suffix;
1657 placement_pools["default-placement"] = default_placement;
1658 }
1659
1660 r = fix_pool_names();
1661 if (r < 0) {
1662 ldout(cct, 0) << "ERROR: fix_pool_names returned r=" << r << dendl;
1663 return r;
1664 }
1665
1666 r = RGWSystemMetaObj::create(exclusive);
1667 if (r < 0) {
1668 return r;
1669 }
1670
1671 // try to set as default. may race with another create, so pass exclusive=true
1672 // so we don't override an existing default
1673 r = set_as_default(true);
1674 if (r < 0 && r != -EEXIST) {
1675 ldout(cct, 10) << "WARNING: failed to set zone as default, r=" << r << dendl;
1676 }
1677
1678 return 0;
1679 }
1680
1681 rgw_pool RGWZoneParams::get_pool(CephContext *cct) const
1682 {
1683 if (cct->_conf->rgw_zone_root_pool.empty()) {
1684 return rgw_pool(RGW_DEFAULT_ZONE_ROOT_POOL);
1685 }
1686
1687 return rgw_pool(cct->_conf->rgw_zone_root_pool);
1688 }
1689
1690 const string RGWZoneParams::get_default_oid(bool old_format) const
1691 {
1692 if (old_format) {
1693 return cct->_conf->rgw_default_zone_info_oid;
1694 }
1695
1696 return cct->_conf->rgw_default_zone_info_oid + "." + realm_id;
1697 }
1698
1699 const string& RGWZoneParams::get_names_oid_prefix() const
1700 {
1701 return zone_names_oid_prefix;
1702 }
1703
1704 const string& RGWZoneParams::get_info_oid_prefix(bool old_format) const
1705 {
1706 return zone_info_oid_prefix;
1707 }
1708
1709 const string& RGWZoneParams::get_predefined_name(CephContext *cct) const {
1710 return cct->_conf->rgw_zone;
1711 }
1712
1713 int RGWZoneParams::init(CephContext *cct, RGWSI_SysObj *sysobj_svc, bool setup_obj, bool old_format)
1714 {
1715 if (name.empty()) {
1716 name = cct->_conf->rgw_zone;
1717 }
1718
1719 return RGWSystemMetaObj::init(cct, sysobj_svc, setup_obj, old_format);
1720 }
1721
1722 int RGWZoneParams::read_default_id(string& default_id, bool old_format)
1723 {
1724 if (realm_id.empty()) {
1725 /* try using default realm */
1726 RGWRealm realm;
1727 int ret = realm.init(cct, sysobj_svc);
1728 //no default realm exist
1729 if (ret < 0) {
1730 return read_id(default_zone_name, default_id);
1731 }
1732 realm_id = realm.get_id();
1733 }
1734
1735 return RGWSystemMetaObj::read_default_id(default_id, old_format);
1736 }
1737
1738
1739 int RGWZoneParams::set_as_default(bool exclusive)
1740 {
1741 if (realm_id.empty()) {
1742 /* try using default realm */
1743 RGWRealm realm;
1744 int ret = realm.init(cct, sysobj_svc);
1745 if (ret < 0) {
1746 ldout(cct, 10) << "could not read realm id: " << cpp_strerror(-ret) << dendl;
1747 return -EINVAL;
1748 }
1749 realm_id = realm.get_id();
1750 }
1751
1752 return RGWSystemMetaObj::set_as_default(exclusive);
1753 }
1754
1755 const string& RGWZoneParams::get_compression_type(const rgw_placement_rule& placement_rule) const
1756 {
1757 static const std::string NONE{"none"};
1758 auto p = placement_pools.find(placement_rule.name);
1759 if (p == placement_pools.end()) {
1760 return NONE;
1761 }
1762 const auto& type = p->second.get_compression_type(placement_rule.get_storage_class());
1763 return !type.empty() ? type : NONE;
1764 }
1765
1766 void RGWPeriodMap::encode(bufferlist& bl) const {
1767 ENCODE_START(2, 1, bl);
1768 encode(id, bl);
1769 encode(zonegroups, bl);
1770 encode(master_zonegroup, bl);
1771 encode(short_zone_ids, bl);
1772 ENCODE_FINISH(bl);
1773 }
1774
1775 void RGWPeriodMap::decode(bufferlist::const_iterator& bl) {
1776 DECODE_START(2, bl);
1777 decode(id, bl);
1778 decode(zonegroups, bl);
1779 decode(master_zonegroup, bl);
1780 if (struct_v >= 2) {
1781 decode(short_zone_ids, bl);
1782 }
1783 DECODE_FINISH(bl);
1784
1785 zonegroups_by_api.clear();
1786 for (map<string, RGWZoneGroup>::iterator iter = zonegroups.begin();
1787 iter != zonegroups.end(); ++iter) {
1788 RGWZoneGroup& zonegroup = iter->second;
1789 zonegroups_by_api[zonegroup.api_name] = zonegroup;
1790 if (zonegroup.is_master_zonegroup()) {
1791 master_zonegroup = zonegroup.get_id();
1792 }
1793 }
1794 }
1795
1796 // run an MD5 hash on the zone_id and return the first 32 bits
1797 static uint32_t gen_short_zone_id(const std::string zone_id)
1798 {
1799 unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE];
1800 MD5 hash;
1801 hash.Update((const unsigned char *)zone_id.c_str(), zone_id.size());
1802 hash.Final(md5);
1803
1804 uint32_t short_id;
1805 memcpy((char *)&short_id, md5, sizeof(short_id));
1806 return std::max(short_id, 1u);
1807 }
1808
1809 int RGWPeriodMap::update(const RGWZoneGroup& zonegroup, CephContext *cct)
1810 {
1811 if (zonegroup.is_master_zonegroup() && (!master_zonegroup.empty() && zonegroup.get_id() != master_zonegroup)) {
1812 ldout(cct,0) << "Error updating periodmap, multiple master zonegroups configured "<< dendl;
1813 ldout(cct,0) << "master zonegroup: " << master_zonegroup << " and " << zonegroup.get_id() <<dendl;
1814 return -EINVAL;
1815 }
1816 map<string, RGWZoneGroup>::iterator iter = zonegroups.find(zonegroup.get_id());
1817 if (iter != zonegroups.end()) {
1818 RGWZoneGroup& old_zonegroup = iter->second;
1819 if (!old_zonegroup.api_name.empty()) {
1820 zonegroups_by_api.erase(old_zonegroup.api_name);
1821 }
1822 }
1823 zonegroups[zonegroup.get_id()] = zonegroup;
1824
1825 if (!zonegroup.api_name.empty()) {
1826 zonegroups_by_api[zonegroup.api_name] = zonegroup;
1827 }
1828
1829 if (zonegroup.is_master_zonegroup()) {
1830 master_zonegroup = zonegroup.get_id();
1831 } else if (master_zonegroup == zonegroup.get_id()) {
1832 master_zonegroup = "";
1833 }
1834
1835 for (auto& i : zonegroup.zones) {
1836 auto& zone = i.second;
1837 if (short_zone_ids.find(zone.id) != short_zone_ids.end()) {
1838 continue;
1839 }
1840 // calculate the zone's short id
1841 uint32_t short_id = gen_short_zone_id(zone.id);
1842
1843 // search for an existing zone with the same short id
1844 for (auto& s : short_zone_ids) {
1845 if (s.second == short_id) {
1846 ldout(cct, 0) << "New zone '" << zone.name << "' (" << zone.id
1847 << ") generates the same short_zone_id " << short_id
1848 << " as existing zone id " << s.first << dendl;
1849 return -EEXIST;
1850 }
1851 }
1852
1853 short_zone_ids[zone.id] = short_id;
1854 }
1855
1856 return 0;
1857 }
1858
1859 uint32_t RGWPeriodMap::get_zone_short_id(const string& zone_id) const
1860 {
1861 auto i = short_zone_ids.find(zone_id);
1862 if (i == short_zone_ids.end()) {
1863 return 0;
1864 }
1865 return i->second;
1866 }
1867
1868 int RGWZoneGroupMap::read(CephContext *cct, RGWSI_SysObj *sysobj_svc)
1869 {
1870
1871 RGWPeriod period;
1872 int ret = period.init(cct, sysobj_svc);
1873 if (ret < 0) {
1874 cerr << "failed to read current period info: " << cpp_strerror(ret);
1875 return ret;
1876 }
1877
1878 bucket_quota = period.get_config().bucket_quota;
1879 user_quota = period.get_config().user_quota;
1880 zonegroups = period.get_map().zonegroups;
1881 zonegroups_by_api = period.get_map().zonegroups_by_api;
1882 master_zonegroup = period.get_map().master_zonegroup;
1883
1884 return 0;
1885 }
1886
1887 void RGWRegionMap::encode(bufferlist& bl) const {
1888 ENCODE_START( 3, 1, bl);
1889 encode(regions, bl);
1890 encode(master_region, bl);
1891 encode(bucket_quota, bl);
1892 encode(user_quota, bl);
1893 ENCODE_FINISH(bl);
1894 }
1895
1896 void RGWRegionMap::decode(bufferlist::const_iterator& bl) {
1897 DECODE_START(3, bl);
1898 decode(regions, bl);
1899 decode(master_region, bl);
1900 if (struct_v >= 2)
1901 decode(bucket_quota, bl);
1902 if (struct_v >= 3)
1903 decode(user_quota, bl);
1904 DECODE_FINISH(bl);
1905 }
1906
1907 void RGWZoneGroupMap::encode(bufferlist& bl) const {
1908 ENCODE_START( 3, 1, bl);
1909 encode(zonegroups, bl);
1910 encode(master_zonegroup, bl);
1911 encode(bucket_quota, bl);
1912 encode(user_quota, bl);
1913 ENCODE_FINISH(bl);
1914 }
1915
1916 void RGWZoneGroupMap::decode(bufferlist::const_iterator& bl) {
1917 DECODE_START(3, bl);
1918 decode(zonegroups, bl);
1919 decode(master_zonegroup, bl);
1920 if (struct_v >= 2)
1921 decode(bucket_quota, bl);
1922 if (struct_v >= 3)
1923 decode(user_quota, bl);
1924 DECODE_FINISH(bl);
1925
1926 zonegroups_by_api.clear();
1927 for (map<string, RGWZoneGroup>::iterator iter = zonegroups.begin();
1928 iter != zonegroups.end(); ++iter) {
1929 RGWZoneGroup& zonegroup = iter->second;
1930 zonegroups_by_api[zonegroup.api_name] = zonegroup;
1931 if (zonegroup.is_master_zonegroup()) {
1932 master_zonegroup = zonegroup.get_name();
1933 }
1934 }
1935 }
1936
1937