]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/rebuild_mondb.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / tools / rebuild_mondb.cc
1 #include "auth/cephx/CephxKeyServer.h"
2 #include "common/errno.h"
3 #include "mon/AuthMonitor.h"
4 #include "mon/MonitorDBStore.h"
5 #include "os/ObjectStore.h"
6 #include "osd/OSD.h"
7
8 static int update_auth(const string& keyring_path,
9 const OSDSuperblock& sb,
10 MonitorDBStore& ms);
11 static int update_monitor(const OSDSuperblock& sb, MonitorDBStore& ms);
12 static int update_osdmap(ObjectStore& fs,
13 OSDSuperblock& sb,
14 MonitorDBStore& ms);
15
16 int update_mon_db(ObjectStore& fs, OSDSuperblock& sb,
17 const string& keyring,
18 const string& store_path)
19 {
20 MonitorDBStore ms(store_path);
21 int r = ms.create_and_open(cerr);
22 if (r < 0) {
23 cerr << "unable to open mon store: " << store_path << std::endl;
24 return r;
25 }
26 if ((r = update_auth(keyring, sb, ms)) < 0) {
27 goto out;
28 }
29 if ((r = update_osdmap(fs, sb, ms)) < 0) {
30 goto out;
31 }
32 if ((r = update_monitor(sb, ms)) < 0) {
33 goto out;
34 }
35 out:
36 ms.close();
37 return r;
38 }
39
40 static void add_auth(KeyServerData::Incremental& auth_inc,
41 MonitorDBStore& ms)
42 {
43 AuthMonitor::Incremental inc;
44 inc.inc_type = AuthMonitor::AUTH_DATA;
45 encode(auth_inc, inc.auth_data);
46 inc.auth_type = CEPH_AUTH_CEPHX;
47
48 bufferlist bl;
49 __u8 v = 1;
50 encode(v, bl);
51 inc.encode(bl, CEPH_FEATURES_ALL);
52
53 const string prefix("auth");
54 auto last_committed = ms.get(prefix, "last_committed") + 1;
55 auto t = make_shared<MonitorDBStore::Transaction>();
56 t->put(prefix, last_committed, bl);
57 t->put(prefix, "last_committed", last_committed);
58 auto first_committed = ms.get(prefix, "first_committed");
59 if (!first_committed) {
60 t->put(prefix, "first_committed", last_committed);
61 }
62 ms.apply_transaction(t);
63 }
64
65 static int get_auth_inc(const string& keyring_path,
66 const OSDSuperblock& sb,
67 KeyServerData::Incremental* auth_inc)
68 {
69 auth_inc->op = KeyServerData::AUTH_INC_ADD;
70
71 // get the name
72 EntityName entity;
73 // assuming the entity name of OSD is "osd.<osd_id>"
74 entity.set(CEPH_ENTITY_TYPE_OSD, std::to_string(sb.whoami));
75 auth_inc->name = entity;
76
77 // read keyring from disk
78 KeyRing keyring;
79 {
80 bufferlist bl;
81 string error;
82 int r = bl.read_file(keyring_path.c_str(), &error);
83 if (r < 0) {
84 if (r == -ENOENT) {
85 cout << "ignoring keyring (" << keyring_path << ")"
86 << ": " << error << std::endl;
87 return 0;
88 } else {
89 cerr << "unable to read keyring (" << keyring_path << ")"
90 << ": " << error << std::endl;
91 return r;
92 }
93 } else if (bl.length() == 0) {
94 cout << "ignoring empty keyring: " << keyring_path << std::endl;
95 return 0;
96 }
97 auto bp = bl.cbegin();
98 try {
99 decode(keyring, bp);
100 } catch (const buffer::error& e) {
101 cerr << "error decoding keyring: " << keyring_path << std::endl;
102 return -EINVAL;
103 }
104 }
105
106 // get the key
107 EntityAuth new_inc;
108 if (!keyring.get_auth(auth_inc->name, new_inc)) {
109 cerr << "key for " << auth_inc->name << " not found in keyring: "
110 << keyring_path << std::endl;
111 return -EINVAL;
112 }
113 auth_inc->auth.key = new_inc.key;
114
115 // get the caps
116 map<string,bufferlist> caps;
117 if (new_inc.caps.empty()) {
118 // fallback to default caps for an OSD
119 // osd 'allow *' mon 'allow rwx'
120 // as suggested by document.
121 encode(string("allow *"), caps["osd"]);
122 encode(string("allow rwx"), caps["mon"]);
123 } else {
124 caps = new_inc.caps;
125 }
126 auth_inc->auth.caps = caps;
127 return 0;
128 }
129
130 // rebuild
131 // - auth/${epoch}
132 // - auth/first_committed
133 // - auth/last_committed
134 static int update_auth(const string& keyring_path,
135 const OSDSuperblock& sb,
136 MonitorDBStore& ms)
137 {
138 // stolen from AuthMonitor::prepare_command(), where prefix is "auth add"
139 KeyServerData::Incremental auth_inc;
140 int r;
141 if ((r = get_auth_inc(keyring_path, sb, &auth_inc))) {
142 return r;
143 }
144 add_auth(auth_inc, ms);
145 return 0;
146 }
147
148 // stolen from Monitor::check_fsid()
149 static int check_fsid(const uuid_d& fsid, MonitorDBStore& ms)
150 {
151 bufferlist bl;
152 int r = ms.get("monitor", "cluster_uuid", bl);
153 if (r == -ENOENT)
154 return r;
155 string uuid(bl.c_str(), bl.length());
156 auto end = uuid.find_first_of('\n');
157 if (end != uuid.npos) {
158 uuid.resize(end);
159 }
160 uuid_d existing;
161 if (!existing.parse(uuid.c_str())) {
162 cerr << "error: unable to parse uuid" << std::endl;
163 return -EINVAL;
164 }
165 if (fsid != existing) {
166 cerr << "error: cluster_uuid " << existing << " != " << fsid << std::endl;
167 return -EEXIST;
168 }
169 return 0;
170 }
171
172 // rebuild
173 // - monitor/cluster_uuid
174 int update_monitor(const OSDSuperblock& sb, MonitorDBStore& ms)
175 {
176 switch (check_fsid(sb.cluster_fsid, ms)) {
177 case -ENOENT:
178 break;
179 case -EINVAL:
180 return -EINVAL;
181 case -EEXIST:
182 return -EEXIST;
183 case 0:
184 return 0;
185 default:
186 ceph_abort();
187 }
188 string uuid = stringify(sb.cluster_fsid) + "\n";
189 bufferlist bl;
190 bl.append(uuid);
191 auto t = make_shared<MonitorDBStore::Transaction>();
192 t->put("monitor", "cluster_uuid", bl);
193 ms.apply_transaction(t);
194 return 0;
195 }
196
197 // rebuild
198 // - osdmap/${epoch}
199 // - osdmap/full_${epoch}
200 // - osdmap/full_latest
201 // - osdmap/first_committed
202 // - osdmap/last_committed
203 int update_osdmap(ObjectStore& fs, OSDSuperblock& sb, MonitorDBStore& ms)
204 {
205 const string prefix("osdmap");
206 const string first_committed_name("first_committed");
207 const string last_committed_name("last_committed");
208 epoch_t first_committed = ms.get(prefix, first_committed_name);
209 epoch_t last_committed = ms.get(prefix, last_committed_name);
210 auto t = make_shared<MonitorDBStore::Transaction>();
211
212 // trim stale maps
213 unsigned ntrimmed = 0;
214 // osdmap starts at 1. if we have a "0" first_committed, then there is nothing
215 // to trim. and "1 osdmaps trimmed" in the output message is misleading. so
216 // let's make it an exception.
217 for (auto e = first_committed; first_committed && e < sb.oldest_map; e++) {
218 t->erase(prefix, e);
219 t->erase(prefix, ms.combine_strings("full", e));
220 ntrimmed++;
221 }
222 // make sure we have a non-zero first_committed. OSDMonitor relies on this.
223 // because PaxosService::put_last_committed() set it to last_committed, if it
224 // is zero. which breaks OSDMonitor::update_from_paxos(), in which we believe
225 // that latest_full should always be greater than last_committed.
226 if (first_committed == 0 && sb.oldest_map < sb.newest_map) {
227 first_committed = 1;
228 } else if (ntrimmed) {
229 first_committed += ntrimmed;
230 }
231 if (first_committed) {
232 t->put(prefix, first_committed_name, first_committed);
233 ms.apply_transaction(t);
234 t = make_shared<MonitorDBStore::Transaction>();
235 }
236
237 unsigned nadded = 0;
238
239 auto ch = fs.open_collection(coll_t::meta());
240 OSDMap osdmap;
241 for (auto e = std::max(last_committed+1, sb.oldest_map);
242 e <= sb.newest_map; e++) {
243 bool have_crc = false;
244 uint32_t crc = -1;
245 uint64_t features = 0;
246 // add inc maps
247 {
248 const auto oid = OSD::get_inc_osdmap_pobject_name(e);
249 bufferlist bl;
250 int nread = fs.read(ch, oid, 0, 0, bl);
251 if (nread <= 0) {
252 cerr << "missing " << oid << std::endl;
253 return -EINVAL;
254 }
255 t->put(prefix, e, bl);
256
257 OSDMap::Incremental inc;
258 auto p = bl.cbegin();
259 inc.decode(p);
260 features = inc.encode_features | CEPH_FEATURE_RESERVED;
261 if (osdmap.get_epoch() && e > 1) {
262 if (osdmap.apply_incremental(inc)) {
263 cerr << "bad fsid: "
264 << osdmap.get_fsid() << " != " << inc.fsid << std::endl;
265 return -EINVAL;
266 }
267 have_crc = inc.have_crc;
268 if (inc.have_crc) {
269 crc = inc.full_crc;
270 bufferlist fbl;
271 osdmap.encode(fbl, features);
272 if (osdmap.get_crc() != inc.full_crc) {
273 cerr << "mismatched inc crc: "
274 << osdmap.get_crc() << " != " << inc.full_crc << std::endl;
275 return -EINVAL;
276 }
277 // inc.decode() verifies `inc_crc`, so it's been taken care of.
278 }
279 }
280 }
281 // add full maps
282 {
283 const auto oid = OSD::get_osdmap_pobject_name(e);
284 bufferlist bl;
285 int nread = fs.read(ch, oid, 0, 0, bl);
286 if (nread <= 0) {
287 cerr << "missing " << oid << std::endl;
288 return -EINVAL;
289 }
290 t->put(prefix, ms.combine_strings("full", e), bl);
291
292 auto p = bl.cbegin();
293 osdmap.decode(p);
294 if (osdmap.have_crc()) {
295 if (have_crc && osdmap.get_crc() != crc) {
296 cerr << "mismatched full/inc crc: "
297 << osdmap.get_crc() << " != " << crc << std::endl;
298 return -EINVAL;
299 }
300 uint32_t saved_crc = osdmap.get_crc();
301 bufferlist fbl;
302 osdmap.encode(fbl, features);
303 if (osdmap.get_crc() != saved_crc) {
304 cerr << "mismatched full crc: "
305 << saved_crc << " != " << osdmap.get_crc() << std::endl;
306 return -EINVAL;
307 }
308 }
309 }
310 nadded++;
311
312 // last_committed
313 t->put(prefix, last_committed_name, e);
314 // full last
315 t->put(prefix, ms.combine_strings("full", "latest"), e);
316
317 // this number comes from the default value of osd_target_transaction_size,
318 // so we won't OOM or stuff too many maps in a single transaction if OSD is
319 // keeping a large series of osdmap
320 static constexpr unsigned TRANSACTION_SIZE = 30;
321 if (t->size() >= TRANSACTION_SIZE) {
322 ms.apply_transaction(t);
323 t = make_shared<MonitorDBStore::Transaction>();
324 }
325 }
326 if (!t->empty()) {
327 ms.apply_transaction(t);
328 }
329 t.reset();
330
331 string osd_name("osd.");
332 osd_name += std::to_string(sb.whoami);
333 cout << std::left << setw(8)
334 << osd_name << ": "
335 << ntrimmed << " osdmaps trimmed, "
336 << nadded << " osdmaps added." << std::endl;
337 return 0;
338 }
339