]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/rebuild_mondb.cc
import ceph 14.2.5
[ceph.git] / ceph / src / tools / rebuild_mondb.cc
CommitLineData
7c673cae
FG
1#include "auth/cephx/CephxKeyServer.h"
2#include "common/errno.h"
3#include "mon/AuthMonitor.h"
4#include "mon/MonitorDBStore.h"
5#include "os/ObjectStore.h"
6#include "osd/OSD.h"
7
8static int update_auth(const string& keyring_path,
9 const OSDSuperblock& sb,
10 MonitorDBStore& ms);
11static int update_monitor(const OSDSuperblock& sb, MonitorDBStore& ms);
12static int update_osdmap(ObjectStore& fs,
13 OSDSuperblock& sb,
14 MonitorDBStore& ms);
7c673cae
FG
15
16int update_mon_db(ObjectStore& fs, OSDSuperblock& sb,
17 const string& keyring,
18 const string& store_path)
19{
20 MonitorDBStore ms(store_path);
21 int r = ms.create_and_open(cerr);
22 if (r < 0) {
23 cerr << "unable to open mon store: " << store_path << std::endl;
24 return r;
25 }
26 if ((r = update_auth(keyring, sb, ms)) < 0) {
27 goto out;
28 }
29 if ((r = update_osdmap(fs, sb, ms)) < 0) {
30 goto out;
31 }
7c673cae
FG
32 if ((r = update_monitor(sb, ms)) < 0) {
33 goto out;
34 }
35 out:
36 ms.close();
37 return r;
38}
39
40static void add_auth(KeyServerData::Incremental& auth_inc,
41 MonitorDBStore& ms)
42{
43 AuthMonitor::Incremental inc;
44 inc.inc_type = AuthMonitor::AUTH_DATA;
11fdf7f2 45 encode(auth_inc, inc.auth_data);
7c673cae
FG
46 inc.auth_type = CEPH_AUTH_CEPHX;
47
48 bufferlist bl;
49 __u8 v = 1;
11fdf7f2 50 encode(v, bl);
7c673cae
FG
51 inc.encode(bl, CEPH_FEATURES_ALL);
52
53 const string prefix("auth");
54 auto last_committed = ms.get(prefix, "last_committed") + 1;
55 auto t = make_shared<MonitorDBStore::Transaction>();
56 t->put(prefix, last_committed, bl);
57 t->put(prefix, "last_committed", last_committed);
58 auto first_committed = ms.get(prefix, "first_committed");
59 if (!first_committed) {
60 t->put(prefix, "first_committed", last_committed);
61 }
62 ms.apply_transaction(t);
63}
64
65static int get_auth_inc(const string& keyring_path,
66 const OSDSuperblock& sb,
67 KeyServerData::Incremental* auth_inc)
68{
69 auth_inc->op = KeyServerData::AUTH_INC_ADD;
70
71 // get the name
72 EntityName entity;
73 // assuming the entity name of OSD is "osd.<osd_id>"
74 entity.set(CEPH_ENTITY_TYPE_OSD, std::to_string(sb.whoami));
75 auth_inc->name = entity;
76
77 // read keyring from disk
78 KeyRing keyring;
79 {
80 bufferlist bl;
81 string error;
82 int r = bl.read_file(keyring_path.c_str(), &error);
83 if (r < 0) {
84 if (r == -ENOENT) {
85 cout << "ignoring keyring (" << keyring_path << ")"
86 << ": " << error << std::endl;
87 return 0;
88 } else {
89 cerr << "unable to read keyring (" << keyring_path << ")"
90 << ": " << error << std::endl;
91 return r;
92 }
93 } else if (bl.length() == 0) {
94 cout << "ignoring empty keyring: " << keyring_path << std::endl;
95 return 0;
96 }
11fdf7f2 97 auto bp = bl.cbegin();
7c673cae 98 try {
11fdf7f2 99 decode(keyring, bp);
7c673cae
FG
100 } catch (const buffer::error& e) {
101 cerr << "error decoding keyring: " << keyring_path << std::endl;
102 return -EINVAL;
103 }
104 }
105
106 // get the key
107 EntityAuth new_inc;
108 if (!keyring.get_auth(auth_inc->name, new_inc)) {
109 cerr << "key for " << auth_inc->name << " not found in keyring: "
110 << keyring_path << std::endl;
111 return -EINVAL;
112 }
113 auth_inc->auth.key = new_inc.key;
114
115 // get the caps
116 map<string,bufferlist> caps;
117 if (new_inc.caps.empty()) {
118 // fallback to default caps for an OSD
119 // osd 'allow *' mon 'allow rwx'
120 // as suggested by document.
11fdf7f2
TL
121 encode(string("allow *"), caps["osd"]);
122 encode(string("allow rwx"), caps["mon"]);
7c673cae
FG
123 } else {
124 caps = new_inc.caps;
125 }
126 auth_inc->auth.caps = caps;
127 return 0;
128}
129
130// rebuild
131// - auth/${epoch}
132// - auth/first_committed
133// - auth/last_committed
134static int update_auth(const string& keyring_path,
135 const OSDSuperblock& sb,
136 MonitorDBStore& ms)
137{
138 // stolen from AuthMonitor::prepare_command(), where prefix is "auth add"
139 KeyServerData::Incremental auth_inc;
140 int r;
141 if ((r = get_auth_inc(keyring_path, sb, &auth_inc))) {
142 return r;
143 }
144 add_auth(auth_inc, ms);
145 return 0;
146}
147
148// stolen from Monitor::check_fsid()
149static int check_fsid(const uuid_d& fsid, MonitorDBStore& ms)
150{
151 bufferlist bl;
152 int r = ms.get("monitor", "cluster_uuid", bl);
153 if (r == -ENOENT)
154 return r;
155 string uuid(bl.c_str(), bl.length());
156 auto end = uuid.find_first_of('\n');
157 if (end != uuid.npos) {
158 uuid.resize(end);
159 }
160 uuid_d existing;
161 if (!existing.parse(uuid.c_str())) {
162 cerr << "error: unable to parse uuid" << std::endl;
163 return -EINVAL;
164 }
165 if (fsid != existing) {
166 cerr << "error: cluster_uuid " << existing << " != " << fsid << std::endl;
167 return -EEXIST;
168 }
169 return 0;
170}
171
172// rebuild
173// - monitor/cluster_uuid
174int update_monitor(const OSDSuperblock& sb, MonitorDBStore& ms)
175{
176 switch (check_fsid(sb.cluster_fsid, ms)) {
177 case -ENOENT:
178 break;
179 case -EINVAL:
180 return -EINVAL;
181 case -EEXIST:
182 return -EEXIST;
183 case 0:
184 return 0;
185 default:
186 ceph_abort();
187 }
188 string uuid = stringify(sb.cluster_fsid) + "\n";
189 bufferlist bl;
190 bl.append(uuid);
191 auto t = make_shared<MonitorDBStore::Transaction>();
192 t->put("monitor", "cluster_uuid", bl);
193 ms.apply_transaction(t);
194 return 0;
195}
196
197// rebuild
198// - osdmap/${epoch}
199// - osdmap/full_${epoch}
200// - osdmap/full_latest
201// - osdmap/first_committed
202// - osdmap/last_committed
203int update_osdmap(ObjectStore& fs, OSDSuperblock& sb, MonitorDBStore& ms)
204{
205 const string prefix("osdmap");
206 const string first_committed_name("first_committed");
207 const string last_committed_name("last_committed");
208 epoch_t first_committed = ms.get(prefix, first_committed_name);
209 epoch_t last_committed = ms.get(prefix, last_committed_name);
210 auto t = make_shared<MonitorDBStore::Transaction>();
211
212 // trim stale maps
213 unsigned ntrimmed = 0;
214 // osdmap starts at 1. if we have a "0" first_committed, then there is nothing
215 // to trim. and "1 osdmaps trimmed" in the output message is misleading. so
216 // let's make it an exception.
217 for (auto e = first_committed; first_committed && e < sb.oldest_map; e++) {
218 t->erase(prefix, e);
219 t->erase(prefix, ms.combine_strings("full", e));
220 ntrimmed++;
221 }
222 // make sure we have a non-zero first_committed. OSDMonitor relies on this.
223 // because PaxosService::put_last_committed() set it to last_committed, if it
224 // is zero. which breaks OSDMonitor::update_from_paxos(), in which we believe
225 // that latest_full should always be greater than last_committed.
226 if (first_committed == 0 && sb.oldest_map < sb.newest_map) {
227 first_committed = 1;
228 } else if (ntrimmed) {
229 first_committed += ntrimmed;
230 }
231 if (first_committed) {
232 t->put(prefix, first_committed_name, first_committed);
233 ms.apply_transaction(t);
234 t = make_shared<MonitorDBStore::Transaction>();
235 }
236
237 unsigned nadded = 0;
238
11fdf7f2 239 auto ch = fs.open_collection(coll_t::meta());
7c673cae 240 OSDMap osdmap;
11fdf7f2 241 for (auto e = std::max(last_committed+1, sb.oldest_map);
7c673cae
FG
242 e <= sb.newest_map; e++) {
243 bool have_crc = false;
244 uint32_t crc = -1;
245 uint64_t features = 0;
246 // add inc maps
eafe8130 247 auto add_inc_result = [&] {
7c673cae
FG
248 const auto oid = OSD::get_inc_osdmap_pobject_name(e);
249 bufferlist bl;
11fdf7f2 250 int nread = fs.read(ch, oid, 0, 0, bl);
7c673cae 251 if (nread <= 0) {
eafe8130
TL
252 cout << "missing " << oid << std::endl;
253 return -ENOENT;
7c673cae
FG
254 }
255 t->put(prefix, e, bl);
256
257 OSDMap::Incremental inc;
11fdf7f2 258 auto p = bl.cbegin();
7c673cae
FG
259 inc.decode(p);
260 features = inc.encode_features | CEPH_FEATURE_RESERVED;
261 if (osdmap.get_epoch() && e > 1) {
262 if (osdmap.apply_incremental(inc)) {
263 cerr << "bad fsid: "
264 << osdmap.get_fsid() << " != " << inc.fsid << std::endl;
265 return -EINVAL;
266 }
267 have_crc = inc.have_crc;
268 if (inc.have_crc) {
269 crc = inc.full_crc;
270 bufferlist fbl;
271 osdmap.encode(fbl, features);
272 if (osdmap.get_crc() != inc.full_crc) {
273 cerr << "mismatched inc crc: "
274 << osdmap.get_crc() << " != " << inc.full_crc << std::endl;
275 return -EINVAL;
276 }
277 // inc.decode() verifies `inc_crc`, so it's been taken care of.
278 }
279 }
eafe8130
TL
280 return 0;
281 }();
282 switch (add_inc_result) {
283 case -ENOENT:
284 // no worries, we always have full map
285 break;
286 case -EINVAL:
287 return -EINVAL;
288 case 0:
289 break;
290 default:
291 assert(0);
7c673cae
FG
292 }
293 // add full maps
294 {
295 const auto oid = OSD::get_osdmap_pobject_name(e);
296 bufferlist bl;
11fdf7f2 297 int nread = fs.read(ch, oid, 0, 0, bl);
7c673cae
FG
298 if (nread <= 0) {
299 cerr << "missing " << oid << std::endl;
300 return -EINVAL;
301 }
302 t->put(prefix, ms.combine_strings("full", e), bl);
303
11fdf7f2 304 auto p = bl.cbegin();
7c673cae
FG
305 osdmap.decode(p);
306 if (osdmap.have_crc()) {
307 if (have_crc && osdmap.get_crc() != crc) {
308 cerr << "mismatched full/inc crc: "
309 << osdmap.get_crc() << " != " << crc << std::endl;
310 return -EINVAL;
311 }
312 uint32_t saved_crc = osdmap.get_crc();
313 bufferlist fbl;
314 osdmap.encode(fbl, features);
315 if (osdmap.get_crc() != saved_crc) {
316 cerr << "mismatched full crc: "
317 << saved_crc << " != " << osdmap.get_crc() << std::endl;
318 return -EINVAL;
319 }
320 }
321 }
322 nadded++;
323
324 // last_committed
325 t->put(prefix, last_committed_name, e);
326 // full last
327 t->put(prefix, ms.combine_strings("full", "latest"), e);
328
329 // this number comes from the default value of osd_target_transaction_size,
330 // so we won't OOM or stuff too many maps in a single transaction if OSD is
331 // keeping a large series of osdmap
332 static constexpr unsigned TRANSACTION_SIZE = 30;
333 if (t->size() >= TRANSACTION_SIZE) {
334 ms.apply_transaction(t);
335 t = make_shared<MonitorDBStore::Transaction>();
336 }
337 }
338 if (!t->empty()) {
339 ms.apply_transaction(t);
340 }
341 t.reset();
342
343 string osd_name("osd.");
344 osd_name += std::to_string(sb.whoami);
345 cout << std::left << setw(8)
346 << osd_name << ": "
347 << ntrimmed << " osdmaps trimmed, "
348 << nadded << " osdmaps added." << std::endl;
349 return 0;
350}
11fdf7f2 351