]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/rebuild_mondb.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / tools / rebuild_mondb.cc
CommitLineData
7c673cae
FG
1#include "auth/cephx/CephxKeyServer.h"
2#include "common/errno.h"
3#include "mon/AuthMonitor.h"
4#include "mon/MonitorDBStore.h"
5#include "os/ObjectStore.h"
6#include "osd/OSD.h"
7
20effc67
TL
8using namespace std;
9
7c673cae
FG
10static int update_auth(const string& keyring_path,
11 const OSDSuperblock& sb,
12 MonitorDBStore& ms);
13static int update_monitor(const OSDSuperblock& sb, MonitorDBStore& ms);
14static int update_osdmap(ObjectStore& fs,
15 OSDSuperblock& sb,
16 MonitorDBStore& ms);
7c673cae
FG
17
18int update_mon_db(ObjectStore& fs, OSDSuperblock& sb,
19 const string& keyring,
20 const string& store_path)
21{
22 MonitorDBStore ms(store_path);
23 int r = ms.create_and_open(cerr);
24 if (r < 0) {
25 cerr << "unable to open mon store: " << store_path << std::endl;
26 return r;
27 }
28 if ((r = update_auth(keyring, sb, ms)) < 0) {
29 goto out;
30 }
31 if ((r = update_osdmap(fs, sb, ms)) < 0) {
32 goto out;
33 }
7c673cae
FG
34 if ((r = update_monitor(sb, ms)) < 0) {
35 goto out;
36 }
37 out:
38 ms.close();
39 return r;
40}
41
42static void add_auth(KeyServerData::Incremental& auth_inc,
43 MonitorDBStore& ms)
44{
45 AuthMonitor::Incremental inc;
46 inc.inc_type = AuthMonitor::AUTH_DATA;
11fdf7f2 47 encode(auth_inc, inc.auth_data);
7c673cae
FG
48 inc.auth_type = CEPH_AUTH_CEPHX;
49
50 bufferlist bl;
51 __u8 v = 1;
11fdf7f2 52 encode(v, bl);
7c673cae
FG
53 inc.encode(bl, CEPH_FEATURES_ALL);
54
55 const string prefix("auth");
56 auto last_committed = ms.get(prefix, "last_committed") + 1;
57 auto t = make_shared<MonitorDBStore::Transaction>();
58 t->put(prefix, last_committed, bl);
59 t->put(prefix, "last_committed", last_committed);
60 auto first_committed = ms.get(prefix, "first_committed");
61 if (!first_committed) {
62 t->put(prefix, "first_committed", last_committed);
63 }
64 ms.apply_transaction(t);
65}
66
67static int get_auth_inc(const string& keyring_path,
68 const OSDSuperblock& sb,
69 KeyServerData::Incremental* auth_inc)
70{
71 auth_inc->op = KeyServerData::AUTH_INC_ADD;
72
73 // get the name
74 EntityName entity;
75 // assuming the entity name of OSD is "osd.<osd_id>"
76 entity.set(CEPH_ENTITY_TYPE_OSD, std::to_string(sb.whoami));
77 auth_inc->name = entity;
78
79 // read keyring from disk
80 KeyRing keyring;
81 {
82 bufferlist bl;
83 string error;
84 int r = bl.read_file(keyring_path.c_str(), &error);
85 if (r < 0) {
86 if (r == -ENOENT) {
87 cout << "ignoring keyring (" << keyring_path << ")"
88 << ": " << error << std::endl;
89 return 0;
90 } else {
91 cerr << "unable to read keyring (" << keyring_path << ")"
92 << ": " << error << std::endl;
93 return r;
94 }
95 } else if (bl.length() == 0) {
96 cout << "ignoring empty keyring: " << keyring_path << std::endl;
97 return 0;
98 }
11fdf7f2 99 auto bp = bl.cbegin();
7c673cae 100 try {
11fdf7f2 101 decode(keyring, bp);
7c673cae
FG
102 } catch (const buffer::error& e) {
103 cerr << "error decoding keyring: " << keyring_path << std::endl;
104 return -EINVAL;
105 }
106 }
107
108 // get the key
109 EntityAuth new_inc;
110 if (!keyring.get_auth(auth_inc->name, new_inc)) {
111 cerr << "key for " << auth_inc->name << " not found in keyring: "
112 << keyring_path << std::endl;
113 return -EINVAL;
114 }
115 auth_inc->auth.key = new_inc.key;
116
117 // get the caps
118 map<string,bufferlist> caps;
119 if (new_inc.caps.empty()) {
120 // fallback to default caps for an OSD
121 // osd 'allow *' mon 'allow rwx'
122 // as suggested by document.
11fdf7f2
TL
123 encode(string("allow *"), caps["osd"]);
124 encode(string("allow rwx"), caps["mon"]);
7c673cae
FG
125 } else {
126 caps = new_inc.caps;
127 }
128 auth_inc->auth.caps = caps;
129 return 0;
130}
131
132// rebuild
133// - auth/${epoch}
134// - auth/first_committed
135// - auth/last_committed
136static int update_auth(const string& keyring_path,
137 const OSDSuperblock& sb,
138 MonitorDBStore& ms)
139{
140 // stolen from AuthMonitor::prepare_command(), where prefix is "auth add"
141 KeyServerData::Incremental auth_inc;
142 int r;
143 if ((r = get_auth_inc(keyring_path, sb, &auth_inc))) {
144 return r;
145 }
146 add_auth(auth_inc, ms);
147 return 0;
148}
149
150// stolen from Monitor::check_fsid()
151static int check_fsid(const uuid_d& fsid, MonitorDBStore& ms)
152{
153 bufferlist bl;
154 int r = ms.get("monitor", "cluster_uuid", bl);
155 if (r == -ENOENT)
156 return r;
157 string uuid(bl.c_str(), bl.length());
158 auto end = uuid.find_first_of('\n');
159 if (end != uuid.npos) {
160 uuid.resize(end);
161 }
162 uuid_d existing;
163 if (!existing.parse(uuid.c_str())) {
164 cerr << "error: unable to parse uuid" << std::endl;
165 return -EINVAL;
166 }
167 if (fsid != existing) {
168 cerr << "error: cluster_uuid " << existing << " != " << fsid << std::endl;
169 return -EEXIST;
170 }
171 return 0;
172}
173
174// rebuild
175// - monitor/cluster_uuid
176int update_monitor(const OSDSuperblock& sb, MonitorDBStore& ms)
177{
178 switch (check_fsid(sb.cluster_fsid, ms)) {
179 case -ENOENT:
180 break;
181 case -EINVAL:
182 return -EINVAL;
183 case -EEXIST:
184 return -EEXIST;
185 case 0:
186 return 0;
187 default:
188 ceph_abort();
189 }
190 string uuid = stringify(sb.cluster_fsid) + "\n";
191 bufferlist bl;
192 bl.append(uuid);
193 auto t = make_shared<MonitorDBStore::Transaction>();
194 t->put("monitor", "cluster_uuid", bl);
195 ms.apply_transaction(t);
196 return 0;
197}
198
199// rebuild
200// - osdmap/${epoch}
201// - osdmap/full_${epoch}
202// - osdmap/full_latest
203// - osdmap/first_committed
204// - osdmap/last_committed
205int update_osdmap(ObjectStore& fs, OSDSuperblock& sb, MonitorDBStore& ms)
206{
207 const string prefix("osdmap");
208 const string first_committed_name("first_committed");
209 const string last_committed_name("last_committed");
210 epoch_t first_committed = ms.get(prefix, first_committed_name);
211 epoch_t last_committed = ms.get(prefix, last_committed_name);
212 auto t = make_shared<MonitorDBStore::Transaction>();
213
214 // trim stale maps
215 unsigned ntrimmed = 0;
216 // osdmap starts at 1. if we have a "0" first_committed, then there is nothing
217 // to trim. and "1 osdmaps trimmed" in the output message is misleading. so
218 // let's make it an exception.
219 for (auto e = first_committed; first_committed && e < sb.oldest_map; e++) {
220 t->erase(prefix, e);
221 t->erase(prefix, ms.combine_strings("full", e));
222 ntrimmed++;
223 }
224 // make sure we have a non-zero first_committed. OSDMonitor relies on this.
225 // because PaxosService::put_last_committed() set it to last_committed, if it
226 // is zero. which breaks OSDMonitor::update_from_paxos(), in which we believe
227 // that latest_full should always be greater than last_committed.
228 if (first_committed == 0 && sb.oldest_map < sb.newest_map) {
229 first_committed = 1;
230 } else if (ntrimmed) {
231 first_committed += ntrimmed;
232 }
233 if (first_committed) {
234 t->put(prefix, first_committed_name, first_committed);
235 ms.apply_transaction(t);
236 t = make_shared<MonitorDBStore::Transaction>();
237 }
238
239 unsigned nadded = 0;
240
11fdf7f2 241 auto ch = fs.open_collection(coll_t::meta());
7c673cae 242 OSDMap osdmap;
11fdf7f2 243 for (auto e = std::max(last_committed+1, sb.oldest_map);
7c673cae
FG
244 e <= sb.newest_map; e++) {
245 bool have_crc = false;
246 uint32_t crc = -1;
247 uint64_t features = 0;
248 // add inc maps
eafe8130 249 auto add_inc_result = [&] {
7c673cae
FG
250 const auto oid = OSD::get_inc_osdmap_pobject_name(e);
251 bufferlist bl;
11fdf7f2 252 int nread = fs.read(ch, oid, 0, 0, bl);
7c673cae 253 if (nread <= 0) {
eafe8130
TL
254 cout << "missing " << oid << std::endl;
255 return -ENOENT;
7c673cae
FG
256 }
257 t->put(prefix, e, bl);
258
259 OSDMap::Incremental inc;
11fdf7f2 260 auto p = bl.cbegin();
7c673cae
FG
261 inc.decode(p);
262 features = inc.encode_features | CEPH_FEATURE_RESERVED;
263 if (osdmap.get_epoch() && e > 1) {
264 if (osdmap.apply_incremental(inc)) {
265 cerr << "bad fsid: "
266 << osdmap.get_fsid() << " != " << inc.fsid << std::endl;
267 return -EINVAL;
268 }
269 have_crc = inc.have_crc;
270 if (inc.have_crc) {
271 crc = inc.full_crc;
272 bufferlist fbl;
273 osdmap.encode(fbl, features);
274 if (osdmap.get_crc() != inc.full_crc) {
275 cerr << "mismatched inc crc: "
276 << osdmap.get_crc() << " != " << inc.full_crc << std::endl;
277 return -EINVAL;
278 }
279 // inc.decode() verifies `inc_crc`, so it's been taken care of.
280 }
281 }
eafe8130
TL
282 return 0;
283 }();
284 switch (add_inc_result) {
285 case -ENOENT:
286 // no worries, we always have full map
287 break;
288 case -EINVAL:
289 return -EINVAL;
290 case 0:
291 break;
292 default:
293 assert(0);
7c673cae
FG
294 }
295 // add full maps
296 {
297 const auto oid = OSD::get_osdmap_pobject_name(e);
298 bufferlist bl;
11fdf7f2 299 int nread = fs.read(ch, oid, 0, 0, bl);
7c673cae
FG
300 if (nread <= 0) {
301 cerr << "missing " << oid << std::endl;
302 return -EINVAL;
303 }
304 t->put(prefix, ms.combine_strings("full", e), bl);
305
11fdf7f2 306 auto p = bl.cbegin();
7c673cae
FG
307 osdmap.decode(p);
308 if (osdmap.have_crc()) {
309 if (have_crc && osdmap.get_crc() != crc) {
310 cerr << "mismatched full/inc crc: "
311 << osdmap.get_crc() << " != " << crc << std::endl;
312 return -EINVAL;
313 }
314 uint32_t saved_crc = osdmap.get_crc();
315 bufferlist fbl;
316 osdmap.encode(fbl, features);
317 if (osdmap.get_crc() != saved_crc) {
318 cerr << "mismatched full crc: "
319 << saved_crc << " != " << osdmap.get_crc() << std::endl;
320 return -EINVAL;
321 }
322 }
323 }
324 nadded++;
325
326 // last_committed
327 t->put(prefix, last_committed_name, e);
328 // full last
329 t->put(prefix, ms.combine_strings("full", "latest"), e);
330
331 // this number comes from the default value of osd_target_transaction_size,
332 // so we won't OOM or stuff too many maps in a single transaction if OSD is
333 // keeping a large series of osdmap
334 static constexpr unsigned TRANSACTION_SIZE = 30;
335 if (t->size() >= TRANSACTION_SIZE) {
336 ms.apply_transaction(t);
337 t = make_shared<MonitorDBStore::Transaction>();
338 }
339 }
340 if (!t->empty()) {
341 ms.apply_transaction(t);
342 }
343 t.reset();
344
345 string osd_name("osd.");
346 osd_name += std::to_string(sb.whoami);
347 cout << std::left << setw(8)
348 << osd_name << ": "
349 << ntrimmed << " osdmaps trimmed, "
350 << nadded << " osdmaps added." << std::endl;
351 return 0;
352}
11fdf7f2 353