]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/OSDMonitor.h
buildsys: auto-determine current version for makefile
[ceph.git] / ceph / src / mon / OSDMonitor.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 * Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com>
8 *
9 * Author: Loic Dachary <loic@dachary.org>
10 *
11 * This is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License version 2.1, as published by the Free Software
14 * Foundation. See file COPYING.
15 *
16 */
17
18/* Object Store Device (OSD) Monitor
19 */
20
21#ifndef CEPH_OSDMONITOR_H
22#define CEPH_OSDMONITOR_H
23
24#include <map>
25#include <set>
7c673cae
FG
26
27#include "include/types.h"
11fdf7f2 28#include "include/encoding.h"
7c673cae
FG
29#include "common/simple_cache.hpp"
30#include "msg/Messenger.h"
31
32#include "osd/OSDMap.h"
33#include "osd/OSDMapMapping.h"
34
35#include "CreatingPGs.h"
36#include "PaxosService.h"
37
38class Monitor;
39class PGMap;
40class MonSession;
41class MOSDMap;
42
43#include "erasure-code/ErasureCodeInterface.h"
44#include "mon/MonOpRequest.h"
28e407b8
AA
45#include <boost/functional/hash.hpp>
46// re-include our assert to clobber the system one; fix dout:
11fdf7f2 47#include "include/ceph_assert.h"
7c673cae 48
7c673cae
FG
49/// information about a particular peer's failure reports for one osd
50struct failure_reporter_t {
51 utime_t failed_since; ///< when they think it failed
52 MonOpRequestRef op; ///< failure op request
53
54 failure_reporter_t() {}
55 explicit failure_reporter_t(utime_t s) : failed_since(s) {}
56 ~failure_reporter_t() { }
57};
58
59/// information about all failure reports for one osd
60struct failure_info_t {
61 map<int, failure_reporter_t> reporters; ///< reporter -> failed_since etc
62 utime_t max_failed_since; ///< most recent failed_since
63
64 failure_info_t() {}
65
66 utime_t get_failed_since() {
67 if (max_failed_since == utime_t() && !reporters.empty()) {
68 // the old max must have canceled; recalculate.
69 for (map<int, failure_reporter_t>::iterator p = reporters.begin();
70 p != reporters.end();
71 ++p)
72 if (p->second.failed_since > max_failed_since)
73 max_failed_since = p->second.failed_since;
74 }
75 return max_failed_since;
76 }
77
78 // set the message for the latest report. return any old op request we had,
79 // if any, so we can discard it.
80 MonOpRequestRef add_report(int who, utime_t failed_since,
81 MonOpRequestRef op) {
82 map<int, failure_reporter_t>::iterator p = reporters.find(who);
83 if (p == reporters.end()) {
91327a77 84 if (max_failed_since != utime_t() && max_failed_since < failed_since)
7c673cae
FG
85 max_failed_since = failed_since;
86 p = reporters.insert(map<int, failure_reporter_t>::value_type(who, failure_reporter_t(failed_since))).first;
87 }
88
89 MonOpRequestRef ret = p->second.op;
90 p->second.op = op;
91 return ret;
92 }
93
94 void take_report_messages(list<MonOpRequestRef>& ls) {
95 for (map<int, failure_reporter_t>::iterator p = reporters.begin();
96 p != reporters.end();
97 ++p) {
98 if (p->second.op) {
99 ls.push_back(p->second.op);
100 p->second.op.reset();
101 }
102 }
103 }
104
105 MonOpRequestRef cancel_report(int who) {
106 map<int, failure_reporter_t>::iterator p = reporters.find(who);
107 if (p == reporters.end())
108 return MonOpRequestRef();
109 MonOpRequestRef ret = p->second.op;
110 reporters.erase(p);
91327a77 111 max_failed_since = utime_t();
7c673cae
FG
112 return ret;
113 }
114};
115
116
117class LastEpochClean {
118 struct Lec {
119 vector<epoch_t> epoch_by_pg;
120 ps_t next_missing = 0;
121 epoch_t floor = std::numeric_limits<epoch_t>::max();
122 void report(ps_t pg, epoch_t last_epoch_clean);
123 };
124 std::map<uint64_t, Lec> report_by_pool;
125public:
126 void report(const pg_t& pg, epoch_t last_epoch_clean);
127 void remove_pool(uint64_t pool);
128 epoch_t get_lower_bound(const OSDMap& latest) const;
129};
130
131
11fdf7f2
TL
132struct osdmap_manifest_t {
133 // all the maps we have pinned -- i.e., won't be removed unless
134 // they are inside a trim interval.
135 set<version_t> pinned;
136
137 osdmap_manifest_t() {}
138
139 version_t get_last_pinned() const
140 {
141 set<version_t>::const_reverse_iterator it = pinned.crbegin();
142 if (it == pinned.crend()) {
143 return 0;
144 }
145 return *it;
146 }
147
148 version_t get_first_pinned() const
149 {
150 set<version_t>::const_iterator it = pinned.cbegin();
151 if (it == pinned.cend()) {
152 return 0;
153 }
154 return *it;
155 }
156
157 bool is_pinned(version_t v) const
158 {
159 return pinned.find(v) != pinned.end();
160 }
161
162 void pin(version_t v)
163 {
164 pinned.insert(v);
165 }
166
167 version_t get_lower_closest_pinned(version_t v) const {
168 set<version_t>::const_iterator p = pinned.lower_bound(v);
169 if (p == pinned.cend()) {
170 return 0;
171 } else if (*p > v) {
172 if (p == pinned.cbegin()) {
173 return 0;
174 }
175 --p;
176 }
177 return *p;
178 }
179
180 void encode(bufferlist& bl) const
181 {
182 ENCODE_START(1, 1, bl);
183 encode(pinned, bl);
184 ENCODE_FINISH(bl);
185 }
186
187 void decode(bufferlist::const_iterator& bl)
188 {
189 DECODE_START(1, bl);
190 decode(pinned, bl);
191 DECODE_FINISH(bl);
192 }
193
194 void decode(bufferlist& bl) {
195 auto p = bl.cbegin();
196 decode(p);
197 }
198
199 void dump(Formatter *f) {
200 f->dump_unsigned("first_pinned", get_first_pinned());
201 f->dump_unsigned("last_pinned", get_last_pinned());
202 f->open_array_section("pinned_maps");
203 for (auto& i : pinned) {
204 f->dump_unsigned("epoch", i);
205 }
206 f->close_section();
207 }
208};
209WRITE_CLASS_ENCODER(osdmap_manifest_t);
210
7c673cae
FG
211class OSDMonitor : public PaxosService {
212 CephContext *cct;
213
214public:
215 OSDMap osdmap;
216
217 // [leader]
218 OSDMap::Incremental pending_inc;
219 map<int, bufferlist> pending_metadata;
220 set<int> pending_metadata_rm;
221 map<int, failure_info_t> failure_info;
222 map<int,utime_t> down_pending_out; // osd down -> out
81eedcae 223 bool priority_convert = false;
7c673cae
FG
224
225 map<int,double> osd_weight;
226
28e407b8
AA
227 using osdmap_key_t = std::pair<version_t, uint64_t>;
228 using osdmap_cache_t = SimpleLRU<osdmap_key_t,
229 bufferlist,
230 std::less<osdmap_key_t>,
231 boost::hash<osdmap_key_t>>;
232 osdmap_cache_t inc_osd_cache;
233 osdmap_cache_t full_osd_cache;
7c673cae 234
11fdf7f2
TL
235 bool has_osdmap_manifest;
236 osdmap_manifest_t osdmap_manifest;
237
7c673cae
FG
238 bool check_failures(utime_t now);
239 bool check_failure(utime_t now, int target_osd, failure_info_t& fi);
224ce89b 240 void force_failure(int target_osd, int by);
7c673cae 241
7c673cae
FG
242 bool _have_pending_crush();
243 CrushWrapper &_get_stable_crush();
244 void _get_pending_crush(CrushWrapper& newcrush);
245
246 enum FastReadType {
247 FAST_READ_OFF,
248 FAST_READ_ON,
249 FAST_READ_DEFAULT
250 };
251
494da23a
TL
252 struct CleanUpmapJob : public ParallelPGMapper::Job {
253 CephContext *cct;
254 const OSDMap& osdmap;
255 OSDMap::Incremental& pending_inc;
256 // lock to protect pending_inc form changing
257 // when checking is done
258 Mutex pending_inc_lock = {"CleanUpmapJob::pending_inc_lock"};
259
260 CleanUpmapJob(CephContext *cct, const OSDMap& om, OSDMap::Incremental& pi)
261 : ParallelPGMapper::Job(&om),
262 cct(cct),
263 osdmap(om),
264 pending_inc(pi) {}
265
266 void process(const vector<pg_t>& to_check) override {
267 vector<pg_t> to_cancel;
268 map<pg_t, mempool::osdmap::vector<pair<int,int>>> to_remap;
269 osdmap.check_pg_upmaps(cct, to_check, &to_cancel, &to_remap);
270 // don't bother taking lock if nothing changes
271 if (!to_cancel.empty() || !to_remap.empty()) {
272 std::lock_guard l(pending_inc_lock);
273 osdmap.clean_pg_upmaps(cct, &pending_inc, to_cancel, to_remap);
274 }
275 }
276
277 void process(int64_t poolid, unsigned ps_begin, unsigned ps_end) override {}
278 void complete() override {}
279 }; // public as this will need to be accessible from TestTestOSDMap.cc
280
7c673cae 281 // svc
11fdf7f2 282public:
7c673cae 283 void create_initial() override;
11fdf7f2 284 void get_store_prefixes(std::set<string>& s) const override;
7c673cae
FG
285
286private:
287 void update_from_paxos(bool *need_bootstrap) override;
288 void create_pending() override; // prepare a new pending
289 void encode_pending(MonitorDBStore::TransactionRef t) override;
290 void on_active() override;
291 void on_restart() override;
292 void on_shutdown() override;
11fdf7f2
TL
293
294 /* osdmap full map prune */
295 void load_osdmap_manifest();
296 bool should_prune() const;
297 void _prune_update_trimmed(
298 MonitorDBStore::TransactionRef tx,
299 version_t first);
300 void prune_init(osdmap_manifest_t& manifest);
301 bool _prune_sanitize_options() const;
302 bool is_prune_enabled() const;
303 bool is_prune_supported() const;
304 bool do_prune(MonitorDBStore::TransactionRef tx);
305
7c673cae
FG
306 /**
307 * we haven't delegated full version stashing to paxosservice for some time
308 * now, making this function useless in current context.
309 */
310 void encode_full(MonitorDBStore::TransactionRef t) override { }
311 /**
312 * do not let paxosservice periodically stash full osdmaps, or we will break our
313 * locally-managed full maps. (update_from_paxos loads the latest and writes them
314 * out going forward from there, but if we just synced that may mean we skip some.)
315 */
316 bool should_stash_full() override {
317 return false;
318 }
319
320 /**
321 * hook into trim to include the oldest full map in the trim transaction
322 *
323 * This ensures that anyone post-sync will have enough to rebuild their
324 * full osdmaps.
325 */
326 void encode_trim_extra(MonitorDBStore::TransactionRef tx, version_t first) override;
327
328 void update_msgr_features();
329 int check_cluster_features(uint64_t features, stringstream &ss);
330 /**
331 * check if the cluster supports the features required by the
332 * given crush map. Outputs the daemons which don't support it
333 * to the stringstream.
334 *
335 * @returns true if the map is passable, false otherwise
336 */
337 bool validate_crush_against_features(const CrushWrapper *newcrush,
338 stringstream &ss);
339 void check_osdmap_subs();
340 void share_map_with_random_osd();
341
342 Mutex prime_pg_temp_lock = {"OSDMonitor::prime_pg_temp_lock"};
343 struct PrimeTempJob : public ParallelPGMapper::Job {
344 OSDMonitor *osdmon;
345 PrimeTempJob(const OSDMap& om, OSDMonitor *m)
346 : ParallelPGMapper::Job(&om), osdmon(m) {}
347 void process(int64_t pool, unsigned ps_begin, unsigned ps_end) override {
348 for (unsigned ps = ps_begin; ps < ps_end; ++ps) {
349 pg_t pgid(ps, pool);
350 osdmon->prime_pg_temp(*osdmap, pgid);
351 }
352 }
494da23a 353 void process(const vector<pg_t>& pgs) override {}
7c673cae
FG
354 void complete() override {}
355 };
356 void maybe_prime_pg_temp();
357 void prime_pg_temp(const OSDMap& next, pg_t pgid);
358
359 ParallelPGMapper mapper; ///< for background pg work
360 OSDMapMapping mapping; ///< pg <-> osd mappings
361 unique_ptr<ParallelPGMapper::Job> mapping_job; ///< background mapping job
362 void start_mapping();
363
364 void update_logger();
365
366 void handle_query(PaxosServiceMessage *m);
367 bool preprocess_query(MonOpRequestRef op) override; // true if processed.
368 bool prepare_update(MonOpRequestRef op) override;
369 bool should_propose(double &delay) override;
370
11fdf7f2 371 version_t get_trim_to() const override;
7c673cae
FG
372
373 bool can_mark_down(int o);
374 bool can_mark_up(int o);
375 bool can_mark_out(int o);
376 bool can_mark_in(int o);
377
378 // ...
28e407b8
AA
379 MOSDMap *build_latest_full(uint64_t features);
380 MOSDMap *build_incremental(epoch_t first, epoch_t last, uint64_t features);
7c673cae
FG
381 void send_full(MonOpRequestRef op);
382 void send_incremental(MonOpRequestRef op, epoch_t first);
383public:
384 // @param req an optional op request, if the osdmaps are replies to it. so
385 // @c Monitor::send_reply() can mark_event with it.
386 void send_incremental(epoch_t first, MonSession *session, bool onetime,
387 MonOpRequestRef req = MonOpRequestRef());
388
389private:
390 void print_utilization(ostream &out, Formatter *f, bool tree) const;
391
11fdf7f2 392 bool check_source(MonOpRequestRef op, uuid_d fsid);
7c673cae
FG
393
394 bool preprocess_get_osdmap(MonOpRequestRef op);
395
396 bool preprocess_mark_me_down(MonOpRequestRef op);
397
398 friend class C_AckMarkedDown;
399 bool preprocess_failure(MonOpRequestRef op);
400 bool prepare_failure(MonOpRequestRef op);
401 bool prepare_mark_me_down(MonOpRequestRef op);
402 void process_failures();
403 void take_all_failures(list<MonOpRequestRef>& ls);
404
405 bool preprocess_full(MonOpRequestRef op);
406 bool prepare_full(MonOpRequestRef op);
407
408 bool preprocess_boot(MonOpRequestRef op);
409 bool prepare_boot(MonOpRequestRef op);
410 void _booted(MonOpRequestRef op, bool logit);
411
412 void update_up_thru(int from, epoch_t up_thru);
413 bool preprocess_alive(MonOpRequestRef op);
414 bool prepare_alive(MonOpRequestRef op);
415 void _reply_map(MonOpRequestRef op, epoch_t e);
416
417 bool preprocess_pgtemp(MonOpRequestRef op);
418 bool prepare_pgtemp(MonOpRequestRef op);
419
420 bool preprocess_pg_created(MonOpRequestRef op);
421 bool prepare_pg_created(MonOpRequestRef op);
422
11fdf7f2
TL
423 bool preprocess_pg_ready_to_merge(MonOpRequestRef op);
424 bool prepare_pg_ready_to_merge(MonOpRequestRef op);
425
7c673cae
FG
426 int _check_remove_pool(int64_t pool_id, const pg_pool_t &pool, ostream *ss);
427 bool _check_become_tier(
428 int64_t tier_pool_id, const pg_pool_t *tier_pool,
429 int64_t base_pool_id, const pg_pool_t *base_pool,
430 int *err, ostream *ss) const;
431 bool _check_remove_tier(
432 int64_t base_pool_id, const pg_pool_t *base_pool, const pg_pool_t *tier_pool,
433 int *err, ostream *ss) const;
434
435 int _prepare_remove_pool(int64_t pool, ostream *ss, bool no_fake);
436 int _prepare_rename_pool(int64_t pool, string newname);
437
28e407b8 438 bool enforce_pool_op_caps(MonOpRequestRef op);
7c673cae
FG
439 bool preprocess_pool_op (MonOpRequestRef op);
440 bool preprocess_pool_op_create (MonOpRequestRef op);
441 bool prepare_pool_op (MonOpRequestRef op);
442 bool prepare_pool_op_create (MonOpRequestRef op);
443 bool prepare_pool_op_delete(MonOpRequestRef op);
444 int crush_rename_bucket(const string& srcname,
445 const string& dstname,
446 ostream *ss);
447 void check_legacy_ec_plugin(const string& plugin,
448 const string& profile) const;
449 int normalize_profile(const string& profilename,
450 ErasureCodeProfile &profile,
451 bool force,
452 ostream *ss);
31f18b77
FG
453 int crush_rule_create_erasure(const string &name,
454 const string &profile,
455 int *rule,
456 ostream *ss);
457 int get_crush_rule(const string &rule_name,
458 int *crush_rule,
7c673cae
FG
459 ostream *ss);
460 int get_erasure_code(const string &erasure_code_profile,
461 ErasureCodeInterfaceRef *erasure_code,
462 ostream *ss) const;
31f18b77 463 int prepare_pool_crush_rule(const unsigned pool_type,
7c673cae 464 const string &erasure_code_profile,
31f18b77
FG
465 const string &rule_name,
466 int *crush_rule,
7c673cae
FG
467 ostream *ss);
468 bool erasure_code_profile_in_use(
469 const mempool::osdmap::map<int64_t, pg_pool_t> &pools,
470 const string &profile,
471 ostream *ss);
472 int parse_erasure_code_profile(const vector<string> &erasure_code_profile,
473 map<string,string> *erasure_code_profile_map,
474 ostream *ss);
475 int prepare_pool_size(const unsigned pool_type,
476 const string &erasure_code_profile,
11fdf7f2 477 uint8_t repl_size,
7c673cae
FG
478 unsigned *size, unsigned *min_size,
479 ostream *ss);
480 int prepare_pool_stripe_width(const unsigned pool_type,
481 const string &erasure_code_profile,
482 unsigned *stripe_width,
483 ostream *ss);
3efd9988 484 int check_pg_num(int64_t pool, int pg_num, int size, ostream* ss);
11fdf7f2 485 int prepare_new_pool(string& name,
31f18b77
FG
486 int crush_rule,
487 const string &crush_rule_name,
7c673cae 488 unsigned pg_num, unsigned pgp_num,
11fdf7f2
TL
489 unsigned pg_num_min,
490 uint64_t repl_size,
491 const uint64_t target_size_bytes,
492 const float target_size_ratio,
7c673cae
FG
493 const string &erasure_code_profile,
494 const unsigned pool_type,
495 const uint64_t expected_num_objects,
496 FastReadType fast_read,
497 ostream *ss);
498 int prepare_new_pool(MonOpRequestRef op);
499
3efd9988
FG
500 void set_pool_flags(int64_t pool_id, uint64_t flags);
501 void clear_pool_flags(int64_t pool_id, uint64_t flags);
7c673cae 502 bool update_pools_status();
7c673cae 503
11fdf7f2
TL
504 string make_snap_epoch_key(int64_t pool, epoch_t epoch);
505 string make_snap_key(int64_t pool, snapid_t snap);
506 string make_snap_key_value(int64_t pool, snapid_t snap, snapid_t num,
507 epoch_t epoch, bufferlist *v);
508 string make_snap_purged_key(int64_t pool, snapid_t snap);
509 string make_snap_purged_key_value(int64_t pool, snapid_t snap, snapid_t num,
510 epoch_t epoch, bufferlist *v);
511 bool try_prune_purged_snaps();
512 int lookup_pruned_snap(int64_t pool, snapid_t snap,
513 snapid_t *begin, snapid_t *end);
514
7c673cae
FG
515 bool prepare_set_flag(MonOpRequestRef op, int flag);
516 bool prepare_unset_flag(MonOpRequestRef op, int flag);
517
518 void _pool_op_reply(MonOpRequestRef op,
519 int ret, epoch_t epoch, bufferlist *blp=NULL);
520
521 struct C_Booted : public C_MonOp {
522 OSDMonitor *cmon;
523 bool logit;
524 C_Booted(OSDMonitor *cm, MonOpRequestRef op_, bool l=true) :
525 C_MonOp(op_), cmon(cm), logit(l) {}
526 void _finish(int r) override {
527 if (r >= 0)
528 cmon->_booted(op, logit);
529 else if (r == -ECANCELED)
530 return;
531 else if (r == -EAGAIN)
532 cmon->dispatch(op);
533 else
11fdf7f2 534 ceph_abort_msg("bad C_Booted return value");
7c673cae
FG
535 }
536 };
537
538 struct C_ReplyMap : public C_MonOp {
539 OSDMonitor *osdmon;
540 epoch_t e;
541 C_ReplyMap(OSDMonitor *o, MonOpRequestRef op_, epoch_t ee)
542 : C_MonOp(op_), osdmon(o), e(ee) {}
543 void _finish(int r) override {
544 if (r >= 0)
545 osdmon->_reply_map(op, e);
546 else if (r == -ECANCELED)
547 return;
548 else if (r == -EAGAIN)
549 osdmon->dispatch(op);
550 else
11fdf7f2 551 ceph_abort_msg("bad C_ReplyMap return value");
7c673cae
FG
552 }
553 };
554 struct C_PoolOp : public C_MonOp {
555 OSDMonitor *osdmon;
556 int replyCode;
557 int epoch;
558 bufferlist reply_data;
559 C_PoolOp(OSDMonitor * osd, MonOpRequestRef op_, int rc, int e, bufferlist *rd=NULL) :
560 C_MonOp(op_), osdmon(osd), replyCode(rc), epoch(e) {
561 if (rd)
562 reply_data = *rd;
563 }
564 void _finish(int r) override {
565 if (r >= 0)
566 osdmon->_pool_op_reply(op, replyCode, epoch, &reply_data);
567 else if (r == -ECANCELED)
568 return;
569 else if (r == -EAGAIN)
570 osdmon->dispatch(op);
571 else
11fdf7f2 572 ceph_abort_msg("bad C_PoolOp return value");
7c673cae
FG
573 }
574 };
575
576 bool preprocess_remove_snaps(MonOpRequestRef op);
577 bool prepare_remove_snaps(MonOpRequestRef op);
578
7c673cae 579 int load_metadata(int osd, map<string, string>& m, ostream *err);
31f18b77 580 void count_metadata(const string& field, Formatter *f);
28e407b8
AA
581
582 void reencode_incremental_map(bufferlist& bl, uint64_t features);
583 void reencode_full_map(bufferlist& bl, uint64_t features);
c07f9fc5
FG
584public:
585 void count_metadata(const string& field, map<string,int> *out);
586protected:
7c673cae
FG
587 int get_osd_objectstore_type(int osd, std::string *type);
588 bool is_pool_currently_all_bluestore(int64_t pool_id, const pg_pool_t &pool,
589 ostream *err);
590
591 // when we last received PG stats from each osd
592 map<int,utime_t> last_osd_report;
593 // TODO: use last_osd_report to store the osd report epochs, once we don't
594 // need to upgrade from pre-luminous releases.
595 map<int,epoch_t> osd_epochs;
596 LastEpochClean last_epoch_clean;
597 bool preprocess_beacon(MonOpRequestRef op);
598 bool prepare_beacon(MonOpRequestRef op);
599 epoch_t get_min_last_epoch_clean() const;
600
601 friend class C_UpdateCreatingPGs;
11fdf7f2 602 std::map<int, std::map<epoch_t, std::set<spg_t>>> creating_pgs_by_osd_epoch;
7c673cae
FG
603 std::vector<pg_t> pending_created_pgs;
604 // the epoch when the pg mapping was calculated
605 epoch_t creating_pgs_epoch = 0;
606 creating_pgs_t creating_pgs;
c07f9fc5 607 mutable std::mutex creating_pgs_lock;
7c673cae 608
94b18763
FG
609 creating_pgs_t update_pending_pgs(const OSDMap::Incremental& inc,
610 const OSDMap& nextmap);
31f18b77 611 unsigned scan_for_creating_pgs(
7c673cae
FG
612 const mempool::osdmap::map<int64_t,pg_pool_t>& pools,
613 const mempool::osdmap::set<int64_t>& removed_pools,
614 utime_t modified,
615 creating_pgs_t* creating_pgs) const;
616 pair<int32_t, pg_t> get_parent_pg(pg_t pgid) const;
617 void update_creating_pgs();
618 void check_pg_creates_subs();
c07f9fc5 619 epoch_t send_pg_creates(int osd, Connection *con, epoch_t next) const;
7c673cae 620
31f18b77
FG
621 int32_t _allocate_osd_id(int32_t* existing_id);
622
7c673cae
FG
623public:
624 OSDMonitor(CephContext *cct, Monitor *mn, Paxos *p, const string& service_name);
625
626 void tick() override; // check state, take actions
627
7c673cae
FG
628 bool preprocess_command(MonOpRequestRef op);
629 bool prepare_command(MonOpRequestRef op);
11fdf7f2 630 bool prepare_command_impl(MonOpRequestRef op, const cmdmap_t& cmdmap);
7c673cae 631
31f18b77
FG
632 int validate_osd_create(
633 const int32_t id,
634 const uuid_d& uuid,
635 const bool check_osd_exists,
636 int32_t* existing_id,
637 stringstream& ss);
638 int prepare_command_osd_create(
639 const int32_t id,
640 const uuid_d& uuid,
641 int32_t* existing_id,
642 stringstream& ss);
3a9019d9
FG
643 void do_osd_create(const int32_t id, const uuid_d& uuid,
644 const string& device_class,
645 int32_t* new_id);
31f18b77
FG
646 int prepare_command_osd_purge(int32_t id, stringstream& ss);
647 int prepare_command_osd_destroy(int32_t id, stringstream& ss);
648 int _prepare_command_osd_crush_remove(
649 CrushWrapper &newcrush,
650 int32_t id,
651 int32_t ancestor,
652 bool has_ancestor,
653 bool unlink_only);
654 void do_osd_crush_remove(CrushWrapper& newcrush);
655 int prepare_command_osd_crush_remove(
656 CrushWrapper &newcrush,
657 int32_t id,
658 int32_t ancestor,
659 bool has_ancestor,
660 bool unlink_only);
661 int prepare_command_osd_remove(int32_t id);
662 int prepare_command_osd_new(
663 MonOpRequestRef op,
11fdf7f2 664 const cmdmap_t& cmdmap,
31f18b77
FG
665 const map<string,string>& secrets,
666 stringstream &ss,
667 Formatter *f);
668
11fdf7f2 669 int prepare_command_pool_set(const cmdmap_t& cmdmap,
7c673cae 670 stringstream& ss);
11fdf7f2 671
c07f9fc5 672 int prepare_command_pool_application(const string &prefix,
11fdf7f2 673 const cmdmap_t& cmdmap,
c07f9fc5 674 stringstream& ss);
11fdf7f2
TL
675 int preprocess_command_pool_application(const string &prefix,
676 const cmdmap_t& cmdmap,
677 stringstream& ss,
678 bool *modified);
679 int _command_pool_application(const string &prefix,
680 const cmdmap_t& cmdmap,
681 stringstream& ss,
682 bool *modified,
683 bool preparing);
7c673cae
FG
684
685 bool handle_osd_timeouts(const utime_t &now,
686 std::map<int,utime_t> &last_osd_report);
687
688 void send_latest(MonOpRequestRef op, epoch_t start=0);
689 void send_latest_now_nodelete(MonOpRequestRef op, epoch_t start=0) {
690 op->mark_osdmon_event(__func__);
691 send_incremental(op, start);
692 }
693
11fdf7f2
TL
694 void get_removed_snaps_range(
695 epoch_t start, epoch_t end,
696 mempool::osdmap::map<int64_t,OSDMap::snap_interval_set_t> *gap_removed_snaps);
697
7c673cae 698 int get_version(version_t ver, bufferlist& bl) override;
28e407b8
AA
699 int get_version(version_t ver, uint64_t feature, bufferlist& bl);
700
701 int get_version_full(version_t ver, uint64_t feature, bufferlist& bl);
7c673cae 702 int get_version_full(version_t ver, bufferlist& bl) override;
11fdf7f2
TL
703 int get_inc(version_t ver, OSDMap::Incremental& inc);
704 int get_full_from_pinned_map(version_t ver, bufferlist& bl);
7c673cae 705
11fdf7f2
TL
706 epoch_t blacklist(const entity_addrvec_t& av, utime_t until);
707 epoch_t blacklist(entity_addr_t a, utime_t until);
7c673cae
FG
708
709 void dump_info(Formatter *f);
710 int dump_osd_metadata(int osd, Formatter *f, ostream *err);
711 void print_nodes(Formatter *f);
712
713 void check_osdmap_sub(Subscription *sub);
714 void check_pg_creates_sub(Subscription *sub);
715
11fdf7f2
TL
716 void do_application_enable(int64_t pool_id, const std::string &app_name,
717 const std::string &app_key="",
718 const std::string &app_value="");
494da23a
TL
719 void do_set_pool_opt(int64_t pool_id, pool_opts_t::key_t opt,
720 pool_opts_t::value_t);
c07f9fc5 721
7c673cae
FG
722 void add_flag(int flag) {
723 if (!(osdmap.flags & flag)) {
724 if (pending_inc.new_flags < 0)
725 pending_inc.new_flags = osdmap.flags;
726 pending_inc.new_flags |= flag;
727 }
728 }
729
730 void remove_flag(int flag) {
731 if(osdmap.flags & flag) {
732 if (pending_inc.new_flags < 0)
733 pending_inc.new_flags = osdmap.flags;
734 pending_inc.new_flags &= ~flag;
735 }
736 }
81eedcae 737 void convert_pool_priorities(void);
7c673cae
FG
738};
739
740#endif