]>
Commit | Line | Data |
---|---|---|
3efd9988 | 1 | import ceph_module # noqa |
3efd9988 | 2 | |
494da23a | 3 | try: |
9f95a23c | 4 | from typing import Set, Tuple, Iterator, Any, Dict, Optional, Callable, List |
494da23a TL |
5 | except ImportError: |
6 | # just for type checking | |
7 | pass | |
7c673cae | 8 | import logging |
9f95a23c | 9 | import errno |
11fdf7f2 | 10 | import json |
1adf2230 | 11 | import six |
7c673cae | 12 | import threading |
11fdf7f2 TL |
13 | from collections import defaultdict, namedtuple |
14 | import rados | |
81eedcae | 15 | import re |
11fdf7f2 | 16 | import time |
f6b5b4d7 | 17 | from mgr_util import profile_method |
11fdf7f2 | 18 | |
f6b5b4d7 | 19 | # Full list of strings in "osd_types.cc:pg_state_string()" |
11fdf7f2 TL |
20 | PG_STATES = [ |
21 | "active", | |
22 | "clean", | |
23 | "down", | |
24 | "recovery_unfound", | |
25 | "backfill_unfound", | |
26 | "scrubbing", | |
27 | "degraded", | |
28 | "inconsistent", | |
29 | "peering", | |
30 | "repair", | |
31 | "recovering", | |
32 | "forced_recovery", | |
33 | "backfill_wait", | |
34 | "incomplete", | |
35 | "stale", | |
36 | "remapped", | |
37 | "deep", | |
38 | "backfilling", | |
39 | "forced_backfill", | |
40 | "backfill_toofull", | |
41 | "recovery_wait", | |
42 | "recovery_toofull", | |
43 | "undersized", | |
44 | "activating", | |
45 | "peered", | |
46 | "snaptrim", | |
47 | "snaptrim_wait", | |
48 | "snaptrim_error", | |
49 | "creating", | |
f6b5b4d7 TL |
50 | "unknown", |
51 | "premerge", | |
52 | "failed_repair", | |
53 | "laggy", | |
54 | "wait", | |
55 | ] | |
3efd9988 FG |
56 | |
57 | ||
7c673cae FG |
58 | class CommandResult(object): |
59 | """ | |
60 | Use with MgrModule.send_command | |
61 | """ | |
11fdf7f2 TL |
62 | |
63 | def __init__(self, tag=None): | |
7c673cae FG |
64 | self.ev = threading.Event() |
65 | self.outs = "" | |
66 | self.outb = "" | |
67 | self.r = 0 | |
68 | ||
69 | # This is just a convenience for notifications from | |
70 | # C++ land, to avoid passing addresses around in messages. | |
11fdf7f2 | 71 | self.tag = tag if tag else "" |
7c673cae FG |
72 | |
73 | def complete(self, r, outb, outs): | |
74 | self.r = r | |
75 | self.outb = outb | |
76 | self.outs = outs | |
77 | self.ev.set() | |
78 | ||
79 | def wait(self): | |
80 | self.ev.wait() | |
81 | return self.r, self.outb, self.outs | |
82 | ||
83 | ||
11fdf7f2 TL |
84 | class HandleCommandResult(namedtuple('HandleCommandResult', ['retval', 'stdout', 'stderr'])): |
85 | def __new__(cls, retval=0, stdout="", stderr=""): | |
86 | """ | |
87 | Tuple containing the result of `handle_command()` | |
88 | ||
89 | Only write to stderr if there is an error, or in extraordinary circumstances | |
90 | ||
91 | Avoid having `ceph foo bar` commands say "did foo bar" on success unless there | |
92 | is critical information to include there. | |
93 | ||
94 | Everything programmatically consumable should be put on stdout | |
95 | ||
96 | :param retval: return code. E.g. 0 or -errno.EINVAL | |
97 | :type retval: int | |
98 | :param stdout: data of this result. | |
99 | :type stdout: str | |
100 | :param stderr: Typically used for error messages. | |
101 | :type stderr: str | |
102 | """ | |
103 | return super(HandleCommandResult, cls).__new__(cls, retval, stdout, stderr) | |
104 | ||
105 | ||
e306af50 TL |
106 | class MonCommandFailed(RuntimeError): pass |
107 | ||
108 | ||
3efd9988 FG |
109 | class OSDMap(ceph_module.BasePyOSDMap): |
110 | def get_epoch(self): | |
111 | return self._get_epoch() | |
112 | ||
113 | def get_crush_version(self): | |
114 | return self._get_crush_version() | |
115 | ||
116 | def dump(self): | |
117 | return self._dump() | |
118 | ||
11fdf7f2 TL |
119 | def get_pools(self): |
120 | # FIXME: efficient implementation | |
121 | d = self._dump() | |
122 | return dict([(p['pool'], p) for p in d['pools']]) | |
123 | ||
124 | def get_pools_by_name(self): | |
125 | # FIXME: efficient implementation | |
126 | d = self._dump() | |
127 | return dict([(p['pool_name'], p) for p in d['pools']]) | |
128 | ||
3efd9988 FG |
129 | def new_incremental(self): |
130 | return self._new_incremental() | |
131 | ||
132 | def apply_incremental(self, inc): | |
133 | return self._apply_incremental(inc) | |
134 | ||
135 | def get_crush(self): | |
136 | return self._get_crush() | |
137 | ||
138 | def get_pools_by_take(self, take): | |
139 | return self._get_pools_by_take(take).get('pools', []) | |
140 | ||
141 | def calc_pg_upmaps(self, inc, | |
11fdf7f2 TL |
142 | max_deviation=.01, max_iterations=10, pools=None): |
143 | if pools is None: | |
144 | pools = [] | |
3efd9988 FG |
145 | return self._calc_pg_upmaps( |
146 | inc, | |
147 | max_deviation, max_iterations, pools) | |
148 | ||
149 | def map_pool_pgs_up(self, poolid): | |
150 | return self._map_pool_pgs_up(poolid) | |
151 | ||
11fdf7f2 TL |
152 | def pg_to_up_acting_osds(self, pool_id, ps): |
153 | return self._pg_to_up_acting_osds(pool_id, ps) | |
154 | ||
155 | def pool_raw_used_rate(self, pool_id): | |
156 | return self._pool_raw_used_rate(pool_id) | |
157 | ||
158 | def get_ec_profile(self, name): | |
159 | # FIXME: efficient implementation | |
160 | d = self._dump() | |
161 | return d['erasure_code_profiles'].get(name, None) | |
162 | ||
9f95a23c TL |
163 | def get_require_osd_release(self): |
164 | d = self._dump() | |
165 | return d['require_osd_release'] | |
166 | ||
11fdf7f2 | 167 | |
3efd9988 FG |
168 | class OSDMapIncremental(ceph_module.BasePyOSDMapIncremental): |
169 | def get_epoch(self): | |
170 | return self._get_epoch() | |
171 | ||
172 | def dump(self): | |
173 | return self._dump() | |
174 | ||
175 | def set_osd_reweights(self, weightmap): | |
176 | """ | |
177 | weightmap is a dict, int to float. e.g. { 0: .9, 1: 1.0, 3: .997 } | |
178 | """ | |
179 | return self._set_osd_reweights(weightmap) | |
180 | ||
181 | def set_crush_compat_weight_set_weights(self, weightmap): | |
182 | """ | |
183 | weightmap is a dict, int to float. devices only. e.g., | |
184 | { 0: 3.4, 1: 3.3, 2: 3.334 } | |
185 | """ | |
186 | return self._set_crush_compat_weight_set_weights(weightmap) | |
187 | ||
11fdf7f2 | 188 | |
3efd9988 | 189 | class CRUSHMap(ceph_module.BasePyCRUSH): |
b32b8144 | 190 | ITEM_NONE = 0x7fffffff |
11fdf7f2 | 191 | DEFAULT_CHOOSE_ARGS = '-1' |
b32b8144 | 192 | |
3efd9988 FG |
193 | def dump(self): |
194 | return self._dump() | |
195 | ||
196 | def get_item_weight(self, item): | |
197 | return self._get_item_weight(item) | |
198 | ||
199 | def get_item_name(self, item): | |
200 | return self._get_item_name(item) | |
201 | ||
202 | def find_takes(self): | |
203 | return self._find_takes().get('takes', []) | |
204 | ||
205 | def get_take_weight_osd_map(self, root): | |
206 | uglymap = self._get_take_weight_osd_map(root) | |
11fdf7f2 TL |
207 | return {int(k): v for k, v in six.iteritems(uglymap.get('weights', {}))} |
208 | ||
209 | @staticmethod | |
210 | def have_default_choose_args(dump): | |
211 | return CRUSHMap.DEFAULT_CHOOSE_ARGS in dump.get('choose_args', {}) | |
212 | ||
213 | @staticmethod | |
214 | def get_default_choose_args(dump): | |
215 | return dump.get('choose_args').get(CRUSHMap.DEFAULT_CHOOSE_ARGS, []) | |
216 | ||
217 | def get_rule(self, rule_name): | |
218 | # TODO efficient implementation | |
219 | for rule in self.dump()['rules']: | |
220 | if rule_name == rule['rule_name']: | |
221 | return rule | |
222 | ||
223 | return None | |
224 | ||
225 | def get_rule_by_id(self, rule_id): | |
226 | for rule in self.dump()['rules']: | |
227 | if rule['rule_id'] == rule_id: | |
228 | return rule | |
229 | ||
230 | return None | |
231 | ||
232 | def get_rule_root(self, rule_name): | |
233 | rule = self.get_rule(rule_name) | |
234 | if rule is None: | |
235 | return None | |
236 | ||
237 | try: | |
238 | first_take = [s for s in rule['steps'] if s['op'] == 'take'][0] | |
239 | except IndexError: | |
9f95a23c | 240 | logging.warning("CRUSH rule '{0}' has no 'take' step".format( |
11fdf7f2 TL |
241 | rule_name)) |
242 | return None | |
243 | else: | |
244 | return first_take['item'] | |
245 | ||
246 | def get_osds_under(self, root_id): | |
247 | # TODO don't abuse dump like this | |
248 | d = self.dump() | |
249 | buckets = dict([(b['id'], b) for b in d['buckets']]) | |
250 | ||
251 | osd_list = [] | |
252 | ||
253 | def accumulate(b): | |
254 | for item in b['items']: | |
255 | if item['id'] >= 0: | |
256 | osd_list.append(item['id']) | |
257 | else: | |
258 | try: | |
259 | accumulate(buckets[item['id']]) | |
260 | except KeyError: | |
261 | pass | |
262 | ||
263 | accumulate(buckets[root_id]) | |
264 | ||
265 | return osd_list | |
266 | ||
267 | def device_class_counts(self): | |
9f95a23c | 268 | result = defaultdict(int) # type: Dict[str, int] |
11fdf7f2 TL |
269 | # TODO don't abuse dump like this |
270 | d = self.dump() | |
271 | for device in d['devices']: | |
272 | cls = device.get('class', None) | |
273 | result[cls] += 1 | |
274 | ||
275 | return dict(result) | |
276 | ||
277 | ||
278 | class CLICommand(object): | |
9f95a23c | 279 | COMMANDS = {} # type: Dict[str, CLICommand] |
11fdf7f2 TL |
280 | |
281 | def __init__(self, prefix, args="", desc="", perm="rw"): | |
282 | self.prefix = prefix | |
283 | self.args = args | |
284 | self.args_dict = {} | |
285 | self.desc = desc | |
286 | self.perm = perm | |
9f95a23c | 287 | self.func = None # type: Optional[Callable] |
11fdf7f2 TL |
288 | self._parse_args() |
289 | ||
290 | def _parse_args(self): | |
291 | if not self.args: | |
292 | return | |
293 | args = self.args.split(" ") | |
294 | for arg in args: | |
295 | arg_desc = arg.strip().split(",") | |
296 | arg_d = {} | |
297 | for kv in arg_desc: | |
298 | k, v = kv.split("=") | |
299 | if k != "name": | |
300 | arg_d[k] = v | |
301 | else: | |
302 | self.args_dict[v] = arg_d | |
303 | ||
304 | def __call__(self, func): | |
305 | self.func = func | |
306 | self.COMMANDS[self.prefix] = self | |
307 | return self.func | |
308 | ||
309 | def call(self, mgr, cmd_dict, inbuf): | |
310 | kwargs = {} | |
311 | for a, d in self.args_dict.items(): | |
312 | if 'req' in d and d['req'] == "false" and a not in cmd_dict: | |
313 | continue | |
314 | kwargs[a.replace("-", "_")] = cmd_dict[a] | |
315 | if inbuf: | |
316 | kwargs['inbuf'] = inbuf | |
9f95a23c | 317 | assert self.func |
11fdf7f2 TL |
318 | return self.func(mgr, **kwargs) |
319 | ||
9f95a23c TL |
320 | def dump_cmd(self): |
321 | return { | |
322 | 'cmd': '{} {}'.format(self.prefix, self.args), | |
323 | 'desc': self.desc, | |
324 | 'perm': self.perm | |
325 | } | |
326 | ||
11fdf7f2 TL |
327 | @classmethod |
328 | def dump_cmd_list(cls): | |
9f95a23c | 329 | return [cmd.dump_cmd() for cmd in cls.COMMANDS.values()] |
11fdf7f2 TL |
330 | |
331 | ||
332 | def CLIReadCommand(prefix, args="", desc=""): | |
333 | return CLICommand(prefix, args, desc, "r") | |
334 | ||
335 | ||
336 | def CLIWriteCommand(prefix, args="", desc=""): | |
337 | return CLICommand(prefix, args, desc, "w") | |
338 | ||
339 | ||
340 | def _get_localized_key(prefix, key): | |
341 | return '{}/{}'.format(prefix, key) | |
342 | ||
343 | ||
344 | class Option(dict): | |
345 | """ | |
346 | Helper class to declare options for MODULE_OPTIONS list. | |
347 | ||
348 | Caveat: it uses argument names matching Python keywords (type, min, max), | |
349 | so any further processing should happen in a separate method. | |
350 | ||
351 | TODO: type validation. | |
352 | """ | |
353 | ||
354 | def __init__( | |
355 | self, name, | |
356 | default=None, | |
357 | type='str', | |
358 | desc=None, longdesc=None, | |
359 | min=None, max=None, | |
360 | enum_allowed=None, | |
361 | see_also=None, | |
362 | tags=None, | |
363 | runtime=False, | |
364 | ): | |
365 | super(Option, self).__init__( | |
366 | (k, v) for k, v in vars().items() | |
367 | if k != 'self' and v is not None) | |
368 | ||
92f5a8d4 TL |
369 | class Command(dict): |
370 | """ | |
371 | Helper class to declare options for COMMANDS list. | |
372 | ||
373 | It also allows to specify prefix and args separately, as well as storing a | |
374 | handler callable. | |
375 | ||
376 | Usage: | |
377 | >>> Command(prefix="example", | |
378 | ... args="name=arg,type=CephInt", | |
379 | ... perm='w', | |
380 | ... desc="Blah") | |
381 | {'poll': False, 'cmd': 'example name=arg,type=CephInt', 'perm': 'w', 'desc': 'Blah'} | |
382 | """ | |
383 | ||
384 | def __init__( | |
385 | self, | |
386 | prefix, | |
387 | args=None, | |
388 | perm="rw", | |
389 | desc=None, | |
390 | poll=False, | |
391 | handler=None | |
392 | ): | |
393 | super(Command, self).__init__( | |
394 | cmd=prefix + (' ' + args if args else ''), | |
395 | perm=perm, | |
396 | desc=desc, | |
397 | poll=poll) | |
398 | self.prefix = prefix | |
399 | self.args = args | |
400 | self.handler = handler | |
401 | ||
402 | def register(self, instance=False): | |
403 | """ | |
404 | Register a CLICommand handler. It allows an instance to register bound | |
405 | methods. In that case, the mgr instance is not passed, and it's expected | |
406 | to be available in the class instance. | |
407 | It also uses HandleCommandResult helper to return a wrapped a tuple of 3 | |
408 | items. | |
409 | """ | |
410 | return CLICommand( | |
411 | prefix=self.prefix, | |
412 | args=self.args, | |
413 | desc=self['desc'], | |
414 | perm=self['perm'] | |
415 | )( | |
416 | func=lambda mgr, *args, **kwargs: HandleCommandResult(*self.handler( | |
417 | *((instance or mgr,) + args), **kwargs)) | |
418 | ) | |
419 | ||
3efd9988 | 420 | |
9f95a23c TL |
421 | class CPlusPlusHandler(logging.Handler): |
422 | def __init__(self, module_inst): | |
423 | super(CPlusPlusHandler, self).__init__() | |
424 | self._module = module_inst | |
425 | self.setFormatter(logging.Formatter("[{} %(levelname)-4s %(name)s] %(message)s" | |
426 | .format(module_inst.module_name))) | |
427 | ||
428 | def emit(self, record): | |
429 | if record.levelno >= self.level: | |
430 | self._module._ceph_log(self.format(record)) | |
431 | ||
432 | class ClusterLogHandler(logging.Handler): | |
433 | def __init__(self, module_inst): | |
434 | super().__init__() | |
435 | self._module = module_inst | |
436 | self.setFormatter(logging.Formatter("%(message)s")) | |
437 | ||
438 | def emit(self, record): | |
439 | levelmap = { | |
440 | 'DEBUG': MgrModule.CLUSTER_LOG_PRIO_DEBUG, | |
441 | 'INFO': MgrModule.CLUSTER_LOG_PRIO_INFO, | |
442 | 'WARNING': MgrModule.CLUSTER_LOG_PRIO_WARN, | |
443 | 'ERROR': MgrModule.CLUSTER_LOG_PRIO_ERROR, | |
444 | 'CRITICAL': MgrModule.CLUSTER_LOG_PRIO_ERROR, | |
445 | } | |
446 | level = levelmap[record.levelname] | |
447 | if record.levelno >= self.level: | |
448 | self._module.cluster_log(self._module.module_name, | |
449 | level, | |
450 | self.format(record)) | |
451 | ||
452 | class FileHandler(logging.FileHandler): | |
453 | def __init__(self, module_inst): | |
454 | path = module_inst.get_ceph_option("log_file") | |
455 | idx = path.rfind(".log") | |
456 | if idx != -1: | |
457 | self.path = "{}.{}.log".format(path[:idx], module_inst.module_name) | |
458 | else: | |
459 | self.path = "{}.{}".format(path, module_inst.module_name) | |
460 | super(FileHandler, self).__init__(self.path, delay=True) | |
461 | self.setFormatter(logging.Formatter("%(asctime)s [%(threadName)s] [%(levelname)-4s] [%(name)s] %(message)s")) | |
462 | ||
463 | ||
464 | class MgrModuleLoggingMixin(object): | |
465 | def _configure_logging(self, mgr_level, module_level, cluster_level, | |
466 | log_to_file, log_to_cluster): | |
467 | self._mgr_level = None | |
468 | self._module_level = None | |
469 | self._root_logger = logging.getLogger() | |
470 | ||
471 | self._unconfigure_logging() | |
472 | ||
473 | # the ceph log handler is initialized only once | |
474 | self._mgr_log_handler = CPlusPlusHandler(self) | |
475 | self._cluster_log_handler = ClusterLogHandler(self) | |
476 | self._file_log_handler = FileHandler(self) | |
477 | ||
478 | self.log_to_file = log_to_file | |
479 | self.log_to_cluster = log_to_cluster | |
480 | ||
481 | self._root_logger.addHandler(self._mgr_log_handler) | |
482 | if log_to_file: | |
483 | self._root_logger.addHandler(self._file_log_handler) | |
484 | if log_to_cluster: | |
485 | self._root_logger.addHandler(self._cluster_log_handler) | |
486 | ||
487 | self._root_logger.setLevel(logging.NOTSET) | |
488 | self._set_log_level(mgr_level, module_level, cluster_level) | |
489 | ||
490 | ||
491 | def _unconfigure_logging(self): | |
492 | # remove existing handlers: | |
493 | rm_handlers = [ | |
494 | h for h in self._root_logger.handlers if isinstance(h, CPlusPlusHandler) or isinstance(h, FileHandler) or isinstance(h, ClusterLogHandler)] | |
495 | for h in rm_handlers: | |
496 | self._root_logger.removeHandler(h) | |
497 | self.log_to_file = False | |
498 | self.log_to_cluster = False | |
499 | ||
500 | def _set_log_level(self, mgr_level, module_level, cluster_level): | |
501 | self._cluster_log_handler.setLevel(cluster_level.upper()) | |
502 | ||
503 | module_level = module_level.upper() if module_level else '' | |
504 | if not self._module_level: | |
505 | # using debug_mgr level | |
506 | if not module_level and self._mgr_level == mgr_level: | |
507 | # no change in module level neither in debug_mgr | |
508 | return | |
509 | else: | |
510 | if self._module_level == module_level: | |
511 | # no change in module level | |
512 | return | |
513 | ||
514 | if not self._module_level and not module_level: | |
515 | level = self._ceph_log_level_to_python(mgr_level) | |
516 | self.getLogger().debug("setting log level based on debug_mgr: %s (%s)", level, mgr_level) | |
517 | elif self._module_level and not module_level: | |
518 | level = self._ceph_log_level_to_python(mgr_level) | |
519 | self.getLogger().warning("unsetting module log level, falling back to " | |
520 | "debug_mgr level: %s (%s)", level, mgr_level) | |
521 | elif module_level: | |
522 | level = module_level | |
523 | self.getLogger().debug("setting log level: %s", level) | |
524 | ||
525 | self._module_level = module_level | |
526 | self._mgr_level = mgr_level | |
527 | ||
528 | self._mgr_log_handler.setLevel(level) | |
529 | self._file_log_handler.setLevel(level) | |
530 | ||
531 | def _enable_file_log(self): | |
532 | # enable file log | |
533 | self.getLogger().warning("enabling logging to file") | |
534 | self.log_to_file = True | |
535 | self._root_logger.addHandler(self._file_log_handler) | |
536 | ||
537 | def _disable_file_log(self): | |
538 | # disable file log | |
539 | self.getLogger().warning("disabling logging to file") | |
540 | self.log_to_file = False | |
541 | self._root_logger.removeHandler(self._file_log_handler) | |
542 | ||
543 | def _enable_cluster_log(self): | |
544 | # enable cluster log | |
545 | self.getLogger().warning("enabling logging to cluster") | |
546 | self.log_to_cluster = True | |
547 | self._root_logger.addHandler(self._cluster_log_handler) | |
548 | ||
549 | def _disable_cluster_log(self): | |
550 | # disable cluster log | |
551 | self.getLogger().warning("disabling logging to cluster") | |
552 | self.log_to_cluster = False | |
553 | self._root_logger.removeHandler(self._cluster_log_handler) | |
554 | ||
555 | def _ceph_log_level_to_python(self, ceph_log_level): | |
556 | if ceph_log_level: | |
557 | try: | |
558 | ceph_log_level = int(ceph_log_level.split("/", 1)[0]) | |
559 | except ValueError: | |
560 | ceph_log_level = 0 | |
561 | else: | |
562 | ceph_log_level = 0 | |
563 | ||
564 | log_level = "DEBUG" | |
565 | if ceph_log_level <= 0: | |
566 | log_level = "CRITICAL" | |
567 | elif ceph_log_level <= 1: | |
568 | log_level = "WARNING" | |
569 | elif ceph_log_level <= 4: | |
570 | log_level = "INFO" | |
571 | return log_level | |
572 | ||
573 | def getLogger(self, name=None): | |
574 | return logging.getLogger(name) | |
575 | ||
576 | ||
577 | class MgrStandbyModule(ceph_module.BaseMgrStandbyModule, MgrModuleLoggingMixin): | |
3efd9988 FG |
578 | """ |
579 | Standby modules only implement a serve and shutdown method, they | |
580 | are not permitted to implement commands and they do not receive | |
581 | any notifications. | |
582 | ||
b32b8144 | 583 | They only have access to the mgrmap (for accessing service URI info |
3efd9988 FG |
584 | from their active peer), and to configuration settings (read only). |
585 | """ | |
586 | ||
9f95a23c TL |
587 | MODULE_OPTIONS = [] # type: List[Dict[str, Any]] |
588 | MODULE_OPTION_DEFAULTS = {} # type: Dict[str, Any] | |
11fdf7f2 | 589 | |
3efd9988 FG |
590 | def __init__(self, module_name, capsule): |
591 | super(MgrStandbyModule, self).__init__(capsule) | |
592 | self.module_name = module_name | |
9f95a23c | 593 | |
11fdf7f2 TL |
594 | # see also MgrModule.__init__() |
595 | for o in self.MODULE_OPTIONS: | |
596 | if 'default' in o: | |
597 | if 'type' in o: | |
598 | self.MODULE_OPTION_DEFAULTS[o['name']] = o['default'] | |
599 | else: | |
600 | self.MODULE_OPTION_DEFAULTS[o['name']] = str(o['default']) | |
3efd9988 | 601 | |
9f95a23c TL |
602 | mgr_level = self.get_ceph_option("debug_mgr") |
603 | log_level = self.get_module_option("log_level") | |
604 | cluster_level = self.get_module_option('log_to_cluster_level') | |
605 | self._configure_logging(mgr_level, log_level, cluster_level, | |
606 | False, False) | |
607 | ||
608 | # for backwards compatibility | |
609 | self._logger = self.getLogger() | |
610 | ||
3efd9988 | 611 | def __del__(self): |
9f95a23c TL |
612 | self._unconfigure_logging() |
613 | ||
614 | @classmethod | |
615 | def _register_options(cls, module_name): | |
616 | cls.MODULE_OPTIONS.append( | |
617 | Option(name='log_level', type='str', default="", runtime=True, | |
618 | enum_allowed=['info', 'debug', 'critical', 'error', | |
619 | 'warning', ''])) | |
620 | cls.MODULE_OPTIONS.append( | |
621 | Option(name='log_to_file', type='bool', default=False, runtime=True)) | |
622 | if not [x for x in cls.MODULE_OPTIONS if x['name'] == 'log_to_cluster']: | |
623 | cls.MODULE_OPTIONS.append( | |
624 | Option(name='log_to_cluster', type='bool', default=False, | |
625 | runtime=True)) | |
626 | cls.MODULE_OPTIONS.append( | |
627 | Option(name='log_to_cluster_level', type='str', default='info', | |
628 | runtime=True, | |
629 | enum_allowed=['info', 'debug', 'critical', 'error', | |
630 | 'warning', ''])) | |
3efd9988 FG |
631 | |
632 | @property | |
633 | def log(self): | |
634 | return self._logger | |
635 | ||
636 | def serve(self): | |
637 | """ | |
638 | The serve method is mandatory for standby modules. | |
639 | :return: | |
640 | """ | |
641 | raise NotImplementedError() | |
642 | ||
643 | def get_mgr_id(self): | |
644 | return self._ceph_get_mgr_id() | |
645 | ||
11fdf7f2 | 646 | def get_module_option(self, key, default=None): |
b32b8144 FG |
647 | """ |
648 | Retrieve the value of a persistent configuration setting | |
649 | ||
650 | :param str key: | |
651 | :param default: the default value of the config if it is not found | |
652 | :return: str | |
653 | """ | |
11fdf7f2 | 654 | r = self._ceph_get_module_option(key) |
b32b8144 | 655 | if r is None: |
11fdf7f2 | 656 | return self.MODULE_OPTION_DEFAULTS.get(key, default) |
b32b8144 FG |
657 | else: |
658 | return r | |
659 | ||
11fdf7f2 TL |
660 | def get_ceph_option(self, key): |
661 | return self._ceph_get_option(key) | |
662 | ||
663 | def get_store(self, key): | |
664 | """ | |
665 | Retrieve the value of a persistent KV store entry | |
666 | ||
667 | :param key: String | |
668 | :return: Byte string or None | |
669 | """ | |
670 | return self._ceph_get_store(key) | |
3efd9988 FG |
671 | |
672 | def get_active_uri(self): | |
673 | return self._ceph_get_active_uri() | |
674 | ||
11fdf7f2 TL |
675 | def get_localized_module_option(self, key, default=None): |
676 | r = self._ceph_get_module_option(key, self.get_mgr_id()) | |
3efd9988 | 677 | if r is None: |
11fdf7f2 TL |
678 | return self.MODULE_OPTION_DEFAULTS.get(key, default) |
679 | else: | |
680 | return r | |
3efd9988 | 681 | |
3efd9988 | 682 | |
9f95a23c TL |
683 | class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): |
684 | COMMANDS = [] # type: List[Any] | |
685 | MODULE_OPTIONS = [] # type: List[dict] | |
686 | MODULE_OPTION_DEFAULTS = {} # type: Dict[str, Any] | |
7c673cae | 687 | |
3efd9988 FG |
688 | # Priority definitions for perf counters |
689 | PRIO_CRITICAL = 10 | |
690 | PRIO_INTERESTING = 8 | |
691 | PRIO_USEFUL = 5 | |
692 | PRIO_UNINTERESTING = 2 | |
693 | PRIO_DEBUGONLY = 0 | |
694 | ||
695 | # counter value types | |
696 | PERFCOUNTER_TIME = 1 | |
697 | PERFCOUNTER_U64 = 2 | |
698 | ||
699 | # counter types | |
700 | PERFCOUNTER_LONGRUNAVG = 4 | |
701 | PERFCOUNTER_COUNTER = 8 | |
702 | PERFCOUNTER_HISTOGRAM = 0x10 | |
28e407b8 | 703 | PERFCOUNTER_TYPE_MASK = ~3 |
7c673cae | 704 | |
1adf2230 AA |
705 | # units supported |
706 | BYTES = 0 | |
707 | NONE = 1 | |
11fdf7f2 TL |
708 | |
709 | # Cluster log priorities | |
710 | CLUSTER_LOG_PRIO_DEBUG = 0 | |
711 | CLUSTER_LOG_PRIO_INFO = 1 | |
712 | CLUSTER_LOG_PRIO_SEC = 2 | |
713 | CLUSTER_LOG_PRIO_WARN = 3 | |
714 | CLUSTER_LOG_PRIO_ERROR = 4 | |
715 | ||
3efd9988 FG |
716 | def __init__(self, module_name, py_modules_ptr, this_ptr): |
717 | self.module_name = module_name | |
3efd9988 | 718 | super(MgrModule, self).__init__(py_modules_ptr, this_ptr) |
7c673cae | 719 | |
11fdf7f2 TL |
720 | for o in self.MODULE_OPTIONS: |
721 | if 'default' in o: | |
722 | if 'type' in o: | |
723 | # we'll assume the declared type matches the | |
724 | # supplied default value's type. | |
725 | self.MODULE_OPTION_DEFAULTS[o['name']] = o['default'] | |
726 | else: | |
727 | # module not declaring it's type, so normalize the | |
728 | # default value to be a string for consistent behavior | |
729 | # with default and user-supplied option values. | |
730 | self.MODULE_OPTION_DEFAULTS[o['name']] = str(o['default']) | |
731 | ||
9f95a23c TL |
732 | mgr_level = self.get_ceph_option("debug_mgr") |
733 | log_level = self.get_module_option("log_level") | |
734 | cluster_level = self.get_module_option('log_to_cluster_level') | |
735 | log_to_file = self.get_module_option("log_to_file") | |
736 | log_to_cluster = self.get_module_option("log_to_cluster") | |
737 | self._configure_logging(mgr_level, log_level, cluster_level, | |
738 | log_to_file, log_to_cluster) | |
739 | ||
740 | # for backwards compatibility | |
741 | self._logger = self.getLogger() | |
742 | ||
743 | self._version = self._ceph_get_version() | |
744 | ||
745 | self._perf_schema_cache = None | |
746 | ||
747 | # Keep a librados instance for those that need it. | |
748 | self._rados = None | |
749 | ||
750 | ||
3efd9988 | 751 | def __del__(self): |
9f95a23c TL |
752 | self._unconfigure_logging() |
753 | ||
754 | @classmethod | |
755 | def _register_options(cls, module_name): | |
756 | cls.MODULE_OPTIONS.append( | |
757 | Option(name='log_level', type='str', default="", runtime=True, | |
758 | enum_allowed=['info', 'debug', 'critical', 'error', | |
759 | 'warning', ''])) | |
760 | cls.MODULE_OPTIONS.append( | |
761 | Option(name='log_to_file', type='bool', default=False, runtime=True)) | |
762 | if not [x for x in cls.MODULE_OPTIONS if x['name'] == 'log_to_cluster']: | |
763 | cls.MODULE_OPTIONS.append( | |
764 | Option(name='log_to_cluster', type='bool', default=False, | |
765 | runtime=True)) | |
766 | cls.MODULE_OPTIONS.append( | |
767 | Option(name='log_to_cluster_level', type='str', default='info', | |
768 | runtime=True, | |
769 | enum_allowed=['info', 'debug', 'critical', 'error', | |
770 | 'warning', ''])) | |
3efd9988 | 771 | |
11fdf7f2 | 772 | @classmethod |
9f95a23c | 773 | def _register_commands(cls, module_name): |
11fdf7f2 | 774 | cls.COMMANDS.extend(CLICommand.dump_cmd_list()) |
7c673cae FG |
775 | |
776 | @property | |
777 | def log(self): | |
778 | return self._logger | |
779 | ||
11fdf7f2 TL |
780 | def cluster_log(self, channel, priority, message): |
781 | """ | |
782 | :param channel: The log channel. This can be 'cluster', 'audit', ... | |
783 | :type channel: str | |
784 | :param priority: The log message priority. This can be | |
785 | CLUSTER_LOG_PRIO_DEBUG, CLUSTER_LOG_PRIO_INFO, | |
786 | CLUSTER_LOG_PRIO_SEC, CLUSTER_LOG_PRIO_WARN or | |
787 | CLUSTER_LOG_PRIO_ERROR. | |
788 | :type priority: int | |
789 | :param message: The message to log. | |
790 | :type message: str | |
791 | """ | |
792 | self._ceph_cluster_log(channel, priority, message) | |
793 | ||
7c673cae FG |
794 | @property |
795 | def version(self): | |
796 | return self._version | |
797 | ||
92f5a8d4 TL |
798 | @property |
799 | def release_name(self): | |
800 | """ | |
801 | Get the release name of the Ceph version, e.g. 'nautilus' or 'octopus'. | |
802 | :return: Returns the release name of the Ceph version in lower case. | |
803 | :rtype: str | |
804 | """ | |
805 | return self._ceph_get_release_name() | |
806 | ||
3efd9988 FG |
807 | def get_context(self): |
808 | """ | |
809 | :return: a Python capsule containing a C++ CephContext pointer | |
810 | """ | |
811 | return self._ceph_get_context() | |
812 | ||
7c673cae FG |
813 | def notify(self, notify_type, notify_id): |
814 | """ | |
815 | Called by the ceph-mgr service to notify the Python plugin | |
816 | that new state is available. | |
11fdf7f2 TL |
817 | |
818 | :param notify_type: string indicating what kind of notification, | |
819 | such as osd_map, mon_map, fs_map, mon_status, | |
820 | health, pg_summary, command, service_map | |
821 | :param notify_id: string (may be empty) that optionally specifies | |
822 | which entity is being notified about. With | |
823 | "command" notifications this is set to the tag | |
824 | ``from send_command``. | |
825 | """ | |
826 | pass | |
827 | ||
9f95a23c TL |
828 | def _config_notify(self): |
829 | # check logging options for changes | |
830 | mgr_level = self.get_ceph_option("debug_mgr") | |
831 | module_level = self.get_module_option("log_level") | |
832 | cluster_level = self.get_module_option("log_to_cluster_level") | |
833 | log_to_file = self.get_module_option("log_to_file", False) | |
834 | log_to_cluster = self.get_module_option("log_to_cluster", False) | |
835 | ||
836 | self._set_log_level(mgr_level, module_level, cluster_level) | |
837 | ||
838 | if log_to_file != self.log_to_file: | |
839 | if log_to_file: | |
840 | self._enable_file_log() | |
841 | else: | |
842 | self._disable_file_log() | |
843 | if log_to_cluster != self.log_to_cluster: | |
844 | if log_to_cluster: | |
845 | self._enable_cluster_log() | |
846 | else: | |
847 | self._disable_cluster_log() | |
848 | ||
849 | # call module subclass implementations | |
850 | self.config_notify() | |
851 | ||
11fdf7f2 TL |
852 | def config_notify(self): |
853 | """ | |
854 | Called by the ceph-mgr service to notify the Python plugin | |
855 | that the configuration may have changed. Modules will want to | |
856 | refresh any configuration values stored in config variables. | |
7c673cae FG |
857 | """ |
858 | pass | |
859 | ||
860 | def serve(self): | |
861 | """ | |
862 | Called by the ceph-mgr service to start any server that | |
863 | is provided by this Python plugin. The implementation | |
864 | of this function should block until ``shutdown`` is called. | |
865 | ||
866 | You *must* implement ``shutdown`` if you implement ``serve`` | |
867 | """ | |
868 | pass | |
869 | ||
870 | def shutdown(self): | |
871 | """ | |
872 | Called by the ceph-mgr service to request that this | |
873 | module drop out of its serve() function. You do not | |
874 | need to implement this if you do not implement serve() | |
875 | ||
876 | :return: None | |
877 | """ | |
11fdf7f2 | 878 | if self._rados: |
9f95a23c | 879 | addrs = self._rados.get_addrs() |
11fdf7f2 | 880 | self._rados.shutdown() |
9f95a23c | 881 | self._ceph_unregister_client(addrs) |
7c673cae FG |
882 | |
883 | def get(self, data_name): | |
884 | """ | |
11fdf7f2 TL |
885 | Called by the plugin to fetch named cluster-wide objects from ceph-mgr. |
886 | ||
887 | :param str data_name: Valid things to fetch are osd_crush_map_text, | |
888 | osd_map, osd_map_tree, osd_map_crush, config, mon_map, fs_map, | |
889 | osd_metadata, pg_summary, io_rate, pg_dump, df, osd_stats, | |
9f95a23c TL |
890 | health, mon_status, devices, device <devid>, pg_stats, |
891 | pool_stats, pg_ready, osd_ping_times. | |
11fdf7f2 TL |
892 | |
893 | Note: | |
894 | All these structures have their own JSON representations: experiment | |
895 | or look at the C++ ``dump()`` methods to learn about them. | |
7c673cae | 896 | """ |
3efd9988 | 897 | return self._ceph_get(data_name) |
7c673cae | 898 | |
94b18763 | 899 | def _stattype_to_str(self, stattype): |
11fdf7f2 | 900 | |
94b18763 FG |
901 | typeonly = stattype & self.PERFCOUNTER_TYPE_MASK |
902 | if typeonly == 0: | |
903 | return 'gauge' | |
904 | if typeonly == self.PERFCOUNTER_LONGRUNAVG: | |
905 | # this lie matches the DaemonState decoding: only val, no counts | |
906 | return 'counter' | |
907 | if typeonly == self.PERFCOUNTER_COUNTER: | |
908 | return 'counter' | |
909 | if typeonly == self.PERFCOUNTER_HISTOGRAM: | |
910 | return 'histogram' | |
11fdf7f2 | 911 | |
94b18763 FG |
912 | return '' |
913 | ||
81eedcae | 914 | def _perfpath_to_path_labels(self, daemon, path): |
9f95a23c TL |
915 | # type: (str, str) -> Tuple[str, Tuple[str, ...], Tuple[str, ...]] |
916 | label_names = ("ceph_daemon",) # type: Tuple[str, ...] | |
917 | labels = (daemon,) # type: Tuple[str, ...] | |
81eedcae TL |
918 | |
919 | if daemon.startswith('rbd-mirror.'): | |
920 | match = re.match( | |
9f95a23c | 921 | r'^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/)?)(.*)\.(replay(?:_bytes|_latency)?)$', |
81eedcae TL |
922 | path |
923 | ) | |
924 | if match: | |
9f95a23c | 925 | path = 'rbd_mirror_image_' + match.group(4) |
81eedcae TL |
926 | pool = match.group(1) |
927 | namespace = match.group(2) or '' | |
928 | image = match.group(3) | |
929 | label_names += ('pool', 'namespace', 'image') | |
930 | labels += (pool, namespace, image) | |
931 | ||
932 | return path, label_names, labels, | |
933 | ||
28e407b8 AA |
934 | def _perfvalue_to_value(self, stattype, value): |
935 | if stattype & self.PERFCOUNTER_TIME: | |
936 | # Convert from ns to seconds | |
937 | return value / 1000000000.0 | |
938 | else: | |
939 | return value | |
940 | ||
1adf2230 AA |
941 | def _unit_to_str(self, unit): |
942 | if unit == self.NONE: | |
943 | return "/s" | |
944 | elif unit == self.BYTES: | |
11fdf7f2 TL |
945 | return "B/s" |
946 | ||
947 | @staticmethod | |
948 | def to_pretty_iec(n): | |
949 | for bits, suffix in [(60, 'Ei'), (50, 'Pi'), (40, 'Ti'), (30, 'Gi'), | |
950 | (20, 'Mi'), (10, 'Ki')]: | |
951 | if n > 10 << bits: | |
952 | return str(n >> bits) + ' ' + suffix | |
953 | return str(n) + ' ' | |
954 | ||
955 | @staticmethod | |
956 | def get_pretty_row(elems, width): | |
957 | """ | |
958 | Takes an array of elements and returns a string with those elements | |
959 | formatted as a table row. Useful for polling modules. | |
960 | ||
961 | :param elems: the elements to be printed | |
962 | :param width: the width of the terminal | |
963 | """ | |
964 | n = len(elems) | |
965 | column_width = int(width / n) | |
966 | ||
967 | ret = '|' | |
968 | for elem in elems: | |
969 | ret += '{0:>{w}} |'.format(elem, w=column_width - 2) | |
970 | ||
971 | return ret | |
972 | ||
973 | def get_pretty_header(self, elems, width): | |
974 | """ | |
975 | Like ``get_pretty_row`` but adds dashes, to be used as a table title. | |
976 | ||
977 | :param elems: the elements to be printed | |
978 | :param width: the width of the terminal | |
979 | """ | |
980 | n = len(elems) | |
981 | column_width = int(width / n) | |
982 | ||
983 | # dash line | |
984 | ret = '+' | |
985 | for i in range(0, n): | |
986 | ret += '-' * (column_width - 1) + '+' | |
987 | ret += '\n' | |
988 | ||
989 | # title | |
990 | ret += self.get_pretty_row(elems, width) | |
991 | ret += '\n' | |
992 | ||
993 | # dash line | |
994 | ret += '+' | |
995 | for i in range(0, n): | |
996 | ret += '-' * (column_width - 1) + '+' | |
997 | ret += '\n' | |
998 | ||
999 | return ret | |
1000 | ||
7c673cae FG |
1001 | def get_server(self, hostname): |
1002 | """ | |
11fdf7f2 TL |
1003 | Called by the plugin to fetch metadata about a particular hostname from |
1004 | ceph-mgr. | |
1005 | ||
1006 | This is information that ceph-mgr has gleaned from the daemon metadata | |
1007 | reported by daemons running on a particular server. | |
7c673cae | 1008 | |
11fdf7f2 | 1009 | :param hostname: a hostname |
7c673cae | 1010 | """ |
3efd9988 | 1011 | return self._ceph_get_server(hostname) |
7c673cae | 1012 | |
c07f9fc5 FG |
1013 | def get_perf_schema(self, svc_type, svc_name): |
1014 | """ | |
1015 | Called by the plugin to fetch perf counter schema info. | |
1016 | svc_name can be nullptr, as can svc_type, in which case | |
1017 | they are wildcards | |
1018 | ||
11fdf7f2 TL |
1019 | :param str svc_type: |
1020 | :param str svc_name: | |
c07f9fc5 FG |
1021 | :return: list of dicts describing the counters requested |
1022 | """ | |
3efd9988 | 1023 | return self._ceph_get_perf_schema(svc_type, svc_name) |
c07f9fc5 | 1024 | |
7c673cae FG |
1025 | def get_counter(self, svc_type, svc_name, path): |
1026 | """ | |
11fdf7f2 TL |
1027 | Called by the plugin to fetch the latest performance counter data for a |
1028 | particular counter on a particular service. | |
7c673cae | 1029 | |
11fdf7f2 TL |
1030 | :param str svc_type: |
1031 | :param str svc_name: | |
1032 | :param str path: a period-separated concatenation of the subsystem and the | |
1033 | counter name, for example "mds.inodes". | |
1034 | :return: A list of two-tuples of (timestamp, value) is returned. This may be | |
1035 | empty if no data is available. | |
7c673cae | 1036 | """ |
3efd9988 | 1037 | return self._ceph_get_counter(svc_type, svc_name, path) |
7c673cae | 1038 | |
11fdf7f2 TL |
1039 | def get_latest_counter(self, svc_type, svc_name, path): |
1040 | """ | |
1041 | Called by the plugin to fetch only the newest performance counter data | |
1042 | pointfor a particular counter on a particular service. | |
1043 | ||
1044 | :param str svc_type: | |
1045 | :param str svc_name: | |
1046 | :param str path: a period-separated concatenation of the subsystem and the | |
1047 | counter name, for example "mds.inodes". | |
1048 | :return: A list of two-tuples of (timestamp, value) is returned. This may be | |
1049 | empty if no data is available. | |
1050 | """ | |
1051 | return self._ceph_get_latest_counter(svc_type, svc_name, path) | |
1052 | ||
7c673cae FG |
1053 | def list_servers(self): |
1054 | """ | |
11fdf7f2 TL |
1055 | Like ``get_server``, but gives information about all servers (i.e. all |
1056 | unique hostnames that have been mentioned in daemon metadata) | |
1057 | ||
1058 | :return: a list of information about all servers | |
1059 | :rtype: list | |
7c673cae | 1060 | """ |
3efd9988 | 1061 | return self._ceph_get_server(None) |
7c673cae | 1062 | |
f6b5b4d7 | 1063 | def get_metadata(self, svc_type, svc_id, default=None): |
7c673cae | 1064 | """ |
11fdf7f2 | 1065 | Fetch the daemon metadata for a particular service. |
7c673cae | 1066 | |
11fdf7f2 TL |
1067 | ceph-mgr fetches metadata asynchronously, so are windows of time during |
1068 | addition/removal of services where the metadata is not available to | |
1069 | modules. ``None`` is returned if no metadata is available. | |
1070 | ||
1071 | :param str svc_type: service type (e.g., 'mds', 'osd', 'mon') | |
1072 | :param str svc_id: service id. convert OSD integer IDs to strings when | |
1073 | calling this | |
1074 | :rtype: dict, or None if no metadata found | |
7c673cae | 1075 | """ |
f6b5b4d7 TL |
1076 | metadata = self._ceph_get_metadata(svc_type, svc_id) |
1077 | if metadata is None: | |
1078 | return default | |
1079 | return metadata | |
7c673cae | 1080 | |
224ce89b WB |
1081 | def get_daemon_status(self, svc_type, svc_id): |
1082 | """ | |
1083 | Fetch the latest status for a particular service daemon. | |
1084 | ||
11fdf7f2 TL |
1085 | This method may return ``None`` if no status information is |
1086 | available, for example because the daemon hasn't fully started yet. | |
1087 | ||
224ce89b WB |
1088 | :param svc_type: string (e.g., 'rgw') |
1089 | :param svc_id: string | |
11fdf7f2 | 1090 | :return: dict, or None if the service is not found |
224ce89b | 1091 | """ |
3efd9988 | 1092 | return self._ceph_get_daemon_status(svc_type, svc_id) |
224ce89b | 1093 | |
e306af50 TL |
1094 | def check_mon_command(self, cmd_dict: dict) -> HandleCommandResult: |
1095 | """ | |
1096 | Wrapper around :func:`~mgr_module.MgrModule.mon_command`, but raises, | |
1097 | if ``retval != 0``. | |
1098 | """ | |
1099 | ||
1100 | r = HandleCommandResult(*self.mon_command(cmd_dict)) | |
1101 | if r.retval: | |
1102 | raise MonCommandFailed(f'{cmd_dict["prefix"]} failed: {r.stderr} retval: {r.retval}') | |
1103 | return r | |
1104 | ||
11fdf7f2 TL |
1105 | def mon_command(self, cmd_dict): |
1106 | """ | |
1107 | Helper for modules that do simple, synchronous mon command | |
1108 | execution. | |
1109 | ||
1110 | See send_command for general case. | |
1111 | ||
1112 | :return: status int, out std, err str | |
1113 | """ | |
1114 | ||
1115 | t1 = time.time() | |
1116 | result = CommandResult() | |
1117 | self.send_command(result, "mon", "", json.dumps(cmd_dict), "") | |
1118 | r = result.wait() | |
1119 | t2 = time.time() | |
1120 | ||
1121 | self.log.debug("mon_command: '{0}' -> {1} in {2:.3f}s".format( | |
1122 | cmd_dict['prefix'], r[0], t2 - t1 | |
1123 | )) | |
1124 | ||
1125 | return r | |
1126 | ||
7c673cae FG |
1127 | def send_command(self, *args, **kwargs): |
1128 | """ | |
1129 | Called by the plugin to send a command to the mon | |
1130 | cluster. | |
11fdf7f2 TL |
1131 | |
1132 | :param CommandResult result: an instance of the ``CommandResult`` | |
1133 | class, defined in the same module as MgrModule. This acts as a | |
1134 | completion and stores the output of the command. Use | |
1135 | ``CommandResult.wait()`` if you want to block on completion. | |
1136 | :param str svc_type: | |
1137 | :param str svc_id: | |
1138 | :param str command: a JSON-serialized command. This uses the same | |
1139 | format as the ceph command line, which is a dictionary of command | |
1140 | arguments, with the extra ``prefix`` key containing the command | |
1141 | name itself. Consult MonCommands.h for available commands and | |
1142 | their expected arguments. | |
1143 | :param str tag: used for nonblocking operation: when a command | |
1144 | completes, the ``notify()`` callback on the MgrModule instance is | |
1145 | triggered, with notify_type set to "command", and notify_id set to | |
1146 | the tag of the command. | |
7c673cae | 1147 | """ |
3efd9988 | 1148 | self._ceph_send_command(*args, **kwargs) |
7c673cae | 1149 | |
c07f9fc5 FG |
1150 | def set_health_checks(self, checks): |
1151 | """ | |
c07f9fc5 FG |
1152 | Set the module's current map of health checks. Argument is a |
1153 | dict of check names to info, in this form: | |
1154 | ||
11fdf7f2 TL |
1155 | :: |
1156 | ||
c07f9fc5 FG |
1157 | { |
1158 | 'CHECK_FOO': { | |
1159 | 'severity': 'warning', # or 'error' | |
1160 | 'summary': 'summary string', | |
9f95a23c | 1161 | 'count': 4, # quantify badness |
c07f9fc5 FG |
1162 | 'detail': [ 'list', 'of', 'detail', 'strings' ], |
1163 | }, | |
1164 | 'CHECK_BAR': { | |
1165 | 'severity': 'error', | |
1166 | 'summary': 'bars are bad', | |
1167 | 'detail': [ 'too hard' ], | |
1168 | }, | |
1169 | } | |
1170 | ||
1171 | :param list: dict of health check dicts | |
1172 | """ | |
3efd9988 | 1173 | self._ceph_set_health_checks(checks) |
c07f9fc5 | 1174 | |
11fdf7f2 TL |
1175 | def _handle_command(self, inbuf, cmd): |
1176 | if cmd['prefix'] not in CLICommand.COMMANDS: | |
1177 | return self.handle_command(inbuf, cmd) | |
9f95a23c | 1178 | |
11fdf7f2 TL |
1179 | return CLICommand.COMMANDS[cmd['prefix']].call(self, cmd, inbuf) |
1180 | ||
1181 | def handle_command(self, inbuf, cmd): | |
7c673cae FG |
1182 | """ |
1183 | Called by ceph-mgr to request the plugin to handle one | |
1184 | of the commands that it declared in self.COMMANDS | |
1185 | ||
1186 | Return a status code, an output buffer, and an | |
1187 | output string. The output buffer is for data results, | |
1188 | the output string is for informative text. | |
1189 | ||
11fdf7f2 TL |
1190 | :param inbuf: content of any "-i <file>" supplied to ceph cli |
1191 | :type inbuf: str | |
1192 | :param cmd: from Ceph's cmdmap_t | |
1193 | :type cmd: dict | |
7c673cae | 1194 | |
11fdf7f2 | 1195 | :return: HandleCommandResult or a 3-tuple of (int, str, str) |
7c673cae FG |
1196 | """ |
1197 | ||
1198 | # Should never get called if they didn't declare | |
1199 | # any ``COMMANDS`` | |
1200 | raise NotImplementedError() | |
1201 | ||
31f18b77 FG |
1202 | def get_mgr_id(self): |
1203 | """ | |
11fdf7f2 TL |
1204 | Retrieve the name of the manager daemon where this plugin |
1205 | is currently being executed (i.e. the active manager). | |
31f18b77 FG |
1206 | |
1207 | :return: str | |
1208 | """ | |
3efd9988 | 1209 | return self._ceph_get_mgr_id() |
31f18b77 | 1210 | |
11fdf7f2 TL |
1211 | def get_ceph_option(self, key): |
1212 | return self._ceph_get_option(key) | |
7c673cae | 1213 | |
11fdf7f2 TL |
1214 | def _validate_module_option(self, key): |
1215 | """ | |
1216 | Helper: don't allow get/set config callers to | |
1217 | access config options that they didn't declare | |
1218 | in their schema. | |
7c673cae | 1219 | """ |
11fdf7f2 TL |
1220 | if key not in [o['name'] for o in self.MODULE_OPTIONS]: |
1221 | raise RuntimeError("Config option '{0}' is not in {1}.MODULE_OPTIONS". | |
1222 | format(key, self.__class__.__name__)) | |
1223 | ||
1224 | def _get_module_option(self, key, default, localized_prefix=""): | |
1225 | r = self._ceph_get_module_option(self.module_name, key, | |
1226 | localized_prefix) | |
3efd9988 | 1227 | if r is None: |
11fdf7f2 | 1228 | return self.MODULE_OPTION_DEFAULTS.get(key, default) |
3efd9988 FG |
1229 | else: |
1230 | return r | |
7c673cae | 1231 | |
11fdf7f2 TL |
1232 | def get_module_option(self, key, default=None): |
1233 | """ | |
1234 | Retrieve the value of a persistent configuration setting | |
1235 | ||
1236 | :param str key: | |
1237 | :param str default: | |
1238 | :return: str | |
1239 | """ | |
1240 | self._validate_module_option(key) | |
1241 | return self._get_module_option(key, default) | |
1242 | ||
1243 | def get_module_option_ex(self, module, key, default=None): | |
1244 | """ | |
1245 | Retrieve the value of a persistent configuration setting | |
1246 | for the specified module. | |
1247 | ||
1248 | :param str module: The name of the module, e.g. 'dashboard' | |
1249 | or 'telemetry'. | |
1250 | :param str key: The configuration key, e.g. 'server_addr'. | |
1251 | :param str,None default: The default value to use when the | |
1252 | returned value is ``None``. Defaults to ``None``. | |
1253 | :return: str,int,bool,float,None | |
1254 | """ | |
1255 | if module == self.module_name: | |
1256 | self._validate_module_option(key) | |
1257 | r = self._ceph_get_module_option(module, key) | |
1258 | return default if r is None else r | |
1259 | ||
1260 | def get_store_prefix(self, key_prefix): | |
31f18b77 | 1261 | """ |
11fdf7f2 TL |
1262 | Retrieve a dict of KV store keys to values, where the keys |
1263 | have the given prefix | |
31f18b77 | 1264 | |
11fdf7f2 | 1265 | :param str key_prefix: |
31f18b77 FG |
1266 | :return: str |
1267 | """ | |
11fdf7f2 | 1268 | return self._ceph_get_store_prefix(key_prefix) |
31f18b77 | 1269 | |
11fdf7f2 TL |
1270 | def _set_localized(self, key, val, setter): |
1271 | return setter(_get_localized_key(self.get_mgr_id(), key), val) | |
1272 | ||
1273 | def get_localized_module_option(self, key, default=None): | |
224ce89b WB |
1274 | """ |
1275 | Retrieve localized configuration for this ceph-mgr instance | |
11fdf7f2 TL |
1276 | :param str key: |
1277 | :param str default: | |
224ce89b WB |
1278 | :return: str |
1279 | """ | |
11fdf7f2 TL |
1280 | self._validate_module_option(key) |
1281 | return self._get_module_option(key, default, self.get_mgr_id()) | |
224ce89b | 1282 | |
11fdf7f2 | 1283 | def _set_module_option(self, key, val): |
eafe8130 TL |
1284 | return self._ceph_set_module_option(self.module_name, key, |
1285 | None if val is None else str(val)) | |
224ce89b | 1286 | |
11fdf7f2 | 1287 | def set_module_option(self, key, val): |
7c673cae FG |
1288 | """ |
1289 | Set the value of a persistent configuration setting | |
1290 | ||
11fdf7f2 TL |
1291 | :param str key: |
1292 | :type val: str | None | |
7c673cae | 1293 | """ |
11fdf7f2 TL |
1294 | self._validate_module_option(key) |
1295 | return self._set_module_option(key, val) | |
7c673cae | 1296 | |
11fdf7f2 TL |
1297 | def set_module_option_ex(self, module, key, val): |
1298 | """ | |
1299 | Set the value of a persistent configuration setting | |
1300 | for the specified module. | |
1301 | ||
1302 | :param str module: | |
1303 | :param str key: | |
1304 | :param str val: | |
1305 | """ | |
1306 | if module == self.module_name: | |
1307 | self._validate_module_option(key) | |
1308 | return self._ceph_set_module_option(module, key, str(val)) | |
1309 | ||
1310 | def set_localized_module_option(self, key, val): | |
224ce89b WB |
1311 | """ |
1312 | Set localized configuration for this ceph-mgr instance | |
11fdf7f2 TL |
1313 | :param str key: |
1314 | :param str val: | |
224ce89b WB |
1315 | :return: str |
1316 | """ | |
11fdf7f2 TL |
1317 | self._validate_module_option(key) |
1318 | return self._set_localized(key, val, self._set_module_option) | |
224ce89b | 1319 | |
11fdf7f2 | 1320 | def set_store(self, key, val): |
7c673cae | 1321 | """ |
11fdf7f2 TL |
1322 | Set a value in this module's persistent key value store. |
1323 | If val is None, remove key from store | |
7c673cae | 1324 | |
11fdf7f2 TL |
1325 | :param str key: |
1326 | :param str val: | |
7c673cae | 1327 | """ |
11fdf7f2 | 1328 | self._ceph_set_store(key, val) |
7c673cae | 1329 | |
11fdf7f2 | 1330 | def get_store(self, key, default=None): |
7c673cae | 1331 | """ |
11fdf7f2 | 1332 | Get a value from this module's persistent key value store |
7c673cae | 1333 | """ |
11fdf7f2 TL |
1334 | r = self._ceph_get_store(key) |
1335 | if r is None: | |
1336 | return default | |
7c673cae | 1337 | else: |
11fdf7f2 TL |
1338 | return r |
1339 | ||
1340 | def get_localized_store(self, key, default=None): | |
1341 | r = self._ceph_get_store(_get_localized_key(self.get_mgr_id(), key)) | |
1342 | if r is None: | |
1343 | r = self._ceph_get_store(key) | |
1344 | if r is None: | |
1345 | r = default | |
1346 | return r | |
1347 | ||
1348 | def set_localized_store(self, key, val): | |
1349 | return self._set_localized(key, val, self.set_store) | |
224ce89b WB |
1350 | |
1351 | def self_test(self): | |
1352 | """ | |
1353 | Run a self-test on the module. Override this function and implement | |
1354 | a best as possible self-test for (automated) testing of the module | |
11fdf7f2 TL |
1355 | |
1356 | Indicate any failures by raising an exception. This does not have | |
1357 | to be pretty, it's mainly for picking up regressions during | |
1358 | development, rather than use in the field. | |
1359 | ||
1360 | :return: None, or an advisory string for developer interest, such | |
1361 | as a json dump of some state. | |
224ce89b WB |
1362 | """ |
1363 | pass | |
3efd9988 FG |
1364 | |
1365 | def get_osdmap(self): | |
1366 | """ | |
1367 | Get a handle to an OSDMap. If epoch==0, get a handle for the latest | |
1368 | OSDMap. | |
1369 | :return: OSDMap | |
1370 | """ | |
1371 | return self._ceph_get_osdmap() | |
1372 | ||
11fdf7f2 TL |
1373 | def get_latest(self, daemon_type, daemon_name, counter): |
1374 | data = self.get_latest_counter( | |
1375 | daemon_type, daemon_name, counter)[counter] | |
1376 | if data: | |
1377 | return data[1] | |
1378 | else: | |
1379 | return 0 | |
1380 | ||
1381 | def get_latest_avg(self, daemon_type, daemon_name, counter): | |
1382 | data = self.get_latest_counter( | |
1383 | daemon_type, daemon_name, counter)[counter] | |
1384 | if data: | |
1385 | return data[1], data[2] | |
1386 | else: | |
1387 | return 0, 0 | |
1388 | ||
f6b5b4d7 | 1389 | @profile_method() |
11fdf7f2 TL |
1390 | def get_all_perf_counters(self, prio_limit=PRIO_USEFUL, |
1391 | services=("mds", "mon", "osd", | |
81eedcae | 1392 | "rbd-mirror", "rgw", "tcmu-runner")): |
3efd9988 FG |
1393 | """ |
1394 | Return the perf counters currently known to this ceph-mgr | |
1395 | instance, filtered by priority equal to or greater than `prio_limit`. | |
1396 | ||
11fdf7f2 | 1397 | The result is a map of string to dict, associating services |
3efd9988 FG |
1398 | (like "osd.123") with their counters. The counter |
1399 | dict for each service maps counter paths to a counter | |
1400 | info structure, which is the information from | |
1401 | the schema, plus an additional "value" member with the latest | |
1402 | value. | |
1403 | """ | |
1404 | ||
9f95a23c | 1405 | result = defaultdict(dict) # type: Dict[str, dict] |
3efd9988 | 1406 | |
3efd9988 FG |
1407 | for server in self.list_servers(): |
1408 | for service in server['services']: | |
11fdf7f2 | 1409 | if service['type'] not in services: |
3efd9988 FG |
1410 | continue |
1411 | ||
1412 | schema = self.get_perf_schema(service['type'], service['id']) | |
1413 | if not schema: | |
e306af50 | 1414 | self.log.warning("No perf counter schema for {0}.{1}".format( |
3efd9988 FG |
1415 | service['type'], service['id'] |
1416 | )) | |
1417 | continue | |
1418 | ||
1419 | # Value is returned in a potentially-multi-service format, | |
1420 | # get just the service we're asking about | |
11fdf7f2 TL |
1421 | svc_full_name = "{0}.{1}".format( |
1422 | service['type'], service['id']) | |
3efd9988 FG |
1423 | schema = schema[svc_full_name] |
1424 | ||
1425 | # Populate latest values | |
1426 | for counter_path, counter_schema in schema.items(): | |
1427 | # self.log.debug("{0}: {1}".format( | |
1428 | # counter_path, json.dumps(counter_schema) | |
1429 | # )) | |
1430 | if counter_schema['priority'] < prio_limit: | |
1431 | continue | |
1432 | ||
28e407b8 AA |
1433 | counter_info = dict(counter_schema) |
1434 | ||
1435 | # Also populate count for the long running avgs | |
1436 | if counter_schema['type'] & self.PERFCOUNTER_LONGRUNAVG: | |
11fdf7f2 | 1437 | v, c = self.get_latest_avg( |
28e407b8 AA |
1438 | service['type'], |
1439 | service['id'], | |
1440 | counter_path | |
1441 | ) | |
1442 | counter_info['value'], counter_info['count'] = v, c | |
1443 | result[svc_full_name][counter_path] = counter_info | |
1444 | else: | |
11fdf7f2 | 1445 | counter_info['value'] = self.get_latest( |
28e407b8 AA |
1446 | service['type'], |
1447 | service['id'], | |
1448 | counter_path | |
1449 | ) | |
1450 | ||
3efd9988 FG |
1451 | result[svc_full_name][counter_path] = counter_info |
1452 | ||
1453 | self.log.debug("returning {0} counter".format(len(result))) | |
1454 | ||
1455 | return result | |
1456 | ||
1457 | def set_uri(self, uri): | |
1458 | """ | |
1459 | If the module exposes a service, then call this to publish the | |
1460 | address once it is available. | |
1461 | ||
1462 | :return: a string | |
1463 | """ | |
1464 | return self._ceph_set_uri(uri) | |
94b18763 FG |
1465 | |
1466 | def have_mon_connection(self): | |
1467 | """ | |
1468 | Check whether this ceph-mgr daemon has an open connection | |
1469 | to a monitor. If it doesn't, then it's likely that the | |
1470 | information we have about the cluster is out of date, | |
1471 | and/or the monitor cluster is down. | |
1472 | """ | |
1473 | ||
28e407b8 | 1474 | return self._ceph_have_mon_connection() |
11fdf7f2 TL |
1475 | |
1476 | def update_progress_event(self, evid, desc, progress): | |
9f95a23c TL |
1477 | return self._ceph_update_progress_event(str(evid), |
1478 | str(desc), | |
1479 | float(progress)) | |
11fdf7f2 TL |
1480 | |
1481 | def complete_progress_event(self, evid): | |
1482 | return self._ceph_complete_progress_event(str(evid)) | |
1483 | ||
1484 | def clear_all_progress_events(self): | |
1485 | return self._ceph_clear_all_progress_events() | |
1486 | ||
1487 | @property | |
1488 | def rados(self): | |
1489 | """ | |
1490 | A librados instance to be shared by any classes within | |
1491 | this mgr module that want one. | |
1492 | """ | |
1493 | if self._rados: | |
1494 | return self._rados | |
1495 | ||
1496 | ctx_capsule = self.get_context() | |
1497 | self._rados = rados.Rados(context=ctx_capsule) | |
1498 | self._rados.connect() | |
9f95a23c | 1499 | self._ceph_register_client(self._rados.get_addrs()) |
11fdf7f2 TL |
1500 | return self._rados |
1501 | ||
1502 | @staticmethod | |
1503 | def can_run(): | |
1504 | """ | |
1505 | Implement this function to report whether the module's dependencies | |
1506 | are met. For example, if the module needs to import a particular | |
1507 | dependency to work, then use a try/except around the import at | |
1508 | file scope, and then report here if the import failed. | |
1509 | ||
1510 | This will be called in a blocking way from the C++ code, so do not | |
1511 | do any I/O that could block in this function. | |
1512 | ||
1513 | :return a 2-tuple consisting of a boolean and explanatory string | |
1514 | """ | |
1515 | ||
1516 | return True, "" | |
1517 | ||
1518 | def remote(self, module_name, method_name, *args, **kwargs): | |
1519 | """ | |
1520 | Invoke a method on another module. All arguments, and the return | |
1521 | value from the other module must be serializable. | |
1522 | ||
1523 | Limitation: Do not import any modules within the called method. | |
1524 | Otherwise you will get an error in Python 2:: | |
1525 | ||
1526 | RuntimeError('cannot unmarshal code objects in restricted execution mode',) | |
1527 | ||
1528 | ||
1529 | ||
1530 | :param module_name: Name of other module. If module isn't loaded, | |
1531 | an ImportError exception is raised. | |
1532 | :param method_name: Method name. If it does not exist, a NameError | |
1533 | exception is raised. | |
1534 | :param args: Argument tuple | |
1535 | :param kwargs: Keyword argument dict | |
1536 | :raises RuntimeError: **Any** error raised within the method is converted to a RuntimeError | |
1537 | :raises ImportError: No such module | |
1538 | """ | |
1539 | return self._ceph_dispatch_remote(module_name, method_name, | |
1540 | args, kwargs) | |
1541 | ||
1542 | def add_osd_perf_query(self, query): | |
1543 | """ | |
1544 | Register an OSD perf query. Argument is a | |
1545 | dict of the query parameters, in this form: | |
1546 | ||
1547 | :: | |
1548 | ||
1549 | { | |
1550 | 'key_descriptor': [ | |
1551 | {'type': subkey_type, 'regex': regex_pattern}, | |
1552 | ... | |
1553 | ], | |
1554 | 'performance_counter_descriptors': [ | |
1555 | list, of, descriptor, types | |
1556 | ], | |
1557 | 'limit': {'order_by': performance_counter_type, 'max_count': n}, | |
1558 | } | |
1559 | ||
1560 | Valid subkey types: | |
1561 | 'client_id', 'client_address', 'pool_id', 'namespace', 'osd_id', | |
1562 | 'pg_id', 'object_name', 'snap_id' | |
1563 | Valid performance counter types: | |
1564 | 'ops', 'write_ops', 'read_ops', 'bytes', 'write_bytes', 'read_bytes', | |
1565 | 'latency', 'write_latency', 'read_latency' | |
1566 | ||
1567 | :param object query: query | |
1568 | :rtype: int (query id) | |
1569 | """ | |
1570 | return self._ceph_add_osd_perf_query(query) | |
1571 | ||
1572 | def remove_osd_perf_query(self, query_id): | |
1573 | """ | |
1574 | Unregister an OSD perf query. | |
1575 | ||
1576 | :param int query_id: query ID | |
1577 | """ | |
1578 | return self._ceph_remove_osd_perf_query(query_id) | |
1579 | ||
1580 | def get_osd_perf_counters(self, query_id): | |
1581 | """ | |
1582 | Get stats collected for an OSD perf query. | |
1583 | ||
1584 | :param int query_id: query ID | |
1585 | """ | |
1586 | return self._ceph_get_osd_perf_counters(query_id) | |
494da23a | 1587 | |
92f5a8d4 TL |
1588 | def is_authorized(self, arguments): |
1589 | """ | |
1590 | Verifies that the current session caps permit executing the py service | |
1591 | or current module with the provided arguments. This provides a generic | |
1592 | way to allow modules to restrict by more fine-grained controls (e.g. | |
1593 | pools). | |
1594 | ||
1595 | :param arguments: dict of key/value arguments to test | |
1596 | """ | |
1597 | return self._ceph_is_authorized(arguments) |