]> git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/telemetry/module.py
import ceph quincy 17.2.1
[ceph.git] / ceph / src / pybind / mgr / telemetry / module.py
1 """
2 Telemetry module for ceph-mgr
3
4 Collect statistics from Ceph cluster and send this back to the Ceph project
5 when user has opted-in
6 """
7 import logging
8 import numbers
9 import enum
10 import errno
11 import hashlib
12 import json
13 import rbd
14 import requests
15 import uuid
16 import time
17 from datetime import datetime, timedelta
18 from prettytable import PrettyTable
19 from threading import Event, Lock
20 from collections import defaultdict
21 from typing import cast, Any, DefaultDict, Dict, List, Optional, Tuple, TypeVar, TYPE_CHECKING, Union
22
23 from mgr_module import CLICommand, CLIReadCommand, MgrModule, Option, OptionValue, ServiceInfoT
24
25
26 ALL_CHANNELS = ['basic', 'ident', 'crash', 'device', 'perf']
27
28 LICENSE = 'sharing-1-0'
29 LICENSE_NAME = 'Community Data License Agreement - Sharing - Version 1.0'
30 LICENSE_URL = 'https://cdla.io/sharing-1-0/'
31 NO_SALT_CNT = 0
32
33 # Latest revision of the telemetry report. Bump this each time we make
34 # *any* change.
35 REVISION = 3
36
37 # History of revisions
38 # --------------------
39 #
40 # Version 1:
41 # Mimic and/or nautilus are lumped together here, since
42 # we didn't track revisions yet.
43 #
44 # Version 2:
45 # - added revision tracking, nagging, etc.
46 # - added config option changes
47 # - added channels
48 # - added explicit license acknowledgement to the opt-in process
49 #
50 # Version 3:
51 # - added device health metrics (i.e., SMART data, minus serial number)
52 # - remove crush_rule
53 # - added CephFS metadata (how many MDSs, fs features, how many data pools,
54 # how much metadata is cached, rfiles, rbytes, rsnapshots)
55 # - added more pool metadata (rep vs ec, cache tiering mode, ec profile)
56 # - added host count, and counts for hosts with each of (mon, osd, mds, mgr)
57 # - whether an OSD cluster network is in use
58 # - rbd pool and image count, and rbd mirror mode (pool-level)
59 # - rgw daemons, zones, zonegroups; which rgw frontends
60 # - crush map stats
61
62 class Collection(str, enum.Enum):
63 basic_base = 'basic_base'
64 device_base = 'device_base'
65 crash_base = 'crash_base'
66 ident_base = 'ident_base'
67 perf_perf = 'perf_perf'
68 basic_mds_metadata = 'basic_mds_metadata'
69 basic_pool_usage = 'basic_pool_usage'
70 basic_usage_by_class = 'basic_usage_by_class'
71 basic_rook_v01 = 'basic_rook_v01'
72
73 MODULE_COLLECTION : List[Dict] = [
74 {
75 "name": Collection.basic_base,
76 "description": "Basic information about the cluster (capacity, number and type of daemons, version, etc.)",
77 "channel": "basic",
78 "nag": False
79 },
80 {
81 "name": Collection.device_base,
82 "description": "Information about device health metrics",
83 "channel": "device",
84 "nag": False
85 },
86 {
87 "name": Collection.crash_base,
88 "description": "Information about daemon crashes (daemon type and version, backtrace, etc.)",
89 "channel": "crash",
90 "nag": False
91 },
92 {
93 "name": Collection.ident_base,
94 "description": "User-provided identifying information about the cluster",
95 "channel": "ident",
96 "nag": False
97 },
98 {
99 "name": Collection.perf_perf,
100 "description": "Information about performance counters of the cluster",
101 "channel": "perf",
102 "nag": True
103 },
104 {
105 "name": Collection.basic_mds_metadata,
106 "description": "MDS metadata",
107 "channel": "basic",
108 "nag": False
109 },
110 {
111 "name": Collection.basic_pool_usage,
112 "description": "Default pool application and usage statistics",
113 "channel": "basic",
114 "nag": False
115 },
116 {
117 "name": Collection.basic_usage_by_class,
118 "description": "Default device class usage statistics",
119 "channel": "basic",
120 "nag": False
121 },
122 {
123 "name": Collection.basic_rook_v01,
124 "description": "Basic Rook deployment data",
125 "channel": "basic",
126 "nag": True
127 },
128 ]
129
130 ROOK_KEYS_BY_COLLECTION : List[Tuple[str, Collection]] = [
131 # Note: a key cannot be both a node and a leaf, e.g.
132 # "rook/a/b"
133 # "rook/a/b/c"
134 ("rook/version", Collection.basic_rook_v01),
135 ("rook/kubernetes/version", Collection.basic_rook_v01),
136 ("rook/csi/version", Collection.basic_rook_v01),
137 ("rook/node/count/kubernetes-total", Collection.basic_rook_v01),
138 ("rook/node/count/with-ceph-daemons", Collection.basic_rook_v01),
139 ("rook/node/count/with-csi-rbd-plugin", Collection.basic_rook_v01),
140 ("rook/node/count/with-csi-cephfs-plugin", Collection.basic_rook_v01),
141 ("rook/node/count/with-csi-nfs-plugin", Collection.basic_rook_v01),
142 ("rook/usage/storage-class/count/total", Collection.basic_rook_v01),
143 ("rook/usage/storage-class/count/rbd", Collection.basic_rook_v01),
144 ("rook/usage/storage-class/count/cephfs", Collection.basic_rook_v01),
145 ("rook/usage/storage-class/count/nfs", Collection.basic_rook_v01),
146 ("rook/usage/storage-class/count/bucket", Collection.basic_rook_v01),
147 ("rook/cluster/storage/device-set/count/total", Collection.basic_rook_v01),
148 ("rook/cluster/storage/device-set/count/portable", Collection.basic_rook_v01),
149 ("rook/cluster/storage/device-set/count/non-portable", Collection.basic_rook_v01),
150 ("rook/cluster/mon/count", Collection.basic_rook_v01),
151 ("rook/cluster/mon/allow-multiple-per-node", Collection.basic_rook_v01),
152 ("rook/cluster/mon/max-id", Collection.basic_rook_v01),
153 ("rook/cluster/mon/pvc/enabled", Collection.basic_rook_v01),
154 ("rook/cluster/mon/stretch/enabled", Collection.basic_rook_v01),
155 ("rook/cluster/network/provider", Collection.basic_rook_v01),
156 ("rook/cluster/external-mode", Collection.basic_rook_v01),
157 ]
158
159 class Module(MgrModule):
160 metadata_keys = [
161 "arch",
162 "ceph_version",
163 "os",
164 "cpu",
165 "kernel_description",
166 "kernel_version",
167 "distro_description",
168 "distro"
169 ]
170
171 MODULE_OPTIONS = [
172 Option(name='url',
173 type='str',
174 default='https://telemetry.ceph.com/report'),
175 Option(name='device_url',
176 type='str',
177 default='https://telemetry.ceph.com/device'),
178 Option(name='enabled',
179 type='bool',
180 default=False),
181 Option(name='last_opt_revision',
182 type='int',
183 default=1),
184 Option(name='leaderboard',
185 type='bool',
186 default=False),
187 Option(name='description',
188 type='str',
189 default=None),
190 Option(name='contact',
191 type='str',
192 default=None),
193 Option(name='organization',
194 type='str',
195 default=None),
196 Option(name='proxy',
197 type='str',
198 default=None),
199 Option(name='interval',
200 type='int',
201 default=24,
202 min=8),
203 Option(name='channel_basic',
204 type='bool',
205 default=True,
206 desc='Share basic cluster information (size, version)'),
207 Option(name='channel_ident',
208 type='bool',
209 default=False,
210 desc='Share a user-provided description and/or contact email for the cluster'),
211 Option(name='channel_crash',
212 type='bool',
213 default=True,
214 desc='Share metadata about Ceph daemon crashes (version, stack straces, etc)'),
215 Option(name='channel_device',
216 type='bool',
217 default=True,
218 desc=('Share device health metrics '
219 '(e.g., SMART data, minus potentially identifying info like serial numbers)')),
220 Option(name='channel_perf',
221 type='bool',
222 default=False,
223 desc='Share various performance metrics of a cluster'),
224 ]
225
226 @property
227 def config_keys(self) -> Dict[str, OptionValue]:
228 return dict((o['name'], o.get('default', None)) for o in self.MODULE_OPTIONS)
229
230 def __init__(self, *args: Any, **kwargs: Any) -> None:
231 super(Module, self).__init__(*args, **kwargs)
232 self.event = Event()
233 self.run = False
234 self.db_collection: Optional[List[str]] = None
235 self.last_opted_in_ceph_version: Optional[int] = None
236 self.last_opted_out_ceph_version: Optional[int] = None
237 self.last_upload: Optional[int] = None
238 self.last_report: Dict[str, Any] = dict()
239 self.report_id: Optional[str] = None
240 self.salt: Optional[str] = None
241 self.get_report_lock = Lock()
242 self.config_update_module_option()
243 # for mypy which does not run the code
244 if TYPE_CHECKING:
245 self.url = ''
246 self.device_url = ''
247 self.enabled = False
248 self.last_opt_revision = 0
249 self.leaderboard = ''
250 self.interval = 0
251 self.proxy = ''
252 self.channel_basic = True
253 self.channel_ident = False
254 self.channel_crash = True
255 self.channel_device = True
256 self.channel_perf = False
257 self.db_collection = ['basic_base', 'device_base']
258 self.last_opted_in_ceph_version = 17
259 self.last_opted_out_ceph_version = 0
260
261 def config_update_module_option(self) -> None:
262 for opt in self.MODULE_OPTIONS:
263 setattr(self,
264 opt['name'],
265 self.get_module_option(opt['name']))
266 self.log.debug(' %s = %s', opt['name'], getattr(self, opt['name']))
267
268 def config_notify(self) -> None:
269 self.config_update_module_option()
270 # wake up serve() thread
271 self.event.set()
272
273 def load(self) -> None:
274 last_upload = self.get_store('last_upload', None)
275 if last_upload is None:
276 self.last_upload = None
277 else:
278 self.last_upload = int(last_upload)
279
280 report_id = self.get_store('report_id', None)
281 if report_id is None:
282 self.report_id = str(uuid.uuid4())
283 self.set_store('report_id', self.report_id)
284 else:
285 self.report_id = report_id
286
287 salt = self.get_store('salt', None)
288 if salt is None:
289 self.salt = str(uuid.uuid4())
290 self.set_store('salt', self.salt)
291 else:
292 self.salt = salt
293
294 self.init_collection()
295
296 last_opted_in_ceph_version = self.get_store('last_opted_in_ceph_version', None)
297 if last_opted_in_ceph_version is None:
298 self.last_opted_in_ceph_version = None
299 else:
300 self.last_opted_in_ceph_version = int(last_opted_in_ceph_version)
301
302 last_opted_out_ceph_version = self.get_store('last_opted_out_ceph_version', None)
303 if last_opted_out_ceph_version is None:
304 self.last_opted_out_ceph_version = None
305 else:
306 self.last_opted_out_ceph_version = int(last_opted_out_ceph_version)
307
308 def gather_osd_metadata(self,
309 osd_map: Dict[str, List[Dict[str, int]]]) -> Dict[str, Dict[str, int]]:
310 keys = ["osd_objectstore", "rotational"]
311 keys += self.metadata_keys
312
313 metadata: Dict[str, Dict[str, int]] = dict()
314 for key in keys:
315 metadata[key] = defaultdict(int)
316
317 for osd in osd_map['osds']:
318 res = self.get_metadata('osd', str(osd['osd']))
319 if res is None:
320 self.log.debug('Could not get metadata for osd.%s' % str(osd['osd']))
321 continue
322 for k, v in res.items():
323 if k not in keys:
324 continue
325
326 metadata[k][v] += 1
327
328 return metadata
329
330 def gather_mon_metadata(self,
331 mon_map: Dict[str, List[Dict[str, str]]]) -> Dict[str, Dict[str, int]]:
332 keys = list()
333 keys += self.metadata_keys
334
335 metadata: Dict[str, Dict[str, int]] = dict()
336 for key in keys:
337 metadata[key] = defaultdict(int)
338
339 for mon in mon_map['mons']:
340 res = self.get_metadata('mon', mon['name'])
341 if res is None:
342 self.log.debug('Could not get metadata for mon.%s' % (mon['name']))
343 continue
344 for k, v in res.items():
345 if k not in keys:
346 continue
347
348 metadata[k][v] += 1
349
350 return metadata
351
352 def gather_mds_metadata(self) -> Dict[str, Dict[str, int]]:
353 metadata: Dict[str, Dict[str, int]] = dict()
354
355 res = self.get('mds_metadata') # metadata of *all* mds daemons
356 if res is None or not res:
357 self.log.debug('Could not get metadata for mds daemons')
358 return metadata
359
360 keys = list()
361 keys += self.metadata_keys
362
363 for key in keys:
364 metadata[key] = defaultdict(int)
365
366 for mds in res.values():
367 for k, v in mds.items():
368 if k not in keys:
369 continue
370
371 metadata[k][v] += 1
372
373 return metadata
374
375 def gather_crush_info(self) -> Dict[str, Union[int,
376 bool,
377 List[int],
378 Dict[str, int],
379 Dict[int, int]]]:
380 osdmap = self.get_osdmap()
381 crush_raw = osdmap.get_crush()
382 crush = crush_raw.dump()
383
384 BucketKeyT = TypeVar('BucketKeyT', int, str)
385
386 def inc(d: Dict[BucketKeyT, int], k: BucketKeyT) -> None:
387 if k in d:
388 d[k] += 1
389 else:
390 d[k] = 1
391
392 device_classes: Dict[str, int] = {}
393 for dev in crush['devices']:
394 inc(device_classes, dev.get('class', ''))
395
396 bucket_algs: Dict[str, int] = {}
397 bucket_types: Dict[str, int] = {}
398 bucket_sizes: Dict[int, int] = {}
399 for bucket in crush['buckets']:
400 if '~' in bucket['name']: # ignore shadow buckets
401 continue
402 inc(bucket_algs, bucket['alg'])
403 inc(bucket_types, bucket['type_id'])
404 inc(bucket_sizes, len(bucket['items']))
405
406 return {
407 'num_devices': len(crush['devices']),
408 'num_types': len(crush['types']),
409 'num_buckets': len(crush['buckets']),
410 'num_rules': len(crush['rules']),
411 'device_classes': list(device_classes.values()),
412 'tunables': crush['tunables'],
413 'compat_weight_set': '-1' in crush['choose_args'],
414 'num_weight_sets': len(crush['choose_args']),
415 'bucket_algs': bucket_algs,
416 'bucket_sizes': bucket_sizes,
417 'bucket_types': bucket_types,
418 }
419
420 def gather_configs(self) -> Dict[str, List[str]]:
421 # cluster config options
422 cluster = set()
423 r, outb, outs = self.mon_command({
424 'prefix': 'config dump',
425 'format': 'json'
426 })
427 if r != 0:
428 return {}
429 try:
430 dump = json.loads(outb)
431 except json.decoder.JSONDecodeError:
432 return {}
433 for opt in dump:
434 name = opt.get('name')
435 if name:
436 cluster.add(name)
437 # daemon-reported options (which may include ceph.conf)
438 active = set()
439 ls = self.get("modified_config_options")
440 for opt in ls.get('options', {}):
441 active.add(opt)
442 return {
443 'cluster_changed': sorted(list(cluster)),
444 'active_changed': sorted(list(active)),
445 }
446
447 def anonymize_entity_name(self, entity_name:str) -> str:
448 if '.' not in entity_name:
449 self.log.debug(f"Cannot split entity name ({entity_name}), no '.' is found")
450 return entity_name
451
452 (etype, eid) = entity_name.split('.', 1)
453 m = hashlib.sha1()
454 salt = ''
455 if self.salt is not None:
456 salt = self.salt
457 # avoid asserting that salt exists
458 if not self.salt:
459 # do not set self.salt to a temp value
460 salt = f"no_salt_found_{NO_SALT_CNT}"
461 NO_SALT_CNT += 1
462 self.log.debug(f"No salt found, created a temp one: {salt}")
463 m.update(salt.encode('utf-8'))
464 m.update(eid.encode('utf-8'))
465 m.update(salt.encode('utf-8'))
466
467 return etype + '.' + m.hexdigest()
468
469 def get_heap_stats(self) -> Dict[str, dict]:
470 # Initialize result dict
471 result: Dict[str, dict] = defaultdict(lambda: defaultdict(int))
472
473 # Get list of osd ids from the metadata
474 osd_metadata = self.get('osd_metadata')
475
476 # Grab output from the "osd.x heap stats" command
477 for osd_id in osd_metadata:
478 cmd_dict = {
479 'prefix': 'heap',
480 'heapcmd': 'stats',
481 'id': str(osd_id),
482 }
483 r, outb, outs = self.osd_command(cmd_dict)
484 if r != 0:
485 self.log.debug("Invalid command dictionary.")
486 continue
487 else:
488 if 'tcmalloc heap stats' in outs:
489 values = [int(i) for i in outs.split() if i.isdigit()]
490 # `categories` must be ordered this way for the correct output to be parsed
491 categories = ['use_by_application',
492 'page_heap_freelist',
493 'central_cache_freelist',
494 'transfer_cache_freelist',
495 'thread_cache_freelists',
496 'malloc_metadata',
497 'actual_memory_used',
498 'released_to_os',
499 'virtual_address_space_used',
500 'spans_in_use',
501 'thread_heaps_in_use',
502 'tcmalloc_page_size']
503 if len(values) != len(categories):
504 self.log.debug('Received unexpected output from osd.{}; number of values should match the number of expected categories:\n' \
505 'values: len={} {} ~ categories: len={} {} ~ outs: {}'.format(osd_id, len(values), values, len(categories), categories, outs))
506 continue
507 osd = 'osd.' + str(osd_id)
508 result[osd] = dict(zip(categories, values))
509 else:
510 self.log.debug('No heap stats available on osd.{}: {}'.format(osd_id, outs))
511 continue
512
513 return result
514
515 def get_mempool(self, mode: str = 'separated') -> Dict[str, dict]:
516 # Initialize result dict
517 result: Dict[str, dict] = defaultdict(lambda: defaultdict(int))
518
519 # Get list of osd ids from the metadata
520 osd_metadata = self.get('osd_metadata')
521
522 # Grab output from the "osd.x dump_mempools" command
523 for osd_id in osd_metadata:
524 cmd_dict = {
525 'prefix': 'dump_mempools',
526 'id': str(osd_id),
527 'format': 'json'
528 }
529 r, outb, outs = self.osd_command(cmd_dict)
530 if r != 0:
531 self.log.debug("Invalid command dictionary.")
532 continue
533 else:
534 try:
535 # This is where the mempool will land.
536 dump = json.loads(outb)
537 if mode == 'separated':
538 result["osd." + str(osd_id)] = dump['mempool']['by_pool']
539 elif mode == 'aggregated':
540 for mem_type in dump['mempool']['by_pool']:
541 result[mem_type]['bytes'] += dump['mempool']['by_pool'][mem_type]['bytes']
542 result[mem_type]['items'] += dump['mempool']['by_pool'][mem_type]['items']
543 else:
544 self.log.debug("Incorrect mode specified in get_mempool")
545 except (json.decoder.JSONDecodeError, KeyError) as e:
546 self.log.debug("Error caught on osd.{}: {}".format(osd_id, e))
547 continue
548
549 return result
550
551 def get_osd_histograms(self, mode: str = 'separated') -> List[Dict[str, dict]]:
552 # Initialize result dict
553 result: Dict[str, dict] = defaultdict(lambda: defaultdict(
554 lambda: defaultdict(
555 lambda: defaultdict(
556 lambda: defaultdict(
557 lambda: defaultdict(int))))))
558
559 # Get list of osd ids from the metadata
560 osd_metadata = self.get('osd_metadata')
561
562 # Grab output from the "osd.x perf histogram dump" command
563 for osd_id in osd_metadata:
564 cmd_dict = {
565 'prefix': 'perf histogram dump',
566 'id': str(osd_id),
567 'format': 'json'
568 }
569 r, outb, outs = self.osd_command(cmd_dict)
570 # Check for invalid calls
571 if r != 0:
572 self.log.debug("Invalid command dictionary.")
573 continue
574 else:
575 try:
576 # This is where the histograms will land if there are any.
577 dump = json.loads(outb)
578
579 for histogram in dump['osd']:
580 # Log axis information. There are two axes, each represented
581 # as a dictionary. Both dictionaries are contained inside a
582 # list called 'axes'.
583 axes = []
584 for axis in dump['osd'][histogram]['axes']:
585
586 # This is the dict that contains information for an individual
587 # axis. It will be appended to the 'axes' list at the end.
588 axis_dict: Dict[str, Any] = defaultdict()
589
590 # Collecting information for buckets, min, name, etc.
591 axis_dict['buckets'] = axis['buckets']
592 axis_dict['min'] = axis['min']
593 axis_dict['name'] = axis['name']
594 axis_dict['quant_size'] = axis['quant_size']
595 axis_dict['scale_type'] = axis['scale_type']
596
597 # Collecting ranges; placing them in lists to
598 # improve readability later on.
599 ranges = []
600 for _range in axis['ranges']:
601 _max, _min = None, None
602 if 'max' in _range:
603 _max = _range['max']
604 if 'min' in _range:
605 _min = _range['min']
606 ranges.append([_min, _max])
607 axis_dict['ranges'] = ranges
608
609 # Now that 'axis_dict' contains all the appropriate
610 # information for the current axis, append it to the 'axes' list.
611 # There will end up being two axes in the 'axes' list, since the
612 # histograms are 2D.
613 axes.append(axis_dict)
614
615 # Add the 'axes' list, containing both axes, to result.
616 # At this point, you will see that the name of the key is the string
617 # form of our axes list (str(axes)). This is there so that histograms
618 # with different axis configs will not be combined.
619 # These key names are later dropped when only the values are returned.
620 result[str(axes)][histogram]['axes'] = axes
621
622 # Collect current values and make sure they are in
623 # integer form.
624 values = []
625 for value_list in dump['osd'][histogram]['values']:
626 values.append([int(v) for v in value_list])
627
628 if mode == 'separated':
629 if 'osds' not in result[str(axes)][histogram]:
630 result[str(axes)][histogram]['osds'] = []
631 result[str(axes)][histogram]['osds'].append({'osd_id': int(osd_id), 'values': values})
632
633 elif mode == 'aggregated':
634 # Aggregate values. If 'values' have already been initialized,
635 # we can safely add.
636 if 'values' in result[str(axes)][histogram]:
637 for i in range (0, len(values)):
638 for j in range (0, len(values[i])):
639 values[i][j] += result[str(axes)][histogram]['values'][i][j]
640
641 # Add the values to result.
642 result[str(axes)][histogram]['values'] = values
643
644 # Update num_combined_osds
645 if 'num_combined_osds' not in result[str(axes)][histogram]:
646 result[str(axes)][histogram]['num_combined_osds'] = 1
647 else:
648 result[str(axes)][histogram]['num_combined_osds'] += 1
649 else:
650 self.log.error('Incorrect mode specified in get_osd_histograms: {}'.format(mode))
651 return list()
652
653 # Sometimes, json errors occur if you give it an empty string.
654 # I am also putting in a catch for a KeyError since it could
655 # happen where the code is assuming that a key exists in the
656 # schema when it doesn't. In either case, we'll handle that
657 # by continuing and collecting what we can from other osds.
658 except (json.decoder.JSONDecodeError, KeyError) as e:
659 self.log.debug("Error caught on osd.{}: {}".format(osd_id, e))
660 continue
661
662 return list(result.values())
663
664 def get_io_rate(self) -> dict:
665 return self.get('io_rate')
666
667 def get_stats_per_pool(self) -> dict:
668 result = self.get('pg_dump')['pool_stats']
669
670 # collect application metadata from osd_map
671 osd_map = self.get('osd_map')
672 application_metadata = {pool['pool']: pool['application_metadata'] for pool in osd_map['pools']}
673
674 # add application to each pool from pg_dump
675 for pool in result:
676 pool['application'] = []
677 # Only include default applications
678 for application in application_metadata[pool['poolid']]:
679 if application in ['cephfs', 'mgr', 'rbd', 'rgw']:
680 pool['application'].append(application)
681
682 return result
683
684 def get_stats_per_pg(self) -> dict:
685 return self.get('pg_dump')['pg_stats']
686
687 def get_rocksdb_stats(self) -> Dict[str, str]:
688 # Initalizers
689 result: Dict[str, str] = defaultdict()
690 version = self.get_rocksdb_version()
691
692 # Update result
693 result['version'] = version
694
695 return result
696
697 def gather_crashinfo(self) -> List[Dict[str, str]]:
698 crashlist: List[Dict[str, str]] = list()
699 errno, crashids, err = self.remote('crash', 'ls')
700 if errno:
701 return crashlist
702 for crashid in crashids.split():
703 errno, crashinfo, err = self.remote('crash', 'do_info', crashid)
704 if errno:
705 continue
706 c = json.loads(crashinfo)
707
708 # redact hostname
709 del c['utsname_hostname']
710
711 # entity_name might have more than one '.', beware
712 (etype, eid) = c.get('entity_name', '').split('.', 1)
713 m = hashlib.sha1()
714 assert self.salt
715 m.update(self.salt.encode('utf-8'))
716 m.update(eid.encode('utf-8'))
717 m.update(self.salt.encode('utf-8'))
718 c['entity_name'] = etype + '.' + m.hexdigest()
719
720 # redact final line of python tracebacks, as the exception
721 # payload may contain identifying information
722 if 'mgr_module' in c and 'backtrace' in c:
723 # backtrace might be empty
724 if len(c['backtrace']) > 0:
725 c['backtrace'][-1] = '<redacted>'
726
727 crashlist.append(c)
728 return crashlist
729
730 def gather_perf_counters(self, mode: str = 'separated') -> Dict[str, dict]:
731 # Extract perf counter data with get_all_perf_counters(), a method
732 # from mgr/mgr_module.py. This method returns a nested dictionary that
733 # looks a lot like perf schema, except with some additional fields.
734 #
735 # Example of output, a snapshot of a mon daemon:
736 # "mon.b": {
737 # "bluestore.kv_flush_lat": {
738 # "count": 2431,
739 # "description": "Average kv_thread flush latency",
740 # "nick": "fl_l",
741 # "priority": 8,
742 # "type": 5,
743 # "units": 1,
744 # "value": 88814109
745 # },
746 # },
747 all_perf_counters = self.get_all_perf_counters()
748
749 # Initialize 'result' dict
750 result: Dict[str, dict] = defaultdict(lambda: defaultdict(
751 lambda: defaultdict(lambda: defaultdict(int))))
752
753 # 'separated' mode
754 anonymized_daemon_dict = {}
755
756 for daemon, all_perf_counters_by_daemon in all_perf_counters.items():
757 daemon_type = daemon[0:3] # i.e. 'mds', 'osd', 'rgw'
758
759 if mode == 'separated':
760 # anonymize individual daemon names except osds
761 if (daemon_type != 'osd'):
762 anonymized_daemon = self.anonymize_entity_name(daemon)
763 anonymized_daemon_dict[anonymized_daemon] = daemon
764 daemon = anonymized_daemon
765
766 # Calculate num combined daemon types if in aggregated mode
767 if mode == 'aggregated':
768 if 'num_combined_daemons' not in result[daemon_type]:
769 result[daemon_type]['num_combined_daemons'] = 1
770 else:
771 result[daemon_type]['num_combined_daemons'] += 1
772
773 for collection in all_perf_counters_by_daemon:
774 # Split the collection to avoid redundancy in final report; i.e.:
775 # bluestore.kv_flush_lat, bluestore.kv_final_lat -->
776 # bluestore: kv_flush_lat, kv_final_lat
777 col_0, col_1 = collection.split('.')
778
779 # Debug log for empty keys. This initially was a problem for prioritycache
780 # perf counters, where the col_0 was empty for certain mon counters:
781 #
782 # "mon.a": { instead of "mon.a": {
783 # "": { "prioritycache": {
784 # "cache_bytes": {...}, "cache_bytes": {...},
785 #
786 # This log is here to detect any future instances of a similar issue.
787 if (daemon == "") or (col_0 == "") or (col_1 == ""):
788 self.log.debug("Instance of an empty key: {}{}".format(daemon, collection))
789
790 if mode == 'separated':
791 # Add value to result
792 result[daemon][col_0][col_1]['value'] = \
793 all_perf_counters_by_daemon[collection]['value']
794
795 # Check that 'count' exists, as not all counters have a count field.
796 if 'count' in all_perf_counters_by_daemon[collection]:
797 result[daemon][col_0][col_1]['count'] = \
798 all_perf_counters_by_daemon[collection]['count']
799 elif mode == 'aggregated':
800 # Not every rgw daemon has the same schema. Specifically, each rgw daemon
801 # has a uniquely-named collection that starts off identically (i.e.
802 # "objecter-0x...") then diverges (i.e. "...55f4e778e140.op_rmw").
803 # This bit of code combines these unique counters all under one rgw instance.
804 # Without this check, the schema would remain separeted out in the final report.
805 if col_0[0:11] == "objecter-0x":
806 col_0 = "objecter-0x"
807
808 # Check that the value can be incremented. In some cases,
809 # the files are of type 'pair' (real-integer-pair, integer-integer pair).
810 # In those cases, the value is a dictionary, and not a number.
811 # i.e. throttle-msgr_dispatch_throttler-hbserver["wait"]
812 if isinstance(all_perf_counters_by_daemon[collection]['value'], numbers.Number):
813 result[daemon_type][col_0][col_1]['value'] += \
814 all_perf_counters_by_daemon[collection]['value']
815
816 # Check that 'count' exists, as not all counters have a count field.
817 if 'count' in all_perf_counters_by_daemon[collection]:
818 result[daemon_type][col_0][col_1]['count'] += \
819 all_perf_counters_by_daemon[collection]['count']
820 else:
821 self.log.error('Incorrect mode specified in gather_perf_counters: {}'.format(mode))
822 return {}
823
824 if mode == 'separated':
825 # for debugging purposes only, this data is never reported
826 self.log.debug('Anonymized daemon mapping for telemetry perf_counters (anonymized: real): {}'.format(anonymized_daemon_dict))
827
828 return result
829
830 def get_active_channels(self) -> List[str]:
831 r = []
832 if self.channel_basic:
833 r.append('basic')
834 if self.channel_crash:
835 r.append('crash')
836 if self.channel_device:
837 r.append('device')
838 if self.channel_ident:
839 r.append('ident')
840 if self.channel_perf:
841 r.append('perf')
842 return r
843
844 def gather_device_report(self) -> Dict[str, Dict[str, Dict[str, str]]]:
845 try:
846 time_format = self.remote('devicehealth', 'get_time_format')
847 except Exception as e:
848 self.log.debug('Unable to format time: {}'.format(e))
849 return {}
850 cutoff = datetime.utcnow() - timedelta(hours=self.interval * 2)
851 min_sample = cutoff.strftime(time_format)
852
853 devices = self.get('devices')['devices']
854 if not devices:
855 self.log.debug('Unable to get device info from the mgr.')
856 return {}
857
858 # anon-host-id -> anon-devid -> { timestamp -> record }
859 res: Dict[str, Dict[str, Dict[str, str]]] = {}
860 for d in devices:
861 devid = d['devid']
862 try:
863 # this is a map of stamp -> {device info}
864 m = self.remote('devicehealth', 'get_recent_device_metrics',
865 devid, min_sample)
866 except Exception as e:
867 self.log.debug('Unable to get recent metrics from device with id "{}": {}'.format(devid, e))
868 continue
869
870 # anonymize host id
871 try:
872 host = d['location'][0]['host']
873 except (KeyError, IndexError) as e:
874 self.log.debug('Unable to get host from device with id "{}": {}'.format(devid, e))
875 continue
876 anon_host = self.get_store('host-id/%s' % host)
877 if not anon_host:
878 anon_host = str(uuid.uuid1())
879 self.set_store('host-id/%s' % host, anon_host)
880 serial = None
881 for dev, rep in m.items():
882 rep['host_id'] = anon_host
883 if serial is None and 'serial_number' in rep:
884 serial = rep['serial_number']
885
886 # anonymize device id
887 anon_devid = self.get_store('devid-id/%s' % devid)
888 if not anon_devid:
889 # ideally devid is 'vendor_model_serial',
890 # but can also be 'model_serial', 'serial'
891 if '_' in devid:
892 anon_devid = f"{devid.rsplit('_', 1)[0]}_{uuid.uuid1()}"
893 else:
894 anon_devid = str(uuid.uuid1())
895 self.set_store('devid-id/%s' % devid, anon_devid)
896 self.log.info('devid %s / %s, host %s / %s' % (devid, anon_devid,
897 host, anon_host))
898
899 # anonymize the smartctl report itself
900 if serial:
901 m_str = json.dumps(m)
902 m = json.loads(m_str.replace(serial, 'deleted'))
903
904 if anon_host not in res:
905 res[anon_host] = {}
906 res[anon_host][anon_devid] = m
907 return res
908
909 def get_latest(self, daemon_type: str, daemon_name: str, stat: str) -> int:
910 data = self.get_counter(daemon_type, daemon_name, stat)[stat]
911 if data:
912 return data[-1][1]
913 else:
914 return 0
915
916 def compile_report(self, channels: Optional[List[str]] = None) -> Dict[str, Any]:
917 if not channels:
918 channels = self.get_active_channels()
919 report = {
920 'leaderboard': self.leaderboard,
921 'report_version': 1,
922 'report_timestamp': datetime.utcnow().isoformat(),
923 'report_id': self.report_id,
924 'channels': channels,
925 'channels_available': ALL_CHANNELS,
926 'license': LICENSE,
927 'collections_available': [c['name'].name for c in MODULE_COLLECTION],
928 'collections_opted_in': [c['name'].name for c in MODULE_COLLECTION if self.is_enabled_collection(c['name'])],
929 }
930
931 if 'ident' in channels:
932 for option in ['description', 'contact', 'organization']:
933 report[option] = getattr(self, option)
934
935 if 'basic' in channels:
936 mon_map = self.get('mon_map')
937 osd_map = self.get('osd_map')
938 service_map = self.get('service_map')
939 fs_map = self.get('fs_map')
940 df = self.get('df')
941 df_pools = {pool['id']: pool for pool in df['pools']}
942
943 report['created'] = mon_map['created']
944
945 # mons
946 v1_mons = 0
947 v2_mons = 0
948 ipv4_mons = 0
949 ipv6_mons = 0
950 for mon in mon_map['mons']:
951 for a in mon['public_addrs']['addrvec']:
952 if a['type'] == 'v2':
953 v2_mons += 1
954 elif a['type'] == 'v1':
955 v1_mons += 1
956 if a['addr'].startswith('['):
957 ipv6_mons += 1
958 else:
959 ipv4_mons += 1
960 report['mon'] = {
961 'count': len(mon_map['mons']),
962 'features': mon_map['features'],
963 'min_mon_release': mon_map['min_mon_release'],
964 'v1_addr_mons': v1_mons,
965 'v2_addr_mons': v2_mons,
966 'ipv4_addr_mons': ipv4_mons,
967 'ipv6_addr_mons': ipv6_mons,
968 }
969
970 report['config'] = self.gather_configs()
971
972 # pools
973
974 rbd_num_pools = 0
975 rbd_num_images_by_pool = []
976 rbd_mirroring_by_pool = []
977 num_pg = 0
978 report['pools'] = list()
979 for pool in osd_map['pools']:
980 num_pg += pool['pg_num']
981 ec_profile = {}
982 if pool['erasure_code_profile']:
983 orig = osd_map['erasure_code_profiles'].get(
984 pool['erasure_code_profile'], {})
985 ec_profile = {
986 k: orig[k] for k in orig.keys()
987 if k in ['k', 'm', 'plugin', 'technique',
988 'crush-failure-domain', 'l']
989 }
990 pool_data = {
991 'pool': pool['pool'],
992 'pg_num': pool['pg_num'],
993 'pgp_num': pool['pg_placement_num'],
994 'size': pool['size'],
995 'min_size': pool['min_size'],
996 'pg_autoscale_mode': pool['pg_autoscale_mode'],
997 'target_max_bytes': pool['target_max_bytes'],
998 'target_max_objects': pool['target_max_objects'],
999 'type': ['', 'replicated', '', 'erasure'][pool['type']],
1000 'erasure_code_profile': ec_profile,
1001 'cache_mode': pool['cache_mode'],
1002 }
1003
1004 # basic_pool_usage collection
1005 if self.is_enabled_collection(Collection.basic_pool_usage):
1006 pool_data['application'] = []
1007 for application in pool['application_metadata']:
1008 # Only include default applications
1009 if application in ['cephfs', 'mgr', 'rbd', 'rgw']:
1010 pool_data['application'].append(application)
1011 pool_stats = df_pools[pool['pool']]['stats']
1012 pool_data['stats'] = { # filter out kb_used
1013 'avail_raw': pool_stats['avail_raw'],
1014 'bytes_used': pool_stats['bytes_used'],
1015 'compress_bytes_used': pool_stats['compress_bytes_used'],
1016 'compress_under_bytes': pool_stats['compress_under_bytes'],
1017 'data_bytes_used': pool_stats['data_bytes_used'],
1018 'dirty': pool_stats['dirty'],
1019 'max_avail': pool_stats['max_avail'],
1020 'objects': pool_stats['objects'],
1021 'omap_bytes_used': pool_stats['omap_bytes_used'],
1022 'percent_used': pool_stats['percent_used'],
1023 'quota_bytes': pool_stats['quota_bytes'],
1024 'quota_objects': pool_stats['quota_objects'],
1025 'rd': pool_stats['rd'],
1026 'rd_bytes': pool_stats['rd_bytes'],
1027 'stored': pool_stats['stored'],
1028 'stored_data': pool_stats['stored_data'],
1029 'stored_omap': pool_stats['stored_omap'],
1030 'stored_raw': pool_stats['stored_raw'],
1031 'wr': pool_stats['wr'],
1032 'wr_bytes': pool_stats['wr_bytes']
1033 }
1034
1035 cast(List[Dict[str, Any]], report['pools']).append(pool_data)
1036 if 'rbd' in pool['application_metadata']:
1037 rbd_num_pools += 1
1038 ioctx = self.rados.open_ioctx(pool['pool_name'])
1039 rbd_num_images_by_pool.append(
1040 sum(1 for _ in rbd.RBD().list2(ioctx)))
1041 rbd_mirroring_by_pool.append(
1042 rbd.RBD().mirror_mode_get(ioctx) != rbd.RBD_MIRROR_MODE_DISABLED)
1043 report['rbd'] = {
1044 'num_pools': rbd_num_pools,
1045 'num_images_by_pool': rbd_num_images_by_pool,
1046 'mirroring_by_pool': rbd_mirroring_by_pool}
1047
1048 # osds
1049 cluster_network = False
1050 for osd in osd_map['osds']:
1051 if osd['up'] and not cluster_network:
1052 front_ip = osd['public_addrs']['addrvec'][0]['addr'].split(':')[0]
1053 back_ip = osd['cluster_addrs']['addrvec'][0]['addr'].split(':')[0]
1054 if front_ip != back_ip:
1055 cluster_network = True
1056 report['osd'] = {
1057 'count': len(osd_map['osds']),
1058 'require_osd_release': osd_map['require_osd_release'],
1059 'require_min_compat_client': osd_map['require_min_compat_client'],
1060 'cluster_network': cluster_network,
1061 }
1062
1063 # crush
1064 report['crush'] = self.gather_crush_info()
1065
1066 # cephfs
1067 report['fs'] = {
1068 'count': len(fs_map['filesystems']),
1069 'feature_flags': fs_map['feature_flags'],
1070 'num_standby_mds': len(fs_map['standbys']),
1071 'filesystems': [],
1072 }
1073 num_mds = len(fs_map['standbys'])
1074 for fsm in fs_map['filesystems']:
1075 fs = fsm['mdsmap']
1076 num_sessions = 0
1077 cached_ino = 0
1078 cached_dn = 0
1079 cached_cap = 0
1080 subtrees = 0
1081 rfiles = 0
1082 rbytes = 0
1083 rsnaps = 0
1084 for gid, mds in fs['info'].items():
1085 num_sessions += self.get_latest('mds', mds['name'],
1086 'mds_sessions.session_count')
1087 cached_ino += self.get_latest('mds', mds['name'],
1088 'mds_mem.ino')
1089 cached_dn += self.get_latest('mds', mds['name'],
1090 'mds_mem.dn')
1091 cached_cap += self.get_latest('mds', mds['name'],
1092 'mds_mem.cap')
1093 subtrees += self.get_latest('mds', mds['name'],
1094 'mds.subtrees')
1095 if mds['rank'] == 0:
1096 rfiles = self.get_latest('mds', mds['name'],
1097 'mds.root_rfiles')
1098 rbytes = self.get_latest('mds', mds['name'],
1099 'mds.root_rbytes')
1100 rsnaps = self.get_latest('mds', mds['name'],
1101 'mds.root_rsnaps')
1102 report['fs']['filesystems'].append({ # type: ignore
1103 'max_mds': fs['max_mds'],
1104 'ever_allowed_features': fs['ever_allowed_features'],
1105 'explicitly_allowed_features': fs['explicitly_allowed_features'],
1106 'num_in': len(fs['in']),
1107 'num_up': len(fs['up']),
1108 'num_standby_replay': len(
1109 [mds for gid, mds in fs['info'].items()
1110 if mds['state'] == 'up:standby-replay']),
1111 'num_mds': len(fs['info']),
1112 'num_sessions': num_sessions,
1113 'cached_inos': cached_ino,
1114 'cached_dns': cached_dn,
1115 'cached_caps': cached_cap,
1116 'cached_subtrees': subtrees,
1117 'balancer_enabled': len(fs['balancer']) > 0,
1118 'num_data_pools': len(fs['data_pools']),
1119 'standby_count_wanted': fs['standby_count_wanted'],
1120 'approx_ctime': fs['created'][0:7],
1121 'files': rfiles,
1122 'bytes': rbytes,
1123 'snaps': rsnaps,
1124 })
1125 num_mds += len(fs['info'])
1126 report['fs']['total_num_mds'] = num_mds # type: ignore
1127
1128 # daemons
1129 report['metadata'] = dict(osd=self.gather_osd_metadata(osd_map),
1130 mon=self.gather_mon_metadata(mon_map))
1131
1132 if self.is_enabled_collection(Collection.basic_mds_metadata):
1133 report['metadata']['mds'] = self.gather_mds_metadata() # type: ignore
1134
1135 # host counts
1136 servers = self.list_servers()
1137 self.log.debug('servers %s' % servers)
1138 hosts = {
1139 'num': len([h for h in servers if h['hostname']]),
1140 }
1141 for t in ['mon', 'mds', 'osd', 'mgr']:
1142 nr_services = sum(1 for host in servers if
1143 any(service for service in cast(List[ServiceInfoT],
1144 host['services'])
1145 if service['type'] == t))
1146 hosts['num_with_' + t] = nr_services
1147 report['hosts'] = hosts
1148
1149 report['usage'] = {
1150 'pools': len(df['pools']),
1151 'pg_num': num_pg,
1152 'total_used_bytes': df['stats']['total_used_bytes'],
1153 'total_bytes': df['stats']['total_bytes'],
1154 'total_avail_bytes': df['stats']['total_avail_bytes']
1155 }
1156 # basic_usage_by_class collection
1157 if self.is_enabled_collection(Collection.basic_usage_by_class):
1158 report['usage']['stats_by_class'] = {} # type: ignore
1159 for device_class in df['stats_by_class']:
1160 if device_class in ['hdd', 'ssd', 'nvme']:
1161 report['usage']['stats_by_class'][device_class] = df['stats_by_class'][device_class] # type: ignore
1162
1163 services: DefaultDict[str, int] = defaultdict(int)
1164 for key, value in service_map['services'].items():
1165 services[key] += 1
1166 if key == 'rgw':
1167 rgw = {}
1168 zones = set()
1169 zonegroups = set()
1170 frontends = set()
1171 count = 0
1172 d = value.get('daemons', dict())
1173 for k, v in d.items():
1174 if k == 'summary' and v:
1175 rgw[k] = v
1176 elif isinstance(v, dict) and 'metadata' in v:
1177 count += 1
1178 zones.add(v['metadata']['zone_id'])
1179 zonegroups.add(v['metadata']['zonegroup_id'])
1180 frontends.add(v['metadata']['frontend_type#0'])
1181
1182 # we could actually iterate over all the keys of
1183 # the dict and check for how many frontends there
1184 # are, but it is unlikely that one would be running
1185 # more than 2 supported ones
1186 f2 = v['metadata'].get('frontend_type#1', None)
1187 if f2:
1188 frontends.add(f2)
1189
1190 rgw['count'] = count
1191 rgw['zones'] = len(zones)
1192 rgw['zonegroups'] = len(zonegroups)
1193 rgw['frontends'] = list(frontends) # sets aren't json-serializable
1194 report['rgw'] = rgw
1195 report['services'] = services
1196
1197 try:
1198 report['balancer'] = self.remote('balancer', 'gather_telemetry')
1199 except ImportError:
1200 report['balancer'] = {
1201 'active': False
1202 }
1203
1204 # Rook
1205 self.get_rook_data(report)
1206
1207 if 'crash' in channels:
1208 report['crashes'] = self.gather_crashinfo()
1209
1210 if 'perf' in channels:
1211 if self.is_enabled_collection(Collection.perf_perf):
1212 report['perf_counters'] = self.gather_perf_counters('separated')
1213 report['stats_per_pool'] = self.get_stats_per_pool()
1214 report['stats_per_pg'] = self.get_stats_per_pg()
1215 report['io_rate'] = self.get_io_rate()
1216 report['osd_perf_histograms'] = self.get_osd_histograms('separated')
1217 report['mempool'] = self.get_mempool('separated')
1218 report['heap_stats'] = self.get_heap_stats()
1219 report['rocksdb_stats'] = self.get_rocksdb_stats()
1220
1221 # NOTE: We do not include the 'device' channel in this report; it is
1222 # sent to a different endpoint.
1223
1224 return report
1225
1226 def get_rook_data(self, report: Dict[str, object]) -> None:
1227 r, outb, outs = self.mon_command({
1228 'prefix': 'config-key dump',
1229 'format': 'json'
1230 })
1231 if r != 0:
1232 return
1233 try:
1234 config_kv_dump = json.loads(outb)
1235 except json.decoder.JSONDecodeError:
1236 return
1237
1238 for elem in ROOK_KEYS_BY_COLLECTION:
1239 # elem[0] is the full key path (e.g. "rook/node/count/with-csi-nfs-plugin")
1240 # elem[1] is the Collection this key belongs to
1241 if self.is_enabled_collection(elem[1]):
1242 self.add_kv_to_report(report, elem[0], config_kv_dump.get(elem[0]))
1243
1244 def add_kv_to_report(self, report: Dict[str, object], key_path: str, value: Any) -> None:
1245 last_node = key_path.split('/')[-1]
1246 for node in key_path.split('/')[0:-1]:
1247 if node not in report:
1248 report[node] = {}
1249 report = report[node] # type: ignore
1250
1251 # sanity check of keys correctness
1252 if not isinstance(report, dict):
1253 self.log.error(f"'{key_path}' is an invalid key, expected type 'dict' but got {type(report)}")
1254 return
1255
1256 if last_node in report:
1257 self.log.error(f"'{key_path}' is an invalid key, last part must not exist at this point")
1258 return
1259
1260 report[last_node] = value
1261
1262 def _try_post(self, what: str, url: str, report: Dict[str, Dict[str, str]]) -> Optional[str]:
1263 self.log.info('Sending %s to: %s' % (what, url))
1264 proxies = dict()
1265 if self.proxy:
1266 self.log.info('Send using HTTP(S) proxy: %s', self.proxy)
1267 proxies['http'] = self.proxy
1268 proxies['https'] = self.proxy
1269 try:
1270 resp = requests.put(url=url, json=report, proxies=proxies)
1271 resp.raise_for_status()
1272 except Exception as e:
1273 fail_reason = 'Failed to send %s to %s: %s' % (what, url, str(e))
1274 self.log.error(fail_reason)
1275 return fail_reason
1276 return None
1277
1278 class EndPoint(enum.Enum):
1279 ceph = 'ceph'
1280 device = 'device'
1281
1282 def collection_delta(self, channels: Optional[List[str]] = None) -> Optional[List[Collection]]:
1283 '''
1284 Find collections that are available in the module, but are not in the db
1285 '''
1286 if self.db_collection is None:
1287 return None
1288
1289 if not channels:
1290 channels = ALL_CHANNELS
1291 else:
1292 for ch in channels:
1293 if ch not in ALL_CHANNELS:
1294 self.log.debug(f"invalid channel name: {ch}")
1295 return None
1296
1297 new_collection : List[Collection] = []
1298
1299 for c in MODULE_COLLECTION:
1300 if c['name'].name not in self.db_collection:
1301 if c['channel'] in channels:
1302 new_collection.append(c['name'])
1303
1304 return new_collection
1305
1306 def is_major_upgrade(self) -> bool:
1307 '''
1308 Returns True only if the user last opted-in to an older major
1309 '''
1310 if self.last_opted_in_ceph_version is None or self.last_opted_in_ceph_version == 0:
1311 # we do not know what Ceph version was when the user last opted-in,
1312 # thus we do not wish to nag in case of a major upgrade
1313 return False
1314
1315 mon_map = self.get('mon_map')
1316 mon_min = mon_map.get("min_mon_release", 0)
1317
1318 if mon_min - self.last_opted_in_ceph_version > 0:
1319 self.log.debug(f"major upgrade: mon_min is: {mon_min} and user last opted-in in {self.last_opted_in_ceph_version}")
1320 return True
1321
1322 return False
1323
1324 def is_opted_in(self) -> bool:
1325 # If len is 0 it means that the user is either opted-out (never
1326 # opted-in, or invoked `telemetry off`), or they upgraded from a
1327 # telemetry revision 1 or 2, which required to re-opt in to revision 3,
1328 # regardless, hence is considered as opted-out
1329 if self.db_collection is None:
1330 return False
1331 return len(self.db_collection) > 0
1332
1333 def should_nag(self) -> bool:
1334 # Find delta between opted-in collections and module collections;
1335 # nag only if module has a collection which is not in db, and nag == True.
1336
1337 # We currently do not nag if the user is opted-out (or never opted-in).
1338 # If we wish to do this in the future, we need to have a tri-mode state
1339 # (opted in, opted out, no action yet), and it needs to be guarded by a
1340 # config option (so that nagging can be turned off via config).
1341 # We also need to add a last_opted_out_ceph_version variable, for the
1342 # major upgrade check.
1343
1344 # check if there are collections the user is not opt-in to
1345 # that we should nag about
1346 if self.db_collection is not None:
1347 for c in MODULE_COLLECTION:
1348 if c['name'].name not in self.db_collection:
1349 if c['nag'] == True:
1350 self.log.debug(f"The collection: {c['name']} is not reported")
1351 return True
1352
1353 # user might be opted-in to the most recent collection, or there is no
1354 # new collection which requires nagging about; thus nag in case it's a
1355 # major upgrade and there are new collections
1356 # (which their own nag == False):
1357 new_collections = False
1358 col_delta = self.collection_delta()
1359 if col_delta is not None and len(col_delta) > 0:
1360 new_collections = True
1361
1362 return self.is_major_upgrade() and new_collections
1363
1364 def init_collection(self) -> None:
1365 # We fetch from db the collections the user had already opted-in to.
1366 # During the transition the results will be empty, but the user might
1367 # be opted-in to an older version (e.g. revision = 3)
1368
1369 collection = self.get_store('collection')
1370
1371 if collection is not None:
1372 self.db_collection = json.loads(collection)
1373
1374 if self.db_collection is None:
1375 # happens once on upgrade
1376 if not self.enabled:
1377 # user is not opted-in
1378 self.set_store('collection', json.dumps([]))
1379 self.log.debug("user is not opted-in")
1380 else:
1381 # user is opted-in, verify the revision:
1382 if self.last_opt_revision == REVISION:
1383 self.log.debug(f"telemetry revision is {REVISION}")
1384 base_collection = [Collection.basic_base.name, Collection.device_base.name, Collection.crash_base.name, Collection.ident_base.name]
1385 self.set_store('collection', json.dumps(base_collection))
1386 else:
1387 # user is opted-in to an older version, meaning they need
1388 # to re-opt in regardless
1389 self.set_store('collection', json.dumps([]))
1390 self.log.debug(f"user is opted-in but revision is old ({self.last_opt_revision}), needs to re-opt-in")
1391
1392 # reload collection after setting
1393 collection = self.get_store('collection')
1394 if collection is not None:
1395 self.db_collection = json.loads(collection)
1396 else:
1397 raise RuntimeError('collection is None after initial setting')
1398 else:
1399 # user has already upgraded
1400 self.log.debug(f"user has upgraded already: collection: {self.db_collection}")
1401
1402 def is_enabled_collection(self, collection: Collection) -> bool:
1403 if self.db_collection is None:
1404 return False
1405 return collection.name in self.db_collection
1406
1407 def opt_in_all_collections(self) -> None:
1408 """
1409 Opt-in to all collections; Update db with the currently available collections in the module
1410 """
1411 if self.db_collection is None:
1412 raise RuntimeError('db_collection is None after initial setting')
1413
1414 for c in MODULE_COLLECTION:
1415 if c['name'].name not in self.db_collection:
1416 self.db_collection.append(c['name'])
1417
1418 self.set_store('collection', json.dumps(self.db_collection))
1419
1420 def send(self,
1421 report: Dict[str, Dict[str, str]],
1422 endpoint: Optional[List[EndPoint]] = None) -> Tuple[int, str, str]:
1423 if not endpoint:
1424 endpoint = [self.EndPoint.ceph, self.EndPoint.device]
1425 failed = []
1426 success = []
1427 self.log.debug('Send endpoints %s' % endpoint)
1428 for e in endpoint:
1429 if e == self.EndPoint.ceph:
1430 fail_reason = self._try_post('ceph report', self.url, report)
1431 if fail_reason:
1432 failed.append(fail_reason)
1433 else:
1434 now = int(time.time())
1435 self.last_upload = now
1436 self.set_store('last_upload', str(now))
1437 success.append('Ceph report sent to {0}'.format(self.url))
1438 self.log.info('Sent report to {0}'.format(self.url))
1439 elif e == self.EndPoint.device:
1440 if 'device' in self.get_active_channels():
1441 devices = self.gather_device_report()
1442 if devices:
1443 num_devs = 0
1444 num_hosts = 0
1445 for host, ls in devices.items():
1446 self.log.debug('host %s devices %s' % (host, ls))
1447 if not len(ls):
1448 continue
1449 fail_reason = self._try_post('devices', self.device_url,
1450 ls)
1451 if fail_reason:
1452 failed.append(fail_reason)
1453 else:
1454 num_devs += len(ls)
1455 num_hosts += 1
1456 if num_devs:
1457 success.append('Reported %d devices from %d hosts across a total of %d hosts' % (
1458 num_devs, num_hosts, len(devices)))
1459 else:
1460 fail_reason = 'Unable to send device report: Device channel is on, but the generated report was empty.'
1461 failed.append(fail_reason)
1462 self.log.error(fail_reason)
1463 if failed:
1464 return 1, '', '\n'.join(success + failed)
1465 return 0, '', '\n'.join(success)
1466
1467 def format_perf_histogram(self, report: Dict[str, Any]) -> None:
1468 # Formatting the perf histograms so they are human-readable. This will change the
1469 # ranges and values, which are currently in list form, into strings so that
1470 # they are displayed horizontally instead of vertically.
1471 try:
1472 # Formatting ranges and values in osd_perf_histograms
1473 mode = 'osd_perf_histograms'
1474 for config in report[mode]:
1475 for histogram in config:
1476 # Adjust ranges by converting lists into strings
1477 for axis in config[histogram]['axes']:
1478 for i in range(0, len(axis['ranges'])):
1479 axis['ranges'][i] = str(axis['ranges'][i])
1480
1481 for osd in config[histogram]['osds']:
1482 for i in range(0, len(osd['values'])):
1483 osd['values'][i] = str(osd['values'][i])
1484 except KeyError:
1485 # If the perf channel is not enabled, there should be a KeyError since
1486 # 'osd_perf_histograms' would not be present in the report. In that case,
1487 # the show function should pass as usual without trying to format the
1488 # histograms.
1489 pass
1490
1491 def toggle_channel(self, action: str, channels: Optional[List[str]] = None) -> Tuple[int, str, str]:
1492 '''
1493 Enable or disable a list of channels
1494 '''
1495 if not self.enabled:
1496 # telemetry should be on for channels to be toggled
1497 msg = 'Telemetry is off. Please consider opting-in with `ceph telemetry on`.\n' \
1498 'Preview sample reports with `ceph telemetry preview`.'
1499 return 0, msg, ''
1500
1501 if channels is None:
1502 msg = f'Please provide a channel name. Available channels: {ALL_CHANNELS}.'
1503 return 0, msg, ''
1504
1505 state = action == 'enable'
1506 msg = ''
1507 for c in channels:
1508 if c not in ALL_CHANNELS:
1509 msg = f"{msg}{c} is not a valid channel name. "\
1510 f"Available channels: {ALL_CHANNELS}.\n"
1511 else:
1512 self.set_module_option(f"channel_{c}", state)
1513 setattr(self,
1514 f"channel_{c}",
1515 state)
1516 msg = f"{msg}channel_{c} is {action}d\n"
1517
1518 return 0, msg, ''
1519
1520 @CLIReadCommand('telemetry status')
1521 def status(self) -> Tuple[int, str, str]:
1522 '''
1523 Show current configuration
1524 '''
1525 r = {}
1526 for opt in self.MODULE_OPTIONS:
1527 r[opt['name']] = getattr(self, opt['name'])
1528 r['last_upload'] = (time.ctime(self.last_upload)
1529 if self.last_upload else self.last_upload)
1530 return 0, json.dumps(r, indent=4, sort_keys=True), ''
1531
1532 @CLIReadCommand('telemetry diff')
1533 def diff(self) -> Tuple[int, str, str]:
1534 '''
1535 Show the diff between opted-in collection and available collection
1536 '''
1537 diff = []
1538 keys = ['nag']
1539
1540 for c in MODULE_COLLECTION:
1541 if not self.is_enabled_collection(c['name']):
1542 diff.append({key: val for key, val in c.items() if key not in keys})
1543
1544 r = None
1545 if diff == []:
1546 r = "Telemetry is up to date"
1547 else:
1548 r = json.dumps(diff, indent=4, sort_keys=True)
1549
1550 return 0, r, ''
1551
1552 @CLICommand('telemetry on')
1553 def on(self, license: Optional[str] = None) -> Tuple[int, str, str]:
1554 '''
1555 Enable telemetry reports from this cluster
1556 '''
1557 if license != LICENSE:
1558 return -errno.EPERM, '', f'''Telemetry data is licensed under the {LICENSE_NAME} ({LICENSE_URL}).
1559 To enable, add '--license {LICENSE}' to the 'ceph telemetry on' command.'''
1560 else:
1561 self.set_module_option('enabled', True)
1562 self.enabled = True
1563 self.opt_in_all_collections()
1564
1565 # for major releases upgrade nagging
1566 mon_map = self.get('mon_map')
1567 mon_min = mon_map.get("min_mon_release", 0)
1568 self.set_store('last_opted_in_ceph_version', str(mon_min))
1569 self.last_opted_in_ceph_version = mon_min
1570
1571 msg = 'Telemetry is on.'
1572 disabled_channels = ''
1573 active_channels = self.get_active_channels()
1574 for c in ALL_CHANNELS:
1575 if c not in active_channels and c != 'ident':
1576 disabled_channels = f"{disabled_channels} {c}"
1577
1578 if len(disabled_channels) > 0:
1579 msg = f"{msg}\nSome channels are disabled, please enable with:\n"\
1580 f"`ceph telemetry enable channel{disabled_channels}`"
1581
1582 return 0, msg, ''
1583
1584 @CLICommand('telemetry off')
1585 def off(self) -> Tuple[int, str, str]:
1586 '''
1587 Disable telemetry reports from this cluster
1588 '''
1589 if not self.enabled:
1590 # telemetry is already off
1591 msg = 'Telemetry is currently not enabled, nothing to turn off. '\
1592 'Please consider opting-in with `ceph telemetry on`.\n' \
1593 'Preview sample reports with `ceph telemetry preview`.'
1594 return 0, msg, ''
1595
1596 self.set_module_option('enabled', False)
1597 self.enabled = False
1598 self.set_store('collection', json.dumps([]))
1599 self.db_collection = []
1600
1601 # we might need this info in the future, in case
1602 # of nagging when user is opted-out
1603 mon_map = self.get('mon_map')
1604 mon_min = mon_map.get("min_mon_release", 0)
1605 self.set_store('last_opted_out_ceph_version', str(mon_min))
1606 self.last_opted_out_ceph_version = mon_min
1607
1608 msg = 'Telemetry is now disabled.'
1609 return 0, msg, ''
1610
1611 @CLIReadCommand('telemetry enable channel all')
1612 def enable_channel_all(self, channels: List[str] = ALL_CHANNELS) -> Tuple[int, str, str]:
1613 '''
1614 Enable all channels
1615 '''
1616 return self.toggle_channel('enable', channels)
1617
1618 @CLIReadCommand('telemetry enable channel')
1619 def enable_channel(self, channels: Optional[List[str]] = None) -> Tuple[int, str, str]:
1620 '''
1621 Enable a list of channels
1622 '''
1623 return self.toggle_channel('enable', channels)
1624
1625 @CLIReadCommand('telemetry disable channel all')
1626 def disable_channel_all(self, channels: List[str] = ALL_CHANNELS) -> Tuple[int, str, str]:
1627 '''
1628 Disable all channels
1629 '''
1630 return self.toggle_channel('disable', channels)
1631
1632 @CLIReadCommand('telemetry disable channel')
1633 def disable_channel(self, channels: Optional[List[str]] = None) -> Tuple[int, str, str]:
1634 '''
1635 Disable a list of channels
1636 '''
1637 return self.toggle_channel('disable', channels)
1638
1639 @CLIReadCommand('telemetry channel ls')
1640 def channel_ls(self) -> Tuple[int, str, str]:
1641 '''
1642 List all channels
1643 '''
1644 table = PrettyTable(
1645 [
1646 'NAME', 'ENABLED', 'DEFAULT', 'DESC',
1647 ],
1648 border=False)
1649 table.align['NAME'] = 'l'
1650 table.align['ENABLED'] = 'l'
1651 table.align['DEFAULT'] = 'l'
1652 table.align['DESC'] = 'l'
1653 table.left_padding_width = 0
1654 table.right_padding_width = 4
1655
1656 for c in ALL_CHANNELS:
1657 enabled = "ON" if getattr(self, f"channel_{c}") else "OFF"
1658 for o in self.MODULE_OPTIONS:
1659 if o['name'] == f"channel_{c}":
1660 default = "ON" if o.get('default', None) else "OFF"
1661 desc = o.get('desc', None)
1662
1663 table.add_row((
1664 c,
1665 enabled,
1666 default,
1667 desc,
1668 ))
1669
1670 return 0, table.get_string(sortby="NAME"), ''
1671
1672 @CLIReadCommand('telemetry collection ls')
1673 def collection_ls(self) -> Tuple[int, str, str]:
1674 '''
1675 List all collections
1676 '''
1677 col_delta = self.collection_delta()
1678 msg = ''
1679 if col_delta is not None and len(col_delta) > 0:
1680 msg = f"New collections are available:\n" \
1681 f"{sorted([c.name for c in col_delta])}\n" \
1682 f"Run `ceph telemetry on` to opt-in to these collections.\n"
1683
1684 table = PrettyTable(
1685 [
1686 'NAME', 'STATUS', 'DESC',
1687 ],
1688 border=False)
1689 table.align['NAME'] = 'l'
1690 table.align['STATUS'] = 'l'
1691 table.align['DESC'] = 'l'
1692 table.left_padding_width = 0
1693 table.right_padding_width = 4
1694
1695 for c in MODULE_COLLECTION:
1696 name = c['name']
1697 opted_in = self.is_enabled_collection(name)
1698 channel_enabled = getattr(self, f"channel_{c['channel']}")
1699
1700 status = ''
1701 if channel_enabled and opted_in:
1702 status = "REPORTING"
1703 else:
1704 why = ''
1705 delimiter = ''
1706
1707 if not opted_in:
1708 why += "NOT OPTED-IN"
1709 delimiter = ', '
1710 if not channel_enabled:
1711 why += f"{delimiter}CHANNEL {c['channel']} IS OFF"
1712
1713 status = f"NOT REPORTING: {why}"
1714
1715 desc = c['description']
1716
1717 table.add_row((
1718 name,
1719 status,
1720 desc,
1721 ))
1722
1723 if len(msg):
1724 # add a new line between message and table output
1725 msg = f"{msg} \n"
1726
1727 return 0, f'{msg}{table.get_string(sortby="NAME")}', ''
1728
1729 @CLICommand('telemetry send')
1730 def do_send(self,
1731 endpoint: Optional[List[EndPoint]] = None,
1732 license: Optional[str] = None) -> Tuple[int, str, str]:
1733 '''
1734 Send a sample report
1735 '''
1736 if not self.is_opted_in() and license != LICENSE:
1737 self.log.debug(('A telemetry send attempt while opted-out. '
1738 'Asking for license agreement'))
1739 return -errno.EPERM, '', f'''Telemetry data is licensed under the {LICENSE_NAME} ({LICENSE_URL}).
1740 To manually send telemetry data, add '--license {LICENSE}' to the 'ceph telemetry send' command.
1741 Please consider enabling the telemetry module with 'ceph telemetry on'.'''
1742 else:
1743 self.last_report = self.compile_report()
1744 return self.send(self.last_report, endpoint)
1745
1746 @CLIReadCommand('telemetry show')
1747 def show(self, channels: Optional[List[str]] = None) -> Tuple[int, str, str]:
1748 '''
1749 Show a sample report of opted-in collections (except for 'device')
1750 '''
1751 if not self.enabled:
1752 # if telemetry is off, no report is being sent, hence nothing to show
1753 msg = 'Telemetry is off. Please consider opting-in with `ceph telemetry on`.\n' \
1754 'Preview sample reports with `ceph telemetry preview`.'
1755 return 0, msg, ''
1756
1757 report = self.get_report_locked(channels=channels)
1758 self.format_perf_histogram(report)
1759 report = json.dumps(report, indent=4, sort_keys=True)
1760
1761 if self.channel_device:
1762 report += '''\nDevice report is generated separately. To see it run 'ceph telemetry show-device'.'''
1763
1764 return 0, report, ''
1765
1766 @CLIReadCommand('telemetry preview')
1767 def preview(self, channels: Optional[List[str]] = None) -> Tuple[int, str, str]:
1768 '''
1769 Preview a sample report of the most recent collections available (except for 'device')
1770 '''
1771 report = {}
1772
1773 # We use a lock to prevent a scenario where the user wishes to preview
1774 # the report, and at the same time the module hits the interval of
1775 # sending a report with the opted-in collection, which has less data
1776 # than in the preview report.
1777 col_delta = self.collection_delta()
1778 with self.get_report_lock:
1779 if col_delta is not None and len(col_delta) == 0:
1780 # user is already opted-in to the most recent collection
1781 msg = 'Telemetry is up to date, see report with `ceph telemetry show`.'
1782 return 0, msg, ''
1783 else:
1784 # there are collections the user is not opted-in to
1785 next_collection = []
1786
1787 for c in MODULE_COLLECTION:
1788 next_collection.append(c['name'].name)
1789
1790 opted_in_collection = self.db_collection
1791 self.db_collection = next_collection
1792 report = self.get_report(channels=channels)
1793 self.db_collection = opted_in_collection
1794
1795 self.format_perf_histogram(report)
1796 report = json.dumps(report, indent=4, sort_keys=True)
1797
1798 if self.channel_device:
1799 report += '''\nDevice report is generated separately. To see it run 'ceph telemetry preview-device'.'''
1800
1801 return 0, report, ''
1802
1803 @CLIReadCommand('telemetry show-device')
1804 def show_device(self) -> Tuple[int, str, str]:
1805 '''
1806 Show a sample device report
1807 '''
1808 if not self.enabled:
1809 # if telemetry is off, no report is being sent, hence nothing to show
1810 msg = 'Telemetry is off. Please consider opting-in with `ceph telemetry on`.\n' \
1811 'Preview sample device reports with `ceph telemetry preview-device`.'
1812 return 0, msg, ''
1813
1814 if not self.channel_device:
1815 # if device channel is off, device report is not being sent, hence nothing to show
1816 msg = 'device channel is off. Please enable with `ceph telemetry enable channel device`.\n' \
1817 'Preview sample device reports with `ceph telemetry preview-device`.'
1818 return 0, msg, ''
1819
1820 return 0, json.dumps(self.get_report_locked('device'), indent=4, sort_keys=True), ''
1821
1822 @CLIReadCommand('telemetry preview-device')
1823 def preview_device(self) -> Tuple[int, str, str]:
1824 '''
1825 Preview a sample device report of the most recent device collection
1826 '''
1827 report = {}
1828
1829 device_col_delta = self.collection_delta(['device'])
1830 with self.get_report_lock:
1831 if device_col_delta is not None and len(device_col_delta) == 0 and self.channel_device:
1832 # user is already opted-in to the most recent device collection,
1833 # and device channel is on, thus `show-device` should be called
1834 msg = 'device channel is on and up to date, see report with `ceph telemetry show-device`.'
1835 return 0, msg, ''
1836
1837 # either the user is not opted-in at all, or there are collections
1838 # they are not opted-in to
1839 next_collection = []
1840
1841 for c in MODULE_COLLECTION:
1842 next_collection.append(c['name'].name)
1843
1844 opted_in_collection = self.db_collection
1845 self.db_collection = next_collection
1846 report = self.get_report('device')
1847 self.db_collection = opted_in_collection
1848
1849 report = json.dumps(report, indent=4, sort_keys=True)
1850 return 0, report, ''
1851
1852 @CLIReadCommand('telemetry show-all')
1853 def show_all(self) -> Tuple[int, str, str]:
1854 '''
1855 Show a sample report of all enabled channels (including 'device' channel)
1856 '''
1857 if not self.enabled:
1858 # if telemetry is off, no report is being sent, hence nothing to show
1859 msg = 'Telemetry is off. Please consider opting-in with `ceph telemetry on`.\n' \
1860 'Preview sample reports with `ceph telemetry preview`.'
1861 return 0, msg, ''
1862
1863 if not self.channel_device:
1864 # device channel is off, no need to display its report
1865 return 0, json.dumps(self.get_report_locked('default'), indent=4, sort_keys=True), ''
1866
1867 # telemetry is on and device channel is enabled, show both
1868 return 0, json.dumps(self.get_report_locked('all'), indent=4, sort_keys=True), ''
1869
1870 @CLIReadCommand('telemetry preview-all')
1871 def preview_all(self) -> Tuple[int, str, str]:
1872 '''
1873 Preview a sample report of the most recent collections available of all channels (including 'device')
1874 '''
1875 report = {}
1876
1877 col_delta = self.collection_delta()
1878 with self.get_report_lock:
1879 if col_delta is not None and len(col_delta) == 0:
1880 # user is already opted-in to the most recent collection
1881 msg = 'Telemetry is up to date, see report with `ceph telemetry show`.'
1882 return 0, msg, ''
1883
1884 # there are collections the user is not opted-in to
1885 next_collection = []
1886
1887 for c in MODULE_COLLECTION:
1888 next_collection.append(c['name'].name)
1889
1890 opted_in_collection = self.db_collection
1891 self.db_collection = next_collection
1892 report = self.get_report('all')
1893 self.db_collection = opted_in_collection
1894
1895 self.format_perf_histogram(report)
1896 report = json.dumps(report, indent=4, sort_keys=True)
1897
1898 return 0, report, ''
1899
1900 def get_report_locked(self,
1901 report_type: str = 'default',
1902 channels: Optional[List[str]] = None) -> Dict[str, Any]:
1903 '''
1904 A wrapper around get_report to allow for compiling a report of the most recent module collections
1905 '''
1906 with self.get_report_lock:
1907 return self.get_report(report_type, channels)
1908
1909 def get_report(self,
1910 report_type: str = 'default',
1911 channels: Optional[List[str]] = None) -> Dict[str, Any]:
1912 if report_type == 'default':
1913 return self.compile_report(channels=channels)
1914 elif report_type == 'device':
1915 return self.gather_device_report()
1916 elif report_type == 'all':
1917 return {'report': self.compile_report(channels=channels),
1918 'device_report': self.gather_device_report()}
1919 return {}
1920
1921 def self_test(self) -> None:
1922 report = self.compile_report()
1923 if len(report) == 0:
1924 raise RuntimeError('Report is empty')
1925
1926 if 'report_id' not in report:
1927 raise RuntimeError('report_id not found in report')
1928
1929 def shutdown(self) -> None:
1930 self.run = False
1931 self.event.set()
1932
1933 def refresh_health_checks(self) -> None:
1934 health_checks = {}
1935 # TODO do we want to nag also in case the user is not opted-in?
1936 if self.enabled and self.should_nag():
1937 health_checks['TELEMETRY_CHANGED'] = {
1938 'severity': 'warning',
1939 'summary': 'Telemetry requires re-opt-in',
1940 'detail': [
1941 'telemetry module includes new collections; please re-opt-in to new collections with `ceph telemetry on`'
1942 ]
1943 }
1944 self.set_health_checks(health_checks)
1945
1946 def serve(self) -> None:
1947 self.load()
1948 self.run = True
1949
1950 self.log.debug('Waiting for mgr to warm up')
1951 time.sleep(10)
1952
1953 while self.run:
1954 self.event.clear()
1955
1956 self.refresh_health_checks()
1957
1958 if not self.is_opted_in():
1959 self.log.debug('Not sending report until user re-opts-in')
1960 self.event.wait(1800)
1961 continue
1962 if not self.enabled:
1963 self.log.debug('Not sending report until configured to do so')
1964 self.event.wait(1800)
1965 continue
1966
1967 now = int(time.time())
1968 if not self.last_upload or \
1969 (now - self.last_upload) > self.interval * 3600:
1970 self.log.info('Compiling and sending report to %s',
1971 self.url)
1972
1973 try:
1974 self.last_report = self.compile_report()
1975 except Exception:
1976 self.log.exception('Exception while compiling report:')
1977
1978 self.send(self.last_report)
1979 else:
1980 self.log.debug('Interval for sending new report has not expired')
1981
1982 sleep = 3600
1983 self.log.debug('Sleeping for %d seconds', sleep)
1984 self.event.wait(sleep)
1985
1986 @staticmethod
1987 def can_run() -> Tuple[bool, str]:
1988 return True, ''