]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | import logging | |
3 | import json | |
4 | ||
5 | from teuthology.task import Task | |
6 | from teuthology import misc | |
7c673cae FG |
7 | |
8 | log = logging.getLogger(__name__) | |
9 | ||
10 | ||
11 | class CheckCounter(Task): | |
12 | """ | |
13 | Use this task to validate that some daemon perf counters were | |
14 | incremented by the nested tasks. | |
15 | ||
16 | Config: | |
17 | 'cluster_name': optional, specify which cluster | |
18 | 'target': dictionary of daemon type to list of performance counters. | |
19 | 'dry_run': just log the value of the counters, don't fail if they | |
20 | aren't nonzero. | |
21 | ||
22 | Success condition is that for all of the named counters, at least | |
23 | one of the daemons of that type has the counter nonzero. | |
24 | ||
25 | Example to check cephfs dirfrag splits are happening: | |
26 | - install: | |
27 | - ceph: | |
28 | - ceph-fuse: | |
29 | - check-counter: | |
30 | counters: | |
31 | mds: | |
32 | - "mds.dir_split" | |
33 | - workunit: ... | |
34 | """ | |
35 | ||
36 | def start(self): | |
37 | log.info("START") | |
38 | ||
39 | def end(self): | |
11fdf7f2 TL |
40 | overrides = self.ctx.config.get('overrides', {}) |
41 | misc.deep_merge(self.config, overrides.get('check-counter', {})) | |
42 | ||
7c673cae FG |
43 | cluster_name = self.config.get('cluster_name', None) |
44 | dry_run = self.config.get('dry_run', False) | |
45 | targets = self.config.get('counters', {}) | |
46 | ||
47 | if cluster_name is None: | |
e306af50 | 48 | cluster_name = next(iter(self.ctx.managers.keys())) |
7c673cae FG |
49 | |
50 | for daemon_type, counters in targets.items(): | |
51 | # List of 'a', 'b', 'c'... | |
52 | daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type)) | |
53 | daemons = dict([(daemon_id, | |
54 | self.ctx.daemons.get_daemon(daemon_type, daemon_id)) | |
55 | for daemon_id in daemon_ids]) | |
56 | ||
57 | seen = set() | |
58 | ||
59 | for daemon_id, daemon in daemons.items(): | |
60 | if not daemon.running(): | |
61 | log.info("Ignoring daemon {0}, it isn't running".format(daemon_id)) | |
62 | continue | |
63 | else: | |
64 | log.debug("Getting stats from {0}".format(daemon_id)) | |
65 | ||
66 | manager = self.ctx.managers[cluster_name] | |
67 | proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"]) | |
68 | response_data = proc.stdout.getvalue().strip() | |
69 | if response_data: | |
70 | perf_dump = json.loads(response_data) | |
71 | else: | |
72 | log.warning("No admin socket response from {0}, skipping".format(daemon_id)) | |
73 | continue | |
74 | ||
75 | for counter in counters: | |
76 | subsys, counter_id = counter.split(".") | |
77 | if subsys not in perf_dump or counter_id not in perf_dump[subsys]: | |
78 | log.warning("Counter '{0}' not found on daemon {1}.{2}".format( | |
79 | counter, daemon_type, daemon_id)) | |
80 | continue | |
81 | value = perf_dump[subsys][counter_id] | |
82 | ||
83 | log.info("Daemon {0}.{1} {2}={3}".format( | |
84 | daemon_type, daemon_id, counter, value | |
85 | )) | |
86 | ||
87 | if value > 0: | |
88 | seen.add(counter) | |
89 | ||
90 | if not dry_run: | |
91 | unseen = set(counters) - set(seen) | |
92 | if unseen: | |
93 | raise RuntimeError("The following counters failed to be set " | |
94 | "on {0} daemons: {1}".format( | |
95 | daemon_type, unseen | |
96 | )) | |
97 | ||
98 | task = CheckCounter |