]> git.proxmox.com Git - ceph.git/blame - ceph/qa/tasks/check_counter.py
import 15.2.4
[ceph.git] / ceph / qa / tasks / check_counter.py
CommitLineData
7c673cae
FG
1
2import logging
3import json
4
5from teuthology.task import Task
6from teuthology import misc
7c673cae
FG
7
8log = logging.getLogger(__name__)
9
10
11class CheckCounter(Task):
12 """
13 Use this task to validate that some daemon perf counters were
14 incremented by the nested tasks.
15
16 Config:
17 'cluster_name': optional, specify which cluster
18 'target': dictionary of daemon type to list of performance counters.
19 'dry_run': just log the value of the counters, don't fail if they
20 aren't nonzero.
21
22 Success condition is that for all of the named counters, at least
23 one of the daemons of that type has the counter nonzero.
24
25 Example to check cephfs dirfrag splits are happening:
26 - install:
27 - ceph:
28 - ceph-fuse:
29 - check-counter:
30 counters:
31 mds:
32 - "mds.dir_split"
33 - workunit: ...
34 """
35
36 def start(self):
37 log.info("START")
38
39 def end(self):
11fdf7f2
TL
40 overrides = self.ctx.config.get('overrides', {})
41 misc.deep_merge(self.config, overrides.get('check-counter', {}))
42
7c673cae
FG
43 cluster_name = self.config.get('cluster_name', None)
44 dry_run = self.config.get('dry_run', False)
45 targets = self.config.get('counters', {})
46
47 if cluster_name is None:
e306af50 48 cluster_name = next(iter(self.ctx.managers.keys()))
7c673cae
FG
49
50 for daemon_type, counters in targets.items():
51 # List of 'a', 'b', 'c'...
52 daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type))
53 daemons = dict([(daemon_id,
54 self.ctx.daemons.get_daemon(daemon_type, daemon_id))
55 for daemon_id in daemon_ids])
56
57 seen = set()
58
59 for daemon_id, daemon in daemons.items():
60 if not daemon.running():
61 log.info("Ignoring daemon {0}, it isn't running".format(daemon_id))
62 continue
63 else:
64 log.debug("Getting stats from {0}".format(daemon_id))
65
66 manager = self.ctx.managers[cluster_name]
67 proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"])
68 response_data = proc.stdout.getvalue().strip()
69 if response_data:
70 perf_dump = json.loads(response_data)
71 else:
72 log.warning("No admin socket response from {0}, skipping".format(daemon_id))
73 continue
74
75 for counter in counters:
76 subsys, counter_id = counter.split(".")
77 if subsys not in perf_dump or counter_id not in perf_dump[subsys]:
78 log.warning("Counter '{0}' not found on daemon {1}.{2}".format(
79 counter, daemon_type, daemon_id))
80 continue
81 value = perf_dump[subsys][counter_id]
82
83 log.info("Daemon {0}.{1} {2}={3}".format(
84 daemon_type, daemon_id, counter, value
85 ))
86
87 if value > 0:
88 seen.add(counter)
89
90 if not dry_run:
91 unseen = set(counters) - set(seen)
92 if unseen:
93 raise RuntimeError("The following counters failed to be set "
94 "on {0} daemons: {1}".format(
95 daemon_type, unseen
96 ))
97
98task = CheckCounter