]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/check_counter.py
5 from teuthology
.task
import Task
6 from teuthology
import misc
8 from tasks
import ceph_manager
10 log
= logging
.getLogger(__name__
)
13 class CheckCounter(Task
):
15 Use this task to validate that some daemon perf counters were
16 incremented by the nested tasks.
19 'cluster_name': optional, specify which cluster
20 'target': dictionary of daemon type to list of performance counters.
21 'dry_run': just log the value of the counters, don't fail if they
24 Success condition is that for all of the named counters, at least
25 one of the daemons of that type has the counter nonzero.
27 Example to check cephfs dirfrag splits are happening:
36 name: "mds.dir_update"
41 def admin_remote(self
):
42 first_mon
= misc
.get_first_mon(self
.ctx
, None)
43 (result
,) = self
.ctx
.cluster
.only(first_mon
).remotes
.keys()
50 overrides
= self
.ctx
.config
.get('overrides', {})
51 misc
.deep_merge(self
.config
, overrides
.get('check-counter', {}))
53 cluster_name
= self
.config
.get('cluster_name', None)
54 dry_run
= self
.config
.get('dry_run', False)
55 targets
= self
.config
.get('counters', {})
57 if cluster_name
is None:
58 cluster_name
= next(iter(self
.ctx
.managers
.keys()))
61 mon_manager
= ceph_manager
.CephManager(self
.admin_remote
, ctx
=self
.ctx
, logger
=log
.getChild('ceph_manager'))
62 active_mgr
= json
.loads(mon_manager
.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))["active_name"]
64 for daemon_type
, counters
in targets
.items():
65 # List of 'a', 'b', 'c'...
66 daemon_ids
= list(misc
.all_roles_of_type(self
.ctx
.cluster
, daemon_type
))
67 daemons
= dict([(daemon_id
,
68 self
.ctx
.daemons
.get_daemon(daemon_type
, daemon_id
))
69 for daemon_id
in daemon_ids
])
74 for daemon_id
, daemon
in daemons
.items():
75 if not daemon
.running():
76 log
.info("Ignoring daemon {0}, it isn't running".format(daemon_id
))
78 elif daemon_type
== 'mgr' and daemon_id
!= active_mgr
:
81 log
.debug("Getting stats from {0}".format(daemon_id
))
83 manager
= self
.ctx
.managers
[cluster_name
]
84 proc
= manager
.admin_socket(daemon_type
, daemon_id
, ["perf", "dump"])
85 response_data
= proc
.stdout
.getvalue().strip()
87 perf_dump
= json
.loads(response_data
)
89 log
.warning("No admin socket response from {0}, skipping".format(daemon_id
))
94 for counter
in counters
:
95 if isinstance(counter
, dict):
96 name
= counter
['name']
98 minval
= counter
['min']
99 if 'expected_val' in counter
:
100 expected_val
= counter
['expected_val']
107 for key
in name
.split('.'):
109 log
.warning(f
"Counter '{name}' not found on daemon {daemon_type}.{daemon_id}")
116 log
.info(f
"Daemon {daemon_type}.{daemon_id} {name}={val}")
117 if isinstance(minval
, int) and val
>= minval
:
119 elif isinstance(expected_val
, int) and val
== expected_val
:
123 unseen
= set(expected
) - set(seen
)
125 raise RuntimeError("The following counters failed to be set "
126 "on {0} daemons: {1}".format(