]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/configchecks.py
5 from mgr_module
import ServiceInfoT
7 from typing
import TYPE_CHECKING
, Any
, Dict
, List
, Optional
, cast
, Tuple
, Callable
10 from cephadm
.module
import CephadmOrchestrator
12 logger
= logging
.getLogger(__name__
)
17 def __init__(self
) -> None:
18 self
.arch
: Optional
[str] = None
19 self
.bios_date
: Optional
[str] = None
20 self
.bios_version
: Optional
[str] = None
21 self
.cpu_cores
: Optional
[int] = None
22 self
.cpu_count
: Optional
[int] = None
23 self
.cpu_load
: Optional
[Dict
[str, float]] = None
24 self
.cpu_model
: Optional
[str] = None
25 self
.cpu_threads
: Optional
[int] = None
26 self
.flash_capacity
: Optional
[str] = None
27 self
.flash_capacity_bytes
: Optional
[int] = None
28 self
.flash_count
: Optional
[int] = None
29 self
.flash_list
: Optional
[List
[Dict
[str, Any
]]] = None
30 self
.hdd_capacity
: Optional
[str] = None
31 self
.hdd_capacity_bytes
: Optional
[int] = None
32 self
.hdd_count
: Optional
[int] = None
33 self
.hdd_list
: Optional
[List
[Dict
[str, Any
]]] = None
34 self
.hostname
: Optional
[str] = None
35 self
.interfaces
: Optional
[Dict
[str, Dict
[str, Any
]]] = None
36 self
.kernel
: Optional
[str] = None
37 self
.kernel_parameters
: Optional
[Dict
[str, Any
]] = None
38 self
.kernel_security
: Optional
[Dict
[str, str]] = None
39 self
.memory_available_kb
: Optional
[int] = None
40 self
.memory_free_kb
: Optional
[int] = None
41 self
.memory_total_kb
: Optional
[int] = None
42 self
.model
: Optional
[str] = None
43 self
.nic_count
: Optional
[int] = None
44 self
.operating_system
: Optional
[str] = None
45 self
.subscribed
: Optional
[str] = None
46 self
.system_uptime
: Optional
[float] = None
47 self
.timestamp
: Optional
[float] = None
48 self
.vendor
: Optional
[str] = None
51 def load_facts(self
, json_data
: Dict
[str, Any
]) -> None:
53 if isinstance(json_data
, dict):
54 keys
= json_data
.keys()
55 if all([k
in keys
for k
in self
.__dict
__ if not k
.startswith('_')]):
57 for k
in json_data
.keys():
59 setattr(self
, k
, json_data
[k
])
65 def subnet_to_nic(self
, subnet
: str) -> Optional
[str]:
66 ip_version
= ipaddress
.ip_network(subnet
).version
67 logger
.debug(f
"subnet {subnet} is IP version {ip_version}")
68 interfaces
= cast(Dict
[str, Dict
[str, Any
]], self
.interfaces
)
70 for iface
in interfaces
.keys():
73 addr
= interfaces
[iface
].get('ipv4_address', '')
75 addr
= interfaces
[iface
].get('ipv6_address', '')
77 a
= addr
.split('/')[0]
78 if ipaddress
.ip_address(a
) in ipaddress
.ip_network(subnet
):
85 def __init__(self
, subnet
: str, hostname
: str, mtu
: str, speed
: str):
95 def host_list(self
) -> List
[str]:
97 for mtu
in self
.mtu_map
:
98 hosts
.extend(self
.mtu_map
.get(mtu
, []))
101 def update(self
, hostname
: str, mtu
: str, speed
: str) -> None:
102 if mtu
in self
.mtu_map
and hostname
not in self
.mtu_map
[mtu
]:
103 self
.mtu_map
[mtu
].append(hostname
)
105 self
.mtu_map
[mtu
] = [hostname
]
107 if speed
in self
.speed_map
and hostname
not in self
.speed_map
[speed
]:
108 self
.speed_map
[speed
].append(hostname
)
110 self
.speed_map
[speed
] = [hostname
]
112 def __repr__(self
) -> str:
114 "subnet": self
.subnet
,
115 "mtu_map": self
.mtu_map
,
116 "speed_map": self
.speed_map
120 class CephadmCheckDefinition
:
121 def __init__(self
, mgr
: "CephadmOrchestrator", healthcheck_name
: str, description
: str, name
: str, func
: Callable
) -> None:
124 self
.healthcheck_name
= healthcheck_name
125 self
.description
= description
130 def status(self
) -> str:
131 check_states
: Dict
[str, str] = {}
132 # Issuing a get each time, since the value could be set at the CLI
133 raw_states
= self
.mgr
.get_store('config_checks')
136 "config_checks setting is not defined - unable to determine healthcheck state")
140 check_states
= json
.loads(raw_states
)
141 except json
.JSONDecodeError
:
142 self
.log
.error("Unable to serialize the config_checks settings to JSON")
145 return check_states
.get(self
.name
, 'Missing')
147 def to_json(self
) -> Dict
[str, Any
]:
149 "healthcheck_name": self
.healthcheck_name
,
150 "description": self
.description
,
152 "status": self
.status
,
153 "valid": True if self
.func
else False
157 class CephadmConfigChecks
:
158 def __init__(self
, mgr
: "CephadmOrchestrator"):
159 self
.mgr
: "CephadmOrchestrator" = mgr
160 self
.health_checks
: List
[CephadmCheckDefinition
] = [
161 CephadmCheckDefinition(mgr
, "CEPHADM_CHECK_KERNEL_LSM",
162 "checks SELINUX/Apparmor profiles are consistent across cluster hosts",
164 self
._check
_kernel
_lsm
),
165 CephadmCheckDefinition(mgr
, "CEPHADM_CHECK_SUBSCRIPTION",
166 "checks subscription states are consistent for all cluster hosts",
168 self
._check
_subscription
),
169 CephadmCheckDefinition(mgr
, "CEPHADM_CHECK_PUBLIC_MEMBERSHIP",
170 "check that all hosts have a NIC on the Ceph public_netork",
172 self
._check
_public
_network
),
173 CephadmCheckDefinition(mgr
, "CEPHADM_CHECK_MTU",
174 "check that OSD hosts share a common MTU setting",
176 self
._check
_osd
_mtu
),
177 CephadmCheckDefinition(mgr
, "CEPHADM_CHECK_LINKSPEED",
178 "check that OSD hosts share a common linkspeed",
180 self
._check
_osd
_linkspeed
),
181 CephadmCheckDefinition(mgr
, "CEPHADM_CHECK_NETWORK_MISSING",
182 "checks that the cluster/public networks defined exist on the Ceph hosts",
184 self
._check
_network
_missing
),
185 CephadmCheckDefinition(mgr
, "CEPHADM_CHECK_CEPH_RELEASE",
186 "check for Ceph version consistency - ceph daemons should be on the same release (unless upgrade is active)",
188 self
._check
_release
_parity
),
189 CephadmCheckDefinition(mgr
, "CEPHADM_CHECK_KERNEL_VERSION",
190 "checks that the MAJ.MIN of the kernel on Ceph hosts is consistent",
192 self
._check
_kernel
_version
),
195 self
.host_facts
: Dict
[str, HostFacts
] = {}
196 self
.subnet_lookup
: Dict
[str, SubnetLookup
] = {} # subnet CIDR -> SubnetLookup Object
197 self
.lsm_to_host
: Dict
[str, List
[str]] = {}
198 self
.subscribed
: Dict
[str, List
[str]] = {
203 self
.host_to_role
: Dict
[str, List
[str]] = {}
204 self
.kernel_to_hosts
: Dict
[str, List
[str]] = {}
206 self
.public_network_list
: List
[str] = []
207 self
.cluster_network_list
: List
[str] = []
208 self
.health_check_raised
= False
209 self
.active_checks
: List
[str] = [] # checks enabled and executed
210 self
.skipped_checks
: List
[str] = [] # checks enabled, but skipped due to a pre-req failure
212 raw_checks
= self
.mgr
.get_store('config_checks')
214 # doesn't exist, so seed the checks
215 self
.seed_config_checks()
217 # setting is there, so ensure there is an entry for each of the checks that
218 # this module supports (account for upgrades/changes)
220 config_checks
= json
.loads(raw_checks
)
221 except json
.JSONDecodeError
:
222 self
.log
.error("Unable to serialize config_checks config. Reset to defaults")
223 self
.seed_config_checks()
225 # Ensure the config_checks setting is consistent with this module
226 from_config
= set(config_checks
.keys())
227 from_module
= set([c
.name
for c
in self
.health_checks
])
228 old_checks
= from_config
.difference(from_module
)
229 new_checks
= from_module
.difference(from_config
)
232 self
.log
.debug(f
"old checks being removed from config_checks: {old_checks}")
236 self
.log
.debug(f
"new checks being added to config_checks: {new_checks}")
238 config_checks
[i
] = 'enabled'
240 if old_checks
or new_checks
:
242 f
"config_checks updated: {len(old_checks)} removed, {len(new_checks)} added")
243 self
.mgr
.set_store('config_checks', json
.dumps(config_checks
))
245 self
.log
.debug("config_checks match module definition")
247 def lookup_check(self
, key_value
: str, key_name
: str = 'name') -> Optional
[CephadmCheckDefinition
]:
249 for c
in self
.health_checks
:
250 if getattr(c
, key_name
) == key_value
:
255 def defined_checks(self
) -> int:
256 return len(self
.health_checks
)
259 def active_checks_count(self
) -> int:
260 return len(self
.active_checks
)
262 def seed_config_checks(self
) -> None:
263 defaults
= {check
.name
: 'enabled' for check
in self
.health_checks
}
264 self
.mgr
.set_store('config_checks', json
.dumps(defaults
))
267 def skipped_checks_count(self
) -> int:
268 return len(self
.skipped_checks
)
270 def to_json(self
) -> List
[Dict
[str, str]]:
271 return [check
.to_json() for check
in self
.health_checks
]
273 def load_network_config(self
) -> None:
274 ret
, out
, _err
= self
.mgr
.check_mon_command({
275 'prefix': 'config dump',
281 if item
['name'] == "cluster_network":
282 self
.cluster_network_list
= item
['value'].strip().split(',')
283 if item
['name'] == "public_network":
284 self
.public_network_list
= item
['value'].strip().split(',')
286 self
.log
.debug(f
"public networks {self.public_network_list}")
287 self
.log
.debug(f
"cluster networks {self.cluster_network_list}")
289 def _update_subnet(self
, subnet
: str, hostname
: str, nic
: Dict
[str, Any
]) -> None:
290 mtu
= nic
.get('mtu', None)
291 speed
= nic
.get('speed', None)
292 if not mtu
or not speed
:
295 this_subnet
= self
.subnet_lookup
.get(subnet
, None)
297 this_subnet
.update(hostname
, mtu
, speed
)
299 self
.subnet_lookup
[subnet
] = SubnetLookup(subnet
, hostname
, mtu
, speed
)
301 def _update_subnet_lookups(self
, hostname
: str, devname
: str, nic
: Dict
[str, Any
]) -> None:
302 if nic
['ipv4_address']:
304 iface4
= ipaddress
.IPv4Interface(nic
['ipv4_address'])
305 subnet
= str(iface4
.network
)
306 except ipaddress
.AddressValueError
as e
:
307 self
.log
.exception(f
"Invalid network on {hostname}, interface {devname} : {str(e)}")
309 self
._update
_subnet
(subnet
, hostname
, nic
)
311 if nic
['ipv6_address']:
313 iface6
= ipaddress
.IPv6Interface(nic
['ipv6_address'])
314 subnet
= str(iface6
.network
)
315 except ipaddress
.AddressValueError
as e
:
316 self
.log
.exception(f
"Invalid network on {hostname}, interface {devname} : {str(e)}")
318 self
._update
_subnet
(subnet
, hostname
, nic
)
320 def hosts_with_role(self
, role
: str) -> List
[str]:
322 for hostname
, roles
in self
.host_to_role
.items():
324 host_list
.append(hostname
)
327 def reset(self
) -> None:
328 self
.subnet_lookup
.clear()
329 self
.lsm_to_host
.clear()
330 self
.subscribed
['yes'] = []
331 self
.subscribed
['no'] = []
332 self
.subscribed
['unknown'] = []
333 self
.host_to_role
.clear()
334 self
.kernel_to_hosts
.clear()
336 def _get_majority(self
, data
: Dict
[str, List
[str]]) -> Tuple
[str, int]:
337 assert isinstance(data
, dict)
342 if len(data
[key
]) > majority_count
:
343 majority_count
= len(data
[key
])
345 return majority_key
, majority_count
347 def get_ceph_metadata(self
) -> Dict
[str, Optional
[Dict
[str, str]]]:
348 """Build a map of service -> service metadata"""
349 service_map
: Dict
[str, Optional
[Dict
[str, str]]] = {}
351 for server
in self
.mgr
.list_servers():
352 for service
in cast(List
[ServiceInfoT
], server
.get('services', [])):
356 f
"{service['type']}.{service['id']}":
357 self
.mgr
.get_metadata(service
['type'], service
['id'])
362 def _check_kernel_lsm(self
) -> None:
363 if len(self
.lsm_to_host
.keys()) > 1:
365 majority_hosts_ptr
, majority_hosts_count
= self
._get
_majority
(self
.lsm_to_host
)
366 lsm_copy
= self
.lsm_to_host
.copy()
367 del lsm_copy
[majority_hosts_ptr
]
369 for lsm_key
in lsm_copy
.keys():
370 for host
in lsm_copy
[lsm_key
]:
372 f
"{host} has inconsistent KSM settings compared to the "
373 f
"majority of hosts({majority_hosts_count}) in the cluster")
374 host_sfx
= 's' if len(details
) > 1 else ''
375 self
.mgr
.health_checks
['CEPHADM_CHECK_KERNEL_LSM'] = {
376 'severity': 'warning',
377 'summary': f
"Kernel Security Module (SELinux/AppArmor) is inconsistent for "
378 f
"{len(details)} host{host_sfx}",
379 'count': len(details
),
382 self
.health_check_raised
= True
384 self
.mgr
.health_checks
.pop('CEPHADM_CHECK_KERNEL_LSM', None)
386 def _check_subscription(self
) -> None:
387 if len(self
.subscribed
['yes']) > 0 and len(self
.subscribed
['no']) > 0:
388 # inconsistent subscription states - CEPHADM_CHECK_SUBSCRIPTION
390 for host
in self
.subscribed
['no']:
391 details
.append(f
"{host} does not have an active subscription")
392 self
.mgr
.health_checks
['CEPHADM_CHECK_SUBSCRIPTION'] = {
393 'severity': 'warning',
394 'summary': f
"Support subscriptions inactive on {len(details)} host(s)"
395 f
"({len(self.subscribed['yes'])} subscriptions active)",
396 'count': len(details
),
399 self
.health_check_raised
= True
401 self
.mgr
.health_checks
.pop('CEPHADM_CHECK_SUBSCRIPTION', None)
403 def _check_public_network(self
) -> None:
404 hosts_remaining
: List
[str] = list(self
.mgr
.cache
.facts
.keys())
405 hosts_removed
: List
[str] = []
406 self
.log
.debug(f
"checking public network membership for: {hosts_remaining}")
408 for p_net
in self
.public_network_list
:
409 self
.log
.debug(f
"checking network {p_net}")
410 subnet_data
= self
.subnet_lookup
.get(p_net
, None)
411 self
.log
.debug(f
"subnet data - {subnet_data}")
414 hosts_in_subnet
= subnet_data
.host_list
415 for host
in hosts_in_subnet
:
416 if host
in hosts_remaining
:
417 hosts_remaining
.remove(host
)
418 hosts_removed
.append(host
)
420 if host
not in hosts_removed
:
421 self
.log
.debug(f
"host={host}, subnet={p_net}")
423 "Host listed for a subnet but not present in the host facts?")
425 # Ideally all hosts will have been removed since they have an IP on at least
426 # one of the public networks
428 if len(hosts_remaining
) != len(self
.mgr
.cache
.facts
):
429 # public network is visible on some hosts
431 f
"{host} does not have an interface on any public network" for host
in hosts_remaining
]
433 self
.mgr
.health_checks
['CEPHADM_CHECK_PUBLIC_MEMBERSHIP'] = {
434 'severity': 'warning',
435 'summary': f
"Public network(s) is not directly accessible from {len(hosts_remaining)} "
437 'count': len(details
),
440 self
.health_check_raised
= True
442 self
.mgr
.health_checks
.pop('CEPHADM_CHECK_PUBLIC_MEMBERSHIP', None)
444 def _check_osd_mtu(self
) -> None:
445 osd_hosts
= set(self
.hosts_with_role('osd'))
446 osd_network_list
= self
.cluster_network_list
or self
.public_network_list
447 mtu_errors
: List
[str] = []
449 for osd_net
in osd_network_list
:
450 subnet_data
= self
.subnet_lookup
.get(osd_net
, None)
454 self
.log
.debug(f
"processing mtu map : {json.dumps(subnet_data.mtu_map)}")
459 for mtu
, host_list
in subnet_data
.mtu_map
.items():
460 mtu_hosts
= set(host_list
)
461 mtu_count
[mtu
] = len(mtu_hosts
)
462 errors
= osd_hosts
.difference(mtu_hosts
)
465 if len(errors
) > max_hosts
:
469 self
.log
.debug("MTU problems detected")
470 self
.log
.debug(f
"most hosts using {mtu_ptr}")
471 mtu_copy
= subnet_data
.mtu_map
.copy()
472 del mtu_copy
[mtu_ptr
]
473 for bad_mtu
in mtu_copy
:
474 for h
in mtu_copy
[bad_mtu
]:
476 host
.load_facts(self
.mgr
.cache
.facts
[h
])
478 f
"host {h}({host.subnet_to_nic(osd_net)}) is using MTU "
479 f
"{bad_mtu} on {osd_net}, NICs on other hosts use {mtu_ptr}")
482 self
.mgr
.health_checks
['CEPHADM_CHECK_MTU'] = {
483 'severity': 'warning',
484 'summary': f
"MTU setting inconsistent on osd network NICs on {len(mtu_errors)} host(s)",
485 'count': len(mtu_errors
),
486 'detail': mtu_errors
,
488 self
.health_check_raised
= True
490 self
.mgr
.health_checks
.pop('CEPHADM_CHECK_MTU', None)
492 def _check_osd_linkspeed(self
) -> None:
493 osd_hosts
= set(self
.hosts_with_role('osd'))
494 osd_network_list
= self
.cluster_network_list
or self
.public_network_list
496 linkspeed_errors
= []
498 for osd_net
in osd_network_list
:
499 subnet_data
= self
.subnet_lookup
.get(osd_net
, None)
503 self
.log
.debug(f
"processing subnet : {subnet_data}")
509 for speed
, host_list
in subnet_data
.speed_map
.items():
510 speed_hosts
= set(host_list
)
511 speed_count
[speed
] = len(speed_hosts
)
512 errors
= osd_hosts
.difference(speed_hosts
)
514 diffs
[speed
] = errors
515 if len(errors
) > max_hosts
:
519 self
.log
.debug("linkspeed issue(s) detected")
520 self
.log
.debug(f
"most hosts using {speed_ptr}")
521 speed_copy
= subnet_data
.speed_map
.copy()
522 del speed_copy
[speed_ptr
]
523 for bad_speed
in speed_copy
:
524 if bad_speed
> speed_ptr
:
525 # skip speed is better than most...it can stay!
527 for h
in speed_copy
[bad_speed
]:
529 host
.load_facts(self
.mgr
.cache
.facts
[h
])
530 linkspeed_errors
.append(
531 f
"host {h}({host.subnet_to_nic(osd_net)}) has linkspeed of "
532 f
"{bad_speed} on {osd_net}, NICs on other hosts use {speed_ptr}")
535 self
.mgr
.health_checks
['CEPHADM_CHECK_LINKSPEED'] = {
536 'severity': 'warning',
537 'summary': "Link speed is inconsistent on osd network NICs for "
538 f
"{len(linkspeed_errors)} host(s)",
539 'count': len(linkspeed_errors
),
540 'detail': linkspeed_errors
,
542 self
.health_check_raised
= True
544 self
.mgr
.health_checks
.pop('CEPHADM_CHECK_LINKSPEED', None)
546 def _check_network_missing(self
) -> None:
547 all_networks
= self
.public_network_list
.copy()
548 all_networks
.extend(self
.cluster_network_list
)
550 missing_networks
= []
551 for subnet
in all_networks
:
552 subnet_data
= self
.subnet_lookup
.get(subnet
, None)
555 missing_networks
.append(f
"{subnet} not found on any host in the cluster")
557 f
"Network {subnet} has been defined, but is not present on any host")
560 net_sfx
= 's' if len(missing_networks
) > 1 else ''
561 self
.mgr
.health_checks
['CEPHADM_CHECK_NETWORK_MISSING'] = {
562 'severity': 'warning',
563 'summary': f
"Public/cluster network{net_sfx} defined, but can not be found on "
565 'count': len(missing_networks
),
566 'detail': missing_networks
,
568 self
.health_check_raised
= True
570 self
.mgr
.health_checks
.pop('CEPHADM_CHECK_NETWORK_MISSING', None)
572 def _check_release_parity(self
) -> None:
573 upgrade_status
= self
.mgr
.upgrade
.upgrade_status()
574 if upgrade_status
.in_progress
:
575 # skip version consistency checks during an upgrade cycle
576 self
.skipped_checks
.append('ceph_release')
579 services
= self
.get_ceph_metadata()
580 self
.log
.debug(json
.dumps(services
))
581 version_to_svcs
: Dict
[str, List
[str]] = {}
585 metadata
= cast(Dict
[str, str], services
[svc
])
586 v
= metadata
.get('ceph_release', '')
587 if v
in version_to_svcs
:
588 version_to_svcs
[v
].append(svc
)
590 version_to_svcs
[v
] = [svc
]
592 if len(version_to_svcs
) > 1:
593 majority_ptr
, _majority_count
= self
._get
_majority
(version_to_svcs
)
594 ver_copy
= version_to_svcs
.copy()
595 del ver_copy
[majority_ptr
]
598 for svc
in ver_copy
[v
]:
600 f
"{svc} is running {v} (majority of cluster is using {majority_ptr})")
602 self
.mgr
.health_checks
['CEPHADM_CHECK_CEPH_RELEASE'] = {
603 'severity': 'warning',
604 'summary': 'Ceph cluster running mixed ceph releases',
605 'count': len(details
),
608 self
.health_check_raised
= True
610 f
"running with {len(version_to_svcs)} different ceph releases within this cluster")
612 self
.mgr
.health_checks
.pop('CEPHADM_CHECK_CEPH_RELEASE', None)
614 def _check_kernel_version(self
) -> None:
615 if len(self
.kernel_to_hosts
.keys()) > 1:
616 majority_hosts_ptr
, majority_hosts_count
= self
._get
_majority
(self
.kernel_to_hosts
)
617 kver_copy
= self
.kernel_to_hosts
.copy()
618 del kver_copy
[majority_hosts_ptr
]
621 for h
in kver_copy
[k
]:
623 f
"host {h} running kernel {k}, majority of hosts({majority_hosts_count}) "
624 f
"running {majority_hosts_ptr}")
626 self
.log
.warning("mixed kernel versions detected")
627 self
.mgr
.health_checks
['CEPHADM_CHECK_KERNEL_VERSION'] = {
628 'severity': 'warning',
629 'summary': f
"{len(details)} host(s) running different kernel versions",
630 'count': len(details
),
633 self
.health_check_raised
= True
635 self
.mgr
.health_checks
.pop('CEPHADM_CHECK_KERNEL_VERSION', None)
637 def _process_hosts(self
) -> None:
638 self
.log
.debug(f
"processing data from {len(self.mgr.cache.facts)} hosts")
639 for hostname
in self
.mgr
.cache
.facts
:
641 host
.load_facts(self
.mgr
.cache
.facts
[hostname
])
643 self
.log
.warning(f
"skipping {hostname} - incompatible host facts")
646 kernel_lsm
= cast(Dict
[str, str], host
.kernel_security
)
647 lsm_desc
= kernel_lsm
.get('description', '')
649 if lsm_desc
in self
.lsm_to_host
:
650 self
.lsm_to_host
[lsm_desc
].append(hostname
)
652 self
.lsm_to_host
[lsm_desc
] = [hostname
]
654 subscription_state
= host
.subscribed
.lower() if host
.subscribed
else None
655 if subscription_state
:
656 self
.subscribed
[subscription_state
].append(hostname
)
658 interfaces
= cast(Dict
[str, Dict
[str, Any
]], host
.interfaces
)
659 for name
in interfaces
.keys():
662 self
._update
_subnet
_lookups
(hostname
, name
, interfaces
[name
])
665 kernel_maj_min
= '.'.join(host
.kernel
.split('.')[0:2])
666 if kernel_maj_min
in self
.kernel_to_hosts
:
667 self
.kernel_to_hosts
[kernel_maj_min
].append(hostname
)
669 self
.kernel_to_hosts
[kernel_maj_min
] = [hostname
]
671 self
.log
.warning(f
"Host gather facts for {hostname} is missing kernel information")
673 # NOTE: if daemondescription had systemd enabled state, we could check for systemd 'tampering'
674 self
.host_to_role
[hostname
] = self
.mgr
.cache
.get_daemon_types(hostname
)
676 def run_checks(self
) -> None:
677 checks_enabled
= self
.mgr
.get_module_option('config_checks_enabled')
678 if checks_enabled
is not True:
683 check_config
: Dict
[str, str] = {}
684 checks_raw
: Optional
[str] = self
.mgr
.get_store('config_checks')
687 check_config
.update(json
.loads(checks_raw
))
688 except json
.JSONDecodeError
:
690 "mgr/cephadm/config_checks is not JSON serializable - all checks will run")
692 # build lookup "maps" by walking the host facts, once
693 self
._process
_hosts
()
695 self
.health_check_raised
= False
696 self
.active_checks
= []
697 self
.skipped_checks
= []
699 # process all healthchecks that are not explcitly disabled
700 for health_check
in self
.health_checks
:
701 if check_config
.get(health_check
.name
, '') != 'disabled':
702 self
.active_checks
.append(health_check
.name
)
705 if self
.health_check_raised
:
706 self
.log
.warning("CEPHADM checks have detected configuration anomalies")
709 f
"CEPHADM {self.active_checks_count}/{self.defined_checks} checks enabled "
710 f
"and executed ({self.skipped_checks_count} bypassed, "
711 f
"{self.defined_checks - self.active_checks_count} disabled). No issues detected")
713 self
.mgr
.set_health_checks(self
.mgr
.health_checks
)