]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pybind/mgr/cephadm/tests/test_configchecks.py
8 from time
import time
as now
10 from ..configchecks
import CephadmConfigChecks
11 from ..inventory
import HostCache
12 from ..upgrade
import CephadmUpgrade
, UpgradeState
13 from orchestrator
import DaemonDescription
15 from typing
import List
, Dict
, Any
, Optional
17 logger
= logging
.getLogger(__name__
)
18 logger
.setLevel(logging
.DEBUG
)
22 "bios_date": "04/01/2014",
31 "cpu_model": "Intel® Xeon® Processor E5-2698 v3",
33 "flash_capacity": "4.0TB",
34 "flash_capacity_bytes": 4000797868032,
38 "description": "ATA CT2000MX500SSD1 (2.0TB)",
40 "disk_size_bytes": 2000398934016,
41 "model": "CT2000MX500SSD1",
44 "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0"
47 "description": "ATA CT2000MX500SSD1 (2.0TB)",
49 "disk_size_bytes": 2000398934016,
50 "model": "CT2000MX500SSD1",
53 "wwid": "t10.ATA CT2000MX500SSD1 193023156DE0"
56 "hdd_capacity": "16.0TB",
57 "hdd_capacity_bytes": 16003148120064,
61 "description": "ST4000VN008-2DR1 (4.0TB)",
63 "disk_size_bytes": 4000787030016,
64 "model": "ST4000VN008-2DR1",
67 "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
70 "description": "ST4000VN008-2DR1 (4.0TB)",
72 "disk_size_bytes": 4000787030016,
73 "model": "ST4000VN008-2DR1",
76 "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
79 "description": "ST4000VN008-2DR1 (4.0TB)",
81 "disk_size_bytes": 4000787030016,
82 "model": "ST4000VN008-2DR1",
85 "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
88 "description": "ST4000VN008-2DR1 (4.0TB)",
90 "disk_size_bytes": 4000787030016,
91 "model": "ST4000VN008-2DR1",
94 "wwid": "t10.ATA ST4000VN008-2DR1 Z340EPBJ"
101 "iftype": "physical",
102 "ipv4_address": "10.7.17.1/24",
103 "ipv6_address": "fe80::215:17ff:feab:50e2/64",
104 "lower_devs_list": [],
106 "nic_type": "ethernet",
109 "upper_devs_list": [],
113 "iftype": "physical",
114 "ipv4_address": "10.7.18.1/24",
115 "ipv6_address": "fe80::215:17ff:feab:50e2/64",
116 "lower_devs_list": [],
118 "nic_type": "ethernet",
121 "upper_devs_list": [],
125 "iftype": "physical",
126 "ipv4_address": "10.7.19.1/24",
127 "ipv6_address": "fe80::76d4:35ff:fe58:9a79/64",
128 "lower_devs_list": [],
130 "nic_type": "ethernet",
133 "upper_devs_list": []
136 "kernel": "4.18.0-240.10.1.el8_3.x86_64",
137 "kernel_parameters": {
138 "net.ipv4.ip_nonlocal_bind": "0",
141 "SELINUX": "enforcing",
142 "SELINUXTYPE": "targeted",
143 "description": "SELinux: Enabled(enforcing, targeted)",
146 "memory_available_kb": 19489212,
147 "memory_free_kb": 245164,
148 "memory_total_kb": 32900916,
149 "model": "StorageHeavy",
151 "operating_system": "Red Hat Enterprise Linux 8.3 (Ootpa)",
153 "system_uptime": 777600.0,
155 "vendor": "Ceph Servers Inc",
159 def role_list(n
: int) -> List
[str]:
161 return ['mon', 'mgr', 'osd']
163 return ['mon', 'mds', 'osd']
168 def generate_testdata(count
: int = 10, public_network
: str = '10.7.17.0/24', cluster_network
: str = '10.7.18.0/24'):
169 # public network = eth0, cluster_network = eth1
171 assert public_network
172 num_disks
= host_sample
['hdd_count']
177 public_netmask
= public_network
.split('/')[1]
181 public_ip_list
= [str(i
) for i
in list(ipaddress
.ip_network(public_network
).hosts())]
183 cluster_ip_list
= [str(i
) for i
in list(ipaddress
.ip_network(cluster_network
).hosts())]
184 cluster_netmask
= cluster_network
.split('/')[1]
186 for n
in range(1, count
+ 1, 1):
188 new_host
= copy
.deepcopy(host_sample
)
189 hostname
= f
"node-{n}.ceph.com"
191 new_host
['hostname'] = hostname
192 new_host
['interfaces']['eth0']['ipv4_address'] = f
"{public_ip_list.pop(0)}/{public_netmask}"
194 new_host
['interfaces']['eth1']['ipv4_address'] = f
"{cluster_ip_list.pop(0)}/{cluster_netmask}"
196 new_host
['interfaces']['eth1']['ipv4_address'] = ''
198 hosts
[hostname
] = new_host
199 daemons
[hostname
] = {}
200 for r
in role_list(n
):
203 for n
in range(num_disks
):
204 osd
= DaemonDescription(
205 hostname
=hostname
, daemon_type
='osd', daemon_id
=osd_num
)
206 name
= f
"osd.{osd_num}"
207 daemons
[hostname
][name
] = osd
208 daemon_to_host
[name
] = hostname
211 name
= f
"{r}.{hostname}"
212 daemons
[hostname
][name
] = DaemonDescription(
213 hostname
=hostname
, daemon_type
=r
, daemon_id
=hostname
)
214 daemon_to_host
[name
] = hostname
216 logger
.debug(f
"daemon to host lookup - {json.dumps(daemon_to_host)}")
217 return hosts
, daemons
, daemon_to_host
222 """Provide a fake ceph mgr object preloaded with a configuration"""
224 mgr
.cache
.facts
, mgr
.cache
.daemons
, mgr
.daemon_to_host
= \
225 generate_testdata(public_network
='10.9.64.0/24', cluster_network
='')
226 mgr
.module_option
.update({
227 "config_checks_enabled": True,
236 self
.module_option
= {}
237 self
.health_checks
= {}
238 self
.default_version
= 'quincy'
239 self
.version_overrides
= {}
240 self
.daemon_to_host
= {}
242 self
.cache
= HostCache(self
)
243 self
.upgrade
= CephadmUpgrade(self
)
245 def set_health_checks(self
, checks
: dict):
248 def get_module_option(self
, keyname
: str) -> Optional
[str]:
249 return self
.module_option
.get(keyname
, None)
251 def set_module_option(self
, keyname
: str, value
: str) -> None:
254 def get_store(self
, keyname
: str, default
=None) -> Optional
[str]:
255 return self
.datastore
.get(keyname
, None)
257 def set_store(self
, keyname
: str, value
: str) -> None:
258 self
.datastore
[keyname
] = value
261 def _ceph_get_server(self
) -> None:
264 def get_metadata(self
, daemon_type
: str, daemon_id
: str) -> Dict
[str, Any
]:
265 key
= f
"{daemon_type}.{daemon_id}"
266 if key
in self
.version_overrides
:
267 logger
.debug(f
"override applied for {key}")
268 version_str
= self
.version_overrides
[key
]
270 version_str
= self
.default_version
272 return {"ceph_release": version_str
, "hostname": self
.daemon_to_host
[key
]}
274 def list_servers(self
) -> List
[Dict
[str, List
[Dict
[str, str]]]]:
275 num_disks
= host_sample
['hdd_count']
279 for hostname
in self
.cache
.facts
:
281 host_num
= int(hostname
.split('.')[0].split('-')[1])
283 for r
in role_list(host_num
):
285 for _n
in range(num_disks
):
297 service_map
.append({"services": svc_list
})
298 logger
.debug(f
"services map - {json.dumps(service_map)}")
301 def use_repo_digest(self
) -> None:
305 class TestConfigCheck
:
307 def test_to_json(self
, mgr
):
308 checker
= CephadmConfigChecks(mgr
)
309 out
= checker
.to_json()
311 assert len(out
) == len(checker
.health_checks
)
313 def test_lookup_check(self
, mgr
):
314 checker
= CephadmConfigChecks(mgr
)
315 check
= checker
.lookup_check('osd_mtu_size')
316 logger
.debug(json
.dumps(check
.to_json()))
318 assert check
.healthcheck_name
== "CEPHADM_CHECK_MTU"
320 def test_old_checks_removed(self
, mgr
):
321 mgr
.datastore
.update({
322 "config_checks": '{"bogus_one": "enabled", "bogus_two": "enabled", '
323 '"kernel_security": "enabled", "public_network": "enabled", '
324 '"kernel_version": "enabled", "network_missing": "enabled", '
325 '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled", '
326 '"os_subscription": "enabled", "ceph_release": "enabled"}'
328 checker
= CephadmConfigChecks(mgr
)
329 raw
= mgr
.get_store('config_checks')
330 checks
= json
.loads(raw
)
331 assert "bogus_one" not in checks
332 assert "bogus_two" not in checks
333 assert len(checks
) == len(checker
.health_checks
)
335 def test_new_checks(self
, mgr
):
336 mgr
.datastore
.update({
337 "config_checks": '{"kernel_security": "enabled", "public_network": "enabled", '
338 '"osd_mtu_size": "enabled", "osd_linkspeed": "enabled"}'
340 checker
= CephadmConfigChecks(mgr
)
341 raw
= mgr
.get_store('config_checks')
342 checks
= json
.loads(raw
)
343 assert len(checks
) == len(checker
.health_checks
)
345 def test_no_issues(self
, mgr
):
346 checker
= CephadmConfigChecks(mgr
)
347 checker
.cluster_network_list
= []
348 checker
.public_network_list
= ['10.9.64.0/24']
351 assert not mgr
.health_checks
353 def test_no_public_network(self
, mgr
):
354 bad_node
= mgr
.cache
.facts
['node-1.ceph.com']
355 bad_node
['interfaces']['eth0']['ipv4_address'] = "192.168.1.20/24"
356 checker
= CephadmConfigChecks(mgr
)
357 checker
.cluster_network_list
= []
358 checker
.public_network_list
= ['10.9.64.0/24']
360 logger
.debug(mgr
.health_checks
)
361 assert len(mgr
.health_checks
) == 1
362 assert 'CEPHADM_CHECK_PUBLIC_MEMBERSHIP' in mgr
.health_checks
363 assert mgr
.health_checks
['CEPHADM_CHECK_PUBLIC_MEMBERSHIP']['detail'][0] == \
364 'node-1.ceph.com does not have an interface on any public network'
366 def test_missing_networks(self
, mgr
):
368 checker
= CephadmConfigChecks(mgr
)
369 checker
.cluster_network_list
= []
370 checker
.public_network_list
= ['10.9.66.0/24']
373 logger
.info(json
.dumps(mgr
.health_checks
))
374 logger
.info(checker
.subnet_lookup
)
375 assert len(mgr
.health_checks
) == 1
376 assert 'CEPHADM_CHECK_NETWORK_MISSING' in mgr
.health_checks
377 assert mgr
.health_checks
['CEPHADM_CHECK_NETWORK_MISSING']['detail'][0] == \
378 "10.9.66.0/24 not found on any host in the cluster"
380 def test_bad_mtu_single(self
, mgr
):
382 bad_node
= mgr
.cache
.facts
['node-1.ceph.com']
383 bad_node
['interfaces']['eth0']['mtu'] = 1500
385 checker
= CephadmConfigChecks(mgr
)
386 checker
.cluster_network_list
= []
387 checker
.public_network_list
= ['10.9.64.0/24']
390 logger
.info(json
.dumps(mgr
.health_checks
))
391 logger
.info(checker
.subnet_lookup
)
392 assert "CEPHADM_CHECK_MTU" in mgr
.health_checks
and len(mgr
.health_checks
) == 1
393 assert mgr
.health_checks
['CEPHADM_CHECK_MTU']['detail'][0] == \
394 'host node-1.ceph.com(eth0) is using MTU 1500 on 10.9.64.0/24, NICs on other hosts use 9000'
396 def test_bad_mtu_multiple(self
, mgr
):
399 bad_node
= mgr
.cache
.facts
[f
'node-{n}.ceph.com']
400 bad_node
['interfaces']['eth0']['mtu'] = 1500
402 checker
= CephadmConfigChecks(mgr
)
403 checker
.cluster_network_list
= []
404 checker
.public_network_list
= ['10.9.64.0/24']
407 logger
.info(json
.dumps(mgr
.health_checks
))
408 logger
.info(checker
.subnet_lookup
)
409 assert "CEPHADM_CHECK_MTU" in mgr
.health_checks
and len(mgr
.health_checks
) == 1
410 assert mgr
.health_checks
['CEPHADM_CHECK_MTU']['count'] == 2
412 def test_bad_linkspeed_single(self
, mgr
):
414 bad_node
= mgr
.cache
.facts
['node-1.ceph.com']
415 bad_node
['interfaces']['eth0']['speed'] = 100
417 checker
= CephadmConfigChecks(mgr
)
418 checker
.cluster_network_list
= []
419 checker
.public_network_list
= ['10.9.64.0/24']
422 logger
.info(json
.dumps(mgr
.health_checks
))
423 logger
.info(checker
.subnet_lookup
)
424 assert mgr
.health_checks
425 assert "CEPHADM_CHECK_LINKSPEED" in mgr
.health_checks
and len(mgr
.health_checks
) == 1
426 assert mgr
.health_checks
['CEPHADM_CHECK_LINKSPEED']['detail'][0] == \
427 'host node-1.ceph.com(eth0) has linkspeed of 100 on 10.9.64.0/24, NICs on other hosts use 1000'
429 def test_super_linkspeed_single(self
, mgr
):
431 bad_node
= mgr
.cache
.facts
['node-1.ceph.com']
432 bad_node
['interfaces']['eth0']['speed'] = 10000
434 checker
= CephadmConfigChecks(mgr
)
435 checker
.cluster_network_list
= []
436 checker
.public_network_list
= ['10.9.64.0/24']
439 logger
.info(json
.dumps(mgr
.health_checks
))
440 logger
.info(checker
.subnet_lookup
)
441 assert not mgr
.health_checks
443 def test_release_mismatch_single(self
, mgr
):
445 mgr
.version_overrides
= {
449 checker
= CephadmConfigChecks(mgr
)
450 checker
.cluster_network_list
= []
451 checker
.public_network_list
= ['10.9.64.0/24']
454 logger
.info(json
.dumps(mgr
.health_checks
))
455 assert mgr
.health_checks
456 assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr
.health_checks
and len(mgr
.health_checks
) == 1
457 assert mgr
.health_checks
['CEPHADM_CHECK_CEPH_RELEASE']['detail'][0] == \
458 'osd.1 is running pacific (majority of cluster is using quincy)'
460 def test_release_mismatch_multi(self
, mgr
):
462 mgr
.version_overrides
= {
467 checker
= CephadmConfigChecks(mgr
)
468 checker
.cluster_network_list
= []
469 checker
.public_network_list
= ['10.9.64.0/24']
472 logger
.info(json
.dumps(mgr
.health_checks
))
473 assert mgr
.health_checks
474 assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr
.health_checks
and len(mgr
.health_checks
) == 1
475 assert len(mgr
.health_checks
['CEPHADM_CHECK_CEPH_RELEASE']['detail']) == 2
477 def test_kernel_mismatch(self
, mgr
):
479 bad_host
= mgr
.cache
.facts
['node-1.ceph.com']
480 bad_host
['kernel'] = "5.10.18.0-241.10.1.el8.x86_64"
482 checker
= CephadmConfigChecks(mgr
)
483 checker
.cluster_network_list
= []
484 checker
.public_network_list
= ['10.9.64.0/24']
487 logger
.info(json
.dumps(mgr
.health_checks
))
488 assert len(mgr
.health_checks
) == 1
489 assert 'CEPHADM_CHECK_KERNEL_VERSION' in mgr
.health_checks
490 assert mgr
.health_checks
['CEPHADM_CHECK_KERNEL_VERSION']['detail'][0] == \
491 "host node-1.ceph.com running kernel 5.10, majority of hosts(9) running 4.18"
492 assert mgr
.health_checks
['CEPHADM_CHECK_KERNEL_VERSION']['count'] == 1
494 def test_inconsistent_subscription(self
, mgr
):
496 bad_host
= mgr
.cache
.facts
['node-5.ceph.com']
497 bad_host
['subscribed'] = "no"
499 checker
= CephadmConfigChecks(mgr
)
500 checker
.cluster_network_list
= []
501 checker
.public_network_list
= ['10.9.64.0/24']
504 logger
.info(json
.dumps(mgr
.health_checks
))
505 assert len(mgr
.health_checks
) == 1
506 assert "CEPHADM_CHECK_SUBSCRIPTION" in mgr
.health_checks
507 assert mgr
.health_checks
['CEPHADM_CHECK_SUBSCRIPTION']['detail'][0] == \
508 "node-5.ceph.com does not have an active subscription"
510 def test_kernel_security_inconsistent(self
, mgr
):
512 bad_node
= mgr
.cache
.facts
['node-3.ceph.com']
513 bad_node
['kernel_security'] = {
514 "SELINUX": "permissive",
515 "SELINUXTYPE": "targeted",
516 "description": "SELinux: Enabled(permissive, targeted)",
519 checker
= CephadmConfigChecks(mgr
)
520 checker
.cluster_network_list
= []
521 checker
.public_network_list
= ['10.9.64.0/24']
524 logger
.info(json
.dumps(mgr
.health_checks
))
525 assert len(mgr
.health_checks
) == 1
526 assert 'CEPHADM_CHECK_KERNEL_LSM' in mgr
.health_checks
527 assert mgr
.health_checks
['CEPHADM_CHECK_KERNEL_LSM']['detail'][0] == \
528 "node-3.ceph.com has inconsistent KSM settings compared to the majority of hosts(9) in the cluster"
530 def test_release_and_bad_mtu(self
, mgr
):
532 mgr
.version_overrides
= {
535 bad_node
= mgr
.cache
.facts
['node-1.ceph.com']
536 bad_node
['interfaces']['eth0']['mtu'] = 1500
538 checker
= CephadmConfigChecks(mgr
)
539 checker
.cluster_network_list
= []
540 checker
.public_network_list
= ['10.9.64.0/24']
543 logger
.info(json
.dumps(mgr
.health_checks
))
544 logger
.info(checker
.subnet_lookup
)
545 assert mgr
.health_checks
546 assert len(mgr
.health_checks
) == 2
547 assert "CEPHADM_CHECK_CEPH_RELEASE" in mgr
.health_checks
and \
548 "CEPHADM_CHECK_MTU" in mgr
.health_checks
550 def test_release_mtu_LSM(self
, mgr
):
552 mgr
.version_overrides
= {
555 bad_node1
= mgr
.cache
.facts
['node-1.ceph.com']
556 bad_node1
['interfaces']['eth0']['mtu'] = 1500
557 bad_node2
= mgr
.cache
.facts
['node-3.ceph.com']
558 bad_node2
['kernel_security'] = {
559 "SELINUX": "permissive",
560 "SELINUXTYPE": "targeted",
561 "description": "SELinux: Enabled(permissive, targeted)",
564 checker
= CephadmConfigChecks(mgr
)
565 checker
.cluster_network_list
= []
566 checker
.public_network_list
= ['10.9.64.0/24']
569 logger
.info(json
.dumps(mgr
.health_checks
))
570 logger
.info(checker
.subnet_lookup
)
571 assert mgr
.health_checks
572 assert len(mgr
.health_checks
) == 3
574 "CEPHADM_CHECK_CEPH_RELEASE" in mgr
.health_checks
and \
575 "CEPHADM_CHECK_MTU" in mgr
.health_checks
and \
576 "CEPHADM_CHECK_KERNEL_LSM" in mgr
.health_checks
578 def test_release_mtu_LSM_subscription(self
, mgr
):
580 mgr
.version_overrides
= {
583 bad_node1
= mgr
.cache
.facts
['node-1.ceph.com']
584 bad_node1
['interfaces']['eth0']['mtu'] = 1500
585 bad_node1
['subscribed'] = "no"
586 bad_node2
= mgr
.cache
.facts
['node-3.ceph.com']
587 bad_node2
['kernel_security'] = {
588 "SELINUX": "permissive",
589 "SELINUXTYPE": "targeted",
590 "description": "SELinux: Enabled(permissive, targeted)",
593 checker
= CephadmConfigChecks(mgr
)
594 checker
.cluster_network_list
= []
595 checker
.public_network_list
= ['10.9.64.0/24']
598 logger
.info(json
.dumps(mgr
.health_checks
))
599 logger
.info(checker
.subnet_lookup
)
600 assert mgr
.health_checks
601 assert len(mgr
.health_checks
) == 4
603 "CEPHADM_CHECK_CEPH_RELEASE" in mgr
.health_checks
and \
604 "CEPHADM_CHECK_MTU" in mgr
.health_checks
and \
605 "CEPHADM_CHECK_KERNEL_LSM" in mgr
.health_checks
and \
606 "CEPHADM_CHECK_SUBSCRIPTION" in mgr
.health_checks
608 def test_skip_release_during_upgrade(self
, mgr
):
609 mgr
.upgrade
.upgrade_state
= UpgradeState
.from_json({
610 'target_name': 'wah',
611 'progress_id': str(uuid
.uuid4()),
616 checker
= CephadmConfigChecks(mgr
)
617 checker
.cluster_network_list
= []
618 checker
.public_network_list
= ['10.9.64.0/24']
621 logger
.info(f
"{checker.skipped_checks_count} skipped check(s): {checker.skipped_checks}")
622 assert checker
.skipped_checks_count
== 1
623 assert 'ceph_release' in checker
.skipped_checks
625 def test_skip_when_disabled(self
, mgr
):
626 mgr
.module_option
.update({
627 "config_checks_enabled": "false"
629 checker
= CephadmConfigChecks(mgr
)
630 checker
.cluster_network_list
= []
631 checker
.public_network_list
= ['10.9.64.0/24']
634 logger
.info(checker
.active_checks
)
635 logger
.info(checker
.defined_checks
)
636 assert checker
.active_checks_count
== 0
638 def test_skip_mtu_checks(self
, mgr
):
639 mgr
.datastore
.update({
640 'config_checks': '{"osd_mtu_size": "disabled"}'
643 checker
= CephadmConfigChecks(mgr
)
644 checker
.cluster_network_list
= []
645 checker
.public_network_list
= ['10.9.64.0/24']
648 logger
.info(checker
.active_checks
)
649 logger
.info(checker
.defined_checks
)
650 assert 'osd_mtu_size' not in checker
.active_checks
651 assert checker
.defined_checks
== 8 and checker
.active_checks_count
== 7
653 def test_skip_mtu_lsm_checks(self
, mgr
):
654 mgr
.datastore
.update({
655 'config_checks': '{"osd_mtu_size": "disabled", "kernel_security": "disabled"}'
658 checker
= CephadmConfigChecks(mgr
)
659 checker
.cluster_network_list
= []
660 checker
.public_network_list
= ['10.9.64.0/24']
663 logger
.info(checker
.active_checks
)
664 logger
.info(checker
.defined_checks
)
665 assert 'osd_mtu_size' not in checker
.active_checks
and \
666 'kernel_security' not in checker
.active_checks
667 assert checker
.defined_checks
== 8 and checker
.active_checks_count
== 6
668 assert not mgr
.health_checks