4 import asyncio
.subprocess
11 from logging
.config
import dictConfig
26 from socketserver
import ThreadingMixIn
27 from http
.server
import BaseHTTPRequestHandler
, HTTPServer
30 from contextlib
import redirect_stdout
34 from typing
import Dict
, List
, Tuple
, Optional
, Union
, Any
, NoReturn
, Callable
, IO
, Sequence
, TypeVar
, cast
, Set
39 from configparser
import ConfigParser
40 from functools
import wraps
42 from io
import StringIO
43 from threading
import Thread
, RLock
44 from urllib
.error
import HTTPError
45 from urllib
.request
import urlopen
46 from pathlib
import Path
48 FuncT
= TypeVar('FuncT', bound
=Callable
)
50 # Default container images -----------------------------------------------------
51 DEFAULT_IMAGE
= 'quay.io/ceph/ceph:v16'
52 DEFAULT_IMAGE_IS_MASTER
= False
53 DEFAULT_IMAGE_RELEASE
= 'pacific'
54 DEFAULT_PROMETHEUS_IMAGE
= 'quay.io/prometheus/prometheus:v2.18.1'
55 DEFAULT_NODE_EXPORTER_IMAGE
= 'quay.io/prometheus/node-exporter:v0.18.1'
56 DEFAULT_ALERT_MANAGER_IMAGE
= 'quay.io/prometheus/alertmanager:v0.20.0'
57 DEFAULT_GRAFANA_IMAGE
= 'quay.io/ceph/ceph-grafana:6.7.4'
58 DEFAULT_HAPROXY_IMAGE
= 'docker.io/library/haproxy:2.3'
59 DEFAULT_KEEPALIVED_IMAGE
= 'docker.io/arcts/keepalived'
60 DEFAULT_REGISTRY
= 'docker.io' # normalize unqualified digests to this
61 # ------------------------------------------------------------------------------
63 LATEST_STABLE_RELEASE
= 'pacific'
64 DATA_DIR
= '/var/lib/ceph'
65 LOG_DIR
= '/var/log/ceph'
66 LOCK_DIR
= '/run/cephadm'
67 LOGROTATE_DIR
= '/etc/logrotate.d'
68 SYSCTL_DIR
= '/usr/lib/sysctl.d'
69 UNIT_DIR
= '/etc/systemd/system'
73 MIN_PODMAN_VERSION
= (2, 0, 2)
74 CGROUPS_SPLIT_PODMAN_VERSION
= (2, 1, 0)
75 CUSTOM_PS1
= r
'[ceph: \u@\h \W]\$ '
76 DEFAULT_TIMEOUT
= None # in seconds
78 SHELL_DEFAULT_CONF
= '/etc/ceph/ceph.conf'
79 SHELL_DEFAULT_KEYRING
= '/etc/ceph/ceph.client.admin.keyring'
80 DATEFMT
= '%Y-%m-%dT%H:%M:%S.%fZ'
82 logger
: logging
.Logger
= None # type: ignore
85 You can invoke cephadm in two ways:
87 1. The normal way, at the command line.
89 2. By piping the script to the python3 binary. In this latter case, you should
90 prepend one or more lines to the beginning of the script.
98 injected_argv = ['ls']
100 For reading stdin from the '--config-json -' argument,
102 injected_stdin = '...'
106 ##################################
111 def __init__(self
) -> None:
113 self
.docker
: bool = False
114 self
.data_dir
: str = DATA_DIR
115 self
.log_dir
: str = LOG_DIR
116 self
.logrotate_dir
: str = LOGROTATE_DIR
117 self
.sysctl_dir
: str = SYSCTL_DIR
118 self
.unit_dir
: str = UNIT_DIR
119 self
.verbose
: bool = False
120 self
.timeout
: Optional
[int] = DEFAULT_TIMEOUT
121 self
.retry
: int = DEFAULT_RETRY
122 self
.env
: List
[str] = []
123 self
.memory_request
: Optional
[int] = None
124 self
.memory_limit
: Optional
[int] = None
126 self
.container_init
: bool = CONTAINER_INIT
127 self
.container_engine
: Optional
[ContainerEngine
] = None
129 def set_from_args(self
, args
: argparse
.Namespace
) -> None:
130 argdict
: Dict
[str, Any
] = vars(args
)
131 for k
, v
in argdict
.items():
136 class CephadmContext
:
138 def __init__(self
) -> None:
139 self
.__dict
__['_args'] = None
140 self
.__dict
__['_conf'] = BaseConfig()
142 def set_args(self
, args
: argparse
.Namespace
) -> None:
143 self
._conf
.set_from_args(args
)
146 def has_function(self
) -> bool:
147 return 'func' in self
._args
149 def __contains__(self
, name
: str) -> bool:
150 return hasattr(self
, name
)
152 def __getattr__(self
, name
: str) -> Any
:
153 if '_conf' in self
.__dict
__ and hasattr(self
._conf
, name
):
154 return getattr(self
._conf
, name
)
155 elif '_args' in self
.__dict
__ and hasattr(self
._args
, name
):
156 return getattr(self
._args
, name
)
158 return super().__getattribute
__(name
)
160 def __setattr__(self
, name
: str, value
: Any
) -> None:
161 if hasattr(self
._conf
, name
):
162 setattr(self
._conf
, name
, value
)
163 elif hasattr(self
._args
, name
):
164 setattr(self
._args
, name
, value
)
166 super().__setattr
__(name
, value
)
169 class ContainerEngine
:
170 def __init__(self
) -> None:
171 self
.path
= find_program(self
.EXE
)
176 raise NotImplementedError()
179 class Podman(ContainerEngine
):
182 def __init__(self
) -> None:
184 self
._version
: Optional
[Tuple
[int, ...]] = None
187 def version(self
) -> Tuple
[int, ...]:
188 if self
._version
is None:
189 raise RuntimeError('Please call `get_version` first')
192 def get_version(self
, ctx
: CephadmContext
) -> None:
193 out
, _
, _
= call_throws(ctx
, [self
.path
, 'version', '--format', '{{.Client.Version}}'])
194 self
._version
= _parse_podman_version(out
)
197 class Docker(ContainerEngine
):
201 CONTAINER_PREFERENCE
= (Podman
, Docker
) # prefer podman to docker
204 # Log and console output config
207 'disable_existing_loggers': True,
210 'format': '%(asctime)s %(levelname)s %(message)s'
216 'class': 'logging.StreamHandler',
220 'class': 'logging.handlers.WatchedFileHandler',
221 'formatter': 'cephadm',
222 'filename': '%s/cephadm.log' % LOG_DIR
,
228 'handlers': ['console', 'log_file'],
240 class Error(Exception):
244 class TimeoutExpired(Error
):
247 ##################################
251 daemons
= ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
252 'crash', 'cephfs-mirror')
254 ##################################
259 def get_sysctl_settings() -> List
[str]:
261 '# allow a large number of OSDs',
262 'fs.aio-max-nr = 1048576',
263 'kernel.pid_max = 4194304',
266 ##################################
269 class Monitoring(object):
270 """Define the configs for the monitoring containers"""
273 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI
274 'node-exporter': [9100],
276 'alertmanager': [9093, 9094],
281 'image': DEFAULT_PROMETHEUS_IMAGE
,
285 '--config.file=/etc/prometheus/prometheus.yml',
286 '--storage.tsdb.path=/prometheus',
288 'config-json-files': [
293 'image': DEFAULT_NODE_EXPORTER_IMAGE
,
297 '--no-collector.timex',
301 'image': DEFAULT_GRAFANA_IMAGE
,
305 'config-json-files': [
307 'provisioning/datasources/ceph-dashboard.yml',
313 'image': DEFAULT_ALERT_MANAGER_IMAGE
,
317 '--cluster.listen-address=:{}'.format(port_map
['alertmanager'][1]),
319 'config-json-files': [
322 'config-json-args': [
329 def get_version(ctx
, container_id
, daemon_type
):
330 # type: (CephadmContext, str, str) -> str
332 :param: daemon_type Either "prometheus", "alertmanager" or "node-exporter"
334 assert daemon_type
in ('prometheus', 'alertmanager', 'node-exporter')
335 cmd
= daemon_type
.replace('-', '_')
339 if daemon_type
== 'alertmanager':
340 for cmd
in ['alertmanager', 'prometheus-alertmanager']:
341 _
, err
, code
= call(ctx
, [
342 ctx
.container_engine
.path
, 'exec', container_id
, cmd
,
344 ], verbosity
=CallVerbosity
.DEBUG
)
347 cmd
= 'alertmanager' # reset cmd for version extraction
349 _
, err
, code
= call(ctx
, [
350 ctx
.container_engine
.path
, 'exec', container_id
, cmd
, '--version'
351 ], verbosity
=CallVerbosity
.DEBUG
)
353 err
.startswith('%s, version ' % cmd
):
354 version
= err
.split(' ')[2]
357 ##################################
360 def populate_files(config_dir
, config_files
, uid
, gid
):
361 # type: (str, Dict, int, int) -> None
362 """create config files for different services"""
363 for fname
in config_files
:
364 config_file
= os
.path
.join(config_dir
, fname
)
365 config_content
= dict_get_join(config_files
, fname
)
366 logger
.info('Write file: %s' % (config_file
))
367 with
open(config_file
, 'w', encoding
='utf-8') as f
:
368 os
.fchown(f
.fileno(), uid
, gid
)
369 os
.fchmod(f
.fileno(), 0o600)
370 f
.write(config_content
)
373 class NFSGanesha(object):
374 """Defines a NFS-Ganesha container"""
377 entrypoint
= '/usr/bin/ganesha.nfsd'
378 daemon_args
= ['-F', '-L', 'STDERR']
380 required_files
= ['ganesha.conf']
391 image
=DEFAULT_IMAGE
):
392 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
395 self
.daemon_id
= daemon_id
398 # config-json options
399 self
.pool
= dict_get(config_json
, 'pool', require
=True)
400 self
.namespace
= dict_get(config_json
, 'namespace')
401 self
.userid
= dict_get(config_json
, 'userid')
402 self
.extra_args
= dict_get(config_json
, 'extra_args', [])
403 self
.files
= dict_get(config_json
, 'files', {})
404 self
.rgw
= dict_get(config_json
, 'rgw', {})
406 # validate the supplied args
410 def init(cls
, ctx
, fsid
, daemon_id
):
411 # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
412 return cls(ctx
, fsid
, daemon_id
, get_parm(ctx
.config_json
), ctx
.image
)
414 def get_container_mounts(self
, data_dir
):
415 # type: (str) -> Dict[str, str]
417 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
418 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
419 mounts
[os
.path
.join(data_dir
, 'etc/ganesha')] = '/etc/ganesha:z'
421 cluster
= self
.rgw
.get('cluster', 'ceph')
422 rgw_user
= self
.rgw
.get('user', 'admin')
423 mounts
[os
.path
.join(data_dir
, 'keyring.rgw')] = \
424 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster
, rgw_user
)
428 def get_container_envs():
429 # type: () -> List[str]
431 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
436 def get_version(ctx
, container_id
):
437 # type: (CephadmContext, str) -> Optional[str]
439 out
, err
, code
= call(ctx
,
440 [ctx
.container_engine
.path
, 'exec', container_id
,
441 NFSGanesha
.entrypoint
, '-v'],
442 verbosity
=CallVerbosity
.DEBUG
)
444 match
= re
.search(r
'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out
)
446 version
= match
.group(1)
451 if not is_fsid(self
.fsid
):
452 raise Error('not an fsid: %s' % self
.fsid
)
453 if not self
.daemon_id
:
454 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
456 raise Error('invalid image: %s' % self
.image
)
458 # check for the required files
459 if self
.required_files
:
460 for fname
in self
.required_files
:
461 if fname
not in self
.files
:
462 raise Error('required file missing from config-json: %s' % fname
)
464 # check for an RGW config
466 if not self
.rgw
.get('keyring'):
467 raise Error('RGW keyring is missing')
468 if not self
.rgw
.get('user'):
469 raise Error('RGW user is missing')
471 def get_daemon_name(self
):
473 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
475 def get_container_name(self
, desc
=None):
476 # type: (Optional[str]) -> str
477 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
479 cname
= '%s-%s' % (cname
, desc
)
482 def get_daemon_args(self
):
483 # type: () -> List[str]
484 return self
.daemon_args
+ self
.extra_args
486 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
487 # type: (str, int, int) -> None
488 """Create files under the container data dir"""
489 if not os
.path
.isdir(data_dir
):
490 raise OSError('data_dir is not a directory: %s' % (data_dir
))
492 logger
.info('Creating ganesha config...')
494 # create the ganesha conf dir
495 config_dir
= os
.path
.join(data_dir
, 'etc/ganesha')
496 makedirs(config_dir
, uid
, gid
, 0o755)
498 # populate files from the config-json
499 populate_files(config_dir
, self
.files
, uid
, gid
)
501 # write the RGW keyring
503 keyring_path
= os
.path
.join(data_dir
, 'keyring.rgw')
504 with
open(keyring_path
, 'w') as f
:
505 os
.fchmod(f
.fileno(), 0o600)
506 os
.fchown(f
.fileno(), uid
, gid
)
507 f
.write(self
.rgw
.get('keyring', ''))
509 ##################################
512 class CephIscsi(object):
513 """Defines a Ceph-Iscsi container"""
515 daemon_type
= 'iscsi'
516 entrypoint
= '/usr/bin/rbd-target-api'
518 required_files
= ['iscsi-gateway.cfg']
525 image
=DEFAULT_IMAGE
):
526 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
529 self
.daemon_id
= daemon_id
532 # config-json options
533 self
.files
= dict_get(config_json
, 'files', {})
535 # validate the supplied args
539 def init(cls
, ctx
, fsid
, daemon_id
):
540 # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
541 return cls(ctx
, fsid
, daemon_id
,
542 get_parm(ctx
.config_json
), ctx
.image
)
545 def get_container_mounts(data_dir
, log_dir
):
546 # type: (str, str) -> Dict[str, str]
548 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
549 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
550 mounts
[os
.path
.join(data_dir
, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
551 mounts
[os
.path
.join(data_dir
, 'configfs')] = '/sys/kernel/config'
552 mounts
[log_dir
] = '/var/log/rbd-target-api:z'
553 mounts
['/dev'] = '/dev'
557 def get_container_binds():
558 # type: () -> List[List[str]]
560 lib_modules
= ['type=bind',
561 'source=/lib/modules',
562 'destination=/lib/modules',
564 binds
.append(lib_modules
)
568 def get_version(ctx
, container_id
):
569 # type: (CephadmContext, str) -> Optional[str]
571 out
, err
, code
= call(ctx
,
572 [ctx
.container_engine
.path
, 'exec', container_id
,
573 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
574 verbosity
=CallVerbosity
.DEBUG
)
576 version
= out
.strip()
581 if not is_fsid(self
.fsid
):
582 raise Error('not an fsid: %s' % self
.fsid
)
583 if not self
.daemon_id
:
584 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
586 raise Error('invalid image: %s' % self
.image
)
588 # check for the required files
589 if self
.required_files
:
590 for fname
in self
.required_files
:
591 if fname
not in self
.files
:
592 raise Error('required file missing from config-json: %s' % fname
)
594 def get_daemon_name(self
):
596 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
598 def get_container_name(self
, desc
=None):
599 # type: (Optional[str]) -> str
600 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
602 cname
= '%s-%s' % (cname
, desc
)
605 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
606 # type: (str, int, int) -> None
607 """Create files under the container data dir"""
608 if not os
.path
.isdir(data_dir
):
609 raise OSError('data_dir is not a directory: %s' % (data_dir
))
611 logger
.info('Creating ceph-iscsi config...')
612 configfs_dir
= os
.path
.join(data_dir
, 'configfs')
613 makedirs(configfs_dir
, uid
, gid
, 0o755)
615 # populate files from the config-json
616 populate_files(data_dir
, self
.files
, uid
, gid
)
619 def configfs_mount_umount(data_dir
, mount
=True):
620 # type: (str, bool) -> List[str]
621 mount_path
= os
.path
.join(data_dir
, 'configfs')
623 cmd
= 'if ! grep -qs {0} /proc/mounts; then ' \
624 'mount -t configfs none {0}; fi'.format(mount_path
)
626 cmd
= 'if grep -qs {0} /proc/mounts; then ' \
627 'umount {0}; fi'.format(mount_path
)
630 def get_tcmu_runner_container(self
):
631 # type: () -> CephContainer
632 tcmu_container
= get_container(self
.ctx
, self
.fsid
, self
.daemon_type
, self
.daemon_id
)
633 tcmu_container
.entrypoint
= '/usr/bin/tcmu-runner'
634 tcmu_container
.cname
= self
.get_container_name(desc
='tcmu')
635 # remove extra container args for tcmu container.
636 # extra args could cause issue with forking service type
637 tcmu_container
.container_args
= []
638 return tcmu_container
640 ##################################
643 class HAproxy(object):
644 """Defines an HAproxy container"""
645 daemon_type
= 'haproxy'
646 required_files
= ['haproxy.cfg']
647 default_image
= DEFAULT_HAPROXY_IMAGE
651 fsid
: str, daemon_id
: Union
[int, str],
652 config_json
: Dict
, image
: str) -> None:
655 self
.daemon_id
= daemon_id
658 # config-json options
659 self
.files
= dict_get(config_json
, 'files', {})
664 def init(cls
, ctx
: CephadmContext
,
665 fsid
: str, daemon_id
: Union
[int, str]) -> 'HAproxy':
666 return cls(ctx
, fsid
, daemon_id
, get_parm(ctx
.config_json
),
669 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
670 """Create files under the container data dir"""
671 if not os
.path
.isdir(data_dir
):
672 raise OSError('data_dir is not a directory: %s' % (data_dir
))
674 # create additional directories in data dir for HAproxy to use
675 if not os
.path
.isdir(os
.path
.join(data_dir
, 'haproxy')):
676 makedirs(os
.path
.join(data_dir
, 'haproxy'), uid
, gid
, DATA_DIR_MODE
)
678 data_dir
= os
.path
.join(data_dir
, 'haproxy')
679 populate_files(data_dir
, self
.files
, uid
, gid
)
681 def get_daemon_args(self
) -> List
[str]:
682 return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
686 if not is_fsid(self
.fsid
):
687 raise Error('not an fsid: %s' % self
.fsid
)
688 if not self
.daemon_id
:
689 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
691 raise Error('invalid image: %s' % self
.image
)
693 # check for the required files
694 if self
.required_files
:
695 for fname
in self
.required_files
:
696 if fname
not in self
.files
:
697 raise Error('required file missing from config-json: %s' % fname
)
699 def get_daemon_name(self
):
701 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
703 def get_container_name(self
, desc
=None):
704 # type: (Optional[str]) -> str
705 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
707 cname
= '%s-%s' % (cname
, desc
)
710 def extract_uid_gid_haproxy(self
) -> Tuple
[int, int]:
711 # better directory for this?
712 return extract_uid_gid(self
.ctx
, file_path
='/var/lib')
715 def get_container_mounts(data_dir
: str) -> Dict
[str, str]:
717 mounts
[os
.path
.join(data_dir
, 'haproxy')] = '/var/lib/haproxy'
721 def get_sysctl_settings() -> List
[str]:
724 'net.ipv4.ip_forward = 1',
727 ##################################
730 class Keepalived(object):
731 """Defines an Keepalived container"""
732 daemon_type
= 'keepalived'
733 required_files
= ['keepalived.conf']
734 default_image
= DEFAULT_KEEPALIVED_IMAGE
738 fsid
: str, daemon_id
: Union
[int, str],
739 config_json
: Dict
, image
: str) -> None:
742 self
.daemon_id
= daemon_id
745 # config-json options
746 self
.files
= dict_get(config_json
, 'files', {})
751 def init(cls
, ctx
: CephadmContext
, fsid
: str,
752 daemon_id
: Union
[int, str]) -> 'Keepalived':
753 return cls(ctx
, fsid
, daemon_id
,
754 get_parm(ctx
.config_json
), ctx
.image
)
756 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
757 """Create files under the container data dir"""
758 if not os
.path
.isdir(data_dir
):
759 raise OSError('data_dir is not a directory: %s' % (data_dir
))
761 # create additional directories in data dir for keepalived to use
762 if not os
.path
.isdir(os
.path
.join(data_dir
, 'keepalived')):
763 makedirs(os
.path
.join(data_dir
, 'keepalived'), uid
, gid
, DATA_DIR_MODE
)
765 # populate files from the config-json
766 populate_files(data_dir
, self
.files
, uid
, gid
)
770 if not is_fsid(self
.fsid
):
771 raise Error('not an fsid: %s' % self
.fsid
)
772 if not self
.daemon_id
:
773 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
775 raise Error('invalid image: %s' % self
.image
)
777 # check for the required files
778 if self
.required_files
:
779 for fname
in self
.required_files
:
780 if fname
not in self
.files
:
781 raise Error('required file missing from config-json: %s' % fname
)
783 def get_daemon_name(self
):
785 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
787 def get_container_name(self
, desc
=None):
788 # type: (Optional[str]) -> str
789 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
791 cname
= '%s-%s' % (cname
, desc
)
795 def get_container_envs():
796 # type: () -> List[str]
798 'KEEPALIVED_AUTOCONF=false',
799 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
800 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
801 'KEEPALIVED_DEBUG=false'
806 def get_sysctl_settings() -> List
[str]:
808 '# IP forwarding and non-local bind',
809 'net.ipv4.ip_forward = 1',
810 'net.ipv4.ip_nonlocal_bind = 1',
813 def extract_uid_gid_keepalived(self
) -> Tuple
[int, int]:
814 # better directory for this?
815 return extract_uid_gid(self
.ctx
, file_path
='/var/lib')
818 def get_container_mounts(data_dir
: str) -> Dict
[str, str]:
820 mounts
[os
.path
.join(data_dir
, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
823 ##################################
826 class CustomContainer(object):
827 """Defines a custom container"""
828 daemon_type
= 'container'
831 fsid
: str, daemon_id
: Union
[int, str],
832 config_json
: Dict
, image
: str) -> None:
834 self
.daemon_id
= daemon_id
837 # config-json options
838 self
.entrypoint
= dict_get(config_json
, 'entrypoint')
839 self
.uid
= dict_get(config_json
, 'uid', 65534) # nobody
840 self
.gid
= dict_get(config_json
, 'gid', 65534) # nobody
841 self
.volume_mounts
= dict_get(config_json
, 'volume_mounts', {})
842 self
.args
= dict_get(config_json
, 'args', [])
843 self
.envs
= dict_get(config_json
, 'envs', [])
844 self
.privileged
= dict_get(config_json
, 'privileged', False)
845 self
.bind_mounts
= dict_get(config_json
, 'bind_mounts', [])
846 self
.ports
= dict_get(config_json
, 'ports', [])
847 self
.dirs
= dict_get(config_json
, 'dirs', [])
848 self
.files
= dict_get(config_json
, 'files', {})
851 def init(cls
, ctx
: CephadmContext
,
852 fsid
: str, daemon_id
: Union
[int, str]) -> 'CustomContainer':
853 return cls(fsid
, daemon_id
,
854 get_parm(ctx
.config_json
), ctx
.image
)
856 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
858 Create dirs/files below the container data directory.
860 logger
.info('Creating custom container configuration '
861 'dirs/files in {} ...'.format(data_dir
))
863 if not os
.path
.isdir(data_dir
):
864 raise OSError('data_dir is not a directory: %s' % data_dir
)
866 for dir_path
in self
.dirs
:
867 logger
.info('Creating directory: {}'.format(dir_path
))
868 dir_path
= os
.path
.join(data_dir
, dir_path
.strip('/'))
869 makedirs(dir_path
, uid
, gid
, 0o755)
871 for file_path
in self
.files
:
872 logger
.info('Creating file: {}'.format(file_path
))
873 content
= dict_get_join(self
.files
, file_path
)
874 file_path
= os
.path
.join(data_dir
, file_path
.strip('/'))
875 with
open(file_path
, 'w', encoding
='utf-8') as f
:
876 os
.fchown(f
.fileno(), uid
, gid
)
877 os
.fchmod(f
.fileno(), 0o600)
880 def get_daemon_args(self
) -> List
[str]:
883 def get_container_args(self
) -> List
[str]:
886 def get_container_envs(self
) -> List
[str]:
889 def get_container_mounts(self
, data_dir
: str) -> Dict
[str, str]:
891 Get the volume mounts. Relative source paths will be located below
892 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
902 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
906 for source
, destination
in self
.volume_mounts
.items():
907 source
= os
.path
.join(data_dir
, source
)
908 mounts
[source
] = destination
911 def get_container_binds(self
, data_dir
: str) -> List
[List
[str]]:
913 Get the bind mounts. Relative `source=...` paths will be located below
914 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
919 'source=lib/modules',
920 'destination=/lib/modules',
926 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
930 binds
= self
.bind_mounts
.copy()
932 for index
, value
in enumerate(bind
):
933 match
= re
.match(r
'^source=(.+)$', value
)
935 bind
[index
] = 'source={}'.format(os
.path
.join(
936 data_dir
, match
.group(1)))
939 ##################################
942 def touch(file_path
: str, uid
: Optional
[int] = None, gid
: Optional
[int] = None) -> None:
943 Path(file_path
).touch()
945 os
.chown(file_path
, uid
, gid
)
948 ##################################
951 def dict_get(d
: Dict
, key
: str, default
: Any
= None, require
: bool = False) -> Any
:
953 Helper function to get a key from a dictionary.
954 :param d: The dictionary to process.
955 :param key: The name of the key to get.
956 :param default: The default value in case the key does not
957 exist. Default is `None`.
958 :param require: Set to `True` if the key is required. An
959 exception will be raised if the key does not exist in
960 the given dictionary.
961 :return: Returns the value of the given key.
962 :raises: :exc:`self.Error` if the given key does not exist
963 and `require` is set to `True`.
965 if require
and key
not in d
.keys():
966 raise Error('{} missing from dict'.format(key
))
967 return d
.get(key
, default
) # type: ignore
969 ##################################
972 def dict_get_join(d
: Dict
, key
: str) -> Any
:
974 Helper function to get the value of a given key from a dictionary.
975 `List` values will be converted to a string by joining them with a
977 :param d: The dictionary to process.
978 :param key: The name of the key to get.
979 :return: Returns the value of the given key. If it was a `list`, it
980 will be joining with a line break.
983 if isinstance(value
, list):
984 value
= '\n'.join(map(str, value
))
987 ##################################
990 def get_supported_daemons():
991 # type: () -> List[str]
992 supported_daemons
= list(Ceph
.daemons
)
993 supported_daemons
.extend(Monitoring
.components
)
994 supported_daemons
.append(NFSGanesha
.daemon_type
)
995 supported_daemons
.append(CephIscsi
.daemon_type
)
996 supported_daemons
.append(CustomContainer
.daemon_type
)
997 supported_daemons
.append(CephadmDaemon
.daemon_type
)
998 supported_daemons
.append(HAproxy
.daemon_type
)
999 supported_daemons
.append(Keepalived
.daemon_type
)
1000 assert len(supported_daemons
) == len(set(supported_daemons
))
1001 return supported_daemons
1003 ##################################
1006 class PortOccupiedError(Error
):
1010 def attempt_bind(ctx
, s
, address
, port
):
1011 # type: (CephadmContext, socket.socket, str, int) -> None
1013 s
.setsockopt(socket
.SOL_SOCKET
, socket
.SO_REUSEADDR
, 1)
1014 s
.bind((address
, port
))
1015 except OSError as e
:
1016 if e
.errno
== errno
.EADDRINUSE
:
1017 msg
= 'Cannot bind to IP %s port %d: %s' % (address
, port
, e
)
1019 raise PortOccupiedError(msg
)
1022 except Exception as e
:
1028 def port_in_use(ctx
, port_num
):
1029 # type: (CephadmContext, int) -> bool
1030 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
1031 logger
.info('Verifying port %d ...' % port_num
)
1033 def _port_in_use(af
: socket
.AddressFamily
, address
: str) -> bool:
1035 s
= socket
.socket(af
, socket
.SOCK_STREAM
)
1036 attempt_bind(ctx
, s
, address
, port_num
)
1037 except PortOccupiedError
:
1039 except OSError as e
:
1040 if e
.errno
in (errno
.EAFNOSUPPORT
, errno
.EADDRNOTAVAIL
):
1041 # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
1042 # being tested here and one might be intentionally be disabled.
1043 # In that case no error should be raised.
1048 return any(_port_in_use(af
, address
) for af
, address
in (
1049 (socket
.AF_INET
, '0.0.0.0'),
1050 (socket
.AF_INET6
, '::')
1054 def check_ip_port(ctx
, ip
, port
):
1055 # type: (CephadmContext, str, int) -> None
1056 if not ctx
.skip_ping_check
:
1057 logger
.info('Verifying IP %s port %d ...' % (ip
, port
))
1059 s
= socket
.socket(socket
.AF_INET6
, socket
.SOCK_STREAM
)
1060 ip
= unwrap_ipv6(ip
)
1062 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
1063 attempt_bind(ctx
, s
, ip
, port
)
1065 ##################################
1068 # this is an abbreviated version of
1069 # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
1070 # that drops all of the compatibility (this is Unix/Linux only).
1072 class Timeout(TimeoutError
):
1074 Raised when the lock could not be acquired in *timeout*
1078 def __init__(self
, lock_file
: str) -> None:
1081 #: The path of the file lock.
1082 self
.lock_file
= lock_file
1085 def __str__(self
) -> str:
1086 temp
= "The file lock '{}' could not be acquired."\
1087 .format(self
.lock_file
)
1091 class _Acquire_ReturnProxy(object):
1092 def __init__(self
, lock
: 'FileLock') -> None:
1096 def __enter__(self
) -> 'FileLock':
1099 def __exit__(self
, exc_type
: Any
, exc_value
: Any
, traceback
: Any
) -> None:
1104 class FileLock(object):
1105 def __init__(self
, ctx
: CephadmContext
, name
: str, timeout
: int = -1) -> None:
1106 if not os
.path
.exists(LOCK_DIR
):
1107 os
.mkdir(LOCK_DIR
, 0o700)
1108 self
._lock
_file
= os
.path
.join(LOCK_DIR
, name
+ '.lock')
1111 # The file descriptor for the *_lock_file* as it is returned by the
1112 # os.open() function.
1113 # This file lock is only NOT None, if the object currently holds the
1115 self
._lock
_file
_fd
: Optional
[int] = None
1116 self
.timeout
= timeout
1117 # The lock counter is used for implementing the nested locking
1118 # mechanism. Whenever the lock is acquired, the counter is increased and
1119 # the lock is only released, when this value is 0 again.
1120 self
._lock
_counter
= 0
1124 def is_locked(self
) -> bool:
1125 return self
._lock
_file
_fd
is not None
1127 def acquire(self
, timeout
: Optional
[int] = None, poll_intervall
: float = 0.05) -> _Acquire_ReturnProxy
:
1129 Acquires the file lock or fails with a :exc:`Timeout` error.
1130 .. code-block:: python
1131 # You can use this method in the context manager (recommended)
1132 with lock.acquire():
1134 # Or use an equivalent try-finally construct:
1141 The maximum time waited for the file lock.
1142 If ``timeout < 0``, there is no timeout and this method will
1143 block until the lock could be acquired.
1144 If ``timeout`` is None, the default :attr:`~timeout` is used.
1145 :arg float poll_intervall:
1146 We check once in *poll_intervall* seconds if we can acquire the
1149 if the lock could not be acquired in *timeout* seconds.
1150 .. versionchanged:: 2.0.0
1151 This method returns now a *proxy* object instead of *self*,
1152 so that it can be used in a with statement without side effects.
1155 # Use the default timeout, if no timeout is provided.
1157 timeout
= self
.timeout
1159 # Increment the number right at the beginning.
1160 # We can still undo it, if something fails.
1161 self
._lock
_counter
+= 1
1164 lock_filename
= self
._lock
_file
1165 start_time
= time
.time()
1168 if not self
.is_locked
:
1169 logger
.debug('Acquiring lock %s on %s', lock_id
,
1174 logger
.debug('Lock %s acquired on %s', lock_id
,
1177 elif timeout
>= 0 and time
.time() - start_time
> timeout
:
1178 logger
.warning('Timeout acquiring lock %s on %s', lock_id
,
1180 raise Timeout(self
._lock
_file
)
1183 'Lock %s not acquired on %s, waiting %s seconds ...',
1184 lock_id
, lock_filename
, poll_intervall
1186 time
.sleep(poll_intervall
)
1188 # Something did go wrong, so decrement the counter.
1189 self
._lock
_counter
= max(0, self
._lock
_counter
- 1)
1192 return _Acquire_ReturnProxy(lock
=self
)
1194 def release(self
, force
: bool = False) -> None:
1196 Releases the file lock.
1197 Please note, that the lock is only completly released, if the lock
1199 Also note, that the lock file itself is not automatically deleted.
1201 If true, the lock counter is ignored and the lock is released in
1205 self
._lock
_counter
-= 1
1207 if self
._lock
_counter
== 0 or force
:
1208 # lock_id = id(self)
1209 # lock_filename = self._lock_file
1211 # Can't log in shutdown:
1212 # File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open
1213 # NameError: name 'open' is not defined
1214 # logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
1216 self
._lock
_counter
= 0
1217 # logger.debug('Lock %s released on %s', lock_id, lock_filename)
1221 def __enter__(self
) -> 'FileLock':
1225 def __exit__(self
, exc_type
: Any
, exc_value
: Any
, traceback
: Any
) -> None:
1229 def __del__(self
) -> None:
1230 self
.release(force
=True)
1233 def _acquire(self
) -> None:
1234 open_mode
= os
.O_RDWR | os
.O_CREAT | os
.O_TRUNC
1235 fd
= os
.open(self
._lock
_file
, open_mode
)
1238 fcntl
.flock(fd
, fcntl
.LOCK_EX | fcntl
.LOCK_NB
)
1239 except (IOError, OSError):
1242 self
._lock
_file
_fd
= fd
1245 def _release(self
) -> None:
1246 # Do not remove the lockfile:
1248 # https://github.com/benediktschmitt/py-filelock/issues/31
1249 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
1250 fd
= self
._lock
_file
_fd
1251 self
._lock
_file
_fd
= None
1252 fcntl
.flock(fd
, fcntl
.LOCK_UN
) # type: ignore
1253 os
.close(fd
) # type: ignore
1257 ##################################
1258 # Popen wrappers, lifted from ceph-volume
1260 class CallVerbosity(Enum
):
1262 # log stdout/stderr to logger.debug
1264 # On a non-zero exit status, it will forcefully set
1265 # logging ON for the terminal
1266 VERBOSE_ON_FAILURE
= 2
1267 # log at info (instead of debug) level.
1271 if sys
.version_info
< (3, 8):
1275 from asyncio
import events
1277 class ThreadedChildWatcher(asyncio
.AbstractChildWatcher
):
1278 """Threaded child watcher implementation.
1279 The watcher uses a thread per process
1280 for waiting for the process finish.
1281 It doesn't require subscription on POSIX signal
1282 but a thread creation is not free.
1283 The watcher has O(1) complexity, its performance doesn't depend
1284 on amount of spawn processes.
1287 def __init__(self
) -> None:
1288 self
._pid
_counter
= itertools
.count(0)
1291 def is_active(self
):
1295 self
._join
_threads
()
1297 def _join_threads(self
):
1298 """Internal: Join all non-daemon threads"""
1299 threads
= [thread
for thread
in list(self
._threads
.values())
1300 if thread
.is_alive() and not thread
.daemon
]
1301 for thread
in threads
:
1304 def __enter__(self
):
1307 def __exit__(self
, exc_type
, exc_val
, exc_tb
):
1310 def __del__(self
, _warn
=warnings
.warn
):
1311 threads
= [thread
for thread
in list(self
._threads
.values())
1312 if thread
.is_alive()]
1314 _warn(f
'{self.__class__} has registered but not finished child processes',
1318 def add_child_handler(self
, pid
, callback
, *args
):
1319 loop
= events
.get_event_loop()
1320 thread
= threading
.Thread(target
=self
._do
_waitpid
,
1321 name
=f
'waitpid-{next(self._pid_counter)}',
1322 args
=(loop
, pid
, callback
, args
),
1324 self
._threads
[pid
] = thread
1327 def remove_child_handler(self
, pid
):
1328 # asyncio never calls remove_child_handler() !!!
1329 # The method is no-op but is implemented because
1330 # abstract base classe requires it
1333 def attach_loop(self
, loop
):
1336 def _do_waitpid(self
, loop
, expected_pid
, callback
, args
):
1337 assert expected_pid
> 0
1340 pid
, status
= os
.waitpid(expected_pid
, 0)
1341 except ChildProcessError
:
1342 # The child process is already reaped
1343 # (may happen if waitpid() is called elsewhere).
1347 'Unknown child process pid %d, will report returncode 255',
1350 if os
.WIFEXITED(status
):
1351 returncode
= os
.WEXITSTATUS(status
)
1352 elif os
.WIFSIGNALED(status
):
1353 returncode
= -os
.WTERMSIG(status
)
1355 raise ValueError(f
'unknown wait status {status}')
1356 if loop
.get_debug():
1357 logger
.debug('process %s exited with returncode %s',
1358 expected_pid
, returncode
)
1360 if loop
.is_closed():
1361 logger
.warning('Loop %r that handles pid %r is closed', loop
, pid
)
1363 loop
.call_soon_threadsafe(callback
, pid
, returncode
, *args
)
1365 self
._threads
.pop(expected_pid
)
1367 # unlike SafeChildWatcher which handles SIGCHLD in the main thread,
1368 # ThreadedChildWatcher runs in a separated thread, hence allows us to
1369 # run create_subprocess_exec() in non-main thread, see
1370 # https://bugs.python.org/issue35621
1371 asyncio
.set_child_watcher(ThreadedChildWatcher())
1375 from asyncio
import run
as async_run
# type: ignore[attr-defined]
1377 def async_run(coro
): # type: ignore
1378 loop
= asyncio
.new_event_loop()
1380 asyncio
.set_event_loop(loop
)
1381 return loop
.run_until_complete(coro
)
1384 loop
.run_until_complete(loop
.shutdown_asyncgens())
1386 asyncio
.set_event_loop(None)
1390 def call(ctx
: CephadmContext
,
1392 desc
: Optional
[str] = None,
1393 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
1394 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
1395 **kwargs
: Any
) -> Tuple
[str, str, int]:
1397 Wrap subprocess.Popen to
1399 - log stdout/stderr to a logger,
1401 - cleanly return out, err, returncode
1403 :param timeout: timeout in seconds
1406 prefix
= command
[0] if desc
is None else desc
1409 timeout
= timeout
or ctx
.timeout
1411 logger
.debug('Running command: %s' % ' '.join(command
))
1413 async def tee(reader
: asyncio
.StreamReader
) -> str:
1414 collected
= StringIO()
1415 async for line
in reader
:
1416 message
= line
.decode('utf-8')
1417 collected
.write(message
)
1418 if verbosity
== CallVerbosity
.VERBOSE
:
1419 logger
.info(prefix
+ message
.rstrip())
1420 elif verbosity
!= CallVerbosity
.SILENT
:
1421 logger
.debug(prefix
+ message
.rstrip())
1422 return collected
.getvalue()
1424 async def run_with_timeout() -> Tuple
[str, str, int]:
1425 process
= await asyncio
.create_subprocess_exec(
1427 stdout
=asyncio
.subprocess
.PIPE
,
1428 stderr
=asyncio
.subprocess
.PIPE
,
1429 env
=os
.environ
.copy())
1430 assert process
.stdout
1431 assert process
.stderr
1433 stdout
, stderr
= await asyncio
.gather(tee(process
.stdout
),
1434 tee(process
.stderr
))
1435 returncode
= await asyncio
.wait_for(process
.wait(), timeout
)
1436 except asyncio
.TimeoutError
:
1437 logger
.info(prefix
+ f
'timeout after {timeout} seconds')
1440 return stdout
, stderr
, returncode
1442 stdout
, stderr
, returncode
= async_run(run_with_timeout())
1443 if returncode
!= 0 and verbosity
== CallVerbosity
.VERBOSE_ON_FAILURE
:
1444 logger
.info('Non-zero exit code %d from %s',
1445 returncode
, ' '.join(command
))
1446 for line
in stdout
.splitlines():
1447 logger
.info(prefix
+ 'stdout ' + line
)
1448 for line
in stderr
.splitlines():
1449 logger
.info(prefix
+ 'stderr ' + line
)
1450 return stdout
, stderr
, returncode
1454 ctx
: CephadmContext
,
1456 desc
: Optional
[str] = None,
1457 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
1458 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
1459 **kwargs
: Any
) -> Tuple
[str, str, int]:
1460 out
, err
, ret
= call(ctx
, command
, desc
, verbosity
, timeout
, **kwargs
)
1462 raise RuntimeError('Failed command: %s' % ' '.join(command
))
1463 return out
, err
, ret
1466 def call_timeout(ctx
, command
, timeout
):
1467 # type: (CephadmContext, List[str], int) -> int
1468 logger
.debug('Running command (timeout=%s): %s'
1469 % (timeout
, ' '.join(command
)))
1471 def raise_timeout(command
, timeout
):
1472 # type: (List[str], int) -> NoReturn
1473 msg
= 'Command `%s` timed out after %s seconds' % (command
, timeout
)
1475 raise TimeoutExpired(msg
)
1478 return subprocess
.call(command
, timeout
=timeout
, env
=os
.environ
.copy())
1479 except subprocess
.TimeoutExpired
:
1480 raise_timeout(command
, timeout
)
1482 ##################################
1485 def json_loads_retry(cli_func
: Callable
[[], str]) -> Any
:
1486 for sleep_secs
in [1, 4, 4]:
1488 return json
.loads(cli_func())
1489 except json
.JSONDecodeError
:
1490 logger
.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs
)
1491 time
.sleep(sleep_secs
)
1492 return json
.loads(cli_func())
1495 def is_available(ctx
, what
, func
):
1496 # type: (CephadmContext, str, Callable[[], bool]) -> None
1498 Wait for a service to become available
1500 :param what: the name of the service
1501 :param func: the callable object that determines availability
1504 logger
.info('Waiting for %s...' % what
)
1508 logger
.info('%s is available'
1512 raise Error('%s not available after %s tries'
1515 logger
.info('%s not available, waiting (%s/%s)...'
1516 % (what
, num
, retry
))
1522 def read_config(fn
):
1523 # type: (Optional[str]) -> ConfigParser
1531 # type: (str) -> str
1532 p
= os
.path
.expanduser(p
)
1533 return os
.path
.abspath(p
)
1536 def get_file_timestamp(fn
):
1537 # type: (str) -> Optional[str]
1539 mt
= os
.path
.getmtime(fn
)
1540 return datetime
.datetime
.fromtimestamp(
1541 mt
, tz
=datetime
.timezone
.utc
1547 def try_convert_datetime(s
):
1548 # type: (str) -> Optional[str]
1549 # This is super irritating because
1550 # 1) podman and docker use different formats
1551 # 2) python's strptime can't parse either one
1554 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1555 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1556 # 2020-03-03 15:52:30.136257504 -0600 CST
1557 # (In the podman case, there is a different string format for
1558 # 'inspect' and 'inspect --format {{.Created}}'!!)
1560 # In *all* cases, the 9 digit second precision is too much for
1561 # python's strptime. Shorten it to 6 digits.
1562 p
= re
.compile(r
'(\.[\d]{6})[\d]*')
1565 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
1566 if s
and s
[-1] == 'Z':
1567 s
= s
[:-1] + '-0000'
1569 # cut off the redundant 'CST' part that strptime can't parse, if
1572 s
= ' '.join(v
[0:3])
1574 # try parsing with several format strings
1576 '%Y-%m-%dT%H:%M:%S.%f%z',
1577 '%Y-%m-%d %H:%M:%S.%f %z',
1581 # return timestamp normalized to UTC, rendered as DATEFMT.
1582 return datetime
.datetime
.strptime(s
, f
).astimezone(tz
=datetime
.timezone
.utc
).strftime(DATEFMT
)
1588 def _parse_podman_version(version_str
):
1589 # type: (str) -> Tuple[int, ...]
1590 def to_int(val
: str, org_e
: Optional
[Exception] = None) -> int:
1591 if not val
and org_e
:
1595 except ValueError as e
:
1596 return to_int(val
[0:-1], org_e
or e
)
1598 return tuple(map(to_int
, version_str
.split('.')))
1603 return socket
.gethostname()
1608 return socket
.getfqdn() or socket
.gethostname()
1613 return platform
.uname().machine
1616 def generate_service_id():
1618 return get_hostname() + '.' + ''.join(random
.choice(string
.ascii_lowercase
)
1622 def generate_password():
1624 return ''.join(random
.choice(string
.ascii_lowercase
+ string
.digits
)
1628 def normalize_container_id(i
):
1629 # type: (str) -> str
1630 # docker adds the sha256: prefix, but AFAICS both
1631 # docker (18.09.7 in bionic at least) and podman
1632 # both always use sha256, so leave off the prefix
1635 if i
.startswith(prefix
):
1642 return str(uuid
.uuid1())
1646 # type: (str) -> bool
1654 def validate_fsid(func
: FuncT
) -> FuncT
:
1656 def _validate_fsid(ctx
: CephadmContext
) -> Any
:
1657 if 'fsid' in ctx
and ctx
.fsid
:
1658 if not is_fsid(ctx
.fsid
):
1659 raise Error('not an fsid: %s' % ctx
.fsid
)
1661 return cast(FuncT
, _validate_fsid
)
1664 def infer_fsid(func
: FuncT
) -> FuncT
:
1666 If we only find a single fsid in /var/lib/ceph/*, use that
1670 def _infer_fsid(ctx
: CephadmContext
) -> Any
:
1671 if 'fsid' in ctx
and ctx
.fsid
:
1672 logger
.debug('Using specified fsid: %s' % ctx
.fsid
)
1677 cp
= read_config(ctx
.config
)
1678 if cp
.has_option('global', 'fsid'):
1679 fsids
.add(cp
.get('global', 'fsid'))
1681 daemon_list
= list_daemons(ctx
, detail
=False)
1682 for daemon
in daemon_list
:
1683 if not is_fsid(daemon
['fsid']):
1686 elif 'name' not in ctx
or not ctx
.name
:
1687 # ctx.name not specified
1688 fsids
.add(daemon
['fsid'])
1689 elif daemon
['name'] == ctx
.name
:
1690 # ctx.name is a match
1691 fsids
.add(daemon
['fsid'])
1692 fsids
= sorted(fsids
)
1695 # some commands do not always require an fsid
1697 elif len(fsids
) == 1:
1698 logger
.info('Inferring fsid %s' % fsids
[0])
1701 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids
)
1704 return cast(FuncT
, _infer_fsid
)
1707 def infer_config(func
: FuncT
) -> FuncT
:
1709 If we find a MON daemon, use the config from that container
1712 def _infer_config(ctx
: CephadmContext
) -> Any
:
1713 ctx
.config
= ctx
.config
if 'config' in ctx
else None
1715 logger
.debug('Using specified config: %s' % ctx
.config
)
1717 if 'fsid' in ctx
and ctx
.fsid
:
1718 name
= ctx
.name
if 'name' in ctx
else None
1720 daemon_list
= list_daemons(ctx
, detail
=False)
1721 for daemon
in daemon_list
:
1722 if daemon
.get('name', '').startswith('mon.'):
1723 name
= daemon
['name']
1726 ctx
.config
= f
'/var/lib/ceph/{ctx.fsid}/{name}/config'
1728 logger
.info('Inferring config %s' % ctx
.config
)
1729 elif os
.path
.exists(SHELL_DEFAULT_CONF
):
1730 logger
.debug('Using default config: %s' % SHELL_DEFAULT_CONF
)
1731 ctx
.config
= SHELL_DEFAULT_CONF
1734 return cast(FuncT
, _infer_config
)
1737 def _get_default_image(ctx
: CephadmContext
) -> str:
1738 if DEFAULT_IMAGE_IS_MASTER
:
1739 warn
= """This is a development version of cephadm.
1740 For information regarding the latest stable release:
1741 https://docs.ceph.com/docs/{}/cephadm/install
1742 """.format(LATEST_STABLE_RELEASE
)
1743 for line
in warn
.splitlines():
1744 logger
.warning('{}{}{}'.format(termcolor
.yellow
, line
, termcolor
.end
))
1745 return DEFAULT_IMAGE
1748 def infer_image(func
: FuncT
) -> FuncT
:
1750 Use the most recent ceph image
1753 def _infer_image(ctx
: CephadmContext
) -> Any
:
1755 ctx
.image
= os
.environ
.get('CEPHADM_IMAGE')
1757 ctx
.image
= get_last_local_ceph_image(ctx
, ctx
.container_engine
.path
)
1759 ctx
.image
= _get_default_image(ctx
)
1762 return cast(FuncT
, _infer_image
)
1765 def default_image(func
: FuncT
) -> FuncT
:
1767 def _default_image(ctx
: CephadmContext
) -> Any
:
1769 if 'name' in ctx
and ctx
.name
:
1770 type_
= ctx
.name
.split('.', 1)[0]
1771 if type_
in Monitoring
.components
:
1772 ctx
.image
= Monitoring
.components
[type_
]['image']
1773 if type_
== 'haproxy':
1774 ctx
.image
= HAproxy
.default_image
1775 if type_
== 'keepalived':
1776 ctx
.image
= Keepalived
.default_image
1778 ctx
.image
= os
.environ
.get('CEPHADM_IMAGE')
1780 ctx
.image
= _get_default_image(ctx
)
1784 return cast(FuncT
, _default_image
)
1787 def get_last_local_ceph_image(ctx
: CephadmContext
, container_path
: str) -> Optional
[str]:
1789 :return: The most recent local ceph image (already pulled)
1791 out
, _
, _
= call_throws(ctx
,
1792 [container_path
, 'images',
1793 '--filter', 'label=ceph=True',
1794 '--filter', 'dangling=false',
1795 '--format', '{{.Repository}}@{{.Digest}}'])
1796 return _filter_last_local_ceph_image(out
)
1799 def _filter_last_local_ceph_image(out
):
1800 # type: (str) -> Optional[str]
1801 for image
in out
.splitlines():
1802 if image
and not image
.endswith('@'):
1803 logger
.info('Using recent ceph image %s' % image
)
1808 def write_tmp(s
, uid
, gid
):
1809 # type: (str, int, int) -> IO[str]
1810 tmp_f
= tempfile
.NamedTemporaryFile(mode
='w',
1812 os
.fchown(tmp_f
.fileno(), uid
, gid
)
1819 def makedirs(dir, uid
, gid
, mode
):
1820 # type: (str, int, int, int) -> None
1821 if not os
.path
.exists(dir):
1822 os
.makedirs(dir, mode
=mode
)
1825 os
.chown(dir, uid
, gid
)
1826 os
.chmod(dir, mode
) # the above is masked by umask...
1829 def get_data_dir(fsid
, data_dir
, t
, n
):
1830 # type: (str, str, str, Union[int, str]) -> str
1831 return os
.path
.join(data_dir
, fsid
, '%s.%s' % (t
, n
))
1834 def get_log_dir(fsid
, log_dir
):
1835 # type: (str, str) -> str
1836 return os
.path
.join(log_dir
, fsid
)
1839 def make_data_dir_base(fsid
, data_dir
, uid
, gid
):
1840 # type: (str, str, int, int) -> str
1841 data_dir_base
= os
.path
.join(data_dir
, fsid
)
1842 makedirs(data_dir_base
, uid
, gid
, DATA_DIR_MODE
)
1843 makedirs(os
.path
.join(data_dir_base
, 'crash'), uid
, gid
, DATA_DIR_MODE
)
1844 makedirs(os
.path
.join(data_dir_base
, 'crash', 'posted'), uid
, gid
,
1846 return data_dir_base
1849 def make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
, uid
=None, gid
=None):
1850 # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str
1851 if uid
is None or gid
is None:
1852 uid
, gid
= extract_uid_gid(ctx
)
1853 make_data_dir_base(fsid
, ctx
.data_dir
, uid
, gid
)
1854 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
1855 makedirs(data_dir
, uid
, gid
, DATA_DIR_MODE
)
1859 def make_log_dir(ctx
, fsid
, uid
=None, gid
=None):
1860 # type: (CephadmContext, str, Optional[int], Optional[int]) -> str
1861 if uid
is None or gid
is None:
1862 uid
, gid
= extract_uid_gid(ctx
)
1863 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
1864 makedirs(log_dir
, uid
, gid
, LOG_DIR_MODE
)
1868 def make_var_run(ctx
, fsid
, uid
, gid
):
1869 # type: (CephadmContext, str, int, int) -> None
1870 call_throws(ctx
, ['install', '-d', '-m0770', '-o', str(uid
), '-g', str(gid
),
1871 '/var/run/ceph/%s' % fsid
])
1874 def copy_tree(ctx
, src
, dst
, uid
=None, gid
=None):
1875 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
1877 Copy a directory tree from src to dst
1879 if uid
is None or gid
is None:
1880 (uid
, gid
) = extract_uid_gid(ctx
)
1884 if os
.path
.isdir(dst
):
1885 dst_dir
= os
.path
.join(dst
, os
.path
.basename(src_dir
))
1887 logger
.debug('copy directory `%s` -> `%s`' % (src_dir
, dst_dir
))
1888 shutil
.rmtree(dst_dir
, ignore_errors
=True)
1889 shutil
.copytree(src_dir
, dst_dir
) # dirs_exist_ok needs python 3.8
1891 for dirpath
, dirnames
, filenames
in os
.walk(dst_dir
):
1892 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dirpath
))
1893 os
.chown(dirpath
, uid
, gid
)
1894 for filename
in filenames
:
1895 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, filename
))
1896 os
.chown(os
.path
.join(dirpath
, filename
), uid
, gid
)
1899 def copy_files(ctx
, src
, dst
, uid
=None, gid
=None):
1900 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
1902 Copy a files from src to dst
1904 if uid
is None or gid
is None:
1905 (uid
, gid
) = extract_uid_gid(ctx
)
1907 for src_file
in src
:
1909 if os
.path
.isdir(dst
):
1910 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
1912 logger
.debug('copy file `%s` -> `%s`' % (src_file
, dst_file
))
1913 shutil
.copyfile(src_file
, dst_file
)
1915 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dst_file
))
1916 os
.chown(dst_file
, uid
, gid
)
1919 def move_files(ctx
, src
, dst
, uid
=None, gid
=None):
1920 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
1922 Move files from src to dst
1924 if uid
is None or gid
is None:
1925 (uid
, gid
) = extract_uid_gid(ctx
)
1927 for src_file
in src
:
1929 if os
.path
.isdir(dst
):
1930 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
1932 if os
.path
.islink(src_file
):
1933 # shutil.move() in py2 does not handle symlinks correctly
1934 src_rl
= os
.readlink(src_file
)
1935 logger
.debug("symlink '%s' -> '%s'" % (dst_file
, src_rl
))
1936 os
.symlink(src_rl
, dst_file
)
1939 logger
.debug("move file '%s' -> '%s'" % (src_file
, dst_file
))
1940 shutil
.move(src_file
, dst_file
)
1941 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dst_file
))
1942 os
.chown(dst_file
, uid
, gid
)
1945 # copied from distutils
1946 def find_executable(executable
: str, path
: Optional
[str] = None) -> Optional
[str]:
1947 """Tries to find 'executable' in the directories listed in 'path'.
1948 A string listing directories separated by 'os.pathsep'; defaults to
1949 os.environ['PATH']. Returns the complete filename or None if not found.
1951 _
, ext
= os
.path
.splitext(executable
)
1952 if (sys
.platform
== 'win32') and (ext
!= '.exe'):
1953 executable
= executable
+ '.exe'
1955 if os
.path
.isfile(executable
):
1959 path
= os
.environ
.get('PATH', None)
1962 path
= os
.confstr('CS_PATH')
1963 except (AttributeError, ValueError):
1964 # os.confstr() or CS_PATH is not available
1966 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1967 # set to an empty string
1969 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1973 paths
= path
.split(os
.pathsep
)
1975 f
= os
.path
.join(p
, executable
)
1976 if os
.path
.isfile(f
):
1977 # the file exists, we have a shot at spawn working
1982 def find_program(filename
):
1983 # type: (str) -> str
1984 name
= find_executable(filename
)
1986 raise ValueError('%s not found' % filename
)
1990 def find_container_engine(ctx
: CephadmContext
) -> Optional
[ContainerEngine
]:
1994 for i
in CONTAINER_PREFERENCE
:
1997 except Exception as e
:
1998 logger
.debug('Could not locate %s: %s' % (i
.EXE
, e
))
2002 def check_container_engine(ctx
):
2003 # type: (CephadmContext) -> None
2004 engine
= ctx
.container_engine
2005 if not isinstance(engine
, CONTAINER_PREFERENCE
):
2006 # See https://github.com/python/mypy/issues/8993
2007 exes
: List
[str] = [i
.EXE
for i
in CONTAINER_PREFERENCE
] # type: ignore
2008 raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install <container engine>`'.format(' or '.join(exes
)))
2009 elif isinstance(engine
, Podman
):
2010 engine
.get_version(ctx
)
2011 if engine
.version
< MIN_PODMAN_VERSION
:
2012 raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION
)
2015 def get_unit_name(fsid
, daemon_type
, daemon_id
=None):
2016 # type: (str, str, Optional[Union[int, str]]) -> str
2017 # accept either name or type + id
2018 if daemon_type
== CephadmDaemon
.daemon_type
and daemon_id
is not None:
2019 return 'ceph-%s-%s.%s' % (fsid
, daemon_type
, daemon_id
)
2020 elif daemon_id
is not None:
2021 return 'ceph-%s@%s.%s' % (fsid
, daemon_type
, daemon_id
)
2023 return 'ceph-%s@%s' % (fsid
, daemon_type
)
2026 def get_unit_name_by_daemon_name(ctx
: CephadmContext
, fsid
: str, name
: str) -> str:
2027 daemon
= get_daemon_description(ctx
, fsid
, name
)
2029 return daemon
['systemd_unit']
2031 raise Error('Failed to get unit name for {}'.format(daemon
))
2034 def check_unit(ctx
, unit_name
):
2035 # type: (CephadmContext, str) -> Tuple[bool, str, bool]
2036 # NOTE: we ignore the exit code here because systemctl outputs
2037 # various exit codes based on the state of the service, but the
2038 # string result is more explicit (and sufficient).
2042 out
, err
, code
= call(ctx
, ['systemctl', 'is-enabled', unit_name
],
2043 verbosity
=CallVerbosity
.DEBUG
)
2047 elif 'disabled' in out
:
2049 except Exception as e
:
2050 logger
.warning('unable to run systemctl: %s' % e
)
2056 out
, err
, code
= call(ctx
, ['systemctl', 'is-active', unit_name
],
2057 verbosity
=CallVerbosity
.DEBUG
)
2059 if out
in ['active']:
2061 elif out
in ['inactive']:
2063 elif out
in ['failed', 'auto-restart']:
2067 except Exception as e
:
2068 logger
.warning('unable to run systemctl: %s' % e
)
2070 return (enabled
, state
, installed
)
2073 def check_units(ctx
, units
, enabler
=None):
2074 # type: (CephadmContext, List[str], Optional[Packager]) -> bool
2076 (enabled
, state
, installed
) = check_unit(ctx
, u
)
2077 if enabled
and state
== 'running':
2078 logger
.info('Unit %s is enabled and running' % u
)
2080 if enabler
is not None:
2082 logger
.info('Enabling unit %s' % u
)
2083 enabler
.enable_service(u
)
2087 def is_container_running(ctx
: CephadmContext
, c
: 'CephContainer') -> bool:
2088 return bool(get_running_container_name(ctx
, c
))
2091 def get_running_container_name(ctx
: CephadmContext
, c
: 'CephContainer') -> Optional
[str]:
2092 for name
in [c
.cname
, c
.old_cname
]:
2093 out
, err
, ret
= call(ctx
, [
2094 ctx
.container_engine
.path
, 'container', 'inspect',
2095 '--format', '{{.State.Status}}', name
2097 if out
.strip() == 'running':
2102 def get_legacy_config_fsid(cluster
, legacy_dir
=None):
2103 # type: (str, Optional[str]) -> Optional[str]
2104 config_file
= '/etc/ceph/%s.conf' % cluster
2105 if legacy_dir
is not None:
2106 config_file
= os
.path
.abspath(legacy_dir
+ config_file
)
2108 if os
.path
.exists(config_file
):
2109 config
= read_config(config_file
)
2110 if config
.has_section('global') and config
.has_option('global', 'fsid'):
2111 return config
.get('global', 'fsid')
2115 def get_legacy_daemon_fsid(ctx
, cluster
,
2116 daemon_type
, daemon_id
, legacy_dir
=None):
2117 # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str]
2119 if daemon_type
== 'osd':
2121 fsid_file
= os
.path
.join(ctx
.data_dir
,
2123 'ceph-%s' % daemon_id
,
2125 if legacy_dir
is not None:
2126 fsid_file
= os
.path
.abspath(legacy_dir
+ fsid_file
)
2127 with
open(fsid_file
, 'r') as f
:
2128 fsid
= f
.read().strip()
2132 fsid
= get_legacy_config_fsid(cluster
, legacy_dir
=legacy_dir
)
2136 def get_daemon_args(ctx
, fsid
, daemon_type
, daemon_id
):
2137 # type: (CephadmContext, str, str, Union[int, str]) -> List[str]
2138 r
= list() # type: List[str]
2140 if daemon_type
in Ceph
.daemons
and daemon_type
!= 'crash':
2142 '--setuser', 'ceph',
2143 '--setgroup', 'ceph',
2144 '--default-log-to-file=false',
2145 '--default-log-to-stderr=true',
2146 '--default-log-stderr-prefix=debug ',
2148 if daemon_type
== 'mon':
2150 '--default-mon-cluster-log-to-file=false',
2151 '--default-mon-cluster-log-to-stderr=true',
2153 elif daemon_type
in Monitoring
.components
:
2154 metadata
= Monitoring
.components
[daemon_type
]
2155 r
+= metadata
.get('args', list())
2156 # set ip and port to bind to for nodeexporter,alertmanager,prometheus
2157 if daemon_type
!= 'grafana':
2159 port
= Monitoring
.port_map
[daemon_type
][0]
2160 if 'meta_json' in ctx
and ctx
.meta_json
:
2161 meta
= json
.loads(ctx
.meta_json
) or {}
2162 if 'ip' in meta
and meta
['ip']:
2164 if 'ports' in meta
and meta
['ports']:
2165 port
= meta
['ports'][0]
2166 r
+= [f
'--web.listen-address={ip}:{port}']
2167 if daemon_type
== 'alertmanager':
2168 config
= get_parm(ctx
.config_json
)
2169 peers
= config
.get('peers', list()) # type: ignore
2171 r
+= ['--cluster.peer={}'.format(peer
)]
2172 # some alertmanager, by default, look elsewhere for a config
2173 r
+= ['--config.file=/etc/alertmanager/alertmanager.yml']
2174 elif daemon_type
== NFSGanesha
.daemon_type
:
2175 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2176 r
+= nfs_ganesha
.get_daemon_args()
2177 elif daemon_type
== HAproxy
.daemon_type
:
2178 haproxy
= HAproxy
.init(ctx
, fsid
, daemon_id
)
2179 r
+= haproxy
.get_daemon_args()
2180 elif daemon_type
== CustomContainer
.daemon_type
:
2181 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2182 r
.extend(cc
.get_daemon_args())
2187 def create_daemon_dirs(ctx
, fsid
, daemon_type
, daemon_id
, uid
, gid
,
2188 config
=None, keyring
=None):
2189 # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
2190 data_dir
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
, uid
=uid
, gid
=gid
)
2191 make_log_dir(ctx
, fsid
, uid
=uid
, gid
=gid
)
2194 config_path
= os
.path
.join(data_dir
, 'config')
2195 with
open(config_path
, 'w') as f
:
2196 os
.fchown(f
.fileno(), uid
, gid
)
2197 os
.fchmod(f
.fileno(), 0o600)
2201 keyring_path
= os
.path
.join(data_dir
, 'keyring')
2202 with
open(keyring_path
, 'w') as f
:
2203 os
.fchmod(f
.fileno(), 0o600)
2204 os
.fchown(f
.fileno(), uid
, gid
)
2207 if daemon_type
in Monitoring
.components
.keys():
2208 config_json
: Dict
[str, Any
] = dict()
2209 if 'config_json' in ctx
:
2210 config_json
= get_parm(ctx
.config_json
)
2212 # Set up directories specific to the monitoring component
2215 if daemon_type
== 'prometheus':
2216 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2217 daemon_type
, daemon_id
)
2218 config_dir
= 'etc/prometheus'
2219 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2220 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'alerting'), uid
, gid
, 0o755)
2221 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2222 elif daemon_type
== 'grafana':
2223 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2224 daemon_type
, daemon_id
)
2225 config_dir
= 'etc/grafana'
2226 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2227 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'certs'), uid
, gid
, 0o755)
2228 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'provisioning/datasources'), uid
, gid
, 0o755)
2229 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2230 touch(os
.path
.join(data_dir_root
, 'data', 'grafana.db'), uid
, gid
)
2231 elif daemon_type
== 'alertmanager':
2232 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2233 daemon_type
, daemon_id
)
2234 config_dir
= 'etc/alertmanager'
2235 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2236 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'data'), uid
, gid
, 0o755)
2238 # populate the config directory for the component from the config-json
2239 if 'files' in config_json
:
2240 for fname
in config_json
['files']:
2241 content
= dict_get_join(config_json
['files'], fname
)
2242 if os
.path
.isabs(fname
):
2243 fpath
= os
.path
.join(data_dir_root
, fname
.lstrip(os
.path
.sep
))
2245 fpath
= os
.path
.join(data_dir_root
, config_dir
, fname
)
2246 with
open(fpath
, 'w', encoding
='utf-8') as f
:
2247 os
.fchown(f
.fileno(), uid
, gid
)
2248 os
.fchmod(f
.fileno(), 0o600)
2251 elif daemon_type
== NFSGanesha
.daemon_type
:
2252 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2253 nfs_ganesha
.create_daemon_dirs(data_dir
, uid
, gid
)
2255 elif daemon_type
== CephIscsi
.daemon_type
:
2256 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
2257 ceph_iscsi
.create_daemon_dirs(data_dir
, uid
, gid
)
2259 elif daemon_type
== HAproxy
.daemon_type
:
2260 haproxy
= HAproxy
.init(ctx
, fsid
, daemon_id
)
2261 haproxy
.create_daemon_dirs(data_dir
, uid
, gid
)
2263 elif daemon_type
== Keepalived
.daemon_type
:
2264 keepalived
= Keepalived
.init(ctx
, fsid
, daemon_id
)
2265 keepalived
.create_daemon_dirs(data_dir
, uid
, gid
)
2267 elif daemon_type
== CustomContainer
.daemon_type
:
2268 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2269 cc
.create_daemon_dirs(data_dir
, uid
, gid
)
2272 def get_parm(option
):
2273 # type: (str) -> Dict[str, str]
2280 if cached_stdin
is not None:
2283 j
= sys
.stdin
.read()
2286 # inline json string
2287 if option
[0] == '{' and option
[-1] == '}':
2290 elif os
.path
.exists(option
):
2291 with
open(option
, 'r') as f
:
2294 raise Error('Config file {} not found'.format(option
))
2298 except ValueError as e
:
2299 raise Error('Invalid JSON in {}: {}'.format(option
, e
))
2304 def get_config_and_keyring(ctx
):
2305 # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
2309 if 'config_json' in ctx
and ctx
.config_json
:
2310 d
= get_parm(ctx
.config_json
)
2311 config
= d
.get('config')
2312 keyring
= d
.get('keyring')
2314 if 'config' in ctx
and ctx
.config
:
2316 with
open(ctx
.config
, 'r') as f
:
2318 except FileNotFoundError
as e
:
2321 if 'key' in ctx
and ctx
.key
:
2322 keyring
= '[%s]\n\tkey = %s\n' % (ctx
.name
, ctx
.key
)
2323 elif 'keyring' in ctx
and ctx
.keyring
:
2325 with
open(ctx
.keyring
, 'r') as f
:
2327 except FileNotFoundError
as e
:
2330 return config
, keyring
2333 def get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
):
2334 # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]]
2337 if daemon_type
== CephIscsi
.daemon_type
:
2338 binds
.extend(CephIscsi
.get_container_binds())
2339 elif daemon_type
== CustomContainer
.daemon_type
:
2341 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2342 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2343 binds
.extend(cc
.get_container_binds(data_dir
))
2348 def get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
,
2350 # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
2353 if daemon_type
in Ceph
.daemons
:
2355 run_path
= os
.path
.join('/var/run/ceph', fsid
)
2356 if os
.path
.exists(run_path
):
2357 mounts
[run_path
] = '/var/run/ceph:z'
2358 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2359 mounts
[log_dir
] = '/var/log/ceph:z'
2360 crash_dir
= '/var/lib/ceph/%s/crash' % fsid
2361 if os
.path
.exists(crash_dir
):
2362 mounts
[crash_dir
] = '/var/lib/ceph/crash:z'
2364 if daemon_type
in Ceph
.daemons
and daemon_id
:
2365 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2366 if daemon_type
== 'rgw':
2367 cdata_dir
= '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id
)
2369 cdata_dir
= '/var/lib/ceph/%s/ceph-%s' % (daemon_type
, daemon_id
)
2370 if daemon_type
!= 'crash':
2371 mounts
[data_dir
] = cdata_dir
+ ':z'
2373 mounts
[data_dir
+ '/config'] = '/etc/ceph/ceph.conf:z'
2374 if daemon_type
in ['rbd-mirror', 'cephfs-mirror', 'crash']:
2375 # these do not search for their keyrings in a data directory
2376 mounts
[data_dir
+ '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type
, daemon_id
)
2378 if daemon_type
in ['mon', 'osd', 'clusterless-ceph-volume']:
2379 mounts
['/dev'] = '/dev' # FIXME: narrow this down?
2380 mounts
['/run/udev'] = '/run/udev'
2381 if daemon_type
in ['osd', 'clusterless-ceph-volume']:
2382 mounts
['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
2383 mounts
['/run/lvm'] = '/run/lvm'
2384 mounts
['/run/lock/lvm'] = '/run/lock/lvm'
2385 if daemon_type
== 'osd':
2386 # selinux-policy in the container may not match the host.
2387 if HostFacts(ctx
).selinux_enabled
:
2388 selinux_folder
= '/var/lib/ceph/%s/selinux' % fsid
2389 if not os
.path
.exists(selinux_folder
):
2390 os
.makedirs(selinux_folder
, mode
=0o755)
2391 mounts
[selinux_folder
] = '/sys/fs/selinux:ro'
2394 if ctx
.shared_ceph_folder
: # make easy manager modules/ceph-volume development
2395 ceph_folder
= pathify(ctx
.shared_ceph_folder
)
2396 if os
.path
.exists(ceph_folder
):
2397 mounts
[ceph_folder
+ '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
2398 mounts
[ceph_folder
+ '/src/cephadm/cephadm'] = '/usr/sbin/cephadm'
2399 mounts
[ceph_folder
+ '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
2400 mounts
[ceph_folder
+ '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
2401 mounts
[ceph_folder
+ '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
2402 mounts
[ceph_folder
+ '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
2404 logger
.error('{}{}{}'.format(termcolor
.red
,
2405 'Ceph shared source folder does not exist.',
2407 except AttributeError:
2410 if daemon_type
in Monitoring
.components
and daemon_id
:
2411 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2412 if daemon_type
== 'prometheus':
2413 mounts
[os
.path
.join(data_dir
, 'etc/prometheus')] = '/etc/prometheus:Z'
2414 mounts
[os
.path
.join(data_dir
, 'data')] = '/prometheus:Z'
2415 elif daemon_type
== 'node-exporter':
2416 mounts
['/proc'] = '/host/proc:ro'
2417 mounts
['/sys'] = '/host/sys:ro'
2418 mounts
['/'] = '/rootfs:ro'
2419 elif daemon_type
== 'grafana':
2420 mounts
[os
.path
.join(data_dir
, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
2421 mounts
[os
.path
.join(data_dir
, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
2422 mounts
[os
.path
.join(data_dir
, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
2423 mounts
[os
.path
.join(data_dir
, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
2424 elif daemon_type
== 'alertmanager':
2425 mounts
[os
.path
.join(data_dir
, 'etc/alertmanager')] = '/etc/alertmanager:Z'
2427 if daemon_type
== NFSGanesha
.daemon_type
:
2429 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2430 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2431 mounts
.update(nfs_ganesha
.get_container_mounts(data_dir
))
2433 if daemon_type
== HAproxy
.daemon_type
:
2435 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2436 mounts
.update(HAproxy
.get_container_mounts(data_dir
))
2438 if daemon_type
== CephIscsi
.daemon_type
:
2440 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2441 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2442 mounts
.update(CephIscsi
.get_container_mounts(data_dir
, log_dir
))
2444 if daemon_type
== Keepalived
.daemon_type
:
2446 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2447 mounts
.update(Keepalived
.get_container_mounts(data_dir
))
2449 if daemon_type
== CustomContainer
.daemon_type
:
2451 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2452 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2453 mounts
.update(cc
.get_container_mounts(data_dir
))
2458 def get_container(ctx
: CephadmContext
,
2459 fsid
: str, daemon_type
: str, daemon_id
: Union
[int, str],
2460 privileged
: bool = False,
2461 ptrace
: bool = False,
2462 container_args
: Optional
[List
[str]] = None) -> 'CephContainer':
2463 entrypoint
: str = ''
2465 ceph_args
: List
[str] = []
2466 envs
: List
[str] = []
2467 host_network
: bool = True
2469 if daemon_type
in Ceph
.daemons
:
2470 envs
.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
2471 if container_args
is None:
2473 if daemon_type
in ['mon', 'osd']:
2474 # mon and osd need privileged in order for libudev to query devices
2476 if daemon_type
== 'rgw':
2477 entrypoint
= '/usr/bin/radosgw'
2478 name
= 'client.rgw.%s' % daemon_id
2479 elif daemon_type
== 'rbd-mirror':
2480 entrypoint
= '/usr/bin/rbd-mirror'
2481 name
= 'client.rbd-mirror.%s' % daemon_id
2482 elif daemon_type
== 'cephfs-mirror':
2483 entrypoint
= '/usr/bin/cephfs-mirror'
2484 name
= 'client.cephfs-mirror.%s' % daemon_id
2485 elif daemon_type
== 'crash':
2486 entrypoint
= '/usr/bin/ceph-crash'
2487 name
= 'client.crash.%s' % daemon_id
2488 elif daemon_type
in ['mon', 'mgr', 'mds', 'osd']:
2489 entrypoint
= '/usr/bin/ceph-' + daemon_type
2490 name
= '%s.%s' % (daemon_type
, daemon_id
)
2491 elif daemon_type
in Monitoring
.components
:
2493 elif daemon_type
== NFSGanesha
.daemon_type
:
2494 entrypoint
= NFSGanesha
.entrypoint
2495 name
= '%s.%s' % (daemon_type
, daemon_id
)
2496 envs
.extend(NFSGanesha
.get_container_envs())
2497 elif daemon_type
== HAproxy
.daemon_type
:
2498 name
= '%s.%s' % (daemon_type
, daemon_id
)
2499 container_args
.extend(['--user=root']) # haproxy 2.4 defaults to a different user
2500 elif daemon_type
== Keepalived
.daemon_type
:
2501 name
= '%s.%s' % (daemon_type
, daemon_id
)
2502 envs
.extend(Keepalived
.get_container_envs())
2503 container_args
.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
2504 elif daemon_type
== CephIscsi
.daemon_type
:
2505 entrypoint
= CephIscsi
.entrypoint
2506 name
= '%s.%s' % (daemon_type
, daemon_id
)
2507 # So the container can modprobe iscsi_target_mod and have write perms
2508 # to configfs we need to make this a privileged container.
2510 elif daemon_type
== CustomContainer
.daemon_type
:
2511 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2512 entrypoint
= cc
.entrypoint
2513 host_network
= False
2514 envs
.extend(cc
.get_container_envs())
2515 container_args
.extend(cc
.get_container_args())
2517 if daemon_type
in Monitoring
.components
:
2518 uid
, gid
= extract_uid_gid_monitoring(ctx
, daemon_type
)
2522 # FIXME: disable cpu/memory limits for the time being (not supported
2523 # by ubuntu 18.04 kernel!)
2525 container_args
.extend(monitoring_args
)
2526 elif daemon_type
== 'crash':
2527 ceph_args
= ['-n', name
]
2528 elif daemon_type
in Ceph
.daemons
:
2529 ceph_args
= ['-n', name
, '-f']
2531 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2532 # so service can have Type=Forking
2533 if isinstance(ctx
.container_engine
, Podman
):
2534 runtime_dir
= '/run'
2535 container_args
.extend([
2536 '-d', '--log-driver', 'journald',
2538 runtime_dir
+ '/ceph-%s@%s.%s.service-pid' % (fsid
, daemon_type
, daemon_id
),
2540 runtime_dir
+ '/ceph-%s@%s.%s.service-cid' % (fsid
, daemon_type
, daemon_id
),
2542 if ctx
.container_engine
.version
>= CGROUPS_SPLIT_PODMAN_VERSION
:
2543 container_args
.append('--cgroups=split')
2545 return CephContainer
.for_daemon(
2548 daemon_type
=daemon_type
,
2549 daemon_id
=str(daemon_id
),
2550 entrypoint
=entrypoint
,
2551 args
=ceph_args
+ get_daemon_args(ctx
, fsid
, daemon_type
, daemon_id
),
2552 container_args
=container_args
,
2553 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
2554 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
2556 privileged
=privileged
,
2558 host_network
=host_network
,
2562 def extract_uid_gid(ctx
, img
='', file_path
='/var/lib/ceph'):
2563 # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]
2568 if isinstance(file_path
, str):
2575 out
= CephContainer(
2579 args
=['-c', '%u %g', fp
]
2581 uid
, gid
= out
.split(' ')
2582 return int(uid
), int(gid
)
2583 except RuntimeError:
2585 raise RuntimeError('uid/gid not found')
2588 def deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
2589 config
=None, keyring
=None,
2593 # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
2596 if any([port_in_use(ctx
, port
) for port
in ports
]):
2597 if daemon_type
== 'mgr':
2598 # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't
2599 # tell whether that is the case here.
2601 f
"ceph-mgr TCP port(s) {','.join(map(str, ports))} already in use"
2604 raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports
)), daemon_type
))
2606 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2607 if reconfig
and not os
.path
.exists(data_dir
):
2608 raise Error('cannot reconfig, data path %s does not exist' % data_dir
)
2609 if daemon_type
== 'mon' and not os
.path
.exists(data_dir
):
2613 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
2616 tmp_config
= write_tmp(config
, uid
, gid
)
2619 create_daemon_dirs(ctx
, fsid
, daemon_type
, daemon_id
, uid
, gid
)
2620 mon_dir
= get_data_dir(fsid
, ctx
.data_dir
, 'mon', daemon_id
)
2621 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2625 entrypoint
='/usr/bin/ceph-mon',
2628 '-i', str(daemon_id
),
2630 '-c', '/tmp/config',
2631 '--keyring', '/tmp/keyring',
2632 ] + get_daemon_args(ctx
, fsid
, 'mon', daemon_id
),
2634 log_dir
: '/var/log/ceph:z',
2635 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id
),
2636 tmp_keyring
.name
: '/tmp/keyring:z',
2637 tmp_config
.name
: '/tmp/config:z',
2642 with
open(mon_dir
+ '/config', 'w') as f
:
2643 os
.fchown(f
.fileno(), uid
, gid
)
2644 os
.fchmod(f
.fileno(), 0o600)
2647 # dirs, conf, keyring
2650 fsid
, daemon_type
, daemon_id
,
2655 if daemon_type
== CephadmDaemon
.daemon_type
:
2656 port
= next(iter(ports
), None) # get first tcp port provided or None
2658 if ctx
.config_json
== '-':
2659 config_js
= get_parm('-')
2661 config_js
= get_parm(ctx
.config_json
)
2662 assert isinstance(config_js
, dict)
2663 assert isinstance(daemon_id
, str)
2665 cephadm_exporter
= CephadmDaemon(ctx
, fsid
, daemon_id
, port
)
2666 cephadm_exporter
.deploy_daemon_unit(config_js
)
2669 deploy_daemon_units(ctx
, fsid
, uid
, gid
, daemon_type
, daemon_id
,
2670 c
, osd_fsid
=osd_fsid
, ports
=ports
)
2672 raise RuntimeError('attempting to deploy a daemon without a container image')
2674 if not os
.path
.exists(data_dir
+ '/unit.created'):
2675 with
open(data_dir
+ '/unit.created', 'w') as f
:
2676 os
.fchmod(f
.fileno(), 0o600)
2677 os
.fchown(f
.fileno(), uid
, gid
)
2678 f
.write('mtime is time the daemon deployment was created\n')
2680 with
open(data_dir
+ '/unit.configured', 'w') as f
:
2681 f
.write('mtime is time we were last configured\n')
2682 os
.fchmod(f
.fileno(), 0o600)
2683 os
.fchown(f
.fileno(), uid
, gid
)
2685 update_firewalld(ctx
, daemon_type
)
2687 # Open ports explicitly required for the daemon
2690 fw
.open_ports(ports
)
2693 if reconfig
and daemon_type
not in Ceph
.daemons
:
2694 # ceph daemons do not need a restart; others (presumably) do to pick
2696 call_throws(ctx
, ['systemctl', 'reset-failed',
2697 get_unit_name(fsid
, daemon_type
, daemon_id
)])
2698 call_throws(ctx
, ['systemctl', 'restart',
2699 get_unit_name(fsid
, daemon_type
, daemon_id
)])
2702 def _write_container_cmd_to_bash(ctx
, file_obj
, container
, comment
=None, background
=False):
2703 # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None
2705 # Sometimes adding a comment, especially if there are multiple containers in one
2706 # unit file, makes it easier to read and grok.
2707 file_obj
.write('# ' + comment
+ '\n')
2708 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
2709 file_obj
.write('! ' + ' '.join(container
.rm_cmd(old_cname
=True)) + ' 2> /dev/null\n')
2710 file_obj
.write('! ' + ' '.join(container
.rm_cmd()) + ' 2> /dev/null\n')
2711 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
2712 if isinstance(ctx
.container_engine
, Podman
):
2715 + ' '.join([shlex
.quote(a
) for a
in container
.rm_cmd(storage
=True)])
2716 + ' 2> /dev/null\n')
2719 + ' '.join([shlex
.quote(a
) for a
in container
.rm_cmd(old_cname
=True, storage
=True)])
2720 + ' 2> /dev/null\n')
2722 # container run command
2724 ' '.join([shlex
.quote(a
) for a
in container
.run_cmd()])
2725 + (' &' if background
else '') + '\n')
2728 def clean_cgroup(ctx
: CephadmContext
, fsid
: str, unit_name
: str) -> None:
2729 # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
2730 # see https://tracker.ceph.com/issues/50998
2732 CGROUPV2_PATH
= Path('/sys/fs/cgroup')
2733 if not (CGROUPV2_PATH
/ 'system.slice').exists():
2734 # Only unified cgroup is affected, skip if not the case
2737 slice_name
= 'system-ceph\\x2d{}.slice'.format(fsid
.replace('-', '\\x2d'))
2738 cg_path
= CGROUPV2_PATH
/ 'system.slice' / slice_name
/ f
'{unit_name}.service'
2739 if not cg_path
.exists():
2742 def cg_trim(path
: Path
) -> None:
2743 for p
in path
.iterdir():
2750 logger
.warning(f
'Failed to trim old cgroups {cg_path}')
2753 def deploy_daemon_units(
2754 ctx
: CephadmContext
,
2759 daemon_id
: Union
[int, str],
2761 enable
: bool = True,
2763 osd_fsid
: Optional
[str] = None,
2764 ports
: Optional
[List
[int]] = None,
2767 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2768 with
open(data_dir
+ '/unit.run.new', 'w') as f
, \
2769 open(data_dir
+ '/unit.meta.new', 'w') as metaf
:
2772 if daemon_type
in Ceph
.daemons
:
2773 install_path
= find_program('install')
2774 f
.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path
=install_path
, fsid
=fsid
, uid
=uid
, gid
=gid
))
2777 if daemon_type
== 'osd':
2778 # osds have a pre-start step
2780 simple_fn
= os
.path
.join('/etc/ceph/osd',
2781 '%s-%s.json.adopted-by-cephadm' % (daemon_id
, osd_fsid
))
2782 if os
.path
.exists(simple_fn
):
2783 f
.write('# Simple OSDs need chown on startup:\n')
2784 for n
in ['block', 'block.db', 'block.wal']:
2785 p
= os
.path
.join(data_dir
, n
)
2786 f
.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p
=p
, uid
=uid
, gid
=gid
))
2788 prestart
= CephContainer(
2791 entrypoint
='/usr/sbin/ceph-volume',
2794 str(daemon_id
), osd_fsid
,
2798 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
2799 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
2800 cname
='ceph-%s-%s.%s-activate' % (fsid
, daemon_type
, daemon_id
),
2801 memory_request
=ctx
.memory_request
,
2802 memory_limit
=ctx
.memory_limit
,
2804 _write_container_cmd_to_bash(ctx
, f
, prestart
, 'LVM OSDs use ceph-volume lvm activate')
2805 elif daemon_type
== CephIscsi
.daemon_type
:
2806 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=True)) + '\n')
2807 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
2808 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
2809 _write_container_cmd_to_bash(ctx
, f
, tcmu_container
, 'iscsi tcmu-runnter container', background
=True)
2811 _write_container_cmd_to_bash(ctx
, f
, c
, '%s.%s' % (daemon_type
, str(daemon_id
)))
2813 # some metadata about the deploy
2814 meta
: Dict
[str, Any
] = {}
2815 if 'meta_json' in ctx
and ctx
.meta_json
:
2816 meta
= json
.loads(ctx
.meta_json
) or {}
2818 'memory_request': int(ctx
.memory_request
) if ctx
.memory_request
else None,
2819 'memory_limit': int(ctx
.memory_limit
) if ctx
.memory_limit
else None,
2821 if not meta
.get('ports'):
2822 meta
['ports'] = ports
2823 metaf
.write(json
.dumps(meta
, indent
=4) + '\n')
2825 os
.fchmod(f
.fileno(), 0o600)
2826 os
.fchmod(metaf
.fileno(), 0o600)
2827 os
.rename(data_dir
+ '/unit.run.new',
2828 data_dir
+ '/unit.run')
2829 os
.rename(data_dir
+ '/unit.meta.new',
2830 data_dir
+ '/unit.meta')
2832 # post-stop command(s)
2833 with
open(data_dir
+ '/unit.poststop.new', 'w') as f
:
2834 if daemon_type
== 'osd':
2836 poststop
= CephContainer(
2839 entrypoint
='/usr/sbin/ceph-volume',
2841 'lvm', 'deactivate',
2842 str(daemon_id
), osd_fsid
,
2845 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
2846 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
2847 cname
='ceph-%s-%s.%s-deactivate' % (fsid
, daemon_type
,
2850 _write_container_cmd_to_bash(ctx
, f
, poststop
, 'deactivate osd')
2851 elif daemon_type
== CephIscsi
.daemon_type
:
2852 # make sure we also stop the tcmu container
2853 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
2854 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
2855 f
.write('! ' + ' '.join(tcmu_container
.stop_cmd()) + '\n')
2856 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=False)) + '\n')
2857 os
.fchmod(f
.fileno(), 0o600)
2858 os
.rename(data_dir
+ '/unit.poststop.new',
2859 data_dir
+ '/unit.poststop')
2861 # post-stop command(s)
2862 with
open(data_dir
+ '/unit.stop.new', 'w') as f
:
2863 f
.write('! ' + ' '.join(c
.stop_cmd()) + '\n')
2864 f
.write('! ' + ' '.join(c
.stop_cmd(old_cname
=True)) + '\n')
2866 os
.fchmod(f
.fileno(), 0o600)
2867 os
.rename(data_dir
+ '/unit.stop.new',
2868 data_dir
+ '/unit.stop')
2871 with
open(data_dir
+ '/unit.image.new', 'w') as f
:
2872 f
.write(c
.image
+ '\n')
2873 os
.fchmod(f
.fileno(), 0o600)
2874 os
.rename(data_dir
+ '/unit.image.new',
2875 data_dir
+ '/unit.image')
2878 install_sysctl(ctx
, fsid
, daemon_type
)
2881 install_base_units(ctx
, fsid
)
2882 unit
= get_unit_file(ctx
, fsid
)
2883 unit_file
= 'ceph-%s@.service' % (fsid
)
2884 with
open(ctx
.unit_dir
+ '/' + unit_file
+ '.new', 'w') as f
:
2886 os
.rename(ctx
.unit_dir
+ '/' + unit_file
+ '.new',
2887 ctx
.unit_dir
+ '/' + unit_file
)
2888 call_throws(ctx
, ['systemctl', 'daemon-reload'])
2890 unit_name
= get_unit_name(fsid
, daemon_type
, daemon_id
)
2891 call(ctx
, ['systemctl', 'stop', unit_name
],
2892 verbosity
=CallVerbosity
.DEBUG
)
2893 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
2894 verbosity
=CallVerbosity
.DEBUG
)
2896 call_throws(ctx
, ['systemctl', 'enable', unit_name
])
2898 clean_cgroup(ctx
, fsid
, unit_name
)
2899 call_throws(ctx
, ['systemctl', 'start', unit_name
])
2902 class Firewalld(object):
2903 def __init__(self
, ctx
):
2904 # type: (CephadmContext) -> None
2906 self
.available
= self
.check()
2910 self
.cmd
= find_executable('firewall-cmd')
2912 logger
.debug('firewalld does not appear to be present')
2914 (enabled
, state
, _
) = check_unit(self
.ctx
, 'firewalld.service')
2916 logger
.debug('firewalld.service is not enabled')
2918 if state
!= 'running':
2919 logger
.debug('firewalld.service is not running')
2922 logger
.info('firewalld ready')
2925 def enable_service_for(self
, daemon_type
):
2926 # type: (str) -> None
2927 if not self
.available
:
2928 logger
.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type
)
2931 if daemon_type
== 'mon':
2933 elif daemon_type
in ['mgr', 'mds', 'osd']:
2935 elif daemon_type
== NFSGanesha
.daemon_type
:
2941 raise RuntimeError('command not defined')
2943 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-service', svc
], verbosity
=CallVerbosity
.DEBUG
)
2945 logger
.info('Enabling firewalld service %s in current zone...' % svc
)
2946 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--add-service', svc
])
2949 'unable to add service %s to current zone: %s' % (svc
, err
))
2951 logger
.debug('firewalld service %s is enabled in current zone' % svc
)
2953 def open_ports(self
, fw_ports
):
2954 # type: (List[int]) -> None
2955 if not self
.available
:
2956 logger
.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports
)
2960 raise RuntimeError('command not defined')
2962 for port
in fw_ports
:
2963 tcp_port
= str(port
) + '/tcp'
2964 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-port', tcp_port
], verbosity
=CallVerbosity
.DEBUG
)
2966 logger
.info('Enabling firewalld port %s in current zone...' % tcp_port
)
2967 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--add-port', tcp_port
])
2969 raise RuntimeError('unable to add port %s to current zone: %s' %
2972 logger
.debug('firewalld port %s is enabled in current zone' % tcp_port
)
2974 def close_ports(self
, fw_ports
):
2975 # type: (List[int]) -> None
2976 if not self
.available
:
2977 logger
.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports
)
2981 raise RuntimeError('command not defined')
2983 for port
in fw_ports
:
2984 tcp_port
= str(port
) + '/tcp'
2985 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-port', tcp_port
], verbosity
=CallVerbosity
.DEBUG
)
2987 logger
.info('Disabling port %s in current zone...' % tcp_port
)
2988 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--remove-port', tcp_port
])
2990 raise RuntimeError('unable to remove port %s from current zone: %s' %
2993 logger
.info(f
'Port {tcp_port} disabled')
2995 logger
.info(f
'firewalld port {tcp_port} already closed')
2997 def apply_rules(self
):
2999 if not self
.available
:
3003 raise RuntimeError('command not defined')
3005 call_throws(self
.ctx
, [self
.cmd
, '--reload'])
3008 def update_firewalld(ctx
, daemon_type
):
3009 # type: (CephadmContext, str) -> None
3010 firewall
= Firewalld(ctx
)
3011 firewall
.enable_service_for(daemon_type
)
3012 firewall
.apply_rules()
3015 def install_sysctl(ctx
: CephadmContext
, fsid
: str, daemon_type
: str) -> None:
3017 Set up sysctl settings
3019 def _write(conf
: Path
, lines
: List
[str]) -> None:
3021 '# created by cephadm',
3026 with
open(conf
, 'w') as f
:
3027 f
.write('\n'.join(lines
))
3029 conf
= Path(ctx
.sysctl_dir
).joinpath(f
'90-ceph-{fsid}-{daemon_type}.conf')
3030 lines
: Optional
[List
] = None
3032 if daemon_type
== 'osd':
3033 lines
= OSD
.get_sysctl_settings()
3034 elif daemon_type
== 'haproxy':
3035 lines
= HAproxy
.get_sysctl_settings()
3036 elif daemon_type
== 'keepalived':
3037 lines
= Keepalived
.get_sysctl_settings()
3039 # apply the sysctl settings
3041 Path(ctx
.sysctl_dir
).mkdir(mode
=0o755, exist_ok
=True)
3043 call_throws(ctx
, ['sysctl', '--system'])
3046 def install_base_units(ctx
, fsid
):
3047 # type: (CephadmContext, str) -> None
3049 Set up ceph.target and ceph-$fsid.target units.
3052 existed
= os
.path
.exists(ctx
.unit_dir
+ '/ceph.target')
3053 with
open(ctx
.unit_dir
+ '/ceph.target.new', 'w') as f
:
3055 'Description=All Ceph clusters and services\n'
3058 'WantedBy=multi-user.target\n')
3059 os
.rename(ctx
.unit_dir
+ '/ceph.target.new',
3060 ctx
.unit_dir
+ '/ceph.target')
3062 # we disable before enable in case a different ceph.target
3063 # (from the traditional package) is present; while newer
3064 # systemd is smart enough to disable the old
3065 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
3066 # some older versions of systemd error out with EEXIST.
3067 call_throws(ctx
, ['systemctl', 'disable', 'ceph.target'])
3068 call_throws(ctx
, ['systemctl', 'enable', 'ceph.target'])
3069 call_throws(ctx
, ['systemctl', 'start', 'ceph.target'])
3072 existed
= os
.path
.exists(ctx
.unit_dir
+ '/ceph-%s.target' % fsid
)
3073 with
open(ctx
.unit_dir
+ '/ceph-%s.target.new' % fsid
, 'w') as f
:
3076 'Description=Ceph cluster {fsid}\n'
3077 'PartOf=ceph.target\n'
3078 'Before=ceph.target\n'
3081 'WantedBy=multi-user.target ceph.target\n'.format(
3084 os
.rename(ctx
.unit_dir
+ '/ceph-%s.target.new' % fsid
,
3085 ctx
.unit_dir
+ '/ceph-%s.target' % fsid
)
3087 call_throws(ctx
, ['systemctl', 'enable', 'ceph-%s.target' % fsid
])
3088 call_throws(ctx
, ['systemctl', 'start', 'ceph-%s.target' % fsid
])
3090 # logrotate for the cluster
3091 with
open(ctx
.logrotate_dir
+ '/ceph-%s' % fsid
, 'w') as f
:
3093 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
3094 in all containers, but I don't see an elegant way to send SIGHUP *just* to
3095 the daemons for this cluster. (1) systemd kill -s will get the signal to
3096 podman, but podman will exit. (2) podman kill will get the signal to the
3097 first child (bash), but that isn't the ceph daemon. This is simpler and
3100 f
.write("""# created by cephadm
3101 /var/log/ceph/%s/*.log {
3107 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror' || true
3116 def get_unit_file(ctx
, fsid
):
3117 # type: (CephadmContext, str) -> str
3119 if isinstance(ctx
.container_engine
, Podman
):
3120 extra_args
= ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3121 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3123 'PIDFile=%t/%n-pid\n')
3124 if ctx
.container_engine
.version
>= CGROUPS_SPLIT_PODMAN_VERSION
:
3125 extra_args
+= 'Delegate=yes\n'
3127 docker
= isinstance(ctx
.container_engine
, Docker
)
3128 u
= """# generated by cephadm
3130 Description=Ceph %i for {fsid}
3133 # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
3134 # these can be removed once ceph-mon will dynamically change network
3136 After=network-online.target local-fs.target time-sync.target{docker_after}
3137 Wants=network-online.target local-fs.target time-sync.target
3140 PartOf=ceph-{fsid}.target
3141 Before=ceph-{fsid}.target
3146 EnvironmentFile=-/etc/environment
3147 ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
3148 ExecStop=-/bin/bash -c '{container_path} stop ceph-{fsid}-%i ; bash {data_dir}/{fsid}/%i/unit.stop'
3149 ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
3155 StartLimitInterval=30min
3159 WantedBy=ceph-{fsid}.target
3160 """.format(container_path
=ctx
.container_engine
.path
,
3162 data_dir
=ctx
.data_dir
,
3163 extra_args
=extra_args
,
3164 # if docker, we depend on docker.service
3165 docker_after
=' docker.service' if docker
else '',
3166 docker_requires
='Requires=docker.service\n' if docker
else '')
3170 ##################################
3173 class CephContainer
:
3175 ctx
: CephadmContext
,
3178 args
: List
[str] = [],
3179 volume_mounts
: Dict
[str, str] = {},
3181 container_args
: List
[str] = [],
3182 envs
: Optional
[List
[str]] = None,
3183 privileged
: bool = False,
3184 ptrace
: bool = False,
3185 bind_mounts
: Optional
[List
[List
[str]]] = None,
3186 init
: Optional
[bool] = None,
3187 host_network
: bool = True,
3188 memory_request
: Optional
[str] = None,
3189 memory_limit
: Optional
[str] = None,
3193 self
.entrypoint
= entrypoint
3195 self
.volume_mounts
= volume_mounts
3197 self
.container_args
= container_args
3199 self
.privileged
= privileged
3200 self
.ptrace
= ptrace
3201 self
.bind_mounts
= bind_mounts
if bind_mounts
else []
3202 self
.init
= init
if init
else ctx
.container_init
3203 self
.host_network
= host_network
3204 self
.memory_request
= memory_request
3205 self
.memory_limit
= memory_limit
3209 ctx
: CephadmContext
,
3214 args
: List
[str] = [],
3215 volume_mounts
: Dict
[str, str] = {},
3216 container_args
: List
[str] = [],
3217 envs
: Optional
[List
[str]] = None,
3218 privileged
: bool = False,
3219 ptrace
: bool = False,
3220 bind_mounts
: Optional
[List
[List
[str]]] = None,
3221 init
: Optional
[bool] = None,
3222 host_network
: bool = True,
3223 memory_request
: Optional
[str] = None,
3224 memory_limit
: Optional
[str] = None,
3225 ) -> 'CephContainer':
3229 entrypoint
=entrypoint
,
3231 volume_mounts
=volume_mounts
,
3232 cname
='ceph-%s-%s.%s' % (fsid
, daemon_type
, daemon_id
),
3233 container_args
=container_args
,
3235 privileged
=privileged
,
3237 bind_mounts
=bind_mounts
,
3239 host_network
=host_network
,
3240 memory_request
=memory_request
,
3241 memory_limit
=memory_limit
,
3245 def cname(self
) -> str:
3247 podman adds the current container name to the /etc/hosts
3248 file. Turns out, python's `socket.getfqdn()` differs from
3249 `hostname -f`, when we have the container names containing
3252 # podman run --name foo.bar.baz.com ceph/ceph /bin/bash
3253 [root@sebastians-laptop /]# cat /etc/hosts
3256 127.0.1.1 sebastians-laptop foo.bar.baz.com
3257 [root@sebastians-laptop /]# hostname -f
3259 [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())'
3262 Fascinatingly, this doesn't happen when using dashes.
3264 return self
._cname
.replace('.', '-')
3267 def cname(self
, val
: str) -> None:
3271 def old_cname(self
) -> str:
3274 def run_cmd(self
) -> List
[str]:
3275 cmd_args
: List
[str] = [
3276 str(self
.ctx
.container_engine
.path
),
3280 # some containers (ahem, haproxy) override this, but we want a fast
3281 # shutdown always (and, more importantly, a successful exit even if we
3282 # fall back to SIGKILL).
3283 '--stop-signal=SIGTERM',
3286 if isinstance(self
.ctx
.container_engine
, Podman
):
3287 if os
.path
.exists('/etc/ceph/podman-auth.json'):
3288 cmd_args
.append('--authfile=/etc/ceph/podman-auth.json')
3291 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
3292 '-e', 'NODE_NAME=%s' % get_hostname(),
3294 vols
: List
[str] = []
3295 binds
: List
[str] = []
3297 if self
.memory_request
:
3298 cmd_args
.extend(['-e', 'POD_MEMORY_REQUEST', str(self
.memory_request
)])
3299 if self
.memory_limit
:
3300 cmd_args
.extend(['-e', 'POD_MEMORY_LIMIT', str(self
.memory_limit
)])
3301 cmd_args
.extend(['--memory', str(self
.memory_limit
)])
3303 if self
.host_network
:
3304 cmd_args
.append('--net=host')
3306 cmd_args
.extend(['--entrypoint', self
.entrypoint
])
3310 # let OSD etc read block devs that haven't been chowned
3311 '--group-add=disk'])
3312 if self
.ptrace
and not self
.privileged
:
3313 # if privileged, the SYS_PTRACE cap is already added
3314 # in addition, --cap-add and --privileged are mutually
3315 # exclusive since podman >= 2.0
3316 cmd_args
.append('--cap-add=SYS_PTRACE')
3318 cmd_args
.append('--init')
3319 envs
+= ['-e', 'CEPH_USE_RANDOM_NONCE=1']
3321 cmd_args
.extend(['--name', self
.cname
])
3323 for env
in self
.envs
:
3324 envs
.extend(['-e', env
])
3327 [['-v', '%s:%s' % (host_dir
, container_dir
)]
3328 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
3329 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
3330 for bind
in self
.bind_mounts
], [])
3333 cmd_args
+ self
.container_args
+ \
3334 envs
+ vols
+ binds
+ \
3335 [self
.image
] + self
.args
# type: ignore
3337 def shell_cmd(self
, cmd
: List
[str]) -> List
[str]:
3338 cmd_args
: List
[str] = [
3339 str(self
.ctx
.container_engine
.path
),
3345 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
3346 '-e', 'NODE_NAME=%s' % get_hostname(),
3348 vols
: List
[str] = []
3349 binds
: List
[str] = []
3351 if self
.host_network
:
3352 cmd_args
.append('--net=host')
3353 if self
.ctx
.no_hosts
:
3354 cmd_args
.append('--no-hosts')
3358 # let OSD etc read block devs that haven't been chowned
3362 cmd_args
.append('--init')
3363 envs
+= ['-e', 'CEPH_USE_RANDOM_NONCE=1']
3365 for env
in self
.envs
:
3366 envs
.extend(['-e', env
])
3369 [['-v', '%s:%s' % (host_dir
, container_dir
)]
3370 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
3371 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
3372 for bind
in self
.bind_mounts
], [])
3374 return cmd_args
+ self
.container_args
+ envs
+ vols
+ binds
+ [
3375 '--entrypoint', cmd
[0],
3379 def exec_cmd(self
, cmd
):
3380 # type: (List[str]) -> List[str]
3381 cname
= get_running_container_name(self
.ctx
, self
)
3383 raise Error('unable to find container "{}"'.format(self
.cname
))
3385 str(self
.ctx
.container_engine
.path
),
3387 ] + self
.container_args
+ [
3391 def rm_cmd(self
, old_cname
: bool = False, storage
: bool = False) -> List
[str]:
3393 str(self
.ctx
.container_engine
.path
),
3397 ret
.append('--storage')
3399 ret
.append(self
.old_cname
)
3401 ret
.append(self
.cname
)
3404 def stop_cmd(self
, old_cname
: bool = False) -> List
[str]:
3406 str(self
.ctx
.container_engine
.path
),
3407 'stop', self
.old_cname
if old_cname
else self
.cname
,
3411 def run(self
, timeout
=DEFAULT_TIMEOUT
):
3412 # type: (Optional[int]) -> str
3413 out
, _
, _
= call_throws(self
.ctx
, self
.run_cmd(),
3414 desc
=self
.entrypoint
, timeout
=timeout
)
3417 ##################################
3421 def command_version(ctx
):
3422 # type: (CephadmContext) -> int
3423 c
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version'])
3424 out
, err
, ret
= call(ctx
, c
.run_cmd(), desc
=c
.entrypoint
)
3429 ##################################
3433 def command_pull(ctx
):
3434 # type: (CephadmContext) -> int
3436 _pull_image(ctx
, ctx
.image
)
3437 return command_inspect_image(ctx
)
3440 def _pull_image(ctx
, image
):
3441 # type: (CephadmContext, str) -> None
3442 logger
.info('Pulling container image %s...' % image
)
3445 'error creating read-write layer with ID',
3446 'net/http: TLS handshake timeout',
3447 'Digest did not match, expected',
3450 cmd
= [ctx
.container_engine
.path
, 'pull', image
]
3451 if isinstance(ctx
.container_engine
, Podman
) and os
.path
.exists('/etc/ceph/podman-auth.json'):
3452 cmd
.append('--authfile=/etc/ceph/podman-auth.json')
3453 cmd_str
= ' '.join(cmd
)
3455 for sleep_secs
in [1, 4, 25]:
3456 out
, err
, ret
= call(ctx
, cmd
)
3460 if not any(pattern
in err
for pattern
in ignorelist
):
3461 raise RuntimeError('Failed command: %s' % cmd_str
)
3463 logger
.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str
, sleep_secs
))
3464 time
.sleep(sleep_secs
)
3466 raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str
)
3468 ##################################
3472 def command_inspect_image(ctx
):
3473 # type: (CephadmContext) -> int
3474 out
, err
, ret
= call_throws(ctx
, [
3475 ctx
.container_engine
.path
, 'inspect',
3476 '--format', '{{.ID}},{{.RepoDigests}}',
3480 info_from
= get_image_info_from_inspect(out
.strip(), ctx
.image
)
3482 ver
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version']).run().strip()
3483 info_from
['ceph_version'] = ver
3485 print(json
.dumps(info_from
, indent
=4, sort_keys
=True))
3489 def normalize_image_digest(digest
: str) -> str:
3491 # ceph/ceph -> docker.io/ceph/ceph
3492 # edge cases that shouldn't ever come up:
3493 # ubuntu -> docker.io/ubuntu (ubuntu alias for library/ubuntu)
3495 # quay.ceph.io/ceph/ceph -> ceph
3496 # docker.io/ubuntu -> no change
3497 bits
= digest
.split('/')
3498 if '.' not in bits
[0] and len(bits
) < 3:
3499 digest
= DEFAULT_REGISTRY
+ '/' + digest
3503 def get_image_info_from_inspect(out
, image
):
3504 # type: (str, str) -> Dict[str, Union[str,List[str]]]
3505 image_id
, digests
= out
.split(',', 1)
3507 raise Error('inspect {}: empty result'.format(image
))
3509 'image_id': normalize_container_id(image_id
)
3510 } # type: Dict[str, Union[str,List[str]]]
3512 r
['repo_digests'] = list(map(normalize_image_digest
, digests
[1:-1].split(' ')))
3515 ##################################
3518 def check_subnet(subnets
: str) -> Tuple
[int, List
[int], str]:
3519 """Determine whether the given string is a valid subnet
3521 :param subnets: subnet string, a single definition or comma separated list of CIDR subnets
3522 :returns: return code, IP version list of the subnets and msg describing any errors validation errors
3528 subnet_list
= subnets
.split(',')
3529 for subnet
in subnet_list
:
3530 # ensure the format of the string is as expected address/netmask
3531 if not re
.search(r
'\/\d+$', subnet
):
3533 errors
.append(f
'{subnet} is not in CIDR format (address/netmask)')
3536 v
= ipaddress
.ip_network(subnet
).version
3538 except ValueError as e
:
3540 errors
.append(f
'{subnet} invalid: {str(e)}')
3542 return rc
, list(versions
), ', '.join(errors
)
3545 def unwrap_ipv6(address
):
3546 # type: (str) -> str
3547 if address
.startswith('[') and address
.endswith(']'):
3548 return address
[1:-1]
3552 def wrap_ipv6(address
):
3553 # type: (str) -> str
3555 # We cannot assume it's already wrapped or even an IPv6 address if
3556 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
3559 if ipaddress
.ip_address(address
).version
== 6:
3560 return f
'[{address}]'
3567 def is_ipv6(address
):
3568 # type: (str) -> bool
3569 address
= unwrap_ipv6(address
)
3571 return ipaddress
.ip_address(address
).version
== 6
3573 logger
.warning('Address: {} is not a valid IP address'.format(address
))
3577 def prepare_mon_addresses(
3579 ) -> Tuple
[str, bool, Optional
[str]]:
3580 r
= re
.compile(r
':(\d+)$')
3585 ipv6
= is_ipv6(ctx
.mon_ip
)
3587 ctx
.mon_ip
= wrap_ipv6(ctx
.mon_ip
)
3588 hasport
= r
.findall(ctx
.mon_ip
)
3590 port_str
= hasport
[0]
3591 port
= int(port_str
)
3593 addr_arg
= '[v1:%s]' % ctx
.mon_ip
3595 addr_arg
= '[v2:%s]' % ctx
.mon_ip
3597 logger
.warning('Using msgr2 protocol for unrecognized port %d' %
3599 addr_arg
= '[v2:%s]' % ctx
.mon_ip
3600 base_ip
= ctx
.mon_ip
[0:-(len(port_str
)) - 1]
3601 check_ip_port(ctx
, base_ip
, port
)
3603 base_ip
= ctx
.mon_ip
3604 addr_arg
= '[v2:%s:3300,v1:%s:6789]' % (ctx
.mon_ip
, ctx
.mon_ip
)
3605 check_ip_port(ctx
, ctx
.mon_ip
, 3300)
3606 check_ip_port(ctx
, ctx
.mon_ip
, 6789)
3608 addr_arg
= ctx
.mon_addrv
3609 if addr_arg
[0] != '[' or addr_arg
[-1] != ']':
3610 raise Error('--mon-addrv value %s must use square backets' %
3612 ipv6
= addr_arg
.count('[') > 1
3613 for addr
in addr_arg
[1:-1].split(','):
3614 hasport
= r
.findall(addr
)
3616 raise Error('--mon-addrv value %s must include port number' %
3618 port_str
= hasport
[0]
3619 port
= int(port_str
)
3620 # strip off v1: or v2: prefix
3621 addr
= re
.sub(r
'^v\d+:', '', addr
)
3622 base_ip
= addr
[0:-(len(port_str
)) - 1]
3623 check_ip_port(ctx
, base_ip
, port
)
3625 raise Error('must specify --mon-ip or --mon-addrv')
3626 logger
.debug('Base mon IP is %s, final addrv is %s' % (base_ip
, addr_arg
))
3629 if not ctx
.skip_mon_network
:
3630 # make sure IP is configured locally, and then figure out the
3632 errmsg
= f
'Cannot infer CIDR network for mon IP `{base_ip}`'
3633 for net
, ifaces
in list_networks(ctx
).items():
3635 for iface
, ls
in ifaces
.items():
3638 if ipaddress
.ip_address(unwrap_ipv6(base_ip
)) in \
3639 [ipaddress
.ip_address(ip
) for ip
in ips
]:
3641 logger
.info(f
'Mon IP `{base_ip}` is in CIDR network `{mon_network}`')
3643 except ValueError as e
:
3644 logger
.warning(f
'{errmsg}: {e}')
3646 raise Error(f
'{errmsg}: pass --skip-mon-network to configure it later')
3648 return (addr_arg
, ipv6
, mon_network
)
3651 def prepare_cluster_network(ctx
: CephadmContext
) -> Tuple
[str, bool]:
3652 cluster_network
= ''
3653 ipv6_cluster_network
= False
3654 # the cluster network may not exist on this node, so all we can do is
3655 # validate that the address given is valid ipv4 or ipv6 subnet
3656 if ctx
.cluster_network
:
3657 rc
, versions
, err_msg
= check_subnet(ctx
.cluster_network
)
3659 raise Error(f
'Invalid --cluster-network parameter: {err_msg}')
3660 cluster_network
= ctx
.cluster_network
3661 ipv6_cluster_network
= True if 6 in versions
else False
3663 logger
.info('- internal network (--cluster-network) has not '
3664 'been provided, OSD replication will default to '
3665 'the public_network')
3667 return cluster_network
, ipv6_cluster_network
3670 def create_initial_keys(
3671 ctx
: CephadmContext
,
3674 ) -> Tuple
[str, str, str, Any
, Any
]: # type: ignore
3678 # create some initial keys
3679 logger
.info('Creating initial keys...')
3680 mon_key
= CephContainer(
3683 entrypoint
='/usr/bin/ceph-authtool',
3684 args
=['--gen-print-key'],
3686 admin_key
= CephContainer(
3689 entrypoint
='/usr/bin/ceph-authtool',
3690 args
=['--gen-print-key'],
3692 mgr_key
= CephContainer(
3695 entrypoint
='/usr/bin/ceph-authtool',
3696 args
=['--gen-print-key'],
3699 keyring
= ('[mon.]\n'
3701 '\tcaps mon = allow *\n'
3704 '\tcaps mon = allow *\n'
3705 '\tcaps mds = allow *\n'
3706 '\tcaps mgr = allow *\n'
3707 '\tcaps osd = allow *\n'
3710 '\tcaps mon = profile mgr\n'
3711 '\tcaps mds = allow *\n'
3712 '\tcaps osd = allow *\n'
3713 % (mon_key
, admin_key
, mgr_id
, mgr_key
))
3715 admin_keyring
= write_tmp('[client.admin]\n'
3716 '\tkey = ' + admin_key
+ '\n',
3720 bootstrap_keyring
= write_tmp(keyring
, uid
, gid
)
3721 return (mon_key
, mgr_key
, admin_key
,
3722 bootstrap_keyring
, admin_keyring
)
3725 def create_initial_monmap(
3726 ctx
: CephadmContext
,
3729 mon_id
: str, mon_addr
: str
3731 logger
.info('Creating initial monmap...')
3732 monmap
= write_tmp('', 0, 0)
3733 out
= CephContainer(
3736 entrypoint
='/usr/bin/monmaptool',
3741 '--addv', mon_id
, mon_addr
,
3745 monmap
.name
: '/tmp/monmap:z',
3748 logger
.debug(f
'monmaptool for {mon_id} {mon_addr} on {out}')
3750 # pass monmap file to ceph user for use by ceph-mon --mkfs below
3751 os
.fchown(monmap
.fileno(), uid
, gid
)
3755 def prepare_create_mon(
3756 ctx
: CephadmContext
,
3758 fsid
: str, mon_id
: str,
3759 bootstrap_keyring_path
: str,
3761 ) -> Tuple
[str, str]:
3762 logger
.info('Creating mon...')
3763 create_daemon_dirs(ctx
, fsid
, 'mon', mon_id
, uid
, gid
)
3764 mon_dir
= get_data_dir(fsid
, ctx
.data_dir
, 'mon', mon_id
)
3765 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
3766 out
= CephContainer(
3769 entrypoint
='/usr/bin/ceph-mon',
3775 '--monmap', '/tmp/monmap',
3776 '--keyring', '/tmp/keyring',
3777 ] + get_daemon_args(ctx
, fsid
, 'mon', mon_id
),
3779 log_dir
: '/var/log/ceph:z',
3780 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
3781 bootstrap_keyring_path
: '/tmp/keyring:z',
3782 monmap_path
: '/tmp/monmap:z',
3785 logger
.debug(f
'create mon.{mon_id} on {out}')
3786 return (mon_dir
, log_dir
)
3790 ctx
: CephadmContext
,
3792 fsid
: str, mon_id
: str
3794 mon_c
= get_container(ctx
, fsid
, 'mon', mon_id
)
3795 ctx
.meta_json
= json
.dumps({'service_name': 'mon'})
3796 deploy_daemon(ctx
, fsid
, 'mon', mon_id
, mon_c
, uid
, gid
,
3797 config
=None, keyring
=None)
3801 ctx
: CephadmContext
,
3802 mon_id
: str, mon_dir
: str,
3803 admin_keyring_path
: str, config_path
: str
3805 logger
.info('Waiting for mon to start...')
3809 entrypoint
='/usr/bin/ceph',
3813 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
3814 admin_keyring_path
: '/etc/ceph/ceph.client.admin.keyring:z',
3815 config_path
: '/etc/ceph/ceph.conf:z',
3819 # wait for the service to become available
3820 def is_mon_available():
3822 timeout
= ctx
.timeout
if ctx
.timeout
else 60 # seconds
3823 out
, err
, ret
= call(ctx
, c
.run_cmd(),
3828 is_available(ctx
, 'mon', is_mon_available
)
3832 ctx
: CephadmContext
,
3834 fsid
: str, mgr_id
: str, mgr_key
: str,
3835 config
: str, clifunc
: Callable
3837 logger
.info('Creating mgr...')
3838 mgr_keyring
= '[mgr.%s]\n\tkey = %s\n' % (mgr_id
, mgr_key
)
3839 mgr_c
= get_container(ctx
, fsid
, 'mgr', mgr_id
)
3840 # Note:the default port used by the Prometheus node exporter is opened in fw
3841 ctx
.meta_json
= json
.dumps({'service_name': 'mgr'})
3842 deploy_daemon(ctx
, fsid
, 'mgr', mgr_id
, mgr_c
, uid
, gid
,
3843 config
=config
, keyring
=mgr_keyring
, ports
=[9283])
3845 # wait for the service to become available
3846 logger
.info('Waiting for mgr to start...')
3848 def is_mgr_available():
3850 timeout
= ctx
.timeout
if ctx
.timeout
else 60 # seconds
3852 out
= clifunc(['status', '-f', 'json-pretty'], timeout
=timeout
)
3854 return j
.get('mgrmap', {}).get('available', False)
3855 except Exception as e
:
3856 logger
.debug('status failed: %s' % e
)
3858 is_available(ctx
, 'mgr', is_mgr_available
)
3862 ctx
: CephadmContext
,
3863 cli
: Callable
, wait_for_mgr_restart
: Callable
3866 cli(['cephadm', 'set-user', ctx
.ssh_user
])
3869 logger
.info('Using provided ssh config...')
3871 pathify(ctx
.ssh_config
.name
): '/tmp/cephadm-ssh-config:z',
3873 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts
=mounts
)
3875 if ctx
.ssh_private_key
and ctx
.ssh_public_key
:
3876 logger
.info('Using provided ssh keys...')
3878 pathify(ctx
.ssh_private_key
.name
): '/tmp/cephadm-ssh-key:z',
3879 pathify(ctx
.ssh_public_key
.name
): '/tmp/cephadm-ssh-key.pub:z'
3881 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts
=mounts
)
3882 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts
=mounts
)
3884 logger
.info('Generating ssh key...')
3885 cli(['cephadm', 'generate-key'])
3886 ssh_pub
= cli(['cephadm', 'get-pub-key'])
3888 with
open(ctx
.output_pub_ssh_key
, 'w') as f
:
3890 logger
.info('Wrote public SSH key to %s' % ctx
.output_pub_ssh_key
)
3892 logger
.info('Adding key to %s@localhost authorized_keys...' % ctx
.ssh_user
)
3894 s_pwd
= pwd
.getpwnam(ctx
.ssh_user
)
3896 raise Error('Cannot find uid/gid for ssh-user: %s' % (ctx
.ssh_user
))
3897 ssh_uid
= s_pwd
.pw_uid
3898 ssh_gid
= s_pwd
.pw_gid
3899 ssh_dir
= os
.path
.join(s_pwd
.pw_dir
, '.ssh')
3901 if not os
.path
.exists(ssh_dir
):
3902 makedirs(ssh_dir
, ssh_uid
, ssh_gid
, 0o700)
3904 auth_keys_file
= '%s/authorized_keys' % ssh_dir
3907 if os
.path
.exists(auth_keys_file
):
3908 with
open(auth_keys_file
, 'r') as f
:
3909 f
.seek(0, os
.SEEK_END
)
3911 f
.seek(f
.tell() - 1, os
.SEEK_SET
) # go to last char
3912 if f
.read() != '\n':
3915 with
open(auth_keys_file
, 'a') as f
:
3916 os
.fchown(f
.fileno(), ssh_uid
, ssh_gid
) # just in case we created it
3917 os
.fchmod(f
.fileno(), 0o600) # just in case we created it
3920 f
.write(ssh_pub
.strip() + '\n')
3922 host
= get_hostname()
3923 logger
.info('Adding host %s...' % host
)
3925 args
= ['orch', 'host', 'add', host
]
3927 args
.append(unwrap_ipv6(ctx
.mon_ip
))
3929 except RuntimeError as e
:
3930 raise Error('Failed to add host <%s>: %s' % (host
, e
))
3932 for t
in ['mon', 'mgr']:
3933 if not ctx
.orphan_initial_daemons
:
3934 logger
.info('Deploying %s service with default placement...' % t
)
3935 cli(['orch', 'apply', t
])
3937 logger
.info('Deploying unmanaged %s service...' % t
)
3938 cli(['orch', 'apply', t
, '--unmanaged'])
3940 if not ctx
.orphan_initial_daemons
:
3941 logger
.info('Deploying crash service with default placement...')
3942 cli(['orch', 'apply', 'crash'])
3944 if not ctx
.skip_monitoring_stack
:
3945 for t
in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
3946 logger
.info('Deploying %s service with default placement...' % t
)
3947 cli(['orch', 'apply', t
])
3950 def enable_cephadm_mgr_module(
3951 cli
: Callable
, wait_for_mgr_restart
: Callable
3954 logger
.info('Enabling cephadm module...')
3955 cli(['mgr', 'module', 'enable', 'cephadm'])
3956 wait_for_mgr_restart()
3957 logger
.info('Setting orchestrator backend to cephadm...')
3958 cli(['orch', 'set', 'backend', 'cephadm'])
3961 def prepare_dashboard(
3962 ctx
: CephadmContext
,
3964 cli
: Callable
, wait_for_mgr_restart
: Callable
3967 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
3968 # if the user does not want to use SSL he can change this setting once the cluster is up
3969 cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx
.ssl_dashboard_port
)])
3971 # configuring dashboard parameters
3972 logger
.info('Enabling the dashboard module...')
3973 cli(['mgr', 'module', 'enable', 'dashboard'])
3974 wait_for_mgr_restart()
3976 # dashboard crt and key
3977 if ctx
.dashboard_key
and ctx
.dashboard_crt
:
3978 logger
.info('Using provided dashboard certificate...')
3980 pathify(ctx
.dashboard_crt
.name
): '/tmp/dashboard.crt:z',
3981 pathify(ctx
.dashboard_key
.name
): '/tmp/dashboard.key:z'
3983 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts
=mounts
)
3984 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts
=mounts
)
3986 logger
.info('Generating a dashboard self-signed certificate...')
3987 cli(['dashboard', 'create-self-signed-cert'])
3989 logger
.info('Creating initial admin user...')
3990 password
= ctx
.initial_dashboard_password
or generate_password()
3991 tmp_password_file
= write_tmp(password
, uid
, gid
)
3992 cmd
= ['dashboard', 'ac-user-create', ctx
.initial_dashboard_user
, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
3993 if not ctx
.dashboard_password_noupdate
:
3994 cmd
.append('--pwd-update-required')
3995 cli(cmd
, extra_mounts
={pathify(tmp_password_file
.name
): '/tmp/dashboard.pw:z'})
3996 logger
.info('Fetching dashboard port number...')
3997 out
= cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
4000 # Open dashboard port
4002 fw
.open_ports([port
])
4005 logger
.info('Ceph Dashboard is now available at:\n\n'
4006 '\t URL: https://%s:%s/\n'
4008 '\tPassword: %s\n' % (
4010 ctx
.initial_dashboard_user
,
4014 def prepare_bootstrap_config(
4015 ctx
: CephadmContext
,
4016 fsid
: str, mon_addr
: str, image
: str
4020 cp
= read_config(ctx
.config
)
4021 if not cp
.has_section('global'):
4022 cp
.add_section('global')
4023 cp
.set('global', 'fsid', fsid
)
4024 cp
.set('global', 'mon_host', mon_addr
)
4025 cp
.set('global', 'container_image', image
)
4027 if not cp
.has_section('mon'):
4028 cp
.add_section('mon')
4030 not cp
.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
4031 and not cp
.has_option('mon', 'auth allow insecure global id reclaim')
4033 cp
.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
4035 if ctx
.single_host_defaults
:
4036 logger
.info('Adjusting default settings to suit single-host cluster...')
4037 # replicate across osds, not hosts
4039 not cp
.has_option('global', 'osd_crush_choose_leaf_type')
4040 and not cp
.has_option('global', 'osd crush choose leaf type')
4042 cp
.set('global', 'osd_crush_choose_leaf_type', '0')
4045 not cp
.has_option('global', 'osd_pool_default_size')
4046 and not cp
.has_option('global', 'osd pool default size')
4048 cp
.set('global', 'osd_pool_default_size', '2')
4049 # disable mgr standby modules (so we can colocate multiple mgrs on one host)
4050 if not cp
.has_section('mgr'):
4051 cp
.add_section('mgr')
4053 not cp
.has_option('mgr', 'mgr_standby_modules')
4054 and not cp
.has_option('mgr', 'mgr standby modules')
4056 cp
.set('mgr', 'mgr_standby_modules', 'false')
4058 cp
.set('global', 'log_to_file', 'true')
4059 cp
.set('global', 'log_to_stderr', 'false')
4060 cp
.set('global', 'log_to_journald', 'false')
4061 cp
.set('global', 'mon_cluster_log_to_file', 'true')
4062 cp
.set('global', 'mon_cluster_log_to_stderr', 'false')
4063 cp
.set('global', 'mon_cluster_log_to_journald', 'false')
4067 config
= cpf
.getvalue()
4069 if ctx
.registry_json
or ctx
.registry_url
:
4070 command_registry_login(ctx
)
4075 def finish_bootstrap_config(
4076 ctx
: CephadmContext
,
4079 mon_id
: str, mon_dir
: str,
4080 mon_network
: Optional
[str], ipv6
: bool,
4082 cluster_network
: Optional
[str], ipv6_cluster_network
: bool
4085 if not ctx
.no_minimize_config
:
4086 logger
.info('Assimilating anything we can from ceph.conf...')
4088 'config', 'assimilate-conf',
4089 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
4091 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
4093 logger
.info('Generating new minimal ceph.conf...')
4095 'config', 'generate-minimal-conf',
4096 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
4098 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
4100 # re-read our minimized config
4101 with
open(mon_dir
+ '/config', 'r') as f
:
4103 logger
.info('Restarting the monitor...')
4107 get_unit_name(fsid
, 'mon', mon_id
)
4111 logger
.info(f
'Setting mon public_network to {mon_network}')
4112 cli(['config', 'set', 'mon', 'public_network', mon_network
])
4115 logger
.info(f
'Setting cluster_network to {cluster_network}')
4116 cli(['config', 'set', 'global', 'cluster_network', cluster_network
])
4118 if ipv6
or ipv6_cluster_network
:
4119 logger
.info('Enabling IPv6 (ms_bind_ipv6) binding')
4120 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
4122 with
open(ctx
.output_config
, 'w') as f
:
4124 logger
.info('Wrote config to %s' % ctx
.output_config
)
4129 def command_bootstrap(ctx
):
4130 # type: (CephadmContext) -> int
4132 if not ctx
.output_config
:
4133 ctx
.output_config
= os
.path
.join(ctx
.output_dir
, 'ceph.conf')
4134 if not ctx
.output_keyring
:
4135 ctx
.output_keyring
= os
.path
.join(ctx
.output_dir
,
4136 'ceph.client.admin.keyring')
4137 if not ctx
.output_pub_ssh_key
:
4138 ctx
.output_pub_ssh_key
= os
.path
.join(ctx
.output_dir
, 'ceph.pub')
4140 # verify output files
4141 for f
in [ctx
.output_config
, ctx
.output_keyring
,
4142 ctx
.output_pub_ssh_key
]:
4143 if not ctx
.allow_overwrite
:
4144 if os
.path
.exists(f
):
4145 raise Error('%s already exists; delete or pass '
4146 '--allow-overwrite to overwrite' % f
)
4147 dirname
= os
.path
.dirname(f
)
4148 if dirname
and not os
.path
.exists(dirname
):
4149 fname
= os
.path
.basename(f
)
4150 logger
.info(f
'Creating directory {dirname} for {fname}')
4152 # use makedirs to create intermediate missing dirs
4153 os
.makedirs(dirname
, 0o755)
4154 except PermissionError
:
4155 raise Error(f
'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
4157 (user_conf
, _
) = get_config_and_keyring(ctx
)
4159 if not ctx
.skip_prepare_host
:
4160 command_prepare_host(ctx
)
4162 logger
.info('Skip prepare_host')
4165 fsid
= ctx
.fsid
or make_fsid()
4166 if not is_fsid(fsid
):
4167 raise Error('not an fsid: %s' % fsid
)
4168 logger
.info('Cluster fsid: %s' % fsid
)
4170 hostname
= get_hostname()
4171 if '.' in hostname
and not ctx
.allow_fqdn_hostname
:
4172 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname
, hostname
.split('.')[0]))
4173 mon_id
= ctx
.mon_id
or hostname
4174 mgr_id
= ctx
.mgr_id
or generate_service_id()
4176 lock
= FileLock(ctx
, fsid
)
4179 (addr_arg
, ipv6
, mon_network
) = prepare_mon_addresses(ctx
)
4180 cluster_network
, ipv6_cluster_network
= prepare_cluster_network(ctx
)
4182 config
= prepare_bootstrap_config(ctx
, fsid
, addr_arg
, ctx
.image
)
4184 if not ctx
.skip_pull
:
4185 _pull_image(ctx
, ctx
.image
)
4187 image_ver
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version']).run().strip()
4188 logger
.info(f
'Ceph version: {image_ver}')
4190 if not ctx
.allow_mismatched_release
:
4191 image_release
= image_ver
.split()[4]
4192 if image_release
not in \
4193 [DEFAULT_IMAGE_RELEASE
, LATEST_STABLE_RELEASE
]:
4195 f
'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};'
4196 ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
4199 logger
.info('Extracting ceph user uid/gid from container image...')
4200 (uid
, gid
) = extract_uid_gid(ctx
)
4202 # create some initial keys
4203 (mon_key
, mgr_key
, admin_key
, bootstrap_keyring
, admin_keyring
) = \
4204 create_initial_keys(ctx
, uid
, gid
, mgr_id
)
4206 monmap
= create_initial_monmap(ctx
, uid
, gid
, fsid
, mon_id
, addr_arg
)
4207 (mon_dir
, log_dir
) = \
4208 prepare_create_mon(ctx
, uid
, gid
, fsid
, mon_id
,
4209 bootstrap_keyring
.name
, monmap
.name
)
4211 with
open(mon_dir
+ '/config', 'w') as f
:
4212 os
.fchown(f
.fileno(), uid
, gid
)
4213 os
.fchmod(f
.fileno(), 0o600)
4216 make_var_run(ctx
, fsid
, uid
, gid
)
4217 create_mon(ctx
, uid
, gid
, fsid
, mon_id
)
4219 # config to issue various CLI commands
4220 tmp_config
= write_tmp(config
, uid
, gid
)
4222 # a CLI helper to reduce our typing
4223 def cli(cmd
, extra_mounts
={}, timeout
=DEFAULT_TIMEOUT
):
4224 # type: (List[str], Dict[str, str], Optional[int]) -> str
4226 log_dir
: '/var/log/ceph:z',
4227 admin_keyring
.name
: '/etc/ceph/ceph.client.admin.keyring:z',
4228 tmp_config
.name
: '/etc/ceph/ceph.conf:z',
4230 for k
, v
in extra_mounts
.items():
4232 timeout
= timeout
or ctx
.timeout
4233 return CephContainer(
4236 entrypoint
='/usr/bin/ceph',
4238 volume_mounts
=mounts
,
4239 ).run(timeout
=timeout
)
4241 wait_for_mon(ctx
, mon_id
, mon_dir
, admin_keyring
.name
, tmp_config
.name
)
4243 finish_bootstrap_config(ctx
, fsid
, config
, mon_id
, mon_dir
,
4244 mon_network
, ipv6
, cli
,
4245 cluster_network
, ipv6_cluster_network
)
4248 with
open(ctx
.output_keyring
, 'w') as f
:
4249 os
.fchmod(f
.fileno(), 0o600)
4250 f
.write('[client.admin]\n'
4251 '\tkey = ' + admin_key
+ '\n')
4252 logger
.info('Wrote keyring to %s' % ctx
.output_keyring
)
4255 create_mgr(ctx
, uid
, gid
, fsid
, mgr_id
, mgr_key
, config
, cli
)
4258 # user given config settings were already assimilated earlier
4259 # but if the given settings contained any attributes in
4260 # the mgr (e.g. mgr/cephadm/container_image_prometheus)
4261 # they don't seem to be stored if there isn't a mgr yet.
4262 # Since re-assimilating the same conf settings should be
4263 # idempotent we can just do it again here.
4264 with tempfile
.NamedTemporaryFile(buffering
=0) as tmp
:
4265 tmp
.write(user_conf
.encode('utf-8'))
4266 cli(['config', 'assimilate-conf',
4267 '-i', '/var/lib/ceph/user.conf'],
4268 {tmp
.name
: '/var/lib/ceph/user.conf:z'})
4270 # wait for mgr to restart (after enabling a module)
4271 def wait_for_mgr_restart() -> None:
4272 # first get latest mgrmap epoch from the mon. try newer 'mgr
4273 # stat' command first, then fall back to 'mgr dump' if
4276 j
= json_loads_retry(lambda: cli(['mgr', 'stat']))
4278 j
= json_loads_retry(lambda: cli(['mgr', 'dump']))
4281 # wait for mgr to have it
4282 logger
.info('Waiting for the mgr to restart...')
4284 def mgr_has_latest_epoch():
4287 out
= cli(['tell', 'mgr', 'mgr_status'])
4289 return j
['mgrmap_epoch'] >= epoch
4290 except Exception as e
:
4291 logger
.debug('tell mgr mgr_status failed: %s' % e
)
4293 is_available(ctx
, 'mgr epoch %d' % epoch
, mgr_has_latest_epoch
)
4295 enable_cephadm_mgr_module(cli
, wait_for_mgr_restart
)
4298 if not ctx
.skip_ssh
:
4299 prepare_ssh(ctx
, cli
, wait_for_mgr_restart
)
4301 if ctx
.registry_url
and ctx
.registry_username
and ctx
.registry_password
:
4302 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', ctx
.registry_url
, '--force'])
4303 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', ctx
.registry_username
, '--force'])
4304 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', ctx
.registry_password
, '--force'])
4306 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx
.container_init
), '--force'])
4308 if ctx
.with_exporter
:
4309 cli(['config-key', 'set', 'mgr/cephadm/exporter_enabled', 'true'])
4310 if ctx
.exporter_config
:
4311 logger
.info('Applying custom cephadm exporter settings')
4312 # validated within the parser, so we can just apply to the store
4313 with tempfile
.NamedTemporaryFile(buffering
=0) as tmp
:
4314 tmp
.write(json
.dumps(ctx
.exporter_config
).encode('utf-8'))
4316 tmp
.name
: '/tmp/exporter-config.json:z'
4318 cli(['cephadm', 'set-exporter-config', '-i', '/tmp/exporter-config.json'], extra_mounts
=mounts
)
4319 logger
.info('-> Use ceph orch apply cephadm-exporter to deploy')
4321 # generate a default SSL configuration for the exporter(s)
4322 logger
.info('Generating a default cephadm exporter configuration (self-signed)')
4323 cli(['cephadm', 'generate-exporter-config'])
4325 # deploy the service (commented out until the cephadm changes are in the ceph container build)
4326 logger
.info('Deploying cephadm exporter service with default placement...')
4327 cli(['orch', 'apply', 'cephadm-exporter'])
4329 if not ctx
.skip_dashboard
:
4330 prepare_dashboard(ctx
, uid
, gid
, cli
, wait_for_mgr_restart
)
4332 if ctx
.output_config
== '/etc/ceph/ceph.conf' and not ctx
.skip_admin_label
:
4333 logger
.info('Enabling client.admin keyring and conf on hosts with "admin" label')
4335 cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin'])
4336 cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin'])
4338 logger
.info('Unable to set up "admin" label; assuming older version of Ceph')
4341 logger
.info('Applying %s to cluster' % ctx
.apply_spec
)
4343 with
open(ctx
.apply_spec
) as f
:
4345 if 'hostname:' in line
:
4346 line
= line
.replace('\n', '')
4347 split
= line
.split(': ')
4348 if split
[1] != hostname
:
4349 logger
.info('Adding ssh key to %s' % split
[1])
4351 ssh_key
= '/etc/ceph/ceph.pub'
4352 if ctx
.ssh_public_key
:
4353 ssh_key
= ctx
.ssh_public_key
.name
4354 out
, err
, code
= call_throws(ctx
, ['sudo', '-u', ctx
.ssh_user
, 'ssh-copy-id', '-f', '-i', ssh_key
, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx
.ssh_user
, split
[1])])
4357 mounts
[pathify(ctx
.apply_spec
)] = '/tmp/spec.yml:z'
4359 out
= cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts
=mounts
)
4362 logger
.info('You can access the Ceph CLI with:\n\n'
4363 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
4367 ctx
.output_keyring
))
4368 logger
.info('Please consider enabling telemetry to help improve Ceph:\n\n'
4369 '\tceph telemetry on\n\n'
4370 'For more information see:\n\n'
4371 '\thttps://docs.ceph.com/docs/pacific/mgr/telemetry/\n')
4372 logger
.info('Bootstrap complete.')
4375 ##################################
4378 def command_registry_login(ctx
: CephadmContext
) -> int:
4379 if ctx
.registry_json
:
4380 logger
.info('Pulling custom registry login info from %s.' % ctx
.registry_json
)
4381 d
= get_parm(ctx
.registry_json
)
4382 if d
.get('url') and d
.get('username') and d
.get('password'):
4383 ctx
.registry_url
= d
.get('url')
4384 ctx
.registry_username
= d
.get('username')
4385 ctx
.registry_password
= d
.get('password')
4386 registry_login(ctx
, ctx
.registry_url
, ctx
.registry_username
, ctx
.registry_password
)
4388 raise Error('json provided for custom registry login did not include all necessary fields. '
4389 'Please setup json file as\n'
4391 ' "url": "REGISTRY_URL",\n'
4392 ' "username": "REGISTRY_USERNAME",\n'
4393 ' "password": "REGISTRY_PASSWORD"\n'
4395 elif ctx
.registry_url
and ctx
.registry_username
and ctx
.registry_password
:
4396 registry_login(ctx
, ctx
.registry_url
, ctx
.registry_username
, ctx
.registry_password
)
4398 raise Error('Invalid custom registry arguments received. To login to a custom registry include '
4399 '--registry-url, --registry-username and --registry-password '
4400 'options or --registry-json option')
4404 def registry_login(ctx
: CephadmContext
, url
: Optional
[str], username
: Optional
[str], password
: Optional
[str]) -> None:
4405 logger
.info('Logging into custom registry.')
4407 engine
= ctx
.container_engine
4408 cmd
= [engine
.path
, 'login',
4409 '-u', username
, '-p', password
,
4411 if isinstance(engine
, Podman
):
4412 cmd
.append('--authfile=/etc/ceph/podman-auth.json')
4413 out
, _
, _
= call_throws(ctx
, cmd
)
4414 if isinstance(engine
, Podman
):
4415 os
.chmod('/etc/ceph/podman-auth.json', 0o600)
4417 raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx
.registry_url
, ctx
.registry_username
))
4419 ##################################
4422 def extract_uid_gid_monitoring(ctx
, daemon_type
):
4423 # type: (CephadmContext, str) -> Tuple[int, int]
4425 if daemon_type
== 'prometheus':
4426 uid
, gid
= extract_uid_gid(ctx
, file_path
='/etc/prometheus')
4427 elif daemon_type
== 'node-exporter':
4428 uid
, gid
= 65534, 65534
4429 elif daemon_type
== 'grafana':
4430 uid
, gid
= extract_uid_gid(ctx
, file_path
='/var/lib/grafana')
4431 elif daemon_type
== 'alertmanager':
4432 uid
, gid
= extract_uid_gid(ctx
, file_path
=['/etc/alertmanager', '/etc/prometheus'])
4434 raise Error('{} not implemented yet'.format(daemon_type
))
4439 def command_deploy(ctx
):
4440 # type: (CephadmContext) -> None
4441 daemon_type
, daemon_id
= ctx
.name
.split('.', 1)
4443 lock
= FileLock(ctx
, ctx
.fsid
)
4446 if daemon_type
not in get_supported_daemons():
4447 raise Error('daemon type %s not recognized' % daemon_type
)
4450 unit_name
= get_unit_name(ctx
.fsid
, daemon_type
, daemon_id
)
4451 (_
, state
, _
) = check_unit(ctx
, unit_name
)
4452 if state
== 'running' or is_container_running(ctx
, CephContainer
.for_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, 'bash')):
4456 logger
.info('%s daemon %s ...' % ('Reconfig', ctx
.name
))
4458 logger
.info('%s daemon %s ...' % ('Redeploy', ctx
.name
))
4460 logger
.info('%s daemon %s ...' % ('Deploy', ctx
.name
))
4462 # Get and check ports explicitly required to be opened
4463 daemon_ports
= [] # type: List[int]
4465 # only check port in use if not reconfig or redeploy since service
4466 # we are redeploying/reconfiguring will already be using the port
4467 if not ctx
.reconfig
and not redeploy
:
4469 daemon_ports
= list(map(int, ctx
.tcp_ports
.split()))
4471 if daemon_type
in Ceph
.daemons
:
4472 config
, keyring
= get_config_and_keyring(ctx
)
4473 uid
, gid
= extract_uid_gid(ctx
)
4474 make_var_run(ctx
, ctx
.fsid
, uid
, gid
)
4476 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
4477 ptrace
=ctx
.allow_ptrace
)
4478 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4479 config
=config
, keyring
=keyring
,
4480 osd_fsid
=ctx
.osd_fsid
,
4481 reconfig
=ctx
.reconfig
,
4484 elif daemon_type
in Monitoring
.components
:
4485 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
4487 # make sure provided config-json is sufficient
4488 config
= get_parm(ctx
.config_json
) # type: ignore
4489 required_files
= Monitoring
.components
[daemon_type
].get('config-json-files', list())
4490 required_args
= Monitoring
.components
[daemon_type
].get('config-json-args', list())
4492 if not config
or not all(c
in config
.get('files', {}).keys() for c
in required_files
): # type: ignore
4493 raise Error('{} deployment requires config-json which must '
4494 'contain file content for {}'.format(daemon_type
.capitalize(), ', '.join(required_files
)))
4496 if not config
or not all(c
in config
.keys() for c
in required_args
): # type: ignore
4497 raise Error('{} deployment requires config-json which must '
4498 'contain arg for {}'.format(daemon_type
.capitalize(), ', '.join(required_args
)))
4500 uid
, gid
= extract_uid_gid_monitoring(ctx
, daemon_type
)
4501 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4502 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4503 reconfig
=ctx
.reconfig
,
4506 elif daemon_type
== NFSGanesha
.daemon_type
:
4507 if not ctx
.reconfig
and not redeploy
and not daemon_ports
:
4508 daemon_ports
= list(NFSGanesha
.port_map
.values())
4510 config
, keyring
= get_config_and_keyring(ctx
)
4511 # TODO: extract ganesha uid/gid (997, 994) ?
4512 uid
, gid
= extract_uid_gid(ctx
)
4513 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4514 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4515 config
=config
, keyring
=keyring
,
4516 reconfig
=ctx
.reconfig
,
4519 elif daemon_type
== CephIscsi
.daemon_type
:
4520 config
, keyring
= get_config_and_keyring(ctx
)
4521 uid
, gid
= extract_uid_gid(ctx
)
4522 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4523 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4524 config
=config
, keyring
=keyring
,
4525 reconfig
=ctx
.reconfig
,
4528 elif daemon_type
== HAproxy
.daemon_type
:
4529 haproxy
= HAproxy
.init(ctx
, ctx
.fsid
, daemon_id
)
4530 uid
, gid
= haproxy
.extract_uid_gid_haproxy()
4531 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4532 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4533 reconfig
=ctx
.reconfig
,
4536 elif daemon_type
== Keepalived
.daemon_type
:
4537 keepalived
= Keepalived
.init(ctx
, ctx
.fsid
, daemon_id
)
4538 uid
, gid
= keepalived
.extract_uid_gid_keepalived()
4539 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4540 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4541 reconfig
=ctx
.reconfig
,
4544 elif daemon_type
== CustomContainer
.daemon_type
:
4545 cc
= CustomContainer
.init(ctx
, ctx
.fsid
, daemon_id
)
4546 if not ctx
.reconfig
and not redeploy
:
4547 daemon_ports
.extend(cc
.ports
)
4548 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
4549 privileged
=cc
.privileged
,
4550 ptrace
=ctx
.allow_ptrace
)
4551 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
,
4552 uid
=cc
.uid
, gid
=cc
.gid
, config
=None,
4553 keyring
=None, reconfig
=ctx
.reconfig
,
4556 elif daemon_type
== CephadmDaemon
.daemon_type
:
4557 # get current user gid and uid
4560 config_js
= get_parm(ctx
.config_json
) # type: Dict[str, str]
4561 if not daemon_ports
:
4562 logger
.info('cephadm-exporter will use default port ({})'.format(CephadmDaemon
.default_port
))
4563 daemon_ports
= [CephadmDaemon
.default_port
]
4565 CephadmDaemon
.validate_config(config_js
)
4567 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, None,
4568 uid
, gid
, ports
=daemon_ports
)
4571 raise Error('daemon type {} not implemented in command_deploy function'
4572 .format(daemon_type
))
4574 ##################################
4578 def command_run(ctx
):
4579 # type: (CephadmContext) -> int
4580 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
4581 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4582 command
= c
.run_cmd()
4583 return call_timeout(ctx
, command
, ctx
.timeout
)
4585 ##################################
4592 def command_shell(ctx
):
4593 # type: (CephadmContext) -> int
4594 cp
= read_config(ctx
.config
)
4595 if cp
.has_option('global', 'fsid') and \
4596 cp
.get('global', 'fsid') != ctx
.fsid
:
4597 raise Error('fsid does not match ceph.conf')
4600 make_log_dir(ctx
, ctx
.fsid
)
4603 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
4605 daemon_type
= ctx
.name
4608 daemon_type
= 'osd' # get the most mounts
4611 if daemon_id
and not ctx
.fsid
:
4612 raise Error('must pass --fsid to specify cluster')
4614 # use /etc/ceph files by default, if present. we do this instead of
4615 # making these defaults in the arg parser because we don't want an error
4616 # if they don't exist.
4617 if not ctx
.keyring
and os
.path
.exists(SHELL_DEFAULT_KEYRING
):
4618 ctx
.keyring
= SHELL_DEFAULT_KEYRING
4620 container_args
: List
[str] = ['-i']
4621 mounts
= get_container_mounts(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
4622 no_config
=True if ctx
.config
else False)
4623 binds
= get_container_binds(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4625 mounts
[pathify(ctx
.config
)] = '/etc/ceph/ceph.conf:z'
4627 mounts
[pathify(ctx
.keyring
)] = '/etc/ceph/ceph.keyring:z'
4629 for _mount
in ctx
.mount
:
4630 split_src_dst
= _mount
.split(':')
4631 mount
= pathify(split_src_dst
[0])
4632 filename
= os
.path
.basename(split_src_dst
[0])
4633 if len(split_src_dst
) > 1:
4634 dst
= split_src_dst
[1] + ':z' if len(split_src_dst
) == 3 else split_src_dst
[1]
4637 mounts
[mount
] = '/mnt/{}:z'.format(filename
)
4639 command
= ctx
.command
4645 '-e', 'PS1=%s' % CUSTOM_PS1
,
4648 home
= os
.path
.join(ctx
.data_dir
, ctx
.fsid
, 'home')
4649 if not os
.path
.exists(home
):
4650 logger
.debug('Creating root home at %s' % home
)
4651 makedirs(home
, 0, 0, 0o660)
4652 if os
.path
.exists('/etc/skel'):
4653 for f
in os
.listdir('/etc/skel'):
4654 if f
.startswith('.bash'):
4655 shutil
.copyfile(os
.path
.join('/etc/skel', f
),
4656 os
.path
.join(home
, f
))
4657 mounts
[home
] = '/root'
4659 for i
in ctx
.volume
:
4660 a
, b
= i
.split(':', 1)
4666 entrypoint
='doesnotmatter',
4668 container_args
=container_args
,
4669 volume_mounts
=mounts
,
4673 command
= c
.shell_cmd(command
)
4675 return call_timeout(ctx
, command
, ctx
.timeout
)
4677 ##################################
4681 def command_enter(ctx
):
4682 # type: (CephadmContext) -> int
4684 raise Error('must pass --fsid to specify cluster')
4685 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
4686 container_args
= ['-i'] # type: List[str]
4688 command
= ctx
.command
4694 '-e', 'PS1=%s' % CUSTOM_PS1
,
4699 entrypoint
='doesnotmatter',
4700 container_args
=container_args
,
4701 cname
='ceph-%s-%s.%s' % (ctx
.fsid
, daemon_type
, daemon_id
),
4703 command
= c
.exec_cmd(command
)
4704 return call_timeout(ctx
, command
, ctx
.timeout
)
4706 ##################################
4712 def command_ceph_volume(ctx
):
4713 # type: (CephadmContext) -> None
4714 cp
= read_config(ctx
.config
)
4715 if cp
.has_option('global', 'fsid') and \
4716 cp
.get('global', 'fsid') != ctx
.fsid
:
4717 raise Error('fsid does not match ceph.conf')
4720 make_log_dir(ctx
, ctx
.fsid
)
4722 lock
= FileLock(ctx
, ctx
.fsid
)
4725 (uid
, gid
) = (0, 0) # ceph-volume runs as root
4726 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'osd', None)
4731 (config
, keyring
) = get_config_and_keyring(ctx
)
4735 tmp_config
= write_tmp(config
, uid
, gid
)
4736 mounts
[tmp_config
.name
] = '/etc/ceph/ceph.conf:z'
4740 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
4741 mounts
[tmp_keyring
.name
] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
4746 entrypoint
='/usr/sbin/ceph-volume',
4750 volume_mounts
=mounts
,
4753 out
, err
, code
= call_throws(ctx
, c
.run_cmd())
4757 ##################################
4761 def command_unit(ctx
):
4762 # type: (CephadmContext) -> None
4764 raise Error('must pass --fsid to specify cluster')
4766 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
4772 verbosity
=CallVerbosity
.VERBOSE
,
4776 ##################################
4780 def command_logs(ctx
):
4781 # type: (CephadmContext) -> None
4783 raise Error('must pass --fsid to specify cluster')
4785 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
4787 cmd
= [find_program('journalctl')]
4788 cmd
.extend(['-u', unit_name
])
4790 cmd
.extend(ctx
.command
)
4792 # call this directly, without our wrapper, so that we get an unmolested
4793 # stdout with logger prefixing.
4794 logger
.debug('Running command: %s' % ' '.join(cmd
))
4795 subprocess
.call(cmd
, env
=os
.environ
.copy()) # type: ignore
4797 ##################################
4800 def list_networks(ctx
):
4801 # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]]
4803 # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
4804 # so we'll need to use a regex to parse 'ip' command output.
4806 # out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
4807 # j = json.loads(out)
4809 res
= _list_ipv4_networks(ctx
)
4810 res
.update(_list_ipv6_networks(ctx
))
4814 def _list_ipv4_networks(ctx
: CephadmContext
) -> Dict
[str, Dict
[str, Set
[str]]]:
4815 execstr
: Optional
[str] = find_executable('ip')
4817 raise FileNotFoundError("unable to find 'ip' command")
4818 out
, _
, _
= call_throws(ctx
, [execstr
, 'route', 'ls'])
4819 return _parse_ipv4_route(out
)
4822 def _parse_ipv4_route(out
: str) -> Dict
[str, Dict
[str, Set
[str]]]:
4823 r
= {} # type: Dict[str, Dict[str, Set[str]]]
4824 p
= re
.compile(r
'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)')
4825 for line
in out
.splitlines():
4834 if iface
not in r
[net
]:
4835 r
[net
][iface
] = set()
4836 r
[net
][iface
].add(ip
)
4840 def _list_ipv6_networks(ctx
: CephadmContext
) -> Dict
[str, Dict
[str, Set
[str]]]:
4841 execstr
: Optional
[str] = find_executable('ip')
4843 raise FileNotFoundError("unable to find 'ip' command")
4844 routes
, _
, _
= call_throws(ctx
, [execstr
, '-6', 'route', 'ls'])
4845 ips
, _
, _
= call_throws(ctx
, [execstr
, '-6', 'addr', 'ls'])
4846 return _parse_ipv6_route(routes
, ips
)
4849 def _parse_ipv6_route(routes
: str, ips
: str) -> Dict
[str, Dict
[str, Set
[str]]]:
4850 r
= {} # type: Dict[str, Dict[str, Set[str]]]
4851 route_p
= re
.compile(r
'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
4852 ip_p
= re
.compile(r
'^\s+inet6 (\S+)/(.*)scope (.*)$')
4853 iface_p
= re
.compile(r
'^(\d+): (\S+): (.*)$')
4854 for line
in routes
.splitlines():
4855 m
= route_p
.findall(line
)
4856 if not m
or m
[0][0].lower() == 'default':
4859 if '/' not in net
: # only consider networks with a mask
4864 if iface
not in r
[net
]:
4865 r
[net
][iface
] = set()
4868 for line
in ips
.splitlines():
4869 m
= ip_p
.findall(line
)
4871 m
= iface_p
.findall(line
)
4873 # drop @... suffix, if present
4874 iface
= m
[0][1].split('@')[0]
4877 # find the network it belongs to
4878 net
= [n
for n
in r
.keys()
4879 if ipaddress
.ip_address(ip
) in ipaddress
.ip_network(n
)]
4882 r
[net
[0]][iface
].add(ip
)
4887 def command_list_networks(ctx
):
4888 # type: (CephadmContext) -> None
4889 r
= list_networks(ctx
)
4891 def serialize_sets(obj
: Any
) -> Any
:
4892 return list(obj
) if isinstance(obj
, set) else obj
4894 print(json
.dumps(r
, indent
=4, default
=serialize_sets
))
4896 ##################################
4899 def command_ls(ctx
):
4900 # type: (CephadmContext) -> None
4901 ls
= list_daemons(ctx
, detail
=not ctx
.no_detail
,
4902 legacy_dir
=ctx
.legacy_dir
)
4903 print(json
.dumps(ls
, indent
=4))
4906 def with_units_to_int(v
: str) -> int:
4907 if v
.endswith('iB'):
4909 elif v
.endswith('B'):
4912 if v
[-1].upper() == 'K':
4915 elif v
[-1].upper() == 'M':
4918 elif v
[-1].upper() == 'G':
4919 mult
= 1024 * 1024 * 1024
4921 elif v
[-1].upper() == 'T':
4922 mult
= 1024 * 1024 * 1024 * 1024
4924 return int(float(v
) * mult
)
4927 def list_daemons(ctx
, detail
=True, legacy_dir
=None):
4928 # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
4929 host_version
: Optional
[str] = None
4931 container_path
= ctx
.container_engine
.path
4933 data_dir
= ctx
.data_dir
4934 if legacy_dir
is not None:
4935 data_dir
= os
.path
.abspath(legacy_dir
+ data_dir
)
4937 # keep track of ceph versions we see
4938 seen_versions
= {} # type: Dict[str, Optional[str]]
4940 # keep track of image digests
4941 seen_digests
= {} # type: Dict[str, List[str]]
4943 # keep track of memory usage we've seen
4944 seen_memusage
= {} # type: Dict[str, int]
4945 out
, err
, code
= call(
4947 [container_path
, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
4948 verbosity
=CallVerbosity
.DEBUG
4950 seen_memusage_cid_len
, seen_memusage
= _parse_mem_usage(code
, out
)
4953 if os
.path
.exists(data_dir
):
4954 for i
in os
.listdir(data_dir
):
4955 if i
in ['mon', 'osd', 'mds', 'mgr']:
4957 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
4960 (cluster
, daemon_id
) = j
.split('-', 1)
4961 fsid
= get_legacy_daemon_fsid(ctx
,
4962 cluster
, daemon_type
, daemon_id
,
4963 legacy_dir
=legacy_dir
)
4964 legacy_unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
4965 val
: Dict
[str, Any
] = {
4967 'name': '%s.%s' % (daemon_type
, daemon_id
),
4968 'fsid': fsid
if fsid
is not None else 'unknown',
4969 'systemd_unit': legacy_unit_name
,
4972 (val
['enabled'], val
['state'], _
) = \
4973 check_unit(ctx
, legacy_unit_name
)
4974 if not host_version
:
4976 out
, err
, code
= call(ctx
,
4978 verbosity
=CallVerbosity
.DEBUG
)
4979 if not code
and out
.startswith('ceph version '):
4980 host_version
= out
.split(' ')[2]
4983 val
['host_version'] = host_version
4986 fsid
= str(i
) # convince mypy that fsid is a str here
4987 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
4988 if '.' in j
and os
.path
.isdir(os
.path
.join(data_dir
, fsid
, j
)):
4990 (daemon_type
, daemon_id
) = j
.split('.', 1)
4991 unit_name
= get_unit_name(fsid
,
4997 'style': 'cephadm:v1',
5000 'systemd_unit': unit_name
,
5004 (val
['enabled'], val
['state'], _
) = \
5005 check_unit(ctx
, unit_name
)
5009 image_digests
= None
5013 out
, err
, code
= get_container_stats(ctx
, container_path
, fsid
, daemon_type
, daemon_id
)
5015 (container_id
, image_name
, image_id
, start
,
5016 version
) = out
.strip().split(',')
5017 image_id
= normalize_container_id(image_id
)
5018 daemon_type
= name
.split('.', 1)[0]
5019 start_stamp
= try_convert_datetime(start
)
5021 # collect digests for this image id
5022 image_digests
= seen_digests
.get(image_id
)
5023 if not image_digests
:
5024 out
, err
, code
= call(
5027 container_path
, 'image', 'inspect', image_id
,
5028 '--format', '{{.RepoDigests}}',
5030 verbosity
=CallVerbosity
.DEBUG
)
5032 image_digests
= list(set(map(
5033 normalize_image_digest
,
5034 out
.strip()[1:-1].split(' '))))
5035 seen_digests
[image_id
] = image_digests
5037 # identify software version inside the container (if we can)
5038 if not version
or '.' not in version
:
5039 version
= seen_versions
.get(image_id
, None)
5040 if daemon_type
== NFSGanesha
.daemon_type
:
5041 version
= NFSGanesha
.get_version(ctx
, container_id
)
5042 if daemon_type
== CephIscsi
.daemon_type
:
5043 version
= CephIscsi
.get_version(ctx
, container_id
)
5045 if daemon_type
in Ceph
.daemons
:
5046 out
, err
, code
= call(ctx
,
5047 [container_path
, 'exec', container_id
,
5049 verbosity
=CallVerbosity
.DEBUG
)
5051 out
.startswith('ceph version '):
5052 version
= out
.split(' ')[2]
5053 seen_versions
[image_id
] = version
5054 elif daemon_type
== 'grafana':
5055 out
, err
, code
= call(ctx
,
5056 [container_path
, 'exec', container_id
,
5057 'grafana-server', '-v'],
5058 verbosity
=CallVerbosity
.DEBUG
)
5060 out
.startswith('Version '):
5061 version
= out
.split(' ')[1]
5062 seen_versions
[image_id
] = version
5063 elif daemon_type
in ['prometheus',
5066 version
= Monitoring
.get_version(ctx
, container_id
, daemon_type
)
5067 seen_versions
[image_id
] = version
5068 elif daemon_type
== 'haproxy':
5069 out
, err
, code
= call(ctx
,
5070 [container_path
, 'exec', container_id
,
5072 verbosity
=CallVerbosity
.DEBUG
)
5074 out
.startswith('HA-Proxy version '):
5075 version
= out
.split(' ')[2]
5076 seen_versions
[image_id
] = version
5077 elif daemon_type
== 'keepalived':
5078 out
, err
, code
= call(ctx
,
5079 [container_path
, 'exec', container_id
,
5080 'keepalived', '--version'],
5081 verbosity
=CallVerbosity
.DEBUG
)
5083 err
.startswith('Keepalived '):
5084 version
= err
.split(' ')[1]
5085 if version
[0] == 'v':
5086 version
= version
[1:]
5087 seen_versions
[image_id
] = version
5088 elif daemon_type
== CustomContainer
.daemon_type
:
5089 # Because a custom container can contain
5090 # everything, we do not know which command
5091 # to execute to get the version.
5094 logger
.warning('version for unknown daemon type %s' % daemon_type
)
5096 vfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.image') # type: ignore
5098 with
open(vfile
, 'r') as f
:
5099 image_name
= f
.read().strip() or None
5104 mfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.meta') # type: ignore
5106 with
open(mfile
, 'r') as f
:
5107 meta
= json
.loads(f
.read())
5112 val
['container_id'] = container_id
5113 val
['container_image_name'] = image_name
5114 val
['container_image_id'] = image_id
5115 val
['container_image_digests'] = image_digests
5117 val
['memory_usage'] = seen_memusage
.get(container_id
[0:seen_memusage_cid_len
])
5118 val
['version'] = version
5119 val
['started'] = start_stamp
5120 val
['created'] = get_file_timestamp(
5121 os
.path
.join(data_dir
, fsid
, j
, 'unit.created')
5123 val
['deployed'] = get_file_timestamp(
5124 os
.path
.join(data_dir
, fsid
, j
, 'unit.image'))
5125 val
['configured'] = get_file_timestamp(
5126 os
.path
.join(data_dir
, fsid
, j
, 'unit.configured'))
5133 def _parse_mem_usage(code
: int, out
: str) -> Tuple
[int, Dict
[str, int]]:
5134 # keep track of memory usage we've seen
5135 seen_memusage
= {} # type: Dict[str, int]
5136 seen_memusage_cid_len
= 0
5138 for line
in out
.splitlines():
5139 (cid
, usage
) = line
.split(',')
5140 (used
, limit
) = usage
.split(' / ')
5142 seen_memusage
[cid
] = with_units_to_int(used
)
5143 if not seen_memusage_cid_len
:
5144 seen_memusage_cid_len
= len(cid
)
5146 logger
.info('unable to parse memory usage line\n>{}'.format(line
))
5148 return seen_memusage_cid_len
, seen_memusage
5151 def get_daemon_description(ctx
, fsid
, name
, detail
=False, legacy_dir
=None):
5152 # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str]
5154 for d
in list_daemons(ctx
, detail
=detail
, legacy_dir
=legacy_dir
):
5155 if d
['fsid'] != fsid
:
5157 if d
['name'] != name
:
5160 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name
))
5163 def get_container_stats(ctx
: CephadmContext
, container_path
: str, fsid
: str, daemon_type
: str, daemon_id
: str) -> Tuple
[str, str, int]:
5164 c
= CephContainer
.for_daemon(ctx
, fsid
, daemon_type
, daemon_id
, 'bash')
5165 out
, err
, code
= '', '', -1
5166 for name
in (c
.cname
, c
.old_cname
):
5168 container_path
, 'inspect',
5169 '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
5172 out
, err
, code
= call(ctx
, cmd
, verbosity
=CallVerbosity
.DEBUG
)
5175 return out
, err
, code
5177 ##################################
5181 def command_adopt(ctx
):
5182 # type: (CephadmContext) -> None
5184 if not ctx
.skip_pull
:
5185 _pull_image(ctx
, ctx
.image
)
5187 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
5190 if ctx
.style
!= 'legacy':
5191 raise Error('adoption of style %s not implemented' % ctx
.style
)
5194 fsid
= get_legacy_daemon_fsid(ctx
,
5198 legacy_dir
=ctx
.legacy_dir
)
5200 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
5201 lock
= FileLock(ctx
, fsid
)
5204 # call correct adoption
5205 if daemon_type
in Ceph
.daemons
:
5206 command_adopt_ceph(ctx
, daemon_type
, daemon_id
, fsid
)
5207 elif daemon_type
== 'prometheus':
5208 command_adopt_prometheus(ctx
, daemon_id
, fsid
)
5209 elif daemon_type
== 'grafana':
5210 command_adopt_grafana(ctx
, daemon_id
, fsid
)
5211 elif daemon_type
== 'node-exporter':
5212 raise Error('adoption of node-exporter not implemented')
5213 elif daemon_type
== 'alertmanager':
5214 command_adopt_alertmanager(ctx
, daemon_id
, fsid
)
5216 raise Error('daemon type %s not recognized' % daemon_type
)
5219 class AdoptOsd(object):
5220 def __init__(self
, ctx
, osd_data_dir
, osd_id
):
5221 # type: (CephadmContext, str, str) -> None
5223 self
.osd_data_dir
= osd_data_dir
5224 self
.osd_id
= osd_id
5226 def check_online_osd(self
):
5227 # type: () -> Tuple[Optional[str], Optional[str]]
5229 osd_fsid
, osd_type
= None, None
5231 path
= os
.path
.join(self
.osd_data_dir
, 'fsid')
5233 with
open(path
, 'r') as f
:
5234 osd_fsid
= f
.read().strip()
5235 logger
.info('Found online OSD at %s' % path
)
5237 logger
.info('Unable to read OSD fsid from %s' % path
)
5238 if os
.path
.exists(os
.path
.join(self
.osd_data_dir
, 'type')):
5239 with
open(os
.path
.join(self
.osd_data_dir
, 'type')) as f
:
5240 osd_type
= f
.read().strip()
5242 logger
.info('"type" file missing for OSD data dir')
5244 return osd_fsid
, osd_type
5246 def check_offline_lvm_osd(self
):
5247 # type: () -> Tuple[Optional[str], Optional[str]]
5248 osd_fsid
, osd_type
= None, None
5252 image
=self
.ctx
.image
,
5253 entrypoint
='/usr/sbin/ceph-volume',
5254 args
=['lvm', 'list', '--format=json'],
5257 out
, err
, code
= call_throws(self
.ctx
, c
.run_cmd())
5260 js
= json
.loads(out
)
5261 if self
.osd_id
in js
:
5262 logger
.info('Found offline LVM OSD {}'.format(self
.osd_id
))
5263 osd_fsid
= js
[self
.osd_id
][0]['tags']['ceph.osd_fsid']
5264 for device
in js
[self
.osd_id
]:
5265 if device
['tags']['ceph.type'] == 'block':
5266 osd_type
= 'bluestore'
5268 if device
['tags']['ceph.type'] == 'data':
5269 osd_type
= 'filestore'
5271 except ValueError as e
:
5272 logger
.info('Invalid JSON in ceph-volume lvm list: {}'.format(e
))
5274 return osd_fsid
, osd_type
5276 def check_offline_simple_osd(self
):
5277 # type: () -> Tuple[Optional[str], Optional[str]]
5278 osd_fsid
, osd_type
= None, None
5280 osd_file
= glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self
.osd_id
))
5281 if len(osd_file
) == 1:
5282 with
open(osd_file
[0], 'r') as f
:
5284 js
= json
.loads(f
.read())
5285 logger
.info('Found offline simple OSD {}'.format(self
.osd_id
))
5286 osd_fsid
= js
['fsid']
5287 osd_type
= js
['type']
5288 if osd_type
!= 'filestore':
5289 # need this to be mounted for the adopt to work, as it
5290 # needs to move files from this directory
5291 call_throws(self
.ctx
, ['mount', js
['data']['path'], self
.osd_data_dir
])
5292 except ValueError as e
:
5293 logger
.info('Invalid JSON in {}: {}'.format(osd_file
, e
))
5295 return osd_fsid
, osd_type
5298 def command_adopt_ceph(ctx
, daemon_type
, daemon_id
, fsid
):
5299 # type: (CephadmContext, str, str, str) -> None
5301 (uid
, gid
) = extract_uid_gid(ctx
)
5303 data_dir_src
= ('/var/lib/ceph/%s/%s-%s' %
5304 (daemon_type
, ctx
.cluster
, daemon_id
))
5305 data_dir_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_dir_src
)
5307 if not os
.path
.exists(data_dir_src
):
5308 raise Error("{}.{} data directory '{}' does not exist. "
5309 'Incorrect ID specified, or daemon already adopted?'.format(
5310 daemon_type
, daemon_id
, data_dir_src
))
5313 if daemon_type
== 'osd':
5314 adopt_osd
= AdoptOsd(ctx
, data_dir_src
, daemon_id
)
5315 osd_fsid
, osd_type
= adopt_osd
.check_online_osd()
5317 osd_fsid
, osd_type
= adopt_osd
.check_offline_lvm_osd()
5319 osd_fsid
, osd_type
= adopt_osd
.check_offline_simple_osd()
5321 raise Error('Unable to find OSD {}'.format(daemon_id
))
5322 logger
.info('objectstore_type is %s' % osd_type
)
5324 if osd_type
== 'filestore':
5325 raise Error('FileStore is not supported by cephadm')
5327 # NOTE: implicit assumption here that the units correspond to the
5328 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
5330 unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
5331 (enabled
, state
, _
) = check_unit(ctx
, unit_name
)
5332 if state
== 'running':
5333 logger
.info('Stopping old systemd unit %s...' % unit_name
)
5334 call_throws(ctx
, ['systemctl', 'stop', unit_name
])
5336 logger
.info('Disabling old systemd unit %s...' % unit_name
)
5337 call_throws(ctx
, ['systemctl', 'disable', unit_name
])
5340 logger
.info('Moving data...')
5341 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
5343 move_files(ctx
, glob(os
.path
.join(data_dir_src
, '*')),
5346 logger
.debug('Remove dir `%s`' % (data_dir_src
))
5347 if os
.path
.ismount(data_dir_src
):
5348 call_throws(ctx
, ['umount', data_dir_src
])
5349 os
.rmdir(data_dir_src
)
5351 logger
.info('Chowning content...')
5352 call_throws(ctx
, ['chown', '-c', '-R', '%d.%d' % (uid
, gid
), data_dir_dst
])
5354 if daemon_type
== 'mon':
5355 # rename *.ldb -> *.sst, in case they are coming from ubuntu
5356 store
= os
.path
.join(data_dir_dst
, 'store.db')
5358 if os
.path
.exists(store
):
5359 for oldf
in os
.listdir(store
):
5360 if oldf
.endswith('.ldb'):
5361 newf
= oldf
.replace('.ldb', '.sst')
5362 oldp
= os
.path
.join(store
, oldf
)
5363 newp
= os
.path
.join(store
, newf
)
5364 logger
.debug('Renaming %s -> %s' % (oldp
, newp
))
5365 os
.rename(oldp
, newp
)
5367 logger
.info('Renamed %d leveldb *.ldb files to *.sst',
5369 if daemon_type
== 'osd':
5370 for n
in ['block', 'block.db', 'block.wal']:
5371 p
= os
.path
.join(data_dir_dst
, n
)
5372 if os
.path
.exists(p
):
5373 logger
.info('Chowning %s...' % p
)
5374 os
.chown(p
, uid
, gid
)
5375 # disable the ceph-volume 'simple' mode files on the host
5376 simple_fn
= os
.path
.join('/etc/ceph/osd',
5377 '%s-%s.json' % (daemon_id
, osd_fsid
))
5378 if os
.path
.exists(simple_fn
):
5379 new_fn
= simple_fn
+ '.adopted-by-cephadm'
5380 logger
.info('Renaming %s -> %s', simple_fn
, new_fn
)
5381 os
.rename(simple_fn
, new_fn
)
5382 logger
.info('Disabling host unit ceph-volume@ simple unit...')
5383 call(ctx
, ['systemctl', 'disable',
5384 'ceph-volume@simple-%s-%s.service' % (daemon_id
, osd_fsid
)])
5386 # assume this is an 'lvm' c-v for now, but don't error
5388 logger
.info('Disabling host unit ceph-volume@ lvm unit...')
5389 call(ctx
, ['systemctl', 'disable',
5390 'ceph-volume@lvm-%s-%s.service' % (daemon_id
, osd_fsid
)])
5393 config_src
= '/etc/ceph/%s.conf' % (ctx
.cluster
)
5394 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
5395 config_dst
= os
.path
.join(data_dir_dst
, 'config')
5396 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
5399 logger
.info('Moving logs...')
5400 log_dir_src
= ('/var/log/ceph/%s-%s.%s.log*' %
5401 (ctx
.cluster
, daemon_type
, daemon_id
))
5402 log_dir_src
= os
.path
.abspath(ctx
.legacy_dir
+ log_dir_src
)
5403 log_dir_dst
= make_log_dir(ctx
, fsid
, uid
=uid
, gid
=gid
)
5404 move_files(ctx
, glob(log_dir_src
),
5408 logger
.info('Creating new units...')
5409 make_var_run(ctx
, fsid
, uid
, gid
)
5410 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
5411 deploy_daemon_units(ctx
, fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
5412 enable
=True, # unconditionally enable the new unit
5413 start
=(state
== 'running' or ctx
.force_start
),
5415 update_firewalld(ctx
, daemon_type
)
5418 def command_adopt_prometheus(ctx
, daemon_id
, fsid
):
5419 # type: (CephadmContext, str, str) -> None
5420 daemon_type
= 'prometheus'
5421 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
5423 _stop_and_disable(ctx
, 'prometheus')
5425 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
5429 config_src
= '/etc/prometheus/prometheus.yml'
5430 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
5431 config_dst
= os
.path
.join(data_dir_dst
, 'etc/prometheus')
5432 makedirs(config_dst
, uid
, gid
, 0o755)
5433 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
5436 data_src
= '/var/lib/prometheus/metrics/'
5437 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
5438 data_dst
= os
.path
.join(data_dir_dst
, 'data')
5439 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
5441 make_var_run(ctx
, fsid
, uid
, gid
)
5442 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
5443 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
5444 update_firewalld(ctx
, daemon_type
)
5447 def command_adopt_grafana(ctx
, daemon_id
, fsid
):
5448 # type: (CephadmContext, str, str) -> None
5450 daemon_type
= 'grafana'
5451 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
5453 _stop_and_disable(ctx
, 'grafana-server')
5455 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
5459 config_src
= '/etc/grafana/grafana.ini'
5460 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
5461 config_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
5462 makedirs(config_dst
, uid
, gid
, 0o755)
5463 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
5465 prov_src
= '/etc/grafana/provisioning/'
5466 prov_src
= os
.path
.abspath(ctx
.legacy_dir
+ prov_src
)
5467 prov_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
5468 copy_tree(ctx
, [prov_src
], prov_dst
, uid
=uid
, gid
=gid
)
5471 cert
= '/etc/grafana/grafana.crt'
5472 key
= '/etc/grafana/grafana.key'
5473 if os
.path
.exists(cert
) and os
.path
.exists(key
):
5474 cert_src
= '/etc/grafana/grafana.crt'
5475 cert_src
= os
.path
.abspath(ctx
.legacy_dir
+ cert_src
)
5476 makedirs(os
.path
.join(data_dir_dst
, 'etc/grafana/certs'), uid
, gid
, 0o755)
5477 cert_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_file')
5478 copy_files(ctx
, [cert_src
], cert_dst
, uid
=uid
, gid
=gid
)
5480 key_src
= '/etc/grafana/grafana.key'
5481 key_src
= os
.path
.abspath(ctx
.legacy_dir
+ key_src
)
5482 key_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_key')
5483 copy_files(ctx
, [key_src
], key_dst
, uid
=uid
, gid
=gid
)
5485 _adjust_grafana_ini(os
.path
.join(config_dst
, 'grafana.ini'))
5487 logger
.debug('Skipping ssl, missing cert {} or key {}'.format(cert
, key
))
5489 # data - possible custom dashboards/plugins
5490 data_src
= '/var/lib/grafana/'
5491 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
5492 data_dst
= os
.path
.join(data_dir_dst
, 'data')
5493 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
5495 make_var_run(ctx
, fsid
, uid
, gid
)
5496 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
5497 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
5498 update_firewalld(ctx
, daemon_type
)
5501 def command_adopt_alertmanager(ctx
, daemon_id
, fsid
):
5502 # type: (CephadmContext, str, str) -> None
5504 daemon_type
= 'alertmanager'
5505 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
5507 _stop_and_disable(ctx
, 'prometheus-alertmanager')
5509 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
5513 config_src
= '/etc/prometheus/alertmanager.yml'
5514 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
5515 config_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager')
5516 makedirs(config_dst
, uid
, gid
, 0o755)
5517 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
5520 data_src
= '/var/lib/prometheus/alertmanager/'
5521 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
5522 data_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager/data')
5523 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
5525 make_var_run(ctx
, fsid
, uid
, gid
)
5526 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
5527 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
5528 update_firewalld(ctx
, daemon_type
)
5531 def _adjust_grafana_ini(filename
):
5532 # type: (str) -> None
5534 # Update cert_file, cert_key pathnames in server section
5535 # ConfigParser does not preserve comments
5537 with
open(filename
, 'r') as grafana_ini
:
5538 lines
= grafana_ini
.readlines()
5539 with
open('{}.new'.format(filename
), 'w') as grafana_ini
:
5540 server_section
= False
5542 if line
.startswith('['):
5543 server_section
= False
5544 if line
.startswith('[server]'):
5545 server_section
= True
5547 line
= re
.sub(r
'^cert_file.*',
5548 'cert_file = /etc/grafana/certs/cert_file', line
)
5549 line
= re
.sub(r
'^cert_key.*',
5550 'cert_key = /etc/grafana/certs/cert_key', line
)
5551 grafana_ini
.write(line
)
5552 os
.rename('{}.new'.format(filename
), filename
)
5553 except OSError as err
:
5554 raise Error('Cannot update {}: {}'.format(filename
, err
))
5557 def _stop_and_disable(ctx
, unit_name
):
5558 # type: (CephadmContext, str) -> None
5560 (enabled
, state
, _
) = check_unit(ctx
, unit_name
)
5561 if state
== 'running':
5562 logger
.info('Stopping old systemd unit %s...' % unit_name
)
5563 call_throws(ctx
, ['systemctl', 'stop', unit_name
])
5565 logger
.info('Disabling old systemd unit %s...' % unit_name
)
5566 call_throws(ctx
, ['systemctl', 'disable', unit_name
])
5568 ##################################
5571 def command_rm_daemon(ctx
):
5572 # type: (CephadmContext) -> None
5573 lock
= FileLock(ctx
, ctx
.fsid
)
5576 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
5577 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
5579 if daemon_type
in ['mon', 'osd'] and not ctx
.force
:
5580 raise Error('must pass --force to proceed: '
5581 'this command may destroy precious data!')
5583 call(ctx
, ['systemctl', 'stop', unit_name
],
5584 verbosity
=CallVerbosity
.DEBUG
)
5585 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
5586 verbosity
=CallVerbosity
.DEBUG
)
5587 call(ctx
, ['systemctl', 'disable', unit_name
],
5588 verbosity
=CallVerbosity
.DEBUG
)
5589 data_dir
= get_data_dir(ctx
.fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
5590 if daemon_type
in ['mon', 'osd', 'prometheus'] and \
5591 not ctx
.force_delete_data
:
5592 # rename it out of the way -- do not delete
5593 backup_dir
= os
.path
.join(ctx
.data_dir
, ctx
.fsid
, 'removed')
5594 if not os
.path
.exists(backup_dir
):
5595 makedirs(backup_dir
, 0, 0, DATA_DIR_MODE
)
5596 dirname
= '%s.%s_%s' % (daemon_type
, daemon_id
,
5597 datetime
.datetime
.utcnow().strftime(DATEFMT
))
5599 os
.path
.join(backup_dir
, dirname
))
5601 if daemon_type
== CephadmDaemon
.daemon_type
:
5602 CephadmDaemon
.uninstall(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5603 call_throws(ctx
, ['rm', '-rf', data_dir
])
5605 ##################################
5608 def _zap(ctx
: CephadmContext
, what
: str) -> None:
5609 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'clusterless-ceph-volume', None)
5613 entrypoint
='/usr/sbin/ceph-volume',
5615 args
=['lvm', 'zap', '--destroy', what
],
5617 volume_mounts
=mounts
,
5619 logger
.info(f
'Zapping {what}...')
5620 out
, err
, code
= call_throws(ctx
, c
.run_cmd())
5624 def _zap_osds(ctx
: CephadmContext
) -> None:
5625 # assume fsid lock already held
5628 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'clusterless-ceph-volume', None)
5632 entrypoint
='/usr/sbin/ceph-volume',
5634 args
=['inventory', '--format', 'json'],
5636 volume_mounts
=mounts
,
5638 out
, err
, code
= call_throws(ctx
, c
.run_cmd())
5640 raise Error('failed to list osd inventory')
5642 ls
= json
.loads(out
)
5643 except ValueError as e
:
5644 raise Error(f
'Invalid JSON in ceph-volume inventory: {e}')
5647 matches
= [lv
.get('cluster_fsid') == ctx
.fsid
for lv
in i
.get('lvs', [])]
5648 if any(matches
) and all(matches
):
5649 _zap(ctx
, i
.get('path'))
5651 lv_names
= [lv
['name'] for lv
in i
.get('lvs', [])]
5652 # TODO: we need to map the lv_names back to device paths (the vg
5653 # id isn't part of the output here!)
5654 logger
.warning(f
'Not zapping LVs (not implemented): {lv_names}')
5657 def command_zap_osds(ctx
: CephadmContext
) -> None:
5659 raise Error('must pass --force to proceed: '
5660 'this command may destroy precious data!')
5662 lock
= FileLock(ctx
, ctx
.fsid
)
5667 ##################################
5670 def command_rm_cluster(ctx
):
5671 # type: (CephadmContext) -> None
5673 raise Error('must pass --force to proceed: '
5674 'this command may destroy precious data!')
5676 lock
= FileLock(ctx
, ctx
.fsid
)
5679 # stop + disable individual daemon units
5680 for d
in list_daemons(ctx
, detail
=False):
5681 if d
['fsid'] != ctx
.fsid
:
5683 if d
['style'] != 'cephadm:v1':
5685 unit_name
= get_unit_name(ctx
.fsid
, d
['name'])
5686 call(ctx
, ['systemctl', 'stop', unit_name
],
5687 verbosity
=CallVerbosity
.DEBUG
)
5688 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
5689 verbosity
=CallVerbosity
.DEBUG
)
5690 call(ctx
, ['systemctl', 'disable', unit_name
],
5691 verbosity
=CallVerbosity
.DEBUG
)
5694 for unit_name
in ['ceph-%s.target' % ctx
.fsid
]:
5695 call(ctx
, ['systemctl', 'stop', unit_name
],
5696 verbosity
=CallVerbosity
.DEBUG
)
5697 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
5698 verbosity
=CallVerbosity
.DEBUG
)
5699 call(ctx
, ['systemctl', 'disable', unit_name
],
5700 verbosity
=CallVerbosity
.DEBUG
)
5702 slice_name
= 'system-ceph\\x2d{}.slice'.format(ctx
.fsid
.replace('-', '\\x2d'))
5703 call(ctx
, ['systemctl', 'stop', slice_name
],
5704 verbosity
=CallVerbosity
.DEBUG
)
5711 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
5712 + '/ceph-%s@.service' % ctx
.fsid
])
5713 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
5714 + '/ceph-%s.target' % ctx
.fsid
])
5715 call_throws(ctx
, ['rm', '-rf',
5716 ctx
.unit_dir
+ '/ceph-%s.target.wants' % ctx
.fsid
])
5718 call_throws(ctx
, ['rm', '-rf', ctx
.data_dir
+ '/' + ctx
.fsid
])
5720 if not ctx
.keep_logs
:
5722 call_throws(ctx
, ['rm', '-rf', ctx
.log_dir
+ '/' + ctx
.fsid
])
5723 call_throws(ctx
, ['rm', '-rf', ctx
.log_dir
5724 + '/*.wants/ceph-%s@*' % ctx
.fsid
])
5726 # rm logrotate config
5727 call_throws(ctx
, ['rm', '-f', ctx
.logrotate_dir
+ '/ceph-%s' % ctx
.fsid
])
5729 # rm cephadm logrotate config if last cluster on host
5730 if not os
.listdir(ctx
.data_dir
):
5731 call_throws(ctx
, ['rm', '-f', ctx
.logrotate_dir
+ '/cephadm'])
5733 # rm sysctl settings
5734 sysctl_dir
= Path(ctx
.sysctl_dir
)
5735 for p
in sysctl_dir
.glob(f
'90-ceph-{ctx.fsid}-*.conf'):
5738 # clean up config, keyring, and pub key files
5739 files
= ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
5741 if os
.path
.exists(files
[0]):
5743 with
open(files
[0]) as f
:
5744 if ctx
.fsid
in f
.read():
5747 for n
in range(0, len(files
)):
5748 if os
.path
.exists(files
[n
]):
5752 ##################################
5755 def check_time_sync(ctx
, enabler
=None):
5756 # type: (CephadmContext, Optional[Packager]) -> bool
5758 'chrony.service', # 18.04 (at least)
5759 'chronyd.service', # el / opensuse
5760 'systemd-timesyncd.service',
5761 'ntpd.service', # el7 (at least)
5762 'ntp.service', # 18.04 (at least)
5763 'ntpsec.service', # 20.04 (at least) / buster
5764 'openntpd.service', # ubuntu / debian
5766 if not check_units(ctx
, units
, enabler
):
5767 logger
.warning('No time sync service is running; checked for %s' % units
)
5772 def command_check_host(ctx
: CephadmContext
) -> None:
5773 container_path
= ctx
.container_engine
.path
5776 commands
= ['systemctl', 'lvcreate']
5779 check_container_engine(ctx
)
5780 logger
.info('podman|docker (%s) is present' % container_path
)
5782 errors
.append(str(e
))
5784 for command
in commands
:
5786 find_program(command
)
5787 logger
.info('%s is present' % command
)
5789 errors
.append('%s binary does not appear to be installed' % command
)
5791 # check for configured+running chronyd or ntp
5792 if not check_time_sync(ctx
):
5793 errors
.append('No time synchronization is active')
5795 if 'expect_hostname' in ctx
and ctx
.expect_hostname
:
5796 if get_hostname().lower() != ctx
.expect_hostname
.lower():
5797 errors
.append('hostname "%s" does not match expected hostname "%s"' % (
5798 get_hostname(), ctx
.expect_hostname
))
5799 logger
.info('Hostname "%s" matches what is expected.',
5800 ctx
.expect_hostname
)
5803 raise Error('\nERROR: '.join(errors
))
5805 logger
.info('Host looks OK')
5807 ##################################
5810 def command_prepare_host(ctx
: CephadmContext
) -> None:
5811 logger
.info('Verifying podman|docker is present...')
5814 check_container_engine(ctx
)
5816 logger
.warning(str(e
))
5818 pkg
= create_packager(ctx
)
5819 pkg
.install_podman()
5821 logger
.info('Verifying lvm2 is present...')
5822 if not find_executable('lvcreate'):
5824 pkg
= create_packager(ctx
)
5825 pkg
.install(['lvm2'])
5827 logger
.info('Verifying time synchronization is in place...')
5828 if not check_time_sync(ctx
):
5830 pkg
= create_packager(ctx
)
5831 pkg
.install(['chrony'])
5832 # check again, and this time try to enable
5834 check_time_sync(ctx
, enabler
=pkg
)
5836 if 'expect_hostname' in ctx
and ctx
.expect_hostname
and ctx
.expect_hostname
!= get_hostname():
5837 logger
.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx
.expect_hostname
))
5838 call_throws(ctx
, ['hostname', ctx
.expect_hostname
])
5839 with
open('/etc/hostname', 'w') as f
:
5840 f
.write(ctx
.expect_hostname
+ '\n')
5842 logger
.info('Repeating the final host check...')
5843 command_check_host(ctx
)
5845 ##################################
5848 class CustomValidation(argparse
.Action
):
5850 def _check_name(self
, values
: str) -> None:
5852 (daemon_type
, daemon_id
) = values
.split('.', 1)
5854 raise argparse
.ArgumentError(self
,
5855 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com')
5857 daemons
= get_supported_daemons()
5858 if daemon_type
not in daemons
:
5859 raise argparse
.ArgumentError(self
,
5860 'name must declare the type of daemon e.g. '
5861 '{}'.format(', '.join(daemons
)))
5863 def __call__(self
, parser
: argparse
.ArgumentParser
, namespace
: argparse
.Namespace
, values
: Union
[str, Sequence
[Any
], None],
5864 option_string
: Optional
[str] = None) -> None:
5865 assert isinstance(values
, str)
5866 if self
.dest
== 'name':
5867 self
._check
_name
(values
)
5868 setattr(namespace
, self
.dest
, values
)
5869 elif self
.dest
== 'exporter_config':
5870 cfg
= get_parm(values
)
5871 # run the class' validate method, and convert to an argparse error
5872 # if problems are found
5874 CephadmDaemon
.validate_config(cfg
)
5876 raise argparse
.ArgumentError(self
,
5878 setattr(namespace
, self
.dest
, cfg
)
5880 ##################################
5884 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
5886 distro_version
= None
5887 distro_codename
= None
5888 with
open('/etc/os-release', 'r') as f
:
5889 for line
in f
.readlines():
5891 if '=' not in line
or line
.startswith('#'):
5893 (var
, val
) = line
.split('=', 1)
5894 if val
[0] == '"' and val
[-1] == '"':
5897 distro
= val
.lower()
5898 elif var
== 'VERSION_ID':
5899 distro_version
= val
.lower()
5900 elif var
== 'VERSION_CODENAME':
5901 distro_codename
= val
.lower()
5902 return distro
, distro_version
, distro_codename
5905 class Packager(object):
5906 def __init__(self
, ctx
: CephadmContext
,
5907 stable
: Optional
[str] = None, version
: Optional
[str] = None,
5908 branch
: Optional
[str] = None, commit
: Optional
[str] = None):
5910 (stable
and not version
and not branch
and not commit
) or \
5911 (not stable
and version
and not branch
and not commit
) or \
5912 (not stable
and not version
and branch
) or \
5913 (not stable
and not version
and not branch
and not commit
)
5915 self
.stable
= stable
5916 self
.version
= version
5917 self
.branch
= branch
5918 self
.commit
= commit
5920 def add_repo(self
) -> None:
5921 raise NotImplementedError
5923 def rm_repo(self
) -> None:
5924 raise NotImplementedError
5926 def install(self
, ls
: List
[str]) -> None:
5927 raise NotImplementedError
5929 def install_podman(self
) -> None:
5930 raise NotImplementedError
5932 def query_shaman(self
, distro
: str, distro_version
: Any
, branch
: Optional
[str], commit
: Optional
[str]) -> str:
5934 logger
.info('Fetching repo metadata from shaman and chacra...')
5935 shaman_url
= 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
5937 distro_version
=distro_version
,
5939 sha1
=commit
or 'latest',
5943 shaman_response
= urlopen(shaman_url
)
5944 except HTTPError
as err
:
5945 logger
.error('repository not found in shaman (might not be available yet)')
5946 raise Error('%s, failed to fetch %s' % (err
, shaman_url
))
5949 chacra_url
= shaman_response
.geturl()
5950 chacra_response
= urlopen(chacra_url
)
5951 except HTTPError
as err
:
5952 logger
.error('repository not found in chacra (might not be available yet)')
5953 raise Error('%s, failed to fetch %s' % (err
, chacra_url
))
5954 return chacra_response
.read().decode('utf-8')
5956 def repo_gpgkey(self
) -> Tuple
[str, str]:
5957 if self
.ctx
.gpg_url
:
5958 return self
.ctx
.gpg_url
5959 if self
.stable
or self
.version
:
5960 return 'https://download.ceph.com/keys/release.gpg', 'release'
5962 return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild'
5964 def enable_service(self
, service
: str) -> None:
5966 Start and enable the service (typically using systemd).
5968 call_throws(self
.ctx
, ['systemctl', 'enable', '--now', service
])
5971 class Apt(Packager
):
5977 def __init__(self
, ctx
: CephadmContext
,
5978 stable
: Optional
[str], version
: Optional
[str], branch
: Optional
[str], commit
: Optional
[str],
5979 distro
: Optional
[str], distro_version
: Optional
[str], distro_codename
: Optional
[str]) -> None:
5980 super(Apt
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
5981 branch
=branch
, commit
=commit
)
5984 self
.distro
= self
.DISTRO_NAMES
[distro
]
5985 self
.distro_codename
= distro_codename
5986 self
.distro_version
= distro_version
5988 def repo_path(self
) -> str:
5989 return '/etc/apt/sources.list.d/ceph.list'
5991 def add_repo(self
) -> None:
5993 url
, name
= self
.repo_gpgkey()
5994 logger
.info('Installing repo GPG key from %s...' % url
)
5996 response
= urlopen(url
)
5997 except HTTPError
as err
:
5998 logger
.error('failed to fetch GPG repo key from %s: %s' % (
6000 raise Error('failed to fetch GPG key')
6001 key
= response
.read()
6002 with
open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
, 'wb') as f
:
6006 content
= 'deb %s/debian-%s/ %s main\n' % (
6007 self
.ctx
.repo_url
, self
.version
, self
.distro_codename
)
6009 content
= 'deb %s/debian-%s/ %s main\n' % (
6010 self
.ctx
.repo_url
, self
.stable
, self
.distro_codename
)
6012 content
= self
.query_shaman(self
.distro
, self
.distro_codename
, self
.branch
,
6015 logger
.info('Installing repo file at %s...' % self
.repo_path())
6016 with
open(self
.repo_path(), 'w') as f
:
6021 def rm_repo(self
) -> None:
6022 for name
in ['autobuild', 'release']:
6023 p
= '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
6024 if os
.path
.exists(p
):
6025 logger
.info('Removing repo GPG key %s...' % p
)
6027 if os
.path
.exists(self
.repo_path()):
6028 logger
.info('Removing repo at %s...' % self
.repo_path())
6029 os
.unlink(self
.repo_path())
6031 if self
.distro
== 'ubuntu':
6032 self
.rm_kubic_repo()
6034 def install(self
, ls
: List
[str]) -> None:
6035 logger
.info('Installing packages %s...' % ls
)
6036 call_throws(self
.ctx
, ['apt-get', 'install', '-y'] + ls
)
6038 def update(self
) -> None:
6039 logger
.info('Updating package list...')
6040 call_throws(self
.ctx
, ['apt-get', 'update'])
6042 def install_podman(self
) -> None:
6043 if self
.distro
== 'ubuntu':
6044 logger
.info('Setting up repo for podman...')
6045 self
.add_kubic_repo()
6048 logger
.info('Attempting podman install...')
6050 self
.install(['podman'])
6052 logger
.info('Podman did not work. Falling back to docker...')
6053 self
.install(['docker.io'])
6055 def kubic_repo_url(self
) -> str:
6056 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
6057 'libcontainers:/stable/xUbuntu_%s/' % self
.distro_version
6059 def kubic_repo_path(self
) -> str:
6060 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
6062 def kubric_repo_gpgkey_url(self
) -> str:
6063 return '%s/Release.key' % self
.kubic_repo_url()
6065 def kubric_repo_gpgkey_path(self
) -> str:
6066 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
6068 def add_kubic_repo(self
) -> None:
6069 url
= self
.kubric_repo_gpgkey_url()
6070 logger
.info('Installing repo GPG key from %s...' % url
)
6072 response
= urlopen(url
)
6073 except HTTPError
as err
:
6074 logger
.error('failed to fetch GPG repo key from %s: %s' % (
6076 raise Error('failed to fetch GPG key')
6077 key
= response
.read().decode('utf-8')
6078 tmp_key
= write_tmp(key
, 0, 0)
6079 keyring
= self
.kubric_repo_gpgkey_path()
6080 call_throws(self
.ctx
, ['apt-key', '--keyring', keyring
, 'add', tmp_key
.name
])
6082 logger
.info('Installing repo file at %s...' % self
.kubic_repo_path())
6083 content
= 'deb %s /\n' % self
.kubic_repo_url()
6084 with
open(self
.kubic_repo_path(), 'w') as f
:
6087 def rm_kubic_repo(self
) -> None:
6088 keyring
= self
.kubric_repo_gpgkey_path()
6089 if os
.path
.exists(keyring
):
6090 logger
.info('Removing repo GPG key %s...' % keyring
)
6093 p
= self
.kubic_repo_path()
6094 if os
.path
.exists(p
):
6095 logger
.info('Removing repo at %s...' % p
)
6099 class YumDnf(Packager
):
6101 'centos': ('centos', 'el'),
6102 'rhel': ('centos', 'el'),
6103 'scientific': ('centos', 'el'),
6104 'rocky': ('centos', 'el'),
6105 'almalinux': ('centos', 'el'),
6106 'fedora': ('fedora', 'fc'),
6109 def __init__(self
, ctx
: CephadmContext
,
6110 stable
: Optional
[str], version
: Optional
[str], branch
: Optional
[str], commit
: Optional
[str],
6111 distro
: Optional
[str], distro_version
: Optional
[str]) -> None:
6112 super(YumDnf
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
6113 branch
=branch
, commit
=commit
)
6115 assert distro_version
6117 self
.major
= int(distro_version
.split('.')[0])
6118 self
.distro_normalized
= self
.DISTRO_NAMES
[distro
][0]
6119 self
.distro_code
= self
.DISTRO_NAMES
[distro
][1] + str(self
.major
)
6120 if (self
.distro_code
== 'fc' and self
.major
>= 30) or \
6121 (self
.distro_code
== 'el' and self
.major
>= 8):
6126 def custom_repo(self
, **kw
: Any
) -> str:
6128 Repo files need special care in that a whole line should not be present
6129 if there is no value for it. Because we were using `format()` we could
6130 not conditionally add a line for a repo file. So the end result would
6131 contain a key with a missing value (say if we were passing `None`).
6133 For example, it could look like::
6140 Which breaks. This function allows us to conditionally add lines,
6141 preserving an order and be more careful.
6143 Previously, and for historical purposes, this is how the template used
6159 # by using tuples (vs a dict) we preserve the order of what we want to
6160 # return, like starting with a [repo name]
6162 ('reponame', '[%s]'),
6163 ('name', 'name=%s'),
6164 ('baseurl', 'baseurl=%s'),
6165 ('enabled', 'enabled=%s'),
6166 ('gpgcheck', 'gpgcheck=%s'),
6167 ('_type', 'type=%s'),
6168 ('gpgkey', 'gpgkey=%s'),
6169 ('proxy', 'proxy=%s'),
6170 ('priority', 'priority=%s'),
6174 tmpl_key
, tmpl_value
= line
# key values from tmpl
6176 # ensure that there is an actual value (not None nor empty string)
6177 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
6178 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
6180 return '\n'.join(lines
)
6182 def repo_path(self
) -> str:
6183 return '/etc/yum.repos.d/ceph.repo'
6185 def repo_baseurl(self
) -> str:
6186 assert self
.stable
or self
.version
6188 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
, self
.version
,
6191 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
, self
.stable
,
6194 def add_repo(self
) -> None:
6195 if self
.distro_code
.startswith('fc'):
6196 raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro')
6197 if self
.distro_code
== 'el7':
6198 if self
.stable
and self
.stable
>= 'pacific':
6199 raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it')
6200 if self
.version
and self
.version
.split('.')[0] >= '16':
6201 raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it')
6202 if self
.stable
or self
.version
:
6205 'Ceph': '$basearch',
6206 'Ceph-noarch': 'noarch',
6207 'Ceph-source': 'SRPMS'}.items():
6208 content
+= '[%s]\n' % (n
)
6209 content
+= self
.custom_repo(
6211 baseurl
=self
.repo_baseurl() + '/' + t
,
6214 gpgkey
=self
.repo_gpgkey()[0],
6218 content
= self
.query_shaman(self
.distro_normalized
, self
.major
,
6222 logger
.info('Writing repo to %s...' % self
.repo_path())
6223 with
open(self
.repo_path(), 'w') as f
:
6226 if self
.distro_code
.startswith('el'):
6227 logger
.info('Enabling EPEL...')
6228 call_throws(self
.ctx
, [self
.tool
, 'install', '-y', 'epel-release'])
6230 def rm_repo(self
) -> None:
6231 if os
.path
.exists(self
.repo_path()):
6232 os
.unlink(self
.repo_path())
6234 def install(self
, ls
: List
[str]) -> None:
6235 logger
.info('Installing packages %s...' % ls
)
6236 call_throws(self
.ctx
, [self
.tool
, 'install', '-y'] + ls
)
6238 def install_podman(self
) -> None:
6239 self
.install(['podman'])
6242 class Zypper(Packager
):
6245 'opensuse-tumbleweed',
6249 def __init__(self
, ctx
: CephadmContext
,
6250 stable
: Optional
[str], version
: Optional
[str], branch
: Optional
[str], commit
: Optional
[str],
6251 distro
: Optional
[str], distro_version
: Optional
[str]) -> None:
6252 super(Zypper
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
6253 branch
=branch
, commit
=commit
)
6254 assert distro
is not None
6256 self
.tool
= 'zypper'
6257 self
.distro
= 'opensuse'
6258 self
.distro_version
= '15.1'
6259 if 'tumbleweed' not in distro
and distro_version
is not None:
6260 self
.distro_version
= distro_version
6262 def custom_repo(self
, **kw
: Any
) -> str:
6264 See YumDnf for format explanation.
6268 # by using tuples (vs a dict) we preserve the order of what we want to
6269 # return, like starting with a [repo name]
6271 ('reponame', '[%s]'),
6272 ('name', 'name=%s'),
6273 ('baseurl', 'baseurl=%s'),
6274 ('enabled', 'enabled=%s'),
6275 ('gpgcheck', 'gpgcheck=%s'),
6276 ('_type', 'type=%s'),
6277 ('gpgkey', 'gpgkey=%s'),
6278 ('proxy', 'proxy=%s'),
6279 ('priority', 'priority=%s'),
6283 tmpl_key
, tmpl_value
= line
# key values from tmpl
6285 # ensure that there is an actual value (not None nor empty string)
6286 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
6287 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
6289 return '\n'.join(lines
)
6291 def repo_path(self
) -> str:
6292 return '/etc/zypp/repos.d/ceph.repo'
6294 def repo_baseurl(self
) -> str:
6295 assert self
.stable
or self
.version
6297 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
,
6298 self
.stable
, self
.distro
)
6300 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
,
6301 self
.stable
, self
.distro
)
6303 def add_repo(self
) -> None:
6304 if self
.stable
or self
.version
:
6307 'Ceph': '$basearch',
6308 'Ceph-noarch': 'noarch',
6309 'Ceph-source': 'SRPMS'}.items():
6310 content
+= '[%s]\n' % (n
)
6311 content
+= self
.custom_repo(
6313 baseurl
=self
.repo_baseurl() + '/' + t
,
6316 gpgkey
=self
.repo_gpgkey()[0],
6320 content
= self
.query_shaman(self
.distro
, self
.distro_version
,
6324 logger
.info('Writing repo to %s...' % self
.repo_path())
6325 with
open(self
.repo_path(), 'w') as f
:
6328 def rm_repo(self
) -> None:
6329 if os
.path
.exists(self
.repo_path()):
6330 os
.unlink(self
.repo_path())
6332 def install(self
, ls
: List
[str]) -> None:
6333 logger
.info('Installing packages %s...' % ls
)
6334 call_throws(self
.ctx
, [self
.tool
, 'in', '-y'] + ls
)
6336 def install_podman(self
) -> None:
6337 self
.install(['podman'])
6340 def create_packager(ctx
: CephadmContext
,
6341 stable
: Optional
[str] = None, version
: Optional
[str] = None,
6342 branch
: Optional
[str] = None, commit
: Optional
[str] = None) -> Packager
:
6343 distro
, distro_version
, distro_codename
= get_distro()
6344 if distro
in YumDnf
.DISTRO_NAMES
:
6345 return YumDnf(ctx
, stable
=stable
, version
=version
,
6346 branch
=branch
, commit
=commit
,
6347 distro
=distro
, distro_version
=distro_version
)
6348 elif distro
in Apt
.DISTRO_NAMES
:
6349 return Apt(ctx
, stable
=stable
, version
=version
,
6350 branch
=branch
, commit
=commit
,
6351 distro
=distro
, distro_version
=distro_version
,
6352 distro_codename
=distro_codename
)
6353 elif distro
in Zypper
.DISTRO_NAMES
:
6354 return Zypper(ctx
, stable
=stable
, version
=version
,
6355 branch
=branch
, commit
=commit
,
6356 distro
=distro
, distro_version
=distro_version
)
6357 raise Error('Distro %s version %s not supported' % (distro
, distro_version
))
6360 def command_add_repo(ctx
: CephadmContext
) -> None:
6361 if ctx
.version
and ctx
.release
:
6362 raise Error('you can specify either --release or --version but not both')
6363 if not ctx
.version
and not ctx
.release
and not ctx
.dev
and not ctx
.dev_commit
:
6364 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
6367 (x
, y
, z
) = ctx
.version
.split('.')
6369 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
6371 # Pacific =/= pacific in this case, set to undercase to avoid confision
6372 ctx
.release
= ctx
.release
.lower()
6374 pkg
= create_packager(ctx
, stable
=ctx
.release
,
6375 version
=ctx
.version
,
6377 commit
=ctx
.dev_commit
)
6379 logger
.info('Completed adding repo.')
6382 def command_rm_repo(ctx
: CephadmContext
) -> None:
6383 pkg
= create_packager(ctx
)
6387 def command_install(ctx
: CephadmContext
) -> None:
6388 pkg
= create_packager(ctx
)
6389 pkg
.install(ctx
.packages
)
6391 ##################################
6394 def get_ipv4_address(ifname
):
6395 # type: (str) -> str
6396 def _extract(sock
: socket
.socket
, offset
: int) -> str:
6397 return socket
.inet_ntop(
6402 struct
.pack('256s', bytes(ifname
[:15], 'utf-8'))
6405 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_DGRAM
)
6407 addr
= _extract(s
, 35093) # '0x8915' = SIOCGIFADDR
6408 dq_mask
= _extract(s
, 35099) # 0x891b = SIOCGIFNETMASK
6410 # interface does not have an ipv4 address
6413 dec_mask
= sum([bin(int(i
)).count('1')
6414 for i
in dq_mask
.split('.')])
6415 return '{}/{}'.format(addr
, dec_mask
)
6418 def get_ipv6_address(ifname
):
6419 # type: (str) -> str
6420 if not os
.path
.exists('/proc/net/if_inet6'):
6423 raw
= read_file(['/proc/net/if_inet6'])
6424 data
= raw
.splitlines()
6425 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
6426 # field 0 is ipv6, field 2 is scope
6427 for iface_setting
in data
:
6428 field
= iface_setting
.split()
6429 if field
[-1] == ifname
:
6431 ipv6_fmtd
= ':'.join([ipv6_raw
[_p
:_p
+ 4] for _p
in range(0, len(field
[0]), 4)])
6432 # apply naming rules using ipaddress module
6433 ipv6
= ipaddress
.ip_address(ipv6_fmtd
)
6434 return '{}/{}'.format(str(ipv6
), int('0x{}'.format(field
[2]), 16))
6438 def bytes_to_human(num
, mode
='decimal'):
6439 # type: (float, str) -> str
6440 """Convert a bytes value into it's human-readable form.
6442 :param num: number, in bytes, to convert
6443 :param mode: Either decimal (default) or binary to determine divisor
6444 :returns: string representing the bytes value in a more readable format
6446 unit_list
= ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
6450 if mode
== 'binary':
6451 unit_list
= ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
6455 for unit
in unit_list
:
6456 if abs(num
) < divisor
:
6457 return '%3.1f%s' % (num
, unit
)
6459 return '%.1f%s' % (num
, yotta
)
6462 def read_file(path_list
, file_name
=''):
6463 # type: (List[str], str) -> str
6464 """Returns the content of the first file found within the `path_list`
6466 :param path_list: list of file paths to search
6467 :param file_name: optional file_name to be applied to a file path
6468 :returns: content of the file or 'Unknown'
6470 for path
in path_list
:
6472 file_path
= os
.path
.join(path
, file_name
)
6475 if os
.path
.exists(file_path
):
6476 with
open(file_path
, 'r') as f
:
6478 content
= f
.read().strip()
6480 # sysfs may populate the file, but for devices like
6481 # virtio reads can fail
6487 ##################################
6491 _dmi_path_list
= ['/sys/class/dmi/id']
6492 _nic_path_list
= ['/sys/class/net']
6493 _apparmor_path_list
= ['/etc/apparmor']
6494 _disk_vendor_workarounds
= {
6495 '0x1af4': 'Virtio Block Device'
6498 def __init__(self
, ctx
: CephadmContext
):
6499 self
.ctx
: CephadmContext
= ctx
6500 self
.cpu_model
: str = 'Unknown'
6501 self
.cpu_count
: int = 0
6502 self
.cpu_cores
: int = 0
6503 self
.cpu_threads
: int = 0
6504 self
.interfaces
: Dict
[str, Any
] = {}
6506 self
._meminfo
: List
[str] = read_file(['/proc/meminfo']).splitlines()
6508 self
._process
_nics
()
6509 self
.arch
: str = platform
.processor()
6510 self
.kernel
: str = platform
.release()
6512 def _get_cpuinfo(self
):
6514 """Determine cpu information via /proc/cpuinfo"""
6515 raw
= read_file(['/proc/cpuinfo'])
6516 output
= raw
.splitlines()
6520 field
= [f
.strip() for f
in line
.split(':')]
6521 if 'model name' in line
:
6522 self
.cpu_model
= field
[1]
6523 if 'physical id' in line
:
6524 cpu_set
.add(field
[1])
6525 if 'siblings' in line
:
6526 self
.cpu_threads
= int(field
[1].strip())
6527 if 'cpu cores' in line
:
6528 self
.cpu_cores
= int(field
[1].strip())
6530 self
.cpu_count
= len(cpu_set
)
6532 def _get_block_devs(self
):
6533 # type: () -> List[str]
6534 """Determine the list of block devices by looking at /sys/block"""
6535 return [dev
for dev
in os
.listdir('/sys/block')
6536 if not dev
.startswith('dm')]
6538 def _get_devs_by_type(self
, rota
='0'):
6539 # type: (str) -> List[str]
6540 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
6542 for blk_dev
in self
._get
_block
_devs
():
6543 rot_path
= '/sys/block/{}/queue/rotational'.format(blk_dev
)
6544 rot_value
= read_file([rot_path
])
6545 if rot_value
== rota
:
6546 devs
.append(blk_dev
)
6550 def operating_system(self
):
6552 """Determine OS version"""
6553 raw_info
= read_file(['/etc/os-release'])
6554 os_release
= raw_info
.splitlines()
6558 for line
in os_release
:
6560 var_name
, var_value
= line
.split('=')
6561 rel_dict
[var_name
] = var_value
.strip('"')
6563 # Would normally use PRETTY_NAME, but NAME and VERSION are more
6565 if all(_v
in rel_dict
for _v
in ['NAME', 'VERSION']):
6566 rel_str
= '{} {}'.format(rel_dict
['NAME'], rel_dict
['VERSION'])
6572 """Return the hostname"""
6573 return platform
.node()
6576 def subscribed(self
):
6578 """Highlevel check to see if the host is subscribed to receive updates/support"""
6582 entitlements_dir
= '/etc/pki/entitlement'
6583 if os
.path
.exists(entitlements_dir
):
6584 pems
= glob('{}/*.pem'.format(entitlements_dir
))
6590 os_name
= self
.operating_system
6591 if os_name
.upper().startswith('RED HAT'):
6597 def hdd_count(self
):
6599 """Return a count of HDDs (spinners)"""
6600 return len(self
._get
_devs
_by
_type
(rota
='1'))
6602 def _get_capacity(self
, dev
):
6603 # type: (str) -> int
6604 """Determine the size of a given device"""
6605 size_path
= os
.path
.join('/sys/block', dev
, 'size')
6606 size_blocks
= int(read_file([size_path
]))
6607 blk_path
= os
.path
.join('/sys/block', dev
, 'queue', 'logical_block_size')
6608 blk_count
= int(read_file([blk_path
]))
6609 return size_blocks
* blk_count
6611 def _get_capacity_by_type(self
, rota
='0'):
6612 # type: (str) -> int
6613 """Return the total capacity of a category of device (flash or hdd)"""
6614 devs
= self
._get
_devs
_by
_type
(rota
=rota
)
6617 capacity
+= self
._get
_capacity
(dev
)
6620 def _dev_list(self
, dev_list
):
6621 # type: (List[str]) -> List[Dict[str, object]]
6622 """Return a 'pretty' name list for each device in the `dev_list`"""
6625 for dev
in dev_list
:
6626 disk_model
= read_file(['/sys/block/{}/device/model'.format(dev
)]).strip()
6627 disk_rev
= read_file(['/sys/block/{}/device/rev'.format(dev
)]).strip()
6628 disk_wwid
= read_file(['/sys/block/{}/device/wwid'.format(dev
)]).strip()
6629 vendor
= read_file(['/sys/block/{}/device/vendor'.format(dev
)]).strip()
6630 disk_vendor
= HostFacts
._disk
_vendor
_workarounds
.get(vendor
, vendor
)
6631 disk_size_bytes
= self
._get
_capacity
(dev
)
6633 'description': '{} {} ({})'.format(disk_vendor
, disk_model
, bytes_to_human(disk_size_bytes
)),
6634 'vendor': disk_vendor
,
6635 'model': disk_model
,
6639 'disk_size_bytes': disk_size_bytes
,
6645 # type: () -> List[Dict[str, object]]
6646 """Return a list of devices that are HDDs (spinners)"""
6647 devs
= self
._get
_devs
_by
_type
(rota
='1')
6648 return self
._dev
_list
(devs
)
6651 def flash_list(self
):
6652 # type: () -> List[Dict[str, object]]
6653 """Return a list of devices that are flash based (SSD, NVMe)"""
6654 devs
= self
._get
_devs
_by
_type
(rota
='0')
6655 return self
._dev
_list
(devs
)
6658 def hdd_capacity_bytes(self
):
6660 """Return the total capacity for all HDD devices (bytes)"""
6661 return self
._get
_capacity
_by
_type
(rota
='1')
6664 def hdd_capacity(self
):
6666 """Return the total capacity for all HDD devices (human readable format)"""
6667 return bytes_to_human(self
.hdd_capacity_bytes
)
6671 # type: () -> Dict[str, float]
6672 """Return the cpu load average data for the host"""
6673 raw
= read_file(['/proc/loadavg']).strip()
6676 '1min': float(data
[0]),
6677 '5min': float(data
[1]),
6678 '15min': float(data
[2]),
6682 def flash_count(self
):
6684 """Return the number of flash devices in the system (SSD, NVMe)"""
6685 return len(self
._get
_devs
_by
_type
(rota
='0'))
6688 def flash_capacity_bytes(self
):
6690 """Return the total capacity for all flash devices (bytes)"""
6691 return self
._get
_capacity
_by
_type
(rota
='0')
6694 def flash_capacity(self
):
6696 """Return the total capacity for all Flash devices (human readable format)"""
6697 return bytes_to_human(self
.flash_capacity_bytes
)
6699 def _process_nics(self
):
6701 """Look at the NIC devices and extract network related metadata"""
6702 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
6709 for nic_path
in HostFacts
._nic
_path
_list
:
6710 if not os
.path
.exists(nic_path
):
6712 for iface
in os
.listdir(nic_path
):
6714 lower_devs_list
= [os
.path
.basename(link
.replace('lower_', '')) for link
in glob(os
.path
.join(nic_path
, iface
, 'lower_*'))]
6715 upper_devs_list
= [os
.path
.basename(link
.replace('upper_', '')) for link
in glob(os
.path
.join(nic_path
, iface
, 'upper_*'))]
6718 mtu
= int(read_file([os
.path
.join(nic_path
, iface
, 'mtu')]))
6722 operstate
= read_file([os
.path
.join(nic_path
, iface
, 'operstate')])
6724 speed
= int(read_file([os
.path
.join(nic_path
, iface
, 'speed')]))
6725 except (OSError, ValueError):
6726 # OSError : device doesn't support the ethtool get_link_ksettings
6727 # ValueError : raised when the read fails, and returns Unknown
6729 # Either way, we show a -1 when speed isn't available
6732 if os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bridge')):
6734 elif os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bonding')):
6735 nic_type
= 'bonding'
6737 nic_type
= hw_lookup
.get(read_file([os
.path
.join(nic_path
, iface
, 'type')]), 'Unknown')
6739 dev_link
= os
.path
.join(nic_path
, iface
, 'device')
6740 if os
.path
.exists(dev_link
):
6742 driver_path
= os
.path
.join(dev_link
, 'driver')
6743 if os
.path
.exists(driver_path
):
6744 driver
= os
.path
.basename(os
.path
.realpath(driver_path
))
6752 self
.interfaces
[iface
] = {
6754 'upper_devs_list': upper_devs_list
,
6755 'lower_devs_list': lower_devs_list
,
6756 'operstate': operstate
,
6758 'nic_type': nic_type
,
6761 'ipv4_address': get_ipv4_address(iface
),
6762 'ipv6_address': get_ipv6_address(iface
),
6766 def nic_count(self
):
6768 """Return a total count of all physical NICs detected in the host"""
6770 for iface
in self
.interfaces
:
6771 if self
.interfaces
[iface
]['iftype'] == 'physical':
6772 phys_devs
.append(iface
)
6773 return len(phys_devs
)
6775 def _get_mem_data(self
, field_name
):
6776 # type: (str) -> int
6777 for line
in self
._meminfo
:
6778 if line
.startswith(field_name
):
6784 def memory_total_kb(self
):
6786 """Determine the memory installed (kb)"""
6787 return self
._get
_mem
_data
('MemTotal')
6790 def memory_free_kb(self
):
6792 """Determine the memory free (not cache, immediately usable)"""
6793 return self
._get
_mem
_data
('MemFree')
6796 def memory_available_kb(self
):
6798 """Determine the memory available to new applications without swapping"""
6799 return self
._get
_mem
_data
('MemAvailable')
6804 """Determine server vendor from DMI data in sysfs"""
6805 return read_file(HostFacts
._dmi
_path
_list
, 'sys_vendor')
6810 """Determine server model information from DMI data in sysfs"""
6811 family
= read_file(HostFacts
._dmi
_path
_list
, 'product_family')
6812 product
= read_file(HostFacts
._dmi
_path
_list
, 'product_name')
6813 if family
== 'Unknown' and product
:
6814 return '{}'.format(product
)
6816 return '{} ({})'.format(family
, product
)
6819 def bios_version(self
):
6821 """Determine server BIOS version from DMI data in sysfs"""
6822 return read_file(HostFacts
._dmi
_path
_list
, 'bios_version')
6825 def bios_date(self
):
6827 """Determine server BIOS date from DMI data in sysfs"""
6828 return read_file(HostFacts
._dmi
_path
_list
, 'bios_date')
6831 def timestamp(self
):
6833 """Return the current time as Epoch seconds"""
6837 def system_uptime(self
):
6839 """Return the system uptime (in secs)"""
6840 raw_time
= read_file(['/proc/uptime'])
6841 up_secs
, _
= raw_time
.split()
6842 return float(up_secs
)
6845 def kernel_security(self
):
6846 # type: () -> Dict[str, str]
6847 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
6848 def _fetch_selinux() -> Dict
[str, str]:
6849 """Get the selinux status"""
6852 out
, err
, code
= call(self
.ctx
, ['sestatus'],
6853 verbosity
=CallVerbosity
.DEBUG
)
6854 security
['type'] = 'SELinux'
6855 status
, mode
, policy
= '', '', ''
6856 for line
in out
.split('\n'):
6857 if line
.startswith('SELinux status:'):
6858 k
, v
= line
.split(':')
6860 elif line
.startswith('Current mode:'):
6861 k
, v
= line
.split(':')
6863 elif line
.startswith('Loaded policy name:'):
6864 k
, v
= line
.split(':')
6866 if status
== 'disabled':
6867 security
['description'] = 'SELinux: Disabled'
6869 security
['description'] = 'SELinux: Enabled({}, {})'.format(mode
, policy
)
6870 except Exception as e
:
6871 logger
.info('unable to get selinux status: %s' % e
)
6874 def _fetch_apparmor() -> Dict
[str, str]:
6875 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
6877 for apparmor_path
in HostFacts
._apparmor
_path
_list
:
6878 if os
.path
.exists(apparmor_path
):
6879 security
['type'] = 'AppArmor'
6880 security
['description'] = 'AppArmor: Enabled'
6882 profiles
= read_file(['/sys/kernel/security/apparmor/profiles'])
6883 if len(profiles
) == 0:
6888 summary
= {} # type: Dict[str, int]
6889 for line
in profiles
.split('\n'):
6890 item
, mode
= line
.split(' ')
6891 mode
= mode
.strip('()')
6896 summary_str
= ','.join(['{} {}'.format(v
, k
) for k
, v
in summary
.items()])
6897 security
= {**security
, **summary
} # type: ignore
6898 security
['description'] += '({})'.format(summary_str
)
6904 if os
.path
.exists('/sys/kernel/security/lsm'):
6905 lsm
= read_file(['/sys/kernel/security/lsm']).strip()
6906 if 'selinux' in lsm
:
6907 ret
= _fetch_selinux()
6908 elif 'apparmor' in lsm
:
6909 ret
= _fetch_apparmor()
6913 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor'
6921 'description': 'Linux Security Module framework is not available'
6925 def selinux_enabled(self
) -> bool:
6926 return (self
.kernel_security
['type'] == 'SELinux') and \
6927 (self
.kernel_security
['description'] != 'SELinux: Disabled')
6930 def kernel_parameters(self
):
6931 # type: () -> Dict[str, str]
6932 """Get kernel parameters required/used in Ceph clusters"""
6935 out
, _
, _
= call_throws(self
.ctx
, ['sysctl', '-a'], verbosity
=CallVerbosity
.SILENT
)
6937 param_list
= out
.split('\n')
6938 param_dict
= {param
.split(' = ')[0]: param
.split(' = ')[-1] for param
in param_list
}
6940 # return only desired parameters
6941 if 'net.ipv4.ip_nonlocal_bind' in param_dict
:
6942 k_param
['net.ipv4.ip_nonlocal_bind'] = param_dict
['net.ipv4.ip_nonlocal_bind']
6947 def _process_net_data(tcp_file
: str, protocol
: str = 'tcp') -> List
[int]:
6948 listening_ports
= []
6949 # Connections state documentation
6950 # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h
6951 # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul <port>)
6957 if protocol
not in listening_state
.keys():
6960 if os
.path
.exists(tcp_file
):
6961 with
open(tcp_file
) as f
:
6962 tcp_data
= f
.readlines()[1:]
6964 for con
in tcp_data
:
6965 con_info
= con
.strip().split()
6966 if con_info
[3] == listening_state
[protocol
]:
6967 local_port
= int(con_info
[1].split(':')[1], 16)
6968 listening_ports
.append(local_port
)
6970 return listening_ports
6973 def tcp_ports_used(self
) -> List
[int]:
6974 return HostFacts
._process
_net
_data
('/proc/net/tcp')
6977 def tcp6_ports_used(self
) -> List
[int]:
6978 return HostFacts
._process
_net
_data
('/proc/net/tcp6')
6981 def udp_ports_used(self
) -> List
[int]:
6982 return HostFacts
._process
_net
_data
('/proc/net/udp', 'udp')
6985 def udp6_ports_used(self
) -> List
[int]:
6986 return HostFacts
._process
_net
_data
('/proc/net/udp6', 'udp')
6990 """Return the attributes of this HostFacts object as json"""
6992 k
: getattr(self
, k
) for k
in dir(self
)
6993 if not k
.startswith('_')
6994 and isinstance(getattr(self
, k
), (float, int, str, list, dict, tuple))
6996 return json
.dumps(data
, indent
=2, sort_keys
=True)
6998 ##################################
7001 def command_gather_facts(ctx
: CephadmContext
) -> None:
7002 """gather_facts is intended to provide host releated metadata to the caller"""
7003 host
= HostFacts(ctx
)
7007 ##################################
7011 task_types
= ['disks', 'daemons', 'host', 'http_server']
7013 def __init__(self
) -> None:
7014 self
.started_epoch_secs
= time
.time()
7016 'daemons': 'inactive',
7017 'disks': 'inactive',
7019 'http_server': 'inactive',
7021 self
.errors
: list = []
7022 self
.disks
: dict = {}
7023 self
.daemons
: dict = {}
7024 self
.host
: dict = {}
7028 def health(self
) -> dict:
7030 'started_epoch_secs': self
.started_epoch_secs
,
7031 'tasks': self
.tasks
,
7032 'errors': self
.errors
,
7035 def to_json(self
) -> dict:
7037 'health': self
.health
,
7039 'daemons': self
.daemons
,
7040 'disks': self
.disks
,
7043 def update_health(self
, task_type
: str, task_status
: str, error_msg
: Optional
[str] = None) -> None:
7044 assert task_type
in CephadmCache
.task_types
7046 self
.tasks
[task_type
] = task_status
7048 self
.errors
.append(error_msg
)
7050 def update_task(self
, task_type
: str, content
: dict) -> None:
7051 assert task_type
in CephadmCache
.task_types
7052 assert isinstance(content
, dict)
7054 current
= getattr(self
, task_type
)
7056 current
[k
] = content
[k
]
7058 setattr(self
, task_type
, current
)
7061 class CephadmHTTPServer(ThreadingMixIn
, HTTPServer
):
7062 allow_reuse_address
= True
7063 daemon_threads
= True
7064 cephadm_cache
: CephadmCache
7068 class CephadmDaemonHandler(BaseHTTPRequestHandler
):
7069 server
: CephadmHTTPServer
7072 f
'/{api_version}/metadata',
7073 f
'/{api_version}/metadata/health',
7074 f
'/{api_version}/metadata/disks',
7075 f
'/{api_version}/metadata/daemons',
7076 f
'/{api_version}/metadata/host',
7081 def authorize(cls
, f
: Any
) -> Any
:
7082 """Implement a basic token check.
7084 The token is installed at deployment time and must be provided to
7085 ensure we only respond to callers who know our token i.e. mgr
7088 def wrapper(self
: Any
, *args
: Any
, **kwargs
: Any
) -> Any
:
7089 auth
= self
.headers
.get('Authorization', None)
7090 if auth
!= 'Bearer ' + self
.server
.token
:
7091 self
.send_error(401)
7093 f(self
, *args
, **kwargs
)
7097 def _help_page(self
) -> str:
7098 return """<!DOCTYPE html>
7100 <head><title>cephadm metadata exporter</title></head>
7103 font-family: sans-serif;
7108 border-spacing: 0px;
7112 background: PowderBlue;
7119 <h1>cephadm metadata exporter {api_version}</h1>
7122 <tr><th>Endpoint</th><th>Methods</th><th>Response</th><th>Description</th></tr>
7124 <tr><td><a href='{api_version}/metadata'>{api_version}/metadata</a></td><td>GET</td><td>JSON</td><td>Return <b>all</b> metadata for the host</td></tr>
7125 <tr><td><a href='{api_version}/metadata/daemons'>{api_version}/metadata/daemons</a></td><td>GET</td><td>JSON</td><td>Return daemon and systemd states for ceph daemons (ls)</td></tr>
7126 <tr><td><a href='{api_version}/metadata/disks'>{api_version}/metadata/disks</a></td><td>GET</td><td>JSON</td><td>show disk inventory (ceph-volume)</td></tr>
7127 <tr><td><a href='{api_version}/metadata/health'>{api_version}/metadata/health</a></td><td>GET</td><td>JSON</td><td>Show current health of the exporter sub-tasks</td></tr>
7128 <tr><td><a href='{api_version}/metadata/host'>{api_version}/metadata/host</a></td><td>GET</td><td>JSON</td><td>Show host metadata (gather-facts)</td></tr>
7131 </html>""".format(api_version
=CephadmDaemonHandler
.api_version
)
7133 def _fetch_root(self
) -> None:
7134 self
.send_response(200)
7135 self
.send_header('Content-type', 'text/html; charset=utf-8')
7137 self
.wfile
.write(self
._help
_page
().encode('utf-8'))
7139 @Decorators.authorize
7140 def do_GET(self
) -> None:
7141 """Handle *all* GET requests"""
7143 if self
.path
== '/':
7144 # provide a html response if someone hits the root url, to document the
7145 # available api endpoints
7146 return self
._fetch
_root
()
7147 elif self
.path
in CephadmDaemonHandler
.valid_routes
:
7148 u
= self
.path
.split('/')[-1]
7149 data
= json
.dumps({})
7152 tasks
= self
.server
.cephadm_cache
.health
.get('tasks', {})
7155 # We're using the http status code to help indicate thread health
7156 # - 200 (OK): request successful
7157 # - 204 (No Content): access to a cache relating to a dead thread
7158 # - 206 (Partial content): one or more theads are inactive
7159 # - 500 (Server Error): all threads inactive
7161 data
= json
.dumps(self
.server
.cephadm_cache
.to_json())
7162 if all([tasks
[task_name
] == 'inactive' for task_name
in tasks
if task_name
!= 'http_server']):
7163 # All the subtasks are dead!
7165 elif any([tasks
[task_name
] == 'inactive' for task_name
in tasks
if task_name
!= 'http_server']):
7168 # Individual GETs against the a tasks endpoint will also return a 503 if the corresponding thread is inactive
7169 elif u
== 'daemons':
7170 data
= json
.dumps(self
.server
.cephadm_cache
.daemons
)
7171 if tasks
['daemons'] == 'inactive':
7174 data
= json
.dumps(self
.server
.cephadm_cache
.disks
)
7175 if tasks
['disks'] == 'inactive':
7178 data
= json
.dumps(self
.server
.cephadm_cache
.host
)
7179 if tasks
['host'] == 'inactive':
7182 # a GET against health will always return a 200, since the op is always successful
7184 data
= json
.dumps(self
.server
.cephadm_cache
.health
)
7186 self
.send_response(status_code
)
7187 self
.send_header('Content-type', 'application/json')
7189 self
.wfile
.write(data
.encode('utf-8'))
7192 bad_request_msg
= 'Valid URLs are: {}'.format(', '.join(CephadmDaemonHandler
.valid_routes
))
7193 self
.send_response(404, message
=bad_request_msg
) # reason
7194 self
.send_header('Content-type', 'application/json')
7196 self
.wfile
.write(json
.dumps({'message': bad_request_msg
}).encode('utf-8'))
7198 def log_message(self
, format
: str, *args
: Any
) -> None:
7199 rqst
= ' '.join(str(a
) for a
in args
)
7200 logger
.info(f
'client:{self.address_string()} [{self.log_date_time_string()}] {rqst}')
7203 class CephadmDaemon():
7205 daemon_type
= 'cephadm-exporter'
7209 token_name
= 'token'
7210 config_requirements
= [
7216 thread_check_interval
= 5
7218 def __init__(self
, ctx
: CephadmContext
, fsid
: str, daemon_id
: Optional
[str] = None, port
: Optional
[int] = None) -> None:
7221 self
.daemon_id
= daemon_id
7223 self
.port
= CephadmDaemon
.default_port
7226 self
.workers
: List
[Thread
] = []
7227 self
.http_server
: CephadmHTTPServer
7229 self
.cephadm_cache
= CephadmCache()
7230 self
.errors
: List
[str] = []
7231 self
.token
= read_file([os
.path
.join(self
.daemon_path
, CephadmDaemon
.token_name
)])
7234 def validate_config(cls
, config
: dict) -> None:
7235 reqs
= ', '.join(CephadmDaemon
.config_requirements
)
7238 if not config
or not all([k_name
in config
for k_name
in CephadmDaemon
.config_requirements
]):
7239 raise Error(f
'config must contain the following fields : {reqs}')
7241 if not all([isinstance(config
[k_name
], str) for k_name
in CephadmDaemon
.config_requirements
]):
7242 errors
.append(f
'the following fields must be strings: {reqs}')
7244 crt
= config
[CephadmDaemon
.crt_name
]
7245 key
= config
[CephadmDaemon
.key_name
]
7246 token
= config
[CephadmDaemon
.token_name
]
7248 if not crt
.startswith('-----BEGIN CERTIFICATE-----') or not crt
.endswith('-----END CERTIFICATE-----\n'):
7249 errors
.append('crt field is not a valid SSL certificate')
7250 if not key
.startswith('-----BEGIN PRIVATE KEY-----') or not key
.endswith('-----END PRIVATE KEY-----\n'):
7251 errors
.append('key is not a valid SSL private key')
7253 errors
.append("'token' must be more than 8 characters long")
7255 if 'port' in config
:
7257 p
= int(config
['port'])
7260 except (TypeError, ValueError):
7261 errors
.append('port must be an integer > 1024')
7264 raise Error('Parameter errors : {}'.format(', '.join(errors
)))
7267 def port_active(self
) -> bool:
7268 return port_in_use(self
.ctx
, self
.port
)
7271 def can_run(self
) -> bool:
7273 if self
.port_active
:
7274 self
.errors
.append(f
'TCP port {self.port} already in use, unable to bind')
7275 if not os
.path
.exists(os
.path
.join(self
.daemon_path
, CephadmDaemon
.key_name
)):
7276 self
.errors
.append(f
"Key file '{CephadmDaemon.key_name}' is missing from {self.daemon_path}")
7277 if not os
.path
.exists(os
.path
.join(self
.daemon_path
, CephadmDaemon
.crt_name
)):
7278 self
.errors
.append(f
"Certificate file '{CephadmDaemon.crt_name}' is missing from {self.daemon_path}")
7279 if self
.token
== 'Unknown':
7280 self
.errors
.append(f
"Authentication token '{CephadmDaemon.token_name}' is missing from {self.daemon_path}")
7281 return len(self
.errors
) == 0
7284 def _unit_name(fsid
: str, daemon_id
: str) -> str:
7285 return '{}.service'.format(get_unit_name(fsid
, CephadmDaemon
.daemon_type
, daemon_id
))
7288 def unit_name(self
) -> str:
7289 assert self
.daemon_id
is not None
7290 return CephadmDaemon
._unit
_name
(self
.fsid
, self
.daemon_id
)
7293 def daemon_path(self
) -> str:
7294 return os
.path
.join(
7297 f
'{self.daemon_type}.{self.daemon_id}'
7301 def binary_path(self
) -> str:
7302 path
= os
.path
.realpath(__file__
)
7303 assert os
.path
.isfile(path
)
7306 def _handle_thread_exception(self
, exc
: Exception, thread_type
: str) -> None:
7307 e_msg
= f
'{exc.__class__.__name__} exception: {str(exc)}'
7308 thread_info
= getattr(self
.cephadm_cache
, thread_type
)
7309 errors
= thread_info
.get('scrape_errors', [])
7310 errors
.append(e_msg
)
7312 logger
.exception(exc
)
7313 self
.cephadm_cache
.update_task(
7316 'scrape_errors': errors
,
7321 def _scrape_host_facts(self
, refresh_interval
: int = 10) -> None:
7323 exception_encountered
= False
7327 if self
.stop
or exception_encountered
:
7330 if ctr
>= refresh_interval
:
7332 logger
.debug('executing host-facts scrape')
7334 s_time
= time
.time()
7337 facts
= HostFacts(self
.ctx
)
7338 except Exception as e
:
7339 self
._handle
_thread
_exception
(e
, 'host')
7340 exception_encountered
= True
7342 elapsed
= time
.time() - s_time
7344 data
= json
.loads(facts
.dump())
7345 except json
.decoder
.JSONDecodeError
:
7346 errors
.append('host-facts provided invalid JSON')
7347 logger
.warning(errors
[-1])
7349 self
.cephadm_cache
.update_task(
7352 'scrape_timestamp': s_time
,
7353 'scrape_duration_secs': elapsed
,
7354 'scrape_errors': errors
,
7358 logger
.debug(f
'completed host-facts scrape - {elapsed}s')
7360 time
.sleep(CephadmDaemon
.loop_delay
)
7361 ctr
+= CephadmDaemon
.loop_delay
7362 logger
.info('host-facts thread stopped')
7364 def _scrape_ceph_volume(self
, refresh_interval
: int = 15) -> None:
7365 # we're invoking the ceph_volume command, so we need to set the args that it
7367 self
.ctx
.command
= 'inventory --format=json'.split()
7368 self
.ctx
.fsid
= self
.fsid
7371 exception_encountered
= False
7374 if self
.stop
or exception_encountered
:
7377 if ctr
>= refresh_interval
:
7379 logger
.debug('executing ceph-volume scrape')
7381 s_time
= time
.time()
7382 stream
= io
.StringIO()
7384 with
redirect_stdout(stream
):
7385 command_ceph_volume(self
.ctx
)
7386 except Exception as e
:
7387 self
._handle
_thread
_exception
(e
, 'disks')
7388 exception_encountered
= True
7390 elapsed
= time
.time() - s_time
7392 # if the call to ceph-volume returns junk with the
7393 # json, it won't parse
7394 stdout
= stream
.getvalue()
7399 data
= json
.loads(stdout
)
7400 except json
.decoder
.JSONDecodeError
:
7401 errors
.append('ceph-volume thread provided bad json data')
7402 logger
.warning(errors
[-1])
7404 errors
.append('ceph-volume did not return any data')
7405 logger
.warning(errors
[-1])
7407 self
.cephadm_cache
.update_task(
7410 'scrape_timestamp': s_time
,
7411 'scrape_duration_secs': elapsed
,
7412 'scrape_errors': errors
,
7417 logger
.debug(f
'completed ceph-volume scrape - {elapsed}s')
7418 time
.sleep(CephadmDaemon
.loop_delay
)
7419 ctr
+= CephadmDaemon
.loop_delay
7421 logger
.info('ceph-volume thread stopped')
7423 def _scrape_list_daemons(self
, refresh_interval
: int = 20) -> None:
7425 exception_encountered
= False
7427 if self
.stop
or exception_encountered
:
7430 if ctr
>= refresh_interval
:
7432 logger
.debug('executing list-daemons scrape')
7434 s_time
= time
.time()
7437 # list daemons should ideally be invoked with a fsid
7438 data
= list_daemons(self
.ctx
)
7439 except Exception as e
:
7440 self
._handle
_thread
_exception
(e
, 'daemons')
7441 exception_encountered
= True
7443 if not isinstance(data
, list):
7444 errors
.append('list-daemons did not supply a list?')
7445 logger
.warning(errors
[-1])
7447 elapsed
= time
.time() - s_time
7448 self
.cephadm_cache
.update_task(
7451 'scrape_timestamp': s_time
,
7452 'scrape_duration_secs': elapsed
,
7453 'scrape_errors': errors
,
7457 logger
.debug(f
'completed list-daemons scrape - {elapsed}s')
7459 time
.sleep(CephadmDaemon
.loop_delay
)
7460 ctr
+= CephadmDaemon
.loop_delay
7461 logger
.info('list-daemons thread stopped')
7463 def _create_thread(self
, target
: Any
, name
: str, refresh_interval
: Optional
[int] = None) -> Thread
:
7464 if refresh_interval
:
7465 t
= Thread(target
=target
, args
=(refresh_interval
,))
7467 t
= Thread(target
=target
)
7470 self
.cephadm_cache
.update_health(name
, 'active')
7473 start_msg
= f
'Started {name} thread'
7474 if refresh_interval
:
7475 logger
.info(f
'{start_msg}, with a refresh interval of {refresh_interval}s')
7477 logger
.info(f
'{start_msg}')
7480 def reload(self
, *args
: Any
) -> None:
7481 """reload -HUP received
7483 This is a placeholder function only, and serves to provide the hook that could
7484 be exploited later if the exporter evolves to incorporate a config file
7486 logger
.info('Reload request received - ignoring, no action needed')
7488 def shutdown(self
, *args
: Any
) -> None:
7489 logger
.info('Shutdown request received')
7491 self
.http_server
.shutdown()
7493 def run(self
) -> None:
7494 logger
.info(f
"cephadm exporter starting for FSID '{self.fsid}'")
7495 if not self
.can_run
:
7496 logger
.error('Unable to start the exporter daemon')
7497 for e
in self
.errors
:
7501 # register signal handlers for running under systemd control
7502 signal
.signal(signal
.SIGTERM
, self
.shutdown
)
7503 signal
.signal(signal
.SIGINT
, self
.shutdown
)
7504 signal
.signal(signal
.SIGHUP
, self
.reload)
7505 logger
.debug('Signal handlers attached')
7507 host_facts
= self
._create
_thread
(self
._scrape
_host
_facts
, 'host', 5)
7508 self
.workers
.append(host_facts
)
7510 daemons
= self
._create
_thread
(self
._scrape
_list
_daemons
, 'daemons', 20)
7511 self
.workers
.append(daemons
)
7513 disks
= self
._create
_thread
(self
._scrape
_ceph
_volume
, 'disks', 20)
7514 self
.workers
.append(disks
)
7516 self
.http_server
= CephadmHTTPServer(('0.0.0.0', self
.port
), CephadmDaemonHandler
) # IPv4 only
7517 self
.http_server
.socket
= ssl
.wrap_socket(self
.http_server
.socket
,
7518 keyfile
=os
.path
.join(self
.daemon_path
, CephadmDaemon
.key_name
),
7519 certfile
=os
.path
.join(self
.daemon_path
, CephadmDaemon
.crt_name
),
7522 self
.http_server
.cephadm_cache
= self
.cephadm_cache
7523 self
.http_server
.token
= self
.token
7524 server_thread
= self
._create
_thread
(self
.http_server
.serve_forever
, 'http_server')
7525 logger
.info(f
'https server listening on {self.http_server.server_address[0]}:{self.http_server.server_port}')
7528 while server_thread
.is_alive():
7532 if ctr
>= CephadmDaemon
.thread_check_interval
:
7534 for worker
in self
.workers
:
7535 if self
.cephadm_cache
.tasks
[worker
.name
] == 'inactive':
7537 if not worker
.is_alive():
7538 logger
.warning(f
'{worker.name} thread not running')
7539 stop_time
= datetime
.datetime
.now().strftime('%Y/%m/%d %H:%M:%S')
7540 self
.cephadm_cache
.update_health(worker
.name
, 'inactive', f
'{worker.name} stopped at {stop_time}')
7542 time
.sleep(CephadmDaemon
.loop_delay
)
7543 ctr
+= CephadmDaemon
.loop_delay
7545 logger
.info('Main http server thread stopped')
7548 def unit_run(self
) -> str:
7551 {py3} {bin_path} exporter --fsid {fsid} --id {daemon_id} --port {port} &""".format(
7552 py3
=shutil
.which('python3'),
7553 bin_path
=self
.binary_path
,
7555 daemon_id
=self
.daemon_id
,
7560 def unit_file(self
) -> str:
7561 docker
= isinstance(self
.ctx
.container_engine
, Docker
)
7562 return """#generated by cephadm
7564 Description=cephadm exporter service for cluster {fsid}
7565 After=network-online.target{docker_after}
7566 Wants=network-online.target
7569 PartOf=ceph-{fsid}.target
7570 Before=ceph-{fsid}.target
7574 ExecStart=/bin/bash {daemon_path}/unit.run
7575 ExecReload=/bin/kill -HUP $MAINPID
7580 WantedBy=ceph-{fsid}.target
7581 """.format(fsid
=self
.fsid
,
7582 daemon_path
=self
.daemon_path
,
7583 # if docker, we depend on docker.service
7584 docker_after
=' docker.service' if docker
else '',
7585 docker_requires
='Requires=docker.service\n' if docker
else '')
7587 def deploy_daemon_unit(self
, config
: Optional
[dict] = None) -> None:
7588 """deploy a specific unit file for cephadm
7590 The normal deploy_daemon_units doesn't apply for this
7591 daemon since it's not a container, so we just create a
7592 simple service definition and add it to the fsid's target
7595 raise Error('Attempting to deploy cephadm daemon without a config')
7596 assert isinstance(config
, dict)
7598 # Create the required config files in the daemons dir, with restricted permissions
7599 for filename
in config
:
7600 with
open(os
.open(os
.path
.join(self
.daemon_path
, filename
), os
.O_CREAT | os
.O_WRONLY
, mode
=0o600), 'w') as f
:
7601 f
.write(config
[filename
])
7603 # When __file__ is <stdin> we're being invoked over remoto via the orchestrator, so
7604 # we pick up the file from where the orchestrator placed it - otherwise we'll
7605 # copy it to the binary location for this cluster
7606 if not __file__
== '<stdin>':
7608 shutil
.copy(__file__
,
7610 except shutil
.SameFileError
:
7613 with
open(os
.path
.join(self
.daemon_path
, 'unit.run'), 'w') as f
:
7614 f
.write(self
.unit_run
)
7617 os
.path
.join(self
.ctx
.unit_dir
,
7618 f
'{self.unit_name}.new'),
7621 f
.write(self
.unit_file
)
7623 os
.path
.join(self
.ctx
.unit_dir
, f
'{self.unit_name}.new'),
7624 os
.path
.join(self
.ctx
.unit_dir
, self
.unit_name
))
7626 call_throws(self
.ctx
, ['systemctl', 'daemon-reload'])
7627 call(self
.ctx
, ['systemctl', 'stop', self
.unit_name
],
7628 verbosity
=CallVerbosity
.DEBUG
)
7629 call(self
.ctx
, ['systemctl', 'reset-failed', self
.unit_name
],
7630 verbosity
=CallVerbosity
.DEBUG
)
7631 call_throws(self
.ctx
, ['systemctl', 'enable', '--now', self
.unit_name
])
7634 def uninstall(cls
, ctx
: CephadmContext
, fsid
: str, daemon_type
: str, daemon_id
: str) -> None:
7635 unit_name
= CephadmDaemon
._unit
_name
(fsid
, daemon_id
)
7636 unit_path
= os
.path
.join(ctx
.unit_dir
, unit_name
)
7637 unit_run
= os
.path
.join(ctx
.data_dir
, fsid
, f
'{daemon_type}.{daemon_id}', 'unit.run')
7640 with
open(unit_run
, 'r') as u
:
7641 contents
= u
.read().strip(' &')
7643 logger
.warning(f
'Unable to access the unit.run file @ {unit_run}')
7647 for line
in contents
.split('\n'):
7648 if '--port ' in line
:
7650 port
= int(line
.split('--port ')[-1])
7652 logger
.warning('Unexpected format in unit.run file: port is not numeric')
7653 logger
.warning('Unable to remove the systemd file and close the port')
7660 fw
.close_ports([port
])
7661 except RuntimeError:
7662 logger
.error(f
'Unable to close port {port}')
7664 stdout
, stderr
, rc
= call(ctx
, ['rm', '-f', unit_path
])
7666 logger
.error(f
'Unable to remove the systemd file @ {unit_path}')
7668 logger
.info(f
'removed systemd unit file @ {unit_path}')
7669 stdout
, stderr
, rc
= call(ctx
, ['systemctl', 'daemon-reload'])
7672 def command_exporter(ctx
: CephadmContext
) -> None:
7673 exporter
= CephadmDaemon(ctx
, ctx
.fsid
, daemon_id
=ctx
.id, port
=ctx
.port
)
7675 if ctx
.fsid
not in os
.listdir(ctx
.data_dir
):
7676 raise Error(f
"cluster fsid '{ctx.fsid}' not found in '{ctx.data_dir}'")
7680 ##################################
7683 def systemd_target_state(target_name
: str, subsystem
: str = 'ceph') -> bool:
7685 return os
.path
.exists(
7688 f
'{subsystem}.target.wants',
7695 def command_maintenance(ctx
: CephadmContext
) -> str:
7697 raise Error('must pass --fsid to specify cluster')
7699 target
= f
'ceph-{ctx.fsid}.target'
7701 if ctx
.maintenance_action
.lower() == 'enter':
7702 logger
.info('Requested to place host into maintenance')
7703 if systemd_target_state(target
):
7704 _out
, _err
, code
= call(ctx
,
7705 ['systemctl', 'disable', target
],
7706 verbosity
=CallVerbosity
.DEBUG
)
7708 logger
.error(f
'Failed to disable the {target} target')
7709 return 'failed - to disable the target'
7711 # stopping a target waits by default
7712 _out
, _err
, code
= call(ctx
,
7713 ['systemctl', 'stop', target
],
7714 verbosity
=CallVerbosity
.DEBUG
)
7716 logger
.error(f
'Failed to stop the {target} target')
7717 return 'failed - to disable the target'
7719 return f
'success - systemd target {target} disabled'
7722 return 'skipped - target already disabled'
7725 logger
.info('Requested to exit maintenance state')
7726 # exit maintenance request
7727 if not systemd_target_state(target
):
7728 _out
, _err
, code
= call(ctx
,
7729 ['systemctl', 'enable', target
],
7730 verbosity
=CallVerbosity
.DEBUG
)
7732 logger
.error(f
'Failed to enable the {target} target')
7733 return 'failed - unable to enable the target'
7735 # starting a target waits by default
7736 _out
, _err
, code
= call(ctx
,
7737 ['systemctl', 'start', target
],
7738 verbosity
=CallVerbosity
.DEBUG
)
7740 logger
.error(f
'Failed to start the {target} target')
7741 return 'failed - unable to start the target'
7743 return f
'success - systemd target {target} enabled and started'
7744 return f
'success - systemd target {target} enabled and started'
7746 ##################################
7750 # type: () -> argparse.ArgumentParser
7751 parser
= argparse
.ArgumentParser(
7752 description
='Bootstrap Ceph daemons with systemd and containers.',
7753 formatter_class
=argparse
.ArgumentDefaultsHelpFormatter
)
7754 parser
.add_argument(
7756 help='container image. Can also be set via the "CEPHADM_IMAGE" '
7758 parser
.add_argument(
7760 action
='store_true',
7761 help='use docker instead of podman')
7762 parser
.add_argument(
7765 help='base directory for daemon data')
7766 parser
.add_argument(
7769 help='base directory for daemon logs')
7770 parser
.add_argument(
7772 default
=LOGROTATE_DIR
,
7773 help='location of logrotate configuration files')
7774 parser
.add_argument(
7777 help='location of sysctl configuration files')
7778 parser
.add_argument(
7781 help='base directory for systemd units')
7782 parser
.add_argument(
7784 action
='store_true',
7785 help='Show debug-level log messages')
7786 parser
.add_argument(
7789 default
=DEFAULT_TIMEOUT
,
7790 help='timeout in seconds')
7791 parser
.add_argument(
7794 default
=DEFAULT_RETRY
,
7795 help='max number of retries')
7796 parser
.add_argument(
7800 help='set environment variable')
7801 parser
.add_argument(
7802 '--no-container-init',
7803 action
='store_true',
7804 default
=not CONTAINER_INIT
,
7805 help='Do not run podman/docker with `--init`')
7807 subparsers
= parser
.add_subparsers(help='sub-command')
7809 parser_version
= subparsers
.add_parser(
7810 'version', help='get ceph version from container')
7811 parser_version
.set_defaults(func
=command_version
)
7813 parser_pull
= subparsers
.add_parser(
7814 'pull', help='pull latest image version')
7815 parser_pull
.set_defaults(func
=command_pull
)
7817 parser_inspect_image
= subparsers
.add_parser(
7818 'inspect-image', help='inspect local container image')
7819 parser_inspect_image
.set_defaults(func
=command_inspect_image
)
7821 parser_ls
= subparsers
.add_parser(
7822 'ls', help='list daemon instances on this host')
7823 parser_ls
.set_defaults(func
=command_ls
)
7824 parser_ls
.add_argument(
7826 action
='store_true',
7827 help='Do not include daemon status')
7828 parser_ls
.add_argument(
7831 help='base directory for legacy daemon data')
7833 parser_list_networks
= subparsers
.add_parser(
7834 'list-networks', help='list IP networks')
7835 parser_list_networks
.set_defaults(func
=command_list_networks
)
7837 parser_adopt
= subparsers
.add_parser(
7838 'adopt', help='adopt daemon deployed with a different tool')
7839 parser_adopt
.set_defaults(func
=command_adopt
)
7840 parser_adopt
.add_argument(
7843 help='daemon name (type.id)')
7844 parser_adopt
.add_argument(
7847 help='deployment style (legacy, ...)')
7848 parser_adopt
.add_argument(
7851 help='cluster name')
7852 parser_adopt
.add_argument(
7855 help='base directory for legacy daemon data')
7856 parser_adopt
.add_argument(
7858 help='Additional configuration information in JSON format')
7859 parser_adopt
.add_argument(
7861 action
='store_true',
7862 help='Do not configure firewalld')
7863 parser_adopt
.add_argument(
7865 action
='store_true',
7866 help='do not pull the latest image before adopting')
7867 parser_adopt
.add_argument(
7869 action
='store_true',
7870 help='start newly adoped daemon, even if it was not running previously')
7871 parser_adopt
.add_argument(
7873 action
='store_true',
7874 default
=CONTAINER_INIT
,
7875 help=argparse
.SUPPRESS
)
7877 parser_rm_daemon
= subparsers
.add_parser(
7878 'rm-daemon', help='remove daemon instance')
7879 parser_rm_daemon
.set_defaults(func
=command_rm_daemon
)
7880 parser_rm_daemon
.add_argument(
7883 action
=CustomValidation
,
7884 help='daemon name (type.id)')
7885 parser_rm_daemon
.add_argument(
7888 help='cluster FSID')
7889 parser_rm_daemon
.add_argument(
7891 action
='store_true',
7892 help='proceed, even though this may destroy valuable data')
7893 parser_rm_daemon
.add_argument(
7894 '--force-delete-data',
7895 action
='store_true',
7896 help='delete valuable daemon data instead of making a backup')
7898 parser_rm_cluster
= subparsers
.add_parser(
7899 'rm-cluster', help='remove all daemons for a cluster')
7900 parser_rm_cluster
.set_defaults(func
=command_rm_cluster
)
7901 parser_rm_cluster
.add_argument(
7904 help='cluster FSID')
7905 parser_rm_cluster
.add_argument(
7907 action
='store_true',
7908 help='proceed, even though this may destroy valuable data')
7909 parser_rm_cluster
.add_argument(
7911 action
='store_true',
7912 help='do not remove log files')
7913 parser_rm_cluster
.add_argument(
7915 action
='store_true',
7916 help='zap OSD devices for this cluster')
7918 parser_run
= subparsers
.add_parser(
7919 'run', help='run a ceph daemon, in a container, in the foreground')
7920 parser_run
.set_defaults(func
=command_run
)
7921 parser_run
.add_argument(
7924 help='daemon name (type.id)')
7925 parser_run
.add_argument(
7928 help='cluster FSID')
7930 parser_shell
= subparsers
.add_parser(
7931 'shell', help='run an interactive shell inside a daemon container')
7932 parser_shell
.set_defaults(func
=command_shell
)
7933 parser_shell
.add_argument(
7935 help='cluster FSID')
7936 parser_shell
.add_argument(
7938 help='daemon name (type.id)')
7939 parser_shell
.add_argument(
7941 help='ceph.conf to pass through to the container')
7942 parser_shell
.add_argument(
7944 help='ceph.keyring to pass through to the container')
7945 parser_shell
.add_argument(
7947 help=('mount a file or directory in the container. '
7948 'Support multiple mounts. '
7949 'ie: `--mount /foo /bar:/bar`. '
7950 'When no destination is passed, default is /mnt'),
7952 parser_shell
.add_argument(
7956 help='set environment variable')
7957 parser_shell
.add_argument(
7961 help='set environment variable')
7962 parser_shell
.add_argument(
7963 'command', nargs
=argparse
.REMAINDER
,
7964 help='command (optional)')
7965 parser_shell
.add_argument(
7967 action
='store_true',
7968 help='dont pass /etc/hosts through to the container')
7970 parser_enter
= subparsers
.add_parser(
7971 'enter', help='run an interactive shell inside a running daemon container')
7972 parser_enter
.set_defaults(func
=command_enter
)
7973 parser_enter
.add_argument(
7975 help='cluster FSID')
7976 parser_enter
.add_argument(
7979 help='daemon name (type.id)')
7980 parser_enter
.add_argument(
7981 'command', nargs
=argparse
.REMAINDER
,
7984 parser_ceph_volume
= subparsers
.add_parser(
7985 'ceph-volume', help='run ceph-volume inside a container')
7986 parser_ceph_volume
.set_defaults(func
=command_ceph_volume
)
7987 parser_ceph_volume
.add_argument(
7989 help='cluster FSID')
7990 parser_ceph_volume
.add_argument(
7992 help='JSON file with config and (client.bootrap-osd) key')
7993 parser_ceph_volume
.add_argument(
7995 help='ceph conf file')
7996 parser_ceph_volume
.add_argument(
7998 help='ceph.keyring to pass through to the container')
7999 parser_ceph_volume
.add_argument(
8000 'command', nargs
=argparse
.REMAINDER
,
8003 parser_zap_osds
= subparsers
.add_parser(
8004 'zap-osds', help='zap all OSDs associated with a particular fsid')
8005 parser_zap_osds
.set_defaults(func
=command_zap_osds
)
8006 parser_zap_osds
.add_argument(
8009 help='cluster FSID')
8010 parser_zap_osds
.add_argument(
8012 action
='store_true',
8013 help='proceed, even though this may destroy valuable data')
8015 parser_unit
= subparsers
.add_parser(
8016 'unit', help="operate on the daemon's systemd unit")
8017 parser_unit
.set_defaults(func
=command_unit
)
8018 parser_unit
.add_argument(
8020 help='systemd command (start, stop, restart, enable, disable, ...)')
8021 parser_unit
.add_argument(
8023 help='cluster FSID')
8024 parser_unit
.add_argument(
8027 help='daemon name (type.id)')
8029 parser_logs
= subparsers
.add_parser(
8030 'logs', help='print journald logs for a daemon container')
8031 parser_logs
.set_defaults(func
=command_logs
)
8032 parser_logs
.add_argument(
8034 help='cluster FSID')
8035 parser_logs
.add_argument(
8038 help='daemon name (type.id)')
8039 parser_logs
.add_argument(
8040 'command', nargs
='*',
8041 help='additional journalctl args')
8043 parser_bootstrap
= subparsers
.add_parser(
8044 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
8045 parser_bootstrap
.set_defaults(func
=command_bootstrap
)
8046 parser_bootstrap
.add_argument(
8048 help='ceph conf file to incorporate')
8049 parser_bootstrap
.add_argument(
8052 help='mon id (default: local hostname)')
8053 parser_bootstrap
.add_argument(
8055 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
8056 parser_bootstrap
.add_argument(
8059 parser_bootstrap
.add_argument(
8062 help='mgr id (default: randomly generated)')
8063 parser_bootstrap
.add_argument(
8065 help='cluster FSID')
8066 parser_bootstrap
.add_argument(
8068 default
='/etc/ceph',
8069 help='directory to write config, keyring, and pub key files')
8070 parser_bootstrap
.add_argument(
8072 help='location to write keyring file with new cluster admin and mon keys')
8073 parser_bootstrap
.add_argument(
8075 help='location to write conf file to connect to new cluster')
8076 parser_bootstrap
.add_argument(
8077 '--output-pub-ssh-key',
8078 help="location to write the cluster's public SSH key")
8079 parser_bootstrap
.add_argument(
8080 '--skip-admin-label',
8081 action
='store_true',
8082 help='do not create admin label for ceph.conf and client.admin keyring distribution')
8083 parser_bootstrap
.add_argument(
8085 action
='store_true',
8086 help='skip setup of ssh key on local host')
8087 parser_bootstrap
.add_argument(
8088 '--initial-dashboard-user',
8090 help='Initial user for the dashboard')
8091 parser_bootstrap
.add_argument(
8092 '--initial-dashboard-password',
8093 help='Initial password for the initial dashboard user')
8094 parser_bootstrap
.add_argument(
8095 '--ssl-dashboard-port',
8098 help='Port number used to connect with dashboard using SSL')
8099 parser_bootstrap
.add_argument(
8101 type=argparse
.FileType('r'),
8102 help='Dashboard key')
8103 parser_bootstrap
.add_argument(
8105 type=argparse
.FileType('r'),
8106 help='Dashboard certificate')
8108 parser_bootstrap
.add_argument(
8110 type=argparse
.FileType('r'),
8112 parser_bootstrap
.add_argument(
8113 '--ssh-private-key',
8114 type=argparse
.FileType('r'),
8115 help='SSH private key')
8116 parser_bootstrap
.add_argument(
8118 type=argparse
.FileType('r'),
8119 help='SSH public key')
8120 parser_bootstrap
.add_argument(
8123 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
8124 parser_bootstrap
.add_argument(
8125 '--skip-mon-network',
8126 action
='store_true',
8127 help='set mon public_network based on bootstrap mon ip')
8128 parser_bootstrap
.add_argument(
8130 action
='store_true',
8131 help='do not enable the Ceph Dashboard')
8132 parser_bootstrap
.add_argument(
8133 '--dashboard-password-noupdate',
8134 action
='store_true',
8135 help='stop forced dashboard password change')
8136 parser_bootstrap
.add_argument(
8137 '--no-minimize-config',
8138 action
='store_true',
8139 help='do not assimilate and minimize the config file')
8140 parser_bootstrap
.add_argument(
8141 '--skip-ping-check',
8142 action
='store_true',
8143 help='do not verify that mon IP is pingable')
8144 parser_bootstrap
.add_argument(
8146 action
='store_true',
8147 help='do not pull the latest image before bootstrapping')
8148 parser_bootstrap
.add_argument(
8150 action
='store_true',
8151 help='Do not configure firewalld')
8152 parser_bootstrap
.add_argument(
8153 '--allow-overwrite',
8154 action
='store_true',
8155 help='allow overwrite of existing --output-* config/keyring/ssh files')
8156 parser_bootstrap
.add_argument(
8157 '--allow-fqdn-hostname',
8158 action
='store_true',
8159 help='allow hostname that is fully-qualified (contains ".")')
8160 parser_bootstrap
.add_argument(
8161 '--allow-mismatched-release',
8162 action
='store_true',
8163 help="allow bootstrap of ceph that doesn't match this version of cephadm")
8164 parser_bootstrap
.add_argument(
8165 '--skip-prepare-host',
8166 action
='store_true',
8167 help='Do not prepare host')
8168 parser_bootstrap
.add_argument(
8169 '--orphan-initial-daemons',
8170 action
='store_true',
8171 help='Set mon and mgr service to `unmanaged`, Do not create the crash service')
8172 parser_bootstrap
.add_argument(
8173 '--skip-monitoring-stack',
8174 action
='store_true',
8175 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
8176 parser_bootstrap
.add_argument(
8178 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
8179 parser_bootstrap
.add_argument(
8180 '--shared_ceph_folder',
8181 metavar
='CEPH_SOURCE_FOLDER',
8182 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
8184 parser_bootstrap
.add_argument(
8186 help='url for custom registry')
8187 parser_bootstrap
.add_argument(
8188 '--registry-username',
8189 help='username for custom registry')
8190 parser_bootstrap
.add_argument(
8191 '--registry-password',
8192 help='password for custom registry')
8193 parser_bootstrap
.add_argument(
8195 help='json file with custom registry login info (URL, Username, Password)')
8196 parser_bootstrap
.add_argument(
8198 action
='store_true',
8199 default
=CONTAINER_INIT
,
8200 help=argparse
.SUPPRESS
)
8201 parser_bootstrap
.add_argument(
8203 action
='store_true',
8204 help='Automatically deploy cephadm metadata exporter to each node')
8205 parser_bootstrap
.add_argument(
8206 '--exporter-config',
8207 action
=CustomValidation
,
8208 help=f
'Exporter configuration information in JSON format (providing: {", ".join(CephadmDaemon.config_requirements)}, port information)')
8209 parser_bootstrap
.add_argument(
8210 '--cluster-network',
8211 help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
8212 parser_bootstrap
.add_argument(
8213 '--single-host-defaults',
8214 action
='store_true',
8215 help='adjust configuration defaults to suit a single-host cluster')
8216 parser_bootstrap
.add_argument(
8218 action
='store_true',
8219 help='configure cluster to log to traditional log files in /var/log/ceph/$fsid')
8221 parser_deploy
= subparsers
.add_parser(
8222 'deploy', help='deploy a daemon')
8223 parser_deploy
.set_defaults(func
=command_deploy
)
8224 parser_deploy
.add_argument(
8227 action
=CustomValidation
,
8228 help='daemon name (type.id)')
8229 parser_deploy
.add_argument(
8232 help='cluster FSID')
8233 parser_deploy
.add_argument(
8235 help='config file for new daemon')
8236 parser_deploy
.add_argument(
8238 help='Additional configuration information in JSON format')
8239 parser_deploy
.add_argument(
8241 help='keyring for new daemon')
8242 parser_deploy
.add_argument(
8244 help='key for new daemon')
8245 parser_deploy
.add_argument(
8247 help='OSD uuid, if creating an OSD container')
8248 parser_deploy
.add_argument(
8250 action
='store_true',
8251 help='Do not configure firewalld')
8252 parser_deploy
.add_argument(
8254 help='List of tcp ports to open in the host firewall')
8255 parser_deploy
.add_argument(
8257 action
='store_true',
8258 help='Reconfigure a previously deployed daemon')
8259 parser_deploy
.add_argument(
8261 action
='store_true',
8262 help='Allow SYS_PTRACE on daemon container')
8263 parser_deploy
.add_argument(
8265 action
='store_true',
8266 default
=CONTAINER_INIT
,
8267 help=argparse
.SUPPRESS
)
8268 parser_deploy
.add_argument(
8270 help='Container memory request/target'
8272 parser_deploy
.add_argument(
8274 help='Container memory hard limit'
8276 parser_deploy
.add_argument(
8278 help='JSON dict of additional metadata'
8281 parser_check_host
= subparsers
.add_parser(
8282 'check-host', help='check host configuration')
8283 parser_check_host
.set_defaults(func
=command_check_host
)
8284 parser_check_host
.add_argument(
8285 '--expect-hostname',
8286 help='Check that hostname matches an expected value')
8288 parser_prepare_host
= subparsers
.add_parser(
8289 'prepare-host', help='prepare a host for cephadm use')
8290 parser_prepare_host
.set_defaults(func
=command_prepare_host
)
8291 parser_prepare_host
.add_argument(
8292 '--expect-hostname',
8293 help='Set hostname')
8295 parser_add_repo
= subparsers
.add_parser(
8296 'add-repo', help='configure package repository')
8297 parser_add_repo
.set_defaults(func
=command_add_repo
)
8298 parser_add_repo
.add_argument(
8300 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE
))
8301 parser_add_repo
.add_argument(
8303 help='use specific upstream version (x.y.z)')
8304 parser_add_repo
.add_argument(
8306 help='use specified bleeding edge build from git branch or tag')
8307 parser_add_repo
.add_argument(
8309 help='use specified bleeding edge build from git commit')
8310 parser_add_repo
.add_argument(
8312 help='specify alternative GPG key location')
8313 parser_add_repo
.add_argument(
8315 default
='https://download.ceph.com',
8316 help='specify alternative repo location')
8319 parser_rm_repo
= subparsers
.add_parser(
8320 'rm-repo', help='remove package repository configuration')
8321 parser_rm_repo
.set_defaults(func
=command_rm_repo
)
8323 parser_install
= subparsers
.add_parser(
8324 'install', help='install ceph package(s)')
8325 parser_install
.set_defaults(func
=command_install
)
8326 parser_install
.add_argument(
8327 'packages', nargs
='*',
8328 default
=['cephadm'],
8331 parser_registry_login
= subparsers
.add_parser(
8332 'registry-login', help='log host into authenticated registry')
8333 parser_registry_login
.set_defaults(func
=command_registry_login
)
8334 parser_registry_login
.add_argument(
8336 help='url for custom registry')
8337 parser_registry_login
.add_argument(
8338 '--registry-username',
8339 help='username for custom registry')
8340 parser_registry_login
.add_argument(
8341 '--registry-password',
8342 help='password for custom registry')
8343 parser_registry_login
.add_argument(
8345 help='json file with custom registry login info (URL, Username, Password)')
8346 parser_registry_login
.add_argument(
8348 help='cluster FSID')
8350 parser_gather_facts
= subparsers
.add_parser(
8351 'gather-facts', help='gather and return host related information (JSON format)')
8352 parser_gather_facts
.set_defaults(func
=command_gather_facts
)
8354 parser_exporter
= subparsers
.add_parser(
8355 'exporter', help='Start cephadm in exporter mode (web service), providing host/daemon/disk metadata')
8356 parser_exporter
.add_argument(
8360 help='fsid of the cephadm exporter to run against')
8361 parser_exporter
.add_argument(
8364 default
=int(CephadmDaemon
.default_port
),
8365 help='port number for the cephadm exporter service')
8366 parser_exporter
.add_argument(
8369 default
=get_hostname().split('.')[0],
8370 help='daemon identifer for the exporter')
8371 parser_exporter
.set_defaults(func
=command_exporter
)
8373 parser_maintenance
= subparsers
.add_parser(
8374 'host-maintenance', help='Manage the maintenance state of a host')
8375 parser_maintenance
.add_argument(
8377 help='cluster FSID')
8378 parser_maintenance
.add_argument(
8379 'maintenance_action',
8381 choices
=['enter', 'exit'],
8382 help='Maintenance action - enter maintenance, or exit maintenance')
8383 parser_maintenance
.set_defaults(func
=command_maintenance
)
8388 def _parse_args(av
: List
[str]) -> argparse
.Namespace
:
8389 parser
= _get_parser()
8391 args
= parser
.parse_args(av
)
8392 if 'command' in args
and args
.command
and args
.command
[0] == '--':
8395 # workaround argparse to deprecate the subparser `--container-init` flag
8396 # container_init and no_container_init must always be mutually exclusive
8397 container_init_args
= ('--container-init', '--no-container-init')
8398 if set(container_init_args
).issubset(av
):
8399 parser
.error('argument %s: not allowed with argument %s' % (container_init_args
))
8400 elif '--container-init' in av
:
8401 args
.no_container_init
= not args
.container_init
8403 args
.container_init
= not args
.no_container_init
8404 assert args
.container_init
is not args
.no_container_init
8409 def cephadm_init_ctx(args
: List
[str]) -> CephadmContext
:
8410 ctx
= CephadmContext()
8411 ctx
.set_args(_parse_args(args
))
8415 def cephadm_init(args
: List
[str]) -> CephadmContext
:
8417 ctx
= cephadm_init_ctx(args
)
8419 # Logger configuration
8420 if not os
.path
.exists(LOG_DIR
):
8421 os
.makedirs(LOG_DIR
)
8422 dictConfig(logging_config
)
8423 logger
= logging
.getLogger()
8425 if not os
.path
.exists(ctx
.logrotate_dir
+ '/cephadm'):
8426 with
open(ctx
.logrotate_dir
+ '/cephadm', 'w') as f
:
8427 f
.write("""# created by cephadm
8428 /var/log/ceph/cephadm.log {
8438 for handler
in logger
.handlers
:
8439 if handler
.name
== 'console':
8440 handler
.setLevel(logging
.DEBUG
)
8448 if os
.geteuid() != 0:
8449 sys
.stderr
.write('ERROR: cephadm should be run as root\n')
8455 ctx
= cephadm_init(av
)
8456 if not ctx
.has_function():
8457 sys
.stderr
.write('No command specified; pass -h or --help for usage\n')
8462 ctx
.container_engine
= find_container_engine(ctx
)
8463 if ctx
.func
not in \
8464 [command_check_host
, command_prepare_host
, command_add_repo
, command_install
]:
8465 check_container_engine(ctx
)
8471 logger
.error('ERROR: %s' % e
)
8478 if __name__
== '__main__':