4 import asyncio
.subprocess
12 from logging
.config
import dictConfig
29 from typing
import Dict
, List
, Tuple
, Optional
, Union
, Any
, NoReturn
, Callable
, IO
, Sequence
, TypeVar
, cast
, Set
, Iterable
34 from configparser
import ConfigParser
35 from contextlib
import redirect_stdout
36 from functools
import wraps
38 from io
import StringIO
39 from threading
import Thread
, Event
40 from urllib
.error
import HTTPError
, URLError
41 from urllib
.request
import urlopen
, Request
42 from pathlib
import Path
44 FuncT
= TypeVar('FuncT', bound
=Callable
)
46 # Default container images -----------------------------------------------------
47 DEFAULT_IMAGE
= 'quay.io/ceph/ceph:v17'
48 DEFAULT_IMAGE_IS_MASTER
= False
49 DEFAULT_IMAGE_RELEASE
= 'quincy'
50 DEFAULT_PROMETHEUS_IMAGE
= 'quay.io/prometheus/prometheus:v2.33.4'
51 DEFAULT_LOKI_IMAGE
= 'docker.io/grafana/loki:2.4.0'
52 DEFAULT_PROMTAIL_IMAGE
= 'docker.io/grafana/promtail:2.4.0'
53 DEFAULT_NODE_EXPORTER_IMAGE
= 'quay.io/prometheus/node-exporter:v1.3.1'
54 DEFAULT_ALERT_MANAGER_IMAGE
= 'quay.io/prometheus/alertmanager:v0.23.0'
55 DEFAULT_GRAFANA_IMAGE
= 'quay.io/ceph/ceph-grafana:8.3.5'
56 DEFAULT_HAPROXY_IMAGE
= 'quay.io/ceph/haproxy:2.3'
57 DEFAULT_KEEPALIVED_IMAGE
= 'quay.io/ceph/keepalived:2.1.5'
58 DEFAULT_SNMP_GATEWAY_IMAGE
= 'docker.io/maxwo/snmp-notifier:v1.2.1'
59 DEFAULT_REGISTRY
= 'docker.io' # normalize unqualified digests to this
60 # ------------------------------------------------------------------------------
62 LATEST_STABLE_RELEASE
= 'quincy'
63 DATA_DIR
= '/var/lib/ceph'
64 LOG_DIR
= '/var/log/ceph'
65 LOCK_DIR
= '/run/cephadm'
66 LOGROTATE_DIR
= '/etc/logrotate.d'
67 SYSCTL_DIR
= '/etc/sysctl.d'
68 UNIT_DIR
= '/etc/systemd/system'
69 CEPH_CONF_DIR
= 'config'
70 CEPH_CONF
= 'ceph.conf'
71 CEPH_PUBKEY
= 'ceph.pub'
72 CEPH_KEYRING
= 'ceph.client.admin.keyring'
73 CEPH_DEFAULT_CONF
= f
'/etc/ceph/{CEPH_CONF}'
74 CEPH_DEFAULT_KEYRING
= f
'/etc/ceph/{CEPH_KEYRING}'
75 CEPH_DEFAULT_PUBKEY
= f
'/etc/ceph/{CEPH_PUBKEY}'
79 MIN_PODMAN_VERSION
= (2, 0, 2)
80 CGROUPS_SPLIT_PODMAN_VERSION
= (2, 1, 0)
81 CUSTOM_PS1
= r
'[ceph: \u@\h \W]\$ '
82 DEFAULT_TIMEOUT
= None # in seconds
84 DATEFMT
= '%Y-%m-%dT%H:%M:%S.%fZ'
85 QUIET_LOG_LEVEL
= 9 # DEBUG is 10, so using 9 to be lower level than DEBUG
87 logger
: logging
.Logger
= None # type: ignore
90 You can invoke cephadm in two ways:
92 1. The normal way, at the command line.
94 2. By piping the script to the python3 binary. In this latter case, you should
95 prepend one or more lines to the beginning of the script.
103 injected_argv = ['ls']
105 For reading stdin from the '--config-json -' argument,
107 injected_stdin = '...'
112 ##################################
115 async def run_func(func
: Callable
, cmd
: str) -> subprocess
.CompletedProcess
:
116 logger
.debug(f
'running function {func.__name__}, with parms: {cmd}')
121 async def concurrent_tasks(func
: Callable
, cmd_list
: List
[str]) -> List
[Any
]:
124 tasks
.append(run_func(func
, cmd
))
126 data
= await asyncio
.gather(*tasks
)
132 """EndPoint representing an ip:port format"""
134 def __init__(self
, ip
: str, port
: int) -> None:
138 def __str__(self
) -> str:
139 return f
'{self.ip}:{self.port}'
141 def __repr__(self
) -> str:
142 return f
'{self.ip}:{self.port}'
146 def __init__(self
, container_id
: str,
150 version
: str) -> None:
151 self
.container_id
= container_id
152 self
.image_name
= image_name
153 self
.image_id
= image_id
155 self
.version
= version
157 def __eq__(self
, other
: Any
) -> bool:
158 if not isinstance(other
, ContainerInfo
):
159 return NotImplemented
160 return (self
.container_id
== other
.container_id
161 and self
.image_name
== other
.image_name
162 and self
.image_id
== other
.image_id
163 and self
.start
== other
.start
164 and self
.version
== other
.version
)
169 def __init__(self
) -> None:
171 self
.docker
: bool = False
172 self
.data_dir
: str = DATA_DIR
173 self
.log_dir
: str = LOG_DIR
174 self
.logrotate_dir
: str = LOGROTATE_DIR
175 self
.sysctl_dir
: str = SYSCTL_DIR
176 self
.unit_dir
: str = UNIT_DIR
177 self
.verbose
: bool = False
178 self
.timeout
: Optional
[int] = DEFAULT_TIMEOUT
179 self
.retry
: int = DEFAULT_RETRY
180 self
.env
: List
[str] = []
181 self
.memory_request
: Optional
[int] = None
182 self
.memory_limit
: Optional
[int] = None
183 self
.log_to_journald
: Optional
[bool] = None
185 self
.container_init
: bool = CONTAINER_INIT
186 self
.container_engine
: Optional
[ContainerEngine
] = None
188 def set_from_args(self
, args
: argparse
.Namespace
) -> None:
189 argdict
: Dict
[str, Any
] = vars(args
)
190 for k
, v
in argdict
.items():
195 class CephadmContext
:
197 def __init__(self
) -> None:
198 self
.__dict
__['_args'] = None
199 self
.__dict
__['_conf'] = BaseConfig()
201 def set_args(self
, args
: argparse
.Namespace
) -> None:
202 self
._conf
.set_from_args(args
)
205 def has_function(self
) -> bool:
206 return 'func' in self
._args
208 def __contains__(self
, name
: str) -> bool:
209 return hasattr(self
, name
)
211 def __getattr__(self
, name
: str) -> Any
:
212 if '_conf' in self
.__dict
__ and hasattr(self
._conf
, name
):
213 return getattr(self
._conf
, name
)
214 elif '_args' in self
.__dict
__ and hasattr(self
._args
, name
):
215 return getattr(self
._args
, name
)
217 return super().__getattribute
__(name
)
219 def __setattr__(self
, name
: str, value
: Any
) -> None:
220 if hasattr(self
._conf
, name
):
221 setattr(self
._conf
, name
, value
)
222 elif hasattr(self
._args
, name
):
223 setattr(self
._args
, name
, value
)
225 super().__setattr
__(name
, value
)
228 class ContainerEngine
:
229 def __init__(self
) -> None:
230 self
.path
= find_program(self
.EXE
)
235 raise NotImplementedError()
237 def __str__(self
) -> str:
238 return f
'{self.EXE} ({self.path})'
241 class Podman(ContainerEngine
):
244 def __init__(self
) -> None:
246 self
._version
: Optional
[Tuple
[int, ...]] = None
249 def version(self
) -> Tuple
[int, ...]:
250 if self
._version
is None:
251 raise RuntimeError('Please call `get_version` first')
254 def get_version(self
, ctx
: CephadmContext
) -> None:
255 out
, _
, _
= call_throws(ctx
, [self
.path
, 'version', '--format', '{{.Client.Version}}'], verbosity
=CallVerbosity
.QUIET
)
256 self
._version
= _parse_podman_version(out
)
258 def __str__(self
) -> str:
259 version
= '.'.join(map(str, self
.version
))
260 return f
'{self.EXE} ({self.path}) version {version}'
263 class Docker(ContainerEngine
):
267 CONTAINER_PREFERENCE
= (Podman
, Docker
) # prefer podman to docker
270 # During normal cephadm operations (cephadm ls, gather-facts, etc ) we use:
271 # stdout: for JSON output only
272 # stderr: for error, debug, info, etc
275 'disable_existing_loggers': True,
278 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
284 'class': 'logging.StreamHandler',
288 'class': 'logging.handlers.WatchedFileHandler',
289 'formatter': 'cephadm',
290 'filename': '%s/cephadm.log' % LOG_DIR
,
296 'handlers': ['console', 'log_file'],
302 class ExcludeErrorsFilter(logging
.Filter
):
303 def filter(self
, record
: logging
.LogRecord
) -> bool:
304 """Only lets through log messages with log level below WARNING ."""
305 return record
.levelno
< logging
.WARNING
308 # When cephadm is used as standard binary (bootstrap, rm-cluster, etc) we use:
309 # stdout: for debug and info
310 # stderr: for errors and warnings
311 interactive_logging_config
= {
315 '()': ExcludeErrorsFilter
318 'disable_existing_loggers': True,
321 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
327 'class': 'logging.StreamHandler',
328 'filters': ['exclude_errors'],
333 'class': 'logging.StreamHandler',
338 'class': 'logging.handlers.WatchedFileHandler',
339 'formatter': 'cephadm',
340 'filename': '%s/cephadm.log' % LOG_DIR
,
346 'handlers': ['console_stdout', 'console_stderr', 'log_file'],
358 class Error(Exception):
362 class TimeoutExpired(Error
):
366 class UnauthorizedRegistryError(Error
):
369 ##################################
373 daemons
= ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror',
374 'crash', 'cephfs-mirror')
376 ##################################
381 def get_sysctl_settings() -> List
[str]:
383 '# allow a large number of OSDs',
384 'fs.aio-max-nr = 1048576',
385 'kernel.pid_max = 4194304',
389 ##################################
393 """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks"""
394 daemon_type
= 'snmp-gateway'
395 SUPPORTED_VERSIONS
= ['V2c', 'V3']
396 default_image
= DEFAULT_SNMP_GATEWAY_IMAGE
398 env_filename
= 'snmp-gateway.conf'
403 daemon_id
: Union
[int, str],
404 config_json
: Dict
[str, Any
],
405 image
: Optional
[str] = None) -> None:
408 self
.daemon_id
= daemon_id
409 self
.image
= image
or SNMPGateway
.default_image
411 self
.uid
= config_json
.get('uid', 0)
412 self
.gid
= config_json
.get('gid', 0)
414 self
.destination
= config_json
.get('destination', '')
415 self
.snmp_version
= config_json
.get('snmp_version', 'V2c')
416 self
.snmp_community
= config_json
.get('snmp_community', 'public')
417 self
.log_level
= config_json
.get('log_level', 'info')
418 self
.snmp_v3_auth_username
= config_json
.get('snmp_v3_auth_username', '')
419 self
.snmp_v3_auth_password
= config_json
.get('snmp_v3_auth_password', '')
420 self
.snmp_v3_auth_protocol
= config_json
.get('snmp_v3_auth_protocol', '')
421 self
.snmp_v3_priv_protocol
= config_json
.get('snmp_v3_priv_protocol', '')
422 self
.snmp_v3_priv_password
= config_json
.get('snmp_v3_priv_password', '')
423 self
.snmp_v3_engine_id
= config_json
.get('snmp_v3_engine_id', '')
428 def init(cls
, ctx
: CephadmContext
, fsid
: str,
429 daemon_id
: Union
[int, str]) -> 'SNMPGateway':
430 assert ctx
.config_json
431 return cls(ctx
, fsid
, daemon_id
,
432 get_parm(ctx
.config_json
), ctx
.image
)
435 def get_version(ctx
: CephadmContext
, fsid
: str, daemon_id
: str) -> Optional
[str]:
436 """Return the version of the notifer from it's http endpoint"""
437 path
= os
.path
.join(ctx
.data_dir
, fsid
, f
'snmp-gateway.{daemon_id}', 'unit.meta')
439 with
open(path
, 'r') as env
:
440 metadata
= json
.loads(env
.read())
441 except (OSError, json
.JSONDecodeError
):
444 ports
= metadata
.get('ports', [])
449 with
urlopen(f
'http://127.0.0.1:{ports[0]}/') as r
:
450 html
= r
.read().decode('utf-8').split('\n')
451 except (HTTPError
, URLError
):
456 if stripped
.startswith(('<pre>', '<PRE>')) and \
457 stripped
.endswith(('</pre>', '</PRE>')):
458 # <pre>(version=1.2.1, branch=HEAD, revision=7...
459 return stripped
.split(',')[0].split('version=')[1]
464 def port(self
) -> int:
465 if not self
.ctx
.tcp_ports
:
466 return self
.DEFAULT_PORT
468 if len(self
.ctx
.tcp_ports
) > 0:
469 return int(self
.ctx
.tcp_ports
.split()[0])
471 return self
.DEFAULT_PORT
473 def get_daemon_args(self
) -> List
[str]:
476 f
'--web.listen-address=:{self.port}',
477 f
'--snmp.destination={self.destination}',
478 f
'--snmp.version={self.snmp_version}',
479 f
'--log.level={self.log_level}',
480 '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl'
483 if self
.snmp_version
== 'V3':
484 # common auth settings
486 '--snmp.authentication-enabled',
487 f
'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}',
488 f
'--snmp.security-engine-id={self.snmp_v3_engine_id}'
490 # authPriv setting is applied if we have a privacy protocol setting
491 if self
.snmp_v3_priv_protocol
:
493 '--snmp.private-enabled',
494 f
'--snmp.private-protocol={self.snmp_v3_priv_protocol}'
497 return base_args
+ v3_args
500 def data_dir(self
) -> str:
501 return os
.path
.join(self
.ctx
.data_dir
, self
.ctx
.fsid
, f
'{self.daemon_type}.{self.daemon_id}')
504 def conf_file_path(self
) -> str:
505 return os
.path
.join(self
.data_dir
, self
.env_filename
)
507 def create_daemon_conf(self
) -> None:
508 """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
509 with
open(os
.open(self
.conf_file_path
, os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
510 if self
.snmp_version
== 'V2c':
511 f
.write(f
'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n')
513 f
.write(f
'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n')
514 f
.write(f
'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n')
515 if self
.snmp_v3_priv_password
:
516 f
.write(f
'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n')
518 def validate(self
) -> None:
519 """Validate the settings
522 Error: if the fsid doesn't look like an fsid
523 Error: if the snmp version is not supported
524 Error: destination IP and port address missing
526 if not is_fsid(self
.fsid
):
527 raise Error(f
'not a valid fsid: {self.fsid}')
529 if self
.snmp_version
not in SNMPGateway
.SUPPORTED_VERSIONS
:
530 raise Error(f
'not a valid snmp version: {self.snmp_version}')
532 if not self
.destination
:
533 raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener')
536 ##################################
537 class Monitoring(object):
538 """Define the configs for the monitoring containers"""
541 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI
542 'node-exporter': [9100],
544 'alertmanager': [9093, 9094],
551 'image': DEFAULT_PROMETHEUS_IMAGE
,
555 '--config.file=/etc/prometheus/prometheus.yml',
556 '--storage.tsdb.path=/prometheus',
558 'config-json-files': [
563 'image': DEFAULT_LOKI_IMAGE
,
567 '--config.file=/etc/loki/loki.yml',
569 'config-json-files': [
574 'image': DEFAULT_PROMTAIL_IMAGE
,
578 '--config.file=/etc/promtail/promtail.yml',
580 'config-json-files': [
585 'image': DEFAULT_NODE_EXPORTER_IMAGE
,
589 '--no-collector.timex',
593 'image': DEFAULT_GRAFANA_IMAGE
,
597 'config-json-files': [
599 'provisioning/datasources/ceph-dashboard.yml',
605 'image': DEFAULT_ALERT_MANAGER_IMAGE
,
609 '--cluster.listen-address=:{}'.format(port_map
['alertmanager'][1]),
611 'config-json-files': [
614 'config-json-args': [
621 def get_version(ctx
, container_id
, daemon_type
):
622 # type: (CephadmContext, str, str) -> str
624 :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
626 assert daemon_type
in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail')
627 cmd
= daemon_type
.replace('-', '_')
631 if daemon_type
== 'alertmanager':
632 for cmd
in ['alertmanager', 'prometheus-alertmanager']:
633 _
, err
, code
= call(ctx
, [
634 ctx
.container_engine
.path
, 'exec', container_id
, cmd
,
636 ], verbosity
=CallVerbosity
.QUIET
)
639 cmd
= 'alertmanager' # reset cmd for version extraction
641 _
, err
, code
= call(ctx
, [
642 ctx
.container_engine
.path
, 'exec', container_id
, cmd
, '--version'
643 ], verbosity
=CallVerbosity
.QUIET
)
645 err
.startswith('%s, version ' % cmd
):
646 version
= err
.split(' ')[2]
649 ##################################
652 def populate_files(config_dir
, config_files
, uid
, gid
):
653 # type: (str, Dict, int, int) -> None
654 """create config files for different services"""
655 for fname
in config_files
:
656 config_file
= os
.path
.join(config_dir
, fname
)
657 config_content
= dict_get_join(config_files
, fname
)
658 logger
.info('Write file: %s' % (config_file
))
659 with
open(config_file
, 'w', encoding
='utf-8') as f
:
660 os
.fchown(f
.fileno(), uid
, gid
)
661 os
.fchmod(f
.fileno(), 0o600)
662 f
.write(config_content
)
665 class NFSGanesha(object):
666 """Defines a NFS-Ganesha container"""
669 entrypoint
= '/usr/bin/ganesha.nfsd'
670 daemon_args
= ['-F', '-L', 'STDERR']
672 required_files
= ['ganesha.conf']
683 image
=DEFAULT_IMAGE
):
684 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
687 self
.daemon_id
= daemon_id
690 # config-json options
691 self
.pool
= dict_get(config_json
, 'pool', require
=True)
692 self
.namespace
= dict_get(config_json
, 'namespace')
693 self
.userid
= dict_get(config_json
, 'userid')
694 self
.extra_args
= dict_get(config_json
, 'extra_args', [])
695 self
.files
= dict_get(config_json
, 'files', {})
696 self
.rgw
= dict_get(config_json
, 'rgw', {})
698 # validate the supplied args
702 def init(cls
, ctx
, fsid
, daemon_id
):
703 # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
704 return cls(ctx
, fsid
, daemon_id
, get_parm(ctx
.config_json
), ctx
.image
)
706 def get_container_mounts(self
, data_dir
):
707 # type: (str) -> Dict[str, str]
709 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
710 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
711 mounts
[os
.path
.join(data_dir
, 'etc/ganesha')] = '/etc/ganesha:z'
713 cluster
= self
.rgw
.get('cluster', 'ceph')
714 rgw_user
= self
.rgw
.get('user', 'admin')
715 mounts
[os
.path
.join(data_dir
, 'keyring.rgw')] = \
716 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster
, rgw_user
)
720 def get_container_envs():
721 # type: () -> List[str]
723 'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF
)
728 def get_version(ctx
, container_id
):
729 # type: (CephadmContext, str) -> Optional[str]
731 out
, err
, code
= call(ctx
,
732 [ctx
.container_engine
.path
, 'exec', container_id
,
733 NFSGanesha
.entrypoint
, '-v'],
734 verbosity
=CallVerbosity
.QUIET
)
736 match
= re
.search(r
'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out
)
738 version
= match
.group(1)
743 if not is_fsid(self
.fsid
):
744 raise Error('not an fsid: %s' % self
.fsid
)
745 if not self
.daemon_id
:
746 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
748 raise Error('invalid image: %s' % self
.image
)
750 # check for the required files
751 if self
.required_files
:
752 for fname
in self
.required_files
:
753 if fname
not in self
.files
:
754 raise Error('required file missing from config-json: %s' % fname
)
756 # check for an RGW config
758 if not self
.rgw
.get('keyring'):
759 raise Error('RGW keyring is missing')
760 if not self
.rgw
.get('user'):
761 raise Error('RGW user is missing')
763 def get_daemon_name(self
):
765 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
767 def get_container_name(self
, desc
=None):
768 # type: (Optional[str]) -> str
769 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
771 cname
= '%s-%s' % (cname
, desc
)
774 def get_daemon_args(self
):
775 # type: () -> List[str]
776 return self
.daemon_args
+ self
.extra_args
778 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
779 # type: (str, int, int) -> None
780 """Create files under the container data dir"""
781 if not os
.path
.isdir(data_dir
):
782 raise OSError('data_dir is not a directory: %s' % (data_dir
))
784 logger
.info('Creating ganesha config...')
786 # create the ganesha conf dir
787 config_dir
= os
.path
.join(data_dir
, 'etc/ganesha')
788 makedirs(config_dir
, uid
, gid
, 0o755)
790 # populate files from the config-json
791 populate_files(config_dir
, self
.files
, uid
, gid
)
793 # write the RGW keyring
795 keyring_path
= os
.path
.join(data_dir
, 'keyring.rgw')
796 with
open(keyring_path
, 'w') as f
:
797 os
.fchmod(f
.fileno(), 0o600)
798 os
.fchown(f
.fileno(), uid
, gid
)
799 f
.write(self
.rgw
.get('keyring', ''))
801 ##################################
804 class CephIscsi(object):
805 """Defines a Ceph-Iscsi container"""
807 daemon_type
= 'iscsi'
808 entrypoint
= '/usr/bin/rbd-target-api'
810 required_files
= ['iscsi-gateway.cfg']
817 image
=DEFAULT_IMAGE
):
818 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
821 self
.daemon_id
= daemon_id
824 # config-json options
825 self
.files
= dict_get(config_json
, 'files', {})
827 # validate the supplied args
831 def init(cls
, ctx
, fsid
, daemon_id
):
832 # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
833 return cls(ctx
, fsid
, daemon_id
,
834 get_parm(ctx
.config_json
), ctx
.image
)
837 def get_container_mounts(data_dir
, log_dir
):
838 # type: (str, str) -> Dict[str, str]
840 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
841 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
842 mounts
[os
.path
.join(data_dir
, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
843 mounts
[os
.path
.join(data_dir
, 'configfs')] = '/sys/kernel/config'
844 mounts
[log_dir
] = '/var/log:z'
845 mounts
['/dev'] = '/dev'
849 def get_container_binds():
850 # type: () -> List[List[str]]
852 lib_modules
= ['type=bind',
853 'source=/lib/modules',
854 'destination=/lib/modules',
856 binds
.append(lib_modules
)
860 def get_version(ctx
, container_id
):
861 # type: (CephadmContext, str) -> Optional[str]
863 out
, err
, code
= call(ctx
,
864 [ctx
.container_engine
.path
, 'exec', container_id
,
865 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
866 verbosity
=CallVerbosity
.QUIET
)
868 version
= out
.strip()
873 if not is_fsid(self
.fsid
):
874 raise Error('not an fsid: %s' % self
.fsid
)
875 if not self
.daemon_id
:
876 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
878 raise Error('invalid image: %s' % self
.image
)
880 # check for the required files
881 if self
.required_files
:
882 for fname
in self
.required_files
:
883 if fname
not in self
.files
:
884 raise Error('required file missing from config-json: %s' % fname
)
886 def get_daemon_name(self
):
888 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
890 def get_container_name(self
, desc
=None):
891 # type: (Optional[str]) -> str
892 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
894 cname
= '%s-%s' % (cname
, desc
)
897 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
898 # type: (str, int, int) -> None
899 """Create files under the container data dir"""
900 if not os
.path
.isdir(data_dir
):
901 raise OSError('data_dir is not a directory: %s' % (data_dir
))
903 logger
.info('Creating ceph-iscsi config...')
904 configfs_dir
= os
.path
.join(data_dir
, 'configfs')
905 makedirs(configfs_dir
, uid
, gid
, 0o755)
907 # populate files from the config-json
908 populate_files(data_dir
, self
.files
, uid
, gid
)
911 def configfs_mount_umount(data_dir
, mount
=True):
912 # type: (str, bool) -> List[str]
913 mount_path
= os
.path
.join(data_dir
, 'configfs')
915 cmd
= 'if ! grep -qs {0} /proc/mounts; then ' \
916 'mount -t configfs none {0}; fi'.format(mount_path
)
918 cmd
= 'if grep -qs {0} /proc/mounts; then ' \
919 'umount {0}; fi'.format(mount_path
)
922 def get_tcmu_runner_container(self
):
923 # type: () -> CephContainer
924 tcmu_container
= get_container(self
.ctx
, self
.fsid
, self
.daemon_type
, self
.daemon_id
)
925 tcmu_container
.entrypoint
= '/usr/bin/tcmu-runner'
926 tcmu_container
.cname
= self
.get_container_name(desc
='tcmu')
927 # remove extra container args for tcmu container.
928 # extra args could cause issue with forking service type
929 tcmu_container
.container_args
= []
930 return tcmu_container
932 ##################################
935 class HAproxy(object):
936 """Defines an HAproxy container"""
937 daemon_type
= 'haproxy'
938 required_files
= ['haproxy.cfg']
939 default_image
= DEFAULT_HAPROXY_IMAGE
943 fsid
: str, daemon_id
: Union
[int, str],
944 config_json
: Dict
, image
: str) -> None:
947 self
.daemon_id
= daemon_id
950 # config-json options
951 self
.files
= dict_get(config_json
, 'files', {})
956 def init(cls
, ctx
: CephadmContext
,
957 fsid
: str, daemon_id
: Union
[int, str]) -> 'HAproxy':
958 return cls(ctx
, fsid
, daemon_id
, get_parm(ctx
.config_json
),
961 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
962 """Create files under the container data dir"""
963 if not os
.path
.isdir(data_dir
):
964 raise OSError('data_dir is not a directory: %s' % (data_dir
))
966 # create additional directories in data dir for HAproxy to use
967 if not os
.path
.isdir(os
.path
.join(data_dir
, 'haproxy')):
968 makedirs(os
.path
.join(data_dir
, 'haproxy'), uid
, gid
, DATA_DIR_MODE
)
970 data_dir
= os
.path
.join(data_dir
, 'haproxy')
971 populate_files(data_dir
, self
.files
, uid
, gid
)
973 def get_daemon_args(self
) -> List
[str]:
974 return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
978 if not is_fsid(self
.fsid
):
979 raise Error('not an fsid: %s' % self
.fsid
)
980 if not self
.daemon_id
:
981 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
983 raise Error('invalid image: %s' % self
.image
)
985 # check for the required files
986 if self
.required_files
:
987 for fname
in self
.required_files
:
988 if fname
not in self
.files
:
989 raise Error('required file missing from config-json: %s' % fname
)
991 def get_daemon_name(self
):
993 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
995 def get_container_name(self
, desc
=None):
996 # type: (Optional[str]) -> str
997 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
999 cname
= '%s-%s' % (cname
, desc
)
1002 def extract_uid_gid_haproxy(self
) -> Tuple
[int, int]:
1003 # better directory for this?
1004 return extract_uid_gid(self
.ctx
, file_path
='/var/lib')
1007 def get_container_mounts(data_dir
: str) -> Dict
[str, str]:
1009 mounts
[os
.path
.join(data_dir
, 'haproxy')] = '/var/lib/haproxy'
1013 def get_sysctl_settings() -> List
[str]:
1016 'net.ipv4.ip_forward = 1',
1019 ##################################
1022 class Keepalived(object):
1023 """Defines an Keepalived container"""
1024 daemon_type
= 'keepalived'
1025 required_files
= ['keepalived.conf']
1026 default_image
= DEFAULT_KEEPALIVED_IMAGE
1029 ctx
: CephadmContext
,
1030 fsid
: str, daemon_id
: Union
[int, str],
1031 config_json
: Dict
, image
: str) -> None:
1034 self
.daemon_id
= daemon_id
1037 # config-json options
1038 self
.files
= dict_get(config_json
, 'files', {})
1043 def init(cls
, ctx
: CephadmContext
, fsid
: str,
1044 daemon_id
: Union
[int, str]) -> 'Keepalived':
1045 return cls(ctx
, fsid
, daemon_id
,
1046 get_parm(ctx
.config_json
), ctx
.image
)
1048 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
1049 """Create files under the container data dir"""
1050 if not os
.path
.isdir(data_dir
):
1051 raise OSError('data_dir is not a directory: %s' % (data_dir
))
1053 # create additional directories in data dir for keepalived to use
1054 if not os
.path
.isdir(os
.path
.join(data_dir
, 'keepalived')):
1055 makedirs(os
.path
.join(data_dir
, 'keepalived'), uid
, gid
, DATA_DIR_MODE
)
1057 # populate files from the config-json
1058 populate_files(data_dir
, self
.files
, uid
, gid
)
1062 if not is_fsid(self
.fsid
):
1063 raise Error('not an fsid: %s' % self
.fsid
)
1064 if not self
.daemon_id
:
1065 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
1067 raise Error('invalid image: %s' % self
.image
)
1069 # check for the required files
1070 if self
.required_files
:
1071 for fname
in self
.required_files
:
1072 if fname
not in self
.files
:
1073 raise Error('required file missing from config-json: %s' % fname
)
1075 def get_daemon_name(self
):
1077 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
1079 def get_container_name(self
, desc
=None):
1080 # type: (Optional[str]) -> str
1081 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
1083 cname
= '%s-%s' % (cname
, desc
)
1087 def get_container_envs():
1088 # type: () -> List[str]
1090 'KEEPALIVED_AUTOCONF=false',
1091 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
1092 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
1093 'KEEPALIVED_DEBUG=false'
1098 def get_sysctl_settings() -> List
[str]:
1100 '# IP forwarding and non-local bind',
1101 'net.ipv4.ip_forward = 1',
1102 'net.ipv4.ip_nonlocal_bind = 1',
1105 def extract_uid_gid_keepalived(self
) -> Tuple
[int, int]:
1106 # better directory for this?
1107 return extract_uid_gid(self
.ctx
, file_path
='/var/lib')
1110 def get_container_mounts(data_dir
: str) -> Dict
[str, str]:
1112 mounts
[os
.path
.join(data_dir
, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
1115 ##################################
1118 class CustomContainer(object):
1119 """Defines a custom container"""
1120 daemon_type
= 'container'
1123 fsid
: str, daemon_id
: Union
[int, str],
1124 config_json
: Dict
, image
: str) -> None:
1126 self
.daemon_id
= daemon_id
1129 # config-json options
1130 self
.entrypoint
= dict_get(config_json
, 'entrypoint')
1131 self
.uid
= dict_get(config_json
, 'uid', 65534) # nobody
1132 self
.gid
= dict_get(config_json
, 'gid', 65534) # nobody
1133 self
.volume_mounts
= dict_get(config_json
, 'volume_mounts', {})
1134 self
.args
= dict_get(config_json
, 'args', [])
1135 self
.envs
= dict_get(config_json
, 'envs', [])
1136 self
.privileged
= dict_get(config_json
, 'privileged', False)
1137 self
.bind_mounts
= dict_get(config_json
, 'bind_mounts', [])
1138 self
.ports
= dict_get(config_json
, 'ports', [])
1139 self
.dirs
= dict_get(config_json
, 'dirs', [])
1140 self
.files
= dict_get(config_json
, 'files', {})
1143 def init(cls
, ctx
: CephadmContext
,
1144 fsid
: str, daemon_id
: Union
[int, str]) -> 'CustomContainer':
1145 return cls(fsid
, daemon_id
,
1146 get_parm(ctx
.config_json
), ctx
.image
)
1148 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
1150 Create dirs/files below the container data directory.
1152 logger
.info('Creating custom container configuration '
1153 'dirs/files in {} ...'.format(data_dir
))
1155 if not os
.path
.isdir(data_dir
):
1156 raise OSError('data_dir is not a directory: %s' % data_dir
)
1158 for dir_path
in self
.dirs
:
1159 logger
.info('Creating directory: {}'.format(dir_path
))
1160 dir_path
= os
.path
.join(data_dir
, dir_path
.strip('/'))
1161 makedirs(dir_path
, uid
, gid
, 0o755)
1163 for file_path
in self
.files
:
1164 logger
.info('Creating file: {}'.format(file_path
))
1165 content
= dict_get_join(self
.files
, file_path
)
1166 file_path
= os
.path
.join(data_dir
, file_path
.strip('/'))
1167 with
open(file_path
, 'w', encoding
='utf-8') as f
:
1168 os
.fchown(f
.fileno(), uid
, gid
)
1169 os
.fchmod(f
.fileno(), 0o600)
1172 def get_daemon_args(self
) -> List
[str]:
1175 def get_container_args(self
) -> List
[str]:
1178 def get_container_envs(self
) -> List
[str]:
1181 def get_container_mounts(self
, data_dir
: str) -> Dict
[str, str]:
1183 Get the volume mounts. Relative source paths will be located below
1184 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
1194 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
1198 for source
, destination
in self
.volume_mounts
.items():
1199 source
= os
.path
.join(data_dir
, source
)
1200 mounts
[source
] = destination
1203 def get_container_binds(self
, data_dir
: str) -> List
[List
[str]]:
1205 Get the bind mounts. Relative `source=...` paths will be located below
1206 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
1211 'source=lib/modules',
1212 'destination=/lib/modules',
1218 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
1222 binds
= self
.bind_mounts
.copy()
1224 for index
, value
in enumerate(bind
):
1225 match
= re
.match(r
'^source=(.+)$', value
)
1227 bind
[index
] = 'source={}'.format(os
.path
.join(
1228 data_dir
, match
.group(1)))
1231 ##################################
1234 def touch(file_path
: str, uid
: Optional
[int] = None, gid
: Optional
[int] = None) -> None:
1235 Path(file_path
).touch()
1237 os
.chown(file_path
, uid
, gid
)
1240 ##################################
1243 def dict_get(d
: Dict
, key
: str, default
: Any
= None, require
: bool = False) -> Any
:
1245 Helper function to get a key from a dictionary.
1246 :param d: The dictionary to process.
1247 :param key: The name of the key to get.
1248 :param default: The default value in case the key does not
1249 exist. Default is `None`.
1250 :param require: Set to `True` if the key is required. An
1251 exception will be raised if the key does not exist in
1252 the given dictionary.
1253 :return: Returns the value of the given key.
1254 :raises: :exc:`self.Error` if the given key does not exist
1255 and `require` is set to `True`.
1257 if require
and key
not in d
.keys():
1258 raise Error('{} missing from dict'.format(key
))
1259 return d
.get(key
, default
) # type: ignore
1261 ##################################
1264 def dict_get_join(d
: Dict
, key
: str) -> Any
:
1266 Helper function to get the value of a given key from a dictionary.
1267 `List` values will be converted to a string by joining them with a
1269 :param d: The dictionary to process.
1270 :param key: The name of the key to get.
1271 :return: Returns the value of the given key. If it was a `list`, it
1272 will be joining with a line break.
1275 if isinstance(value
, list):
1276 value
= '\n'.join(map(str, value
))
1279 ##################################
1282 def get_supported_daemons():
1283 # type: () -> List[str]
1284 supported_daemons
= list(Ceph
.daemons
)
1285 supported_daemons
.extend(Monitoring
.components
)
1286 supported_daemons
.append(NFSGanesha
.daemon_type
)
1287 supported_daemons
.append(CephIscsi
.daemon_type
)
1288 supported_daemons
.append(CustomContainer
.daemon_type
)
1289 supported_daemons
.append(HAproxy
.daemon_type
)
1290 supported_daemons
.append(Keepalived
.daemon_type
)
1291 supported_daemons
.append(CephadmAgent
.daemon_type
)
1292 supported_daemons
.append(SNMPGateway
.daemon_type
)
1293 assert len(supported_daemons
) == len(set(supported_daemons
))
1294 return supported_daemons
1296 ##################################
1299 class PortOccupiedError(Error
):
1303 def attempt_bind(ctx
, s
, address
, port
):
1304 # type: (CephadmContext, socket.socket, str, int) -> None
1306 s
.setsockopt(socket
.SOL_SOCKET
, socket
.SO_REUSEADDR
, 1)
1307 s
.bind((address
, port
))
1308 except OSError as e
:
1309 if e
.errno
== errno
.EADDRINUSE
:
1310 msg
= 'Cannot bind to IP %s port %d: %s' % (address
, port
, e
)
1312 raise PortOccupiedError(msg
)
1315 except Exception as e
:
1321 def port_in_use(ctx
, port_num
):
1322 # type: (CephadmContext, int) -> bool
1323 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
1324 logger
.info('Verifying port %d ...' % port_num
)
1326 def _port_in_use(af
: socket
.AddressFamily
, address
: str) -> bool:
1328 s
= socket
.socket(af
, socket
.SOCK_STREAM
)
1329 attempt_bind(ctx
, s
, address
, port_num
)
1330 except PortOccupiedError
:
1332 except OSError as e
:
1333 if e
.errno
in (errno
.EAFNOSUPPORT
, errno
.EADDRNOTAVAIL
):
1334 # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
1335 # being tested here and one might be intentionally be disabled.
1336 # In that case no error should be raised.
1341 return any(_port_in_use(af
, address
) for af
, address
in (
1342 (socket
.AF_INET
, '0.0.0.0'),
1343 (socket
.AF_INET6
, '::')
1347 def check_ip_port(ctx
, ep
):
1348 # type: (CephadmContext, EndPoint) -> None
1349 if not ctx
.skip_ping_check
:
1350 logger
.info(f
'Verifying IP {ep.ip} port {ep.port} ...')
1352 s
= socket
.socket(socket
.AF_INET6
, socket
.SOCK_STREAM
)
1353 ip
= unwrap_ipv6(ep
.ip
)
1355 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
1357 attempt_bind(ctx
, s
, ip
, ep
.port
)
1359 ##################################
1362 # this is an abbreviated version of
1363 # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
1364 # that drops all of the compatibility (this is Unix/Linux only).
1366 class Timeout(TimeoutError
):
1368 Raised when the lock could not be acquired in *timeout*
1372 def __init__(self
, lock_file
: str) -> None:
1375 #: The path of the file lock.
1376 self
.lock_file
= lock_file
1379 def __str__(self
) -> str:
1380 temp
= "The file lock '{}' could not be acquired."\
1381 .format(self
.lock_file
)
1385 class _Acquire_ReturnProxy(object):
1386 def __init__(self
, lock
: 'FileLock') -> None:
1390 def __enter__(self
) -> 'FileLock':
1393 def __exit__(self
, exc_type
: Any
, exc_value
: Any
, traceback
: Any
) -> None:
1398 class FileLock(object):
1399 def __init__(self
, ctx
: CephadmContext
, name
: str, timeout
: int = -1) -> None:
1400 if not os
.path
.exists(LOCK_DIR
):
1401 os
.mkdir(LOCK_DIR
, 0o700)
1402 self
._lock
_file
= os
.path
.join(LOCK_DIR
, name
+ '.lock')
1405 # The file descriptor for the *_lock_file* as it is returned by the
1406 # os.open() function.
1407 # This file lock is only NOT None, if the object currently holds the
1409 self
._lock
_file
_fd
: Optional
[int] = None
1410 self
.timeout
= timeout
1411 # The lock counter is used for implementing the nested locking
1412 # mechanism. Whenever the lock is acquired, the counter is increased and
1413 # the lock is only released, when this value is 0 again.
1414 self
._lock
_counter
= 0
1418 def is_locked(self
) -> bool:
1419 return self
._lock
_file
_fd
is not None
1421 def acquire(self
, timeout
: Optional
[int] = None, poll_intervall
: float = 0.05) -> _Acquire_ReturnProxy
:
1423 Acquires the file lock or fails with a :exc:`Timeout` error.
1424 .. code-block:: python
1425 # You can use this method in the context manager (recommended)
1426 with lock.acquire():
1428 # Or use an equivalent try-finally construct:
1435 The maximum time waited for the file lock.
1436 If ``timeout < 0``, there is no timeout and this method will
1437 block until the lock could be acquired.
1438 If ``timeout`` is None, the default :attr:`~timeout` is used.
1439 :arg float poll_intervall:
1440 We check once in *poll_intervall* seconds if we can acquire the
1443 if the lock could not be acquired in *timeout* seconds.
1444 .. versionchanged:: 2.0.0
1445 This method returns now a *proxy* object instead of *self*,
1446 so that it can be used in a with statement without side effects.
1449 # Use the default timeout, if no timeout is provided.
1451 timeout
= self
.timeout
1453 # Increment the number right at the beginning.
1454 # We can still undo it, if something fails.
1455 self
._lock
_counter
+= 1
1458 lock_filename
= self
._lock
_file
1459 start_time
= time
.time()
1462 if not self
.is_locked
:
1463 logger
.log(QUIET_LOG_LEVEL
, 'Acquiring lock %s on %s', lock_id
,
1468 logger
.log(QUIET_LOG_LEVEL
, 'Lock %s acquired on %s', lock_id
,
1471 elif timeout
>= 0 and time
.time() - start_time
> timeout
:
1472 logger
.warning('Timeout acquiring lock %s on %s', lock_id
,
1474 raise Timeout(self
._lock
_file
)
1478 'Lock %s not acquired on %s, waiting %s seconds ...',
1479 lock_id
, lock_filename
, poll_intervall
1481 time
.sleep(poll_intervall
)
1483 # Something did go wrong, so decrement the counter.
1484 self
._lock
_counter
= max(0, self
._lock
_counter
- 1)
1487 return _Acquire_ReturnProxy(lock
=self
)
1489 def release(self
, force
: bool = False) -> None:
1491 Releases the file lock.
1492 Please note, that the lock is only completly released, if the lock
1494 Also note, that the lock file itself is not automatically deleted.
1496 If true, the lock counter is ignored and the lock is released in
1500 self
._lock
_counter
-= 1
1502 if self
._lock
_counter
== 0 or force
:
1503 # lock_id = id(self)
1504 # lock_filename = self._lock_file
1506 # Can't log in shutdown:
1507 # File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open
1508 # NameError: name 'open' is not defined
1509 # logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
1511 self
._lock
_counter
= 0
1512 # logger.debug('Lock %s released on %s', lock_id, lock_filename)
1516 def __enter__(self
) -> 'FileLock':
1520 def __exit__(self
, exc_type
: Any
, exc_value
: Any
, traceback
: Any
) -> None:
1524 def __del__(self
) -> None:
1525 self
.release(force
=True)
1528 def _acquire(self
) -> None:
1529 open_mode
= os
.O_RDWR | os
.O_CREAT | os
.O_TRUNC
1530 fd
= os
.open(self
._lock
_file
, open_mode
)
1533 fcntl
.flock(fd
, fcntl
.LOCK_EX | fcntl
.LOCK_NB
)
1534 except (IOError, OSError):
1537 self
._lock
_file
_fd
= fd
1540 def _release(self
) -> None:
1541 # Do not remove the lockfile:
1543 # https://github.com/benediktschmitt/py-filelock/issues/31
1544 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
1545 fd
= self
._lock
_file
_fd
1546 self
._lock
_file
_fd
= None
1547 fcntl
.flock(fd
, fcntl
.LOCK_UN
) # type: ignore
1548 os
.close(fd
) # type: ignore
1552 ##################################
1553 # Popen wrappers, lifted from ceph-volume
1555 class CallVerbosity(Enum
):
1558 # Normal Operation: <log-level-when-no-errors>, Errors: <log-level-when-error>
1560 # NOTE: QUIET log level is custom level only used when --verbose is passed
1563 # Normal Operation: None, Errors: None
1565 # Normal Operation: QUIET, Error: QUIET
1567 # Normal Operation: DEBUG, Error: DEBUG
1569 # Normal Operation: QUIET, Error: INFO
1570 QUIET_UNLESS_ERROR
= 3
1571 # Normal Operation: DEBUG, Error: INFO
1572 VERBOSE_ON_FAILURE
= 4
1573 # Normal Operation: INFO, Error: INFO
1576 def success_log_level(self
) -> int:
1577 _verbosity_level_to_log_level
= {
1579 self
.QUIET
: QUIET_LOG_LEVEL
,
1580 self
.DEBUG
: logging
.DEBUG
,
1581 self
.QUIET_UNLESS_ERROR
: QUIET_LOG_LEVEL
,
1582 self
.VERBOSE_ON_FAILURE
: logging
.DEBUG
,
1583 self
.VERBOSE
: logging
.INFO
1585 return _verbosity_level_to_log_level
[self
] # type: ignore
1587 def error_log_level(self
) -> int:
1588 _verbosity_level_to_log_level
= {
1590 self
.QUIET
: QUIET_LOG_LEVEL
,
1591 self
.DEBUG
: logging
.DEBUG
,
1592 self
.QUIET_UNLESS_ERROR
: logging
.INFO
,
1593 self
.VERBOSE_ON_FAILURE
: logging
.INFO
,
1594 self
.VERBOSE
: logging
.INFO
1596 return _verbosity_level_to_log_level
[self
] # type: ignore
1599 if sys
.version_info
< (3, 8):
1603 from asyncio
import events
1605 class ThreadedChildWatcher(asyncio
.AbstractChildWatcher
):
1606 """Threaded child watcher implementation.
1607 The watcher uses a thread per process
1608 for waiting for the process finish.
1609 It doesn't require subscription on POSIX signal
1610 but a thread creation is not free.
1611 The watcher has O(1) complexity, its performance doesn't depend
1612 on amount of spawn processes.
1615 def __init__(self
) -> None:
1616 self
._pid
_counter
= itertools
.count(0)
1617 self
._threads
: Dict
[Any
, Any
] = {}
1619 def is_active(self
) -> bool:
1622 def close(self
) -> None:
1623 self
._join
_threads
()
1625 def _join_threads(self
) -> None:
1626 """Internal: Join all non-daemon threads"""
1627 threads
= [thread
for thread
in list(self
._threads
.values())
1628 if thread
.is_alive() and not thread
.daemon
]
1629 for thread
in threads
:
1632 def __enter__(self
) -> Any
:
1635 def __exit__(self
, exc_type
: Any
, exc_val
: Any
, exc_tb
: Any
) -> None:
1638 def __del__(self
, _warn
: Any
= warnings
.warn
) -> None:
1639 threads
= [thread
for thread
in list(self
._threads
.values())
1640 if thread
.is_alive()]
1642 _warn(f
'{self.__class__} has registered but not finished child processes',
1646 def add_child_handler(self
, pid
: Any
, callback
: Any
, *args
: Any
) -> None:
1647 loop
= events
.get_event_loop()
1648 thread
= threading
.Thread(target
=self
._do
_waitpid
,
1649 name
=f
'waitpid-{next(self._pid_counter)}',
1650 args
=(loop
, pid
, callback
, args
),
1652 self
._threads
[pid
] = thread
1655 def remove_child_handler(self
, pid
: Any
) -> bool:
1656 # asyncio never calls remove_child_handler() !!!
1657 # The method is no-op but is implemented because
1658 # abstract base classe requires it
1661 def attach_loop(self
, loop
: Any
) -> None:
1664 def _do_waitpid(self
, loop
: Any
, expected_pid
: Any
, callback
: Any
, args
: Any
) -> None:
1665 assert expected_pid
> 0
1668 pid
, status
= os
.waitpid(expected_pid
, 0)
1669 except ChildProcessError
:
1670 # The child process is already reaped
1671 # (may happen if waitpid() is called elsewhere).
1675 'Unknown child process pid %d, will report returncode 255',
1678 if os
.WIFEXITED(status
):
1679 returncode
= os
.WEXITSTATUS(status
)
1680 elif os
.WIFSIGNALED(status
):
1681 returncode
= -os
.WTERMSIG(status
)
1683 raise ValueError(f
'unknown wait status {status}')
1684 if loop
.get_debug():
1685 logger
.debug('process %s exited with returncode %s',
1686 expected_pid
, returncode
)
1688 if loop
.is_closed():
1689 logger
.warning('Loop %r that handles pid %r is closed', loop
, pid
)
1691 loop
.call_soon_threadsafe(callback
, pid
, returncode
, *args
)
1693 self
._threads
.pop(expected_pid
)
1695 # unlike SafeChildWatcher which handles SIGCHLD in the main thread,
1696 # ThreadedChildWatcher runs in a separated thread, hence allows us to
1697 # run create_subprocess_exec() in non-main thread, see
1698 # https://bugs.python.org/issue35621
1699 asyncio
.set_child_watcher(ThreadedChildWatcher())
1703 from asyncio
import run
as async_run
# type: ignore[attr-defined]
1705 def async_run(coro
): # type: ignore
1706 loop
= asyncio
.new_event_loop()
1708 asyncio
.set_event_loop(loop
)
1709 return loop
.run_until_complete(coro
)
1712 loop
.run_until_complete(loop
.shutdown_asyncgens())
1714 asyncio
.set_event_loop(None)
1718 def call(ctx
: CephadmContext
,
1720 desc
: Optional
[str] = None,
1721 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
1722 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
1723 **kwargs
: Any
) -> Tuple
[str, str, int]:
1725 Wrap subprocess.Popen to
1727 - log stdout/stderr to a logger,
1729 - cleanly return out, err, returncode
1731 :param timeout: timeout in seconds
1734 prefix
= command
[0] if desc
is None else desc
1737 timeout
= timeout
or ctx
.timeout
1739 async def tee(reader
: asyncio
.StreamReader
) -> str:
1740 collected
= StringIO()
1741 async for line
in reader
:
1742 message
= line
.decode('utf-8')
1743 collected
.write(message
)
1744 return collected
.getvalue()
1746 async def run_with_timeout() -> Tuple
[str, str, int]:
1747 process
= await asyncio
.create_subprocess_exec(
1749 stdout
=asyncio
.subprocess
.PIPE
,
1750 stderr
=asyncio
.subprocess
.PIPE
,
1751 env
=os
.environ
.copy())
1752 assert process
.stdout
1753 assert process
.stderr
1755 stdout
, stderr
= await asyncio
.gather(tee(process
.stdout
),
1756 tee(process
.stderr
))
1757 returncode
= await asyncio
.wait_for(process
.wait(), timeout
)
1758 except asyncio
.TimeoutError
:
1759 logger
.info(prefix
+ f
'timeout after {timeout} seconds')
1762 return stdout
, stderr
, returncode
1764 stdout
, stderr
, returncode
= async_run(run_with_timeout())
1765 log_level
= verbosity
.success_log_level()
1767 log_level
= verbosity
.error_log_level()
1768 logger
.log(log_level
, f
'Non-zero exit code {returncode} from {" ".join(command)}')
1769 for line
in stdout
.splitlines():
1770 logger
.log(log_level
, prefix
+ 'stdout ' + line
)
1771 for line
in stderr
.splitlines():
1772 logger
.log(log_level
, prefix
+ 'stderr ' + line
)
1773 return stdout
, stderr
, returncode
1777 ctx
: CephadmContext
,
1779 desc
: Optional
[str] = None,
1780 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
1781 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
1782 **kwargs
: Any
) -> Tuple
[str, str, int]:
1783 out
, err
, ret
= call(ctx
, command
, desc
, verbosity
, timeout
, **kwargs
)
1785 for s
in (out
, err
):
1786 if s
.strip() and len(s
.splitlines()) <= 2: # readable message?
1787 raise RuntimeError(f
'Failed command: {" ".join(command)}: {s}')
1788 raise RuntimeError('Failed command: %s' % ' '.join(command
))
1789 return out
, err
, ret
1792 def call_timeout(ctx
, command
, timeout
):
1793 # type: (CephadmContext, List[str], int) -> int
1794 logger
.debug('Running command (timeout=%s): %s'
1795 % (timeout
, ' '.join(command
)))
1797 def raise_timeout(command
, timeout
):
1798 # type: (List[str], int) -> NoReturn
1799 msg
= 'Command `%s` timed out after %s seconds' % (command
, timeout
)
1801 raise TimeoutExpired(msg
)
1804 return subprocess
.call(command
, timeout
=timeout
, env
=os
.environ
.copy())
1805 except subprocess
.TimeoutExpired
:
1806 raise_timeout(command
, timeout
)
1808 ##################################
1811 def json_loads_retry(cli_func
: Callable
[[], str]) -> Any
:
1812 for sleep_secs
in [1, 4, 4]:
1814 return json
.loads(cli_func())
1815 except json
.JSONDecodeError
:
1816 logger
.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs
)
1817 time
.sleep(sleep_secs
)
1818 return json
.loads(cli_func())
1821 def is_available(ctx
, what
, func
):
1822 # type: (CephadmContext, str, Callable[[], bool]) -> None
1824 Wait for a service to become available
1826 :param what: the name of the service
1827 :param func: the callable object that determines availability
1830 logger
.info('Waiting for %s...' % what
)
1834 logger
.info('%s is available'
1838 raise Error('%s not available after %s tries'
1841 logger
.info('%s not available, waiting (%s/%s)...'
1842 % (what
, num
, retry
))
1848 def read_config(fn
):
1849 # type: (Optional[str]) -> ConfigParser
1857 # type: (str) -> str
1858 p
= os
.path
.expanduser(p
)
1859 return os
.path
.abspath(p
)
1862 def get_file_timestamp(fn
):
1863 # type: (str) -> Optional[str]
1865 mt
= os
.path
.getmtime(fn
)
1866 return datetime
.datetime
.fromtimestamp(
1867 mt
, tz
=datetime
.timezone
.utc
1873 def try_convert_datetime(s
):
1874 # type: (str) -> Optional[str]
1875 # This is super irritating because
1876 # 1) podman and docker use different formats
1877 # 2) python's strptime can't parse either one
1880 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1881 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1882 # 2020-03-03 15:52:30.136257504 -0600 CST
1883 # (In the podman case, there is a different string format for
1884 # 'inspect' and 'inspect --format {{.Created}}'!!)
1886 # In *all* cases, the 9 digit second precision is too much for
1887 # python's strptime. Shorten it to 6 digits.
1888 p
= re
.compile(r
'(\.[\d]{6})[\d]*')
1891 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
1892 if s
and s
[-1] == 'Z':
1893 s
= s
[:-1] + '-0000'
1895 # cut off the redundant 'CST' part that strptime can't parse, if
1898 s
= ' '.join(v
[0:3])
1900 # try parsing with several format strings
1902 '%Y-%m-%dT%H:%M:%S.%f%z',
1903 '%Y-%m-%d %H:%M:%S.%f %z',
1907 # return timestamp normalized to UTC, rendered as DATEFMT.
1908 return datetime
.datetime
.strptime(s
, f
).astimezone(tz
=datetime
.timezone
.utc
).strftime(DATEFMT
)
1914 def _parse_podman_version(version_str
):
1915 # type: (str) -> Tuple[int, ...]
1916 def to_int(val
: str, org_e
: Optional
[Exception] = None) -> int:
1917 if not val
and org_e
:
1921 except ValueError as e
:
1922 return to_int(val
[0:-1], org_e
or e
)
1924 return tuple(map(to_int
, version_str
.split('.')))
1929 return socket
.gethostname()
1934 return socket
.getfqdn() or socket
.gethostname()
1939 return platform
.uname().machine
1942 def generate_service_id():
1944 return get_hostname() + '.' + ''.join(random
.choice(string
.ascii_lowercase
)
1948 def generate_password():
1950 return ''.join(random
.choice(string
.ascii_lowercase
+ string
.digits
)
1954 def normalize_container_id(i
):
1955 # type: (str) -> str
1956 # docker adds the sha256: prefix, but AFAICS both
1957 # docker (18.09.7 in bionic at least) and podman
1958 # both always use sha256, so leave off the prefix
1961 if i
.startswith(prefix
):
1968 return str(uuid
.uuid1())
1972 # type: (str) -> bool
1980 def validate_fsid(func
: FuncT
) -> FuncT
:
1982 def _validate_fsid(ctx
: CephadmContext
) -> Any
:
1983 if 'fsid' in ctx
and ctx
.fsid
:
1984 if not is_fsid(ctx
.fsid
):
1985 raise Error('not an fsid: %s' % ctx
.fsid
)
1987 return cast(FuncT
, _validate_fsid
)
1990 def infer_fsid(func
: FuncT
) -> FuncT
:
1992 If we only find a single fsid in /var/lib/ceph/*, use that
1996 def _infer_fsid(ctx
: CephadmContext
) -> Any
:
1997 if 'fsid' in ctx
and ctx
.fsid
:
1998 logger
.debug('Using specified fsid: %s' % ctx
.fsid
)
2003 cp
= read_config(ctx
.config
)
2004 if cp
.has_option('global', 'fsid'):
2005 fsids
.add(cp
.get('global', 'fsid'))
2007 daemon_list
= list_daemons(ctx
, detail
=False)
2008 for daemon
in daemon_list
:
2009 if not is_fsid(daemon
['fsid']):
2012 elif 'name' not in ctx
or not ctx
.name
:
2013 # ctx.name not specified
2014 fsids
.add(daemon
['fsid'])
2015 elif daemon
['name'] == ctx
.name
:
2016 # ctx.name is a match
2017 fsids
.add(daemon
['fsid'])
2018 fsids
= sorted(fsids
)
2021 # some commands do not always require an fsid
2023 elif len(fsids
) == 1:
2024 logger
.info('Inferring fsid %s' % fsids
[0])
2027 raise Error('Cannot infer an fsid, one must be specified (using --fsid): %s' % fsids
)
2030 return cast(FuncT
, _infer_fsid
)
2033 def infer_config(func
: FuncT
) -> FuncT
:
2035 Infer the clusater configuration using the followign priority order:
2036 1- if the user has provided custom conf file (-c option) use it
2037 2- otherwise if daemon --name has been provided use daemon conf
2038 3- otherwise find the mon daemon conf file and use it (if v1)
2039 4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it
2040 5- finally: fallback to the default file /etc/ceph/ceph.conf
2043 def _infer_config(ctx
: CephadmContext
) -> Any
:
2045 def config_path(daemon_type
: str, daemon_name
: str) -> str:
2046 data_dir
= get_data_dir(ctx
.fsid
, ctx
.data_dir
, daemon_type
, daemon_name
)
2047 return os
.path
.join(data_dir
, 'config')
2049 def get_mon_daemon_name(fsid
: str) -> Optional
[str]:
2050 daemon_list
= list_daemons(ctx
, detail
=False)
2051 for daemon
in daemon_list
:
2053 daemon
.get('name', '').startswith('mon.')
2054 and daemon
.get('fsid', '') == fsid
2055 and daemon
.get('style', '') == 'cephadm:v1'
2056 and os
.path
.exists(config_path('mon', daemon
['name'].split('.', 1)[1]))
2058 return daemon
['name']
2061 ctx
.config
= ctx
.config
if 'config' in ctx
else None
2062 # check if user has provided conf by using -c option
2063 if ctx
.config
and (ctx
.config
!= CEPH_DEFAULT_CONF
):
2064 logger
.debug(f
'Using specified config: {ctx.config}')
2067 if 'fsid' in ctx
and ctx
.fsid
:
2068 name
= ctx
.name
if ('name' in ctx
and ctx
.name
) else get_mon_daemon_name(ctx
.fsid
)
2069 if name
is not None:
2070 # daemon name has been specified (or inffered from mon), let's use its conf
2071 ctx
.config
= config_path(name
.split('.', 1)[0], name
.split('.', 1)[1])
2073 # no daemon, in case the cluster has a config dir then use it
2074 ceph_conf
= f
'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_CONF}'
2075 if os
.path
.exists(ceph_conf
):
2076 ctx
.config
= ceph_conf
2079 logger
.info(f
'Inferring config {ctx.config}')
2080 elif os
.path
.exists(CEPH_DEFAULT_CONF
):
2081 logger
.debug(f
'Using default config {CEPH_DEFAULT_CONF}')
2082 ctx
.config
= CEPH_DEFAULT_CONF
2085 return cast(FuncT
, _infer_config
)
2088 def _get_default_image(ctx
: CephadmContext
) -> str:
2089 if DEFAULT_IMAGE_IS_MASTER
:
2090 warn
= """This is a development version of cephadm.
2091 For information regarding the latest stable release:
2092 https://docs.ceph.com/docs/{}/cephadm/install
2093 """.format(LATEST_STABLE_RELEASE
)
2094 for line
in warn
.splitlines():
2095 logger
.warning('{}{}{}'.format(termcolor
.yellow
, line
, termcolor
.end
))
2096 return DEFAULT_IMAGE
2099 def infer_image(func
: FuncT
) -> FuncT
:
2101 Use the most recent ceph image
2104 def _infer_image(ctx
: CephadmContext
) -> Any
:
2106 ctx
.image
= os
.environ
.get('CEPHADM_IMAGE')
2108 ctx
.image
= infer_local_ceph_image(ctx
, ctx
.container_engine
.path
)
2110 ctx
.image
= _get_default_image(ctx
)
2113 return cast(FuncT
, _infer_image
)
2116 def default_image(func
: FuncT
) -> FuncT
:
2118 def _default_image(ctx
: CephadmContext
) -> Any
:
2120 if 'name' in ctx
and ctx
.name
:
2121 type_
= ctx
.name
.split('.', 1)[0]
2122 if type_
in Monitoring
.components
:
2123 ctx
.image
= Monitoring
.components
[type_
]['image']
2124 if type_
== 'haproxy':
2125 ctx
.image
= HAproxy
.default_image
2126 if type_
== 'keepalived':
2127 ctx
.image
= Keepalived
.default_image
2128 if type_
== SNMPGateway
.daemon_type
:
2129 ctx
.image
= SNMPGateway
.default_image
2131 ctx
.image
= os
.environ
.get('CEPHADM_IMAGE')
2133 ctx
.image
= _get_default_image(ctx
)
2137 return cast(FuncT
, _default_image
)
2140 def get_container_info(ctx
: CephadmContext
, daemon_filter
: str, by_name
: bool) -> Optional
[ContainerInfo
]:
2142 :param ctx: Cephadm context
2143 :param daemon_filter: daemon name or type
2144 :param by_name: must be set to True if daemon name is provided
2145 :return: Container information or None
2147 def daemon_name_or_type(daemon
: Dict
[str, str]) -> str:
2148 return daemon
['name'] if by_name
else daemon
['name'].split('.', 1)[0]
2150 if by_name
and '.' not in daemon_filter
:
2151 logger
.warning(f
'Trying to get container info using invalid daemon name {daemon_filter}')
2153 daemons
= list_daemons(ctx
, detail
=False)
2154 matching_daemons
= [d
for d
in daemons
if daemon_name_or_type(d
) == daemon_filter
and d
['fsid'] == ctx
.fsid
]
2155 if matching_daemons
:
2156 d_type
, d_id
= matching_daemons
[0]['name'].split('.', 1)
2157 out
, _
, code
= get_container_stats(ctx
, ctx
.container_engine
.path
, ctx
.fsid
, d_type
, d_id
)
2159 (container_id
, image_name
, image_id
, start
, version
) = out
.strip().split(',')
2160 return ContainerInfo(container_id
, image_name
, image_id
, start
, version
)
2164 def infer_local_ceph_image(ctx
: CephadmContext
, container_path
: str) -> Optional
[str]:
2166 Infer the local ceph image based on the following priority criteria:
2167 1- the image specified by --image arg (if provided).
2168 2- the same image as the daemon container specified by --name arg (if provided).
2169 3- image used by any ceph container running on the host. In this case we use daemon types.
2170 4- if no container is found then we use the most ceph recent image on the host.
2172 Note: any selected container must have the same fsid inferred previously.
2174 :return: The most recent local ceph image (already pulled)
2176 # '|' special character is used to separate the output fields into:
2177 # - Repository@digest
2180 # - Image creation date
2181 out
, _
, _
= call_throws(ctx
,
2182 [container_path
, 'images',
2183 '--filter', 'label=ceph=True',
2184 '--filter', 'dangling=false',
2185 '--format', '{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}'])
2187 container_info
= None
2188 daemon_name
= ctx
.name
if ('name' in ctx
and ctx
.name
and '.' in ctx
.name
) else None
2189 daemons_ls
= [daemon_name
] if daemon_name
is not None else Ceph
.daemons
# daemon types: 'mon', 'mgr', etc
2190 for daemon
in daemons_ls
:
2191 container_info
= get_container_info(ctx
, daemon
, daemon_name
is not None)
2192 if container_info
is not None:
2193 logger
.debug(f
"Using container info for daemon '{daemon}'")
2196 for image
in out
.splitlines():
2197 if image
and not image
.isspace():
2198 (digest
, image_id
, tag
, created_date
) = image
.lstrip().split('|')
2199 if container_info
is not None and image_id
not in container_info
.image_id
:
2201 if digest
and not digest
.endswith('@'):
2202 logger
.info(f
"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}")
2207 def write_tmp(s
, uid
, gid
):
2208 # type: (str, int, int) -> IO[str]
2209 tmp_f
= tempfile
.NamedTemporaryFile(mode
='w',
2211 os
.fchown(tmp_f
.fileno(), uid
, gid
)
2218 def makedirs(dir, uid
, gid
, mode
):
2219 # type: (str, int, int, int) -> None
2220 if not os
.path
.exists(dir):
2221 os
.makedirs(dir, mode
=mode
)
2224 os
.chown(dir, uid
, gid
)
2225 os
.chmod(dir, mode
) # the above is masked by umask...
2228 def get_data_dir(fsid
, data_dir
, t
, n
):
2229 # type: (str, str, str, Union[int, str]) -> str
2230 return os
.path
.join(data_dir
, fsid
, '%s.%s' % (t
, n
))
2233 def get_log_dir(fsid
, log_dir
):
2234 # type: (str, str) -> str
2235 return os
.path
.join(log_dir
, fsid
)
2238 def make_data_dir_base(fsid
, data_dir
, uid
, gid
):
2239 # type: (str, str, int, int) -> str
2240 data_dir_base
= os
.path
.join(data_dir
, fsid
)
2241 makedirs(data_dir_base
, uid
, gid
, DATA_DIR_MODE
)
2242 makedirs(os
.path
.join(data_dir_base
, 'crash'), uid
, gid
, DATA_DIR_MODE
)
2243 makedirs(os
.path
.join(data_dir_base
, 'crash', 'posted'), uid
, gid
,
2245 return data_dir_base
2248 def make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
, uid
=None, gid
=None):
2249 # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str
2250 if uid
is None or gid
is None:
2251 uid
, gid
= extract_uid_gid(ctx
)
2252 make_data_dir_base(fsid
, ctx
.data_dir
, uid
, gid
)
2253 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2254 makedirs(data_dir
, uid
, gid
, DATA_DIR_MODE
)
2258 def make_log_dir(ctx
, fsid
, uid
=None, gid
=None):
2259 # type: (CephadmContext, str, Optional[int], Optional[int]) -> str
2260 if uid
is None or gid
is None:
2261 uid
, gid
= extract_uid_gid(ctx
)
2262 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2263 makedirs(log_dir
, uid
, gid
, LOG_DIR_MODE
)
2267 def make_var_run(ctx
, fsid
, uid
, gid
):
2268 # type: (CephadmContext, str, int, int) -> None
2269 call_throws(ctx
, ['install', '-d', '-m0770', '-o', str(uid
), '-g', str(gid
),
2270 '/var/run/ceph/%s' % fsid
])
2273 def copy_tree(ctx
, src
, dst
, uid
=None, gid
=None):
2274 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
2276 Copy a directory tree from src to dst
2278 if uid
is None or gid
is None:
2279 (uid
, gid
) = extract_uid_gid(ctx
)
2283 if os
.path
.isdir(dst
):
2284 dst_dir
= os
.path
.join(dst
, os
.path
.basename(src_dir
))
2286 logger
.debug('copy directory `%s` -> `%s`' % (src_dir
, dst_dir
))
2287 shutil
.rmtree(dst_dir
, ignore_errors
=True)
2288 shutil
.copytree(src_dir
, dst_dir
) # dirs_exist_ok needs python 3.8
2290 for dirpath
, dirnames
, filenames
in os
.walk(dst_dir
):
2291 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dirpath
))
2292 os
.chown(dirpath
, uid
, gid
)
2293 for filename
in filenames
:
2294 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, filename
))
2295 os
.chown(os
.path
.join(dirpath
, filename
), uid
, gid
)
2298 def copy_files(ctx
, src
, dst
, uid
=None, gid
=None):
2299 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
2301 Copy a files from src to dst
2303 if uid
is None or gid
is None:
2304 (uid
, gid
) = extract_uid_gid(ctx
)
2306 for src_file
in src
:
2308 if os
.path
.isdir(dst
):
2309 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
2311 logger
.debug('copy file `%s` -> `%s`' % (src_file
, dst_file
))
2312 shutil
.copyfile(src_file
, dst_file
)
2314 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dst_file
))
2315 os
.chown(dst_file
, uid
, gid
)
2318 def move_files(ctx
, src
, dst
, uid
=None, gid
=None):
2319 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
2321 Move files from src to dst
2323 if uid
is None or gid
is None:
2324 (uid
, gid
) = extract_uid_gid(ctx
)
2326 for src_file
in src
:
2328 if os
.path
.isdir(dst
):
2329 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
2331 if os
.path
.islink(src_file
):
2332 # shutil.move() in py2 does not handle symlinks correctly
2333 src_rl
= os
.readlink(src_file
)
2334 logger
.debug("symlink '%s' -> '%s'" % (dst_file
, src_rl
))
2335 os
.symlink(src_rl
, dst_file
)
2338 logger
.debug("move file '%s' -> '%s'" % (src_file
, dst_file
))
2339 shutil
.move(src_file
, dst_file
)
2340 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dst_file
))
2341 os
.chown(dst_file
, uid
, gid
)
2344 def recursive_chown(path
: str, uid
: int, gid
: int) -> None:
2345 for dirpath
, dirnames
, filenames
in os
.walk(path
):
2346 os
.chown(dirpath
, uid
, gid
)
2347 for filename
in filenames
:
2348 os
.chown(os
.path
.join(dirpath
, filename
), uid
, gid
)
2351 # copied from distutils
2352 def find_executable(executable
: str, path
: Optional
[str] = None) -> Optional
[str]:
2353 """Tries to find 'executable' in the directories listed in 'path'.
2354 A string listing directories separated by 'os.pathsep'; defaults to
2355 os.environ['PATH']. Returns the complete filename or None if not found.
2357 _
, ext
= os
.path
.splitext(executable
)
2358 if (sys
.platform
== 'win32') and (ext
!= '.exe'):
2359 executable
= executable
+ '.exe'
2361 if os
.path
.isfile(executable
):
2365 path
= os
.environ
.get('PATH', None)
2368 path
= os
.confstr('CS_PATH')
2369 except (AttributeError, ValueError):
2370 # os.confstr() or CS_PATH is not available
2372 # bpo-35755: Don't use os.defpath if the PATH environment variable is
2373 # set to an empty string
2375 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
2379 paths
= path
.split(os
.pathsep
)
2381 f
= os
.path
.join(p
, executable
)
2382 if os
.path
.isfile(f
):
2383 # the file exists, we have a shot at spawn working
2388 def find_program(filename
):
2389 # type: (str) -> str
2390 name
= find_executable(filename
)
2392 raise ValueError('%s not found' % filename
)
2396 def find_container_engine(ctx
: CephadmContext
) -> Optional
[ContainerEngine
]:
2400 for i
in CONTAINER_PREFERENCE
:
2408 def check_container_engine(ctx
: CephadmContext
) -> ContainerEngine
:
2409 engine
= ctx
.container_engine
2410 if not isinstance(engine
, CONTAINER_PREFERENCE
):
2411 # See https://github.com/python/mypy/issues/8993
2412 exes
: List
[str] = [i
.EXE
for i
in CONTAINER_PREFERENCE
] # type: ignore
2413 raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install <container engine>`'.format(' or '.join(exes
)))
2414 elif isinstance(engine
, Podman
):
2415 engine
.get_version(ctx
)
2416 if engine
.version
< MIN_PODMAN_VERSION
:
2417 raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION
)
2421 def get_unit_name(fsid
, daemon_type
, daemon_id
=None):
2422 # type: (str, str, Optional[Union[int, str]]) -> str
2423 # accept either name or type + id
2424 if daemon_id
is not None:
2425 return 'ceph-%s@%s.%s' % (fsid
, daemon_type
, daemon_id
)
2427 return 'ceph-%s@%s' % (fsid
, daemon_type
)
2430 def get_unit_name_by_daemon_name(ctx
: CephadmContext
, fsid
: str, name
: str) -> str:
2431 daemon
= get_daemon_description(ctx
, fsid
, name
)
2433 return daemon
['systemd_unit']
2435 raise Error('Failed to get unit name for {}'.format(daemon
))
2438 def check_unit(ctx
, unit_name
):
2439 # type: (CephadmContext, str) -> Tuple[bool, str, bool]
2440 # NOTE: we ignore the exit code here because systemctl outputs
2441 # various exit codes based on the state of the service, but the
2442 # string result is more explicit (and sufficient).
2446 out
, err
, code
= call(ctx
, ['systemctl', 'is-enabled', unit_name
],
2447 verbosity
=CallVerbosity
.QUIET
)
2451 elif 'disabled' in out
:
2453 except Exception as e
:
2454 logger
.warning('unable to run systemctl: %s' % e
)
2460 out
, err
, code
= call(ctx
, ['systemctl', 'is-active', unit_name
],
2461 verbosity
=CallVerbosity
.QUIET
)
2463 if out
in ['active']:
2465 elif out
in ['inactive']:
2467 elif out
in ['failed', 'auto-restart']:
2471 except Exception as e
:
2472 logger
.warning('unable to run systemctl: %s' % e
)
2474 return (enabled
, state
, installed
)
2477 def check_units(ctx
, units
, enabler
=None):
2478 # type: (CephadmContext, List[str], Optional[Packager]) -> bool
2480 (enabled
, state
, installed
) = check_unit(ctx
, u
)
2481 if enabled
and state
== 'running':
2482 logger
.info('Unit %s is enabled and running' % u
)
2484 if enabler
is not None:
2486 logger
.info('Enabling unit %s' % u
)
2487 enabler
.enable_service(u
)
2491 def is_container_running(ctx
: CephadmContext
, c
: 'CephContainer') -> bool:
2492 if ctx
.name
.split('.', 1)[0] in ['agent', 'cephadm-exporter']:
2493 # these are non-containerized daemon types
2495 return bool(get_running_container_name(ctx
, c
))
2498 def get_running_container_name(ctx
: CephadmContext
, c
: 'CephContainer') -> Optional
[str]:
2499 for name
in [c
.cname
, c
.old_cname
]:
2500 out
, err
, ret
= call(ctx
, [
2501 ctx
.container_engine
.path
, 'container', 'inspect',
2502 '--format', '{{.State.Status}}', name
2504 if out
.strip() == 'running':
2509 def get_legacy_config_fsid(cluster
, legacy_dir
=None):
2510 # type: (str, Optional[str]) -> Optional[str]
2511 config_file
= '/etc/ceph/%s.conf' % cluster
2512 if legacy_dir
is not None:
2513 config_file
= os
.path
.abspath(legacy_dir
+ config_file
)
2515 if os
.path
.exists(config_file
):
2516 config
= read_config(config_file
)
2517 if config
.has_section('global') and config
.has_option('global', 'fsid'):
2518 return config
.get('global', 'fsid')
2522 def get_legacy_daemon_fsid(ctx
, cluster
,
2523 daemon_type
, daemon_id
, legacy_dir
=None):
2524 # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str]
2526 if daemon_type
== 'osd':
2528 fsid_file
= os
.path
.join(ctx
.data_dir
,
2530 'ceph-%s' % daemon_id
,
2532 if legacy_dir
is not None:
2533 fsid_file
= os
.path
.abspath(legacy_dir
+ fsid_file
)
2534 with
open(fsid_file
, 'r') as f
:
2535 fsid
= f
.read().strip()
2539 fsid
= get_legacy_config_fsid(cluster
, legacy_dir
=legacy_dir
)
2543 def should_log_to_journald(ctx
: CephadmContext
) -> bool:
2544 if ctx
.log_to_journald
is not None:
2545 return ctx
.log_to_journald
2546 return isinstance(ctx
.container_engine
, Podman
) and \
2547 ctx
.container_engine
.version
>= CGROUPS_SPLIT_PODMAN_VERSION
2550 def get_daemon_args(ctx
, fsid
, daemon_type
, daemon_id
):
2551 # type: (CephadmContext, str, str, Union[int, str]) -> List[str]
2552 r
= list() # type: List[str]
2554 if daemon_type
in Ceph
.daemons
and daemon_type
!= 'crash':
2556 '--setuser', 'ceph',
2557 '--setgroup', 'ceph',
2558 '--default-log-to-file=false',
2560 log_to_journald
= should_log_to_journald(ctx
)
2563 '--default-log-to-journald=true',
2564 '--default-log-to-stderr=false',
2568 '--default-log-to-stderr=true',
2569 '--default-log-stderr-prefix=debug ',
2571 if daemon_type
== 'mon':
2573 '--default-mon-cluster-log-to-file=false',
2577 '--default-mon-cluster-log-to-journald=true',
2578 '--default-mon-cluster-log-to-stderr=false',
2581 r
+= ['--default-mon-cluster-log-to-stderr=true']
2582 elif daemon_type
in Monitoring
.components
:
2583 metadata
= Monitoring
.components
[daemon_type
]
2584 r
+= metadata
.get('args', list())
2585 # set ip and port to bind to for nodeexporter,alertmanager,prometheus
2586 if daemon_type
not in ['grafana', 'loki', 'promtail']:
2588 port
= Monitoring
.port_map
[daemon_type
][0]
2589 if 'meta_json' in ctx
and ctx
.meta_json
:
2590 meta
= json
.loads(ctx
.meta_json
) or {}
2591 if 'ip' in meta
and meta
['ip']:
2593 if 'ports' in meta
and meta
['ports']:
2594 port
= meta
['ports'][0]
2595 r
+= [f
'--web.listen-address={ip}:{port}']
2596 if daemon_type
== 'prometheus':
2599 r
+= [f
'--web.external-url={scheme}://{host}:{port}']
2600 if daemon_type
== 'alertmanager':
2601 config
= get_parm(ctx
.config_json
)
2602 peers
= config
.get('peers', list()) # type: ignore
2604 r
+= ['--cluster.peer={}'.format(peer
)]
2605 # some alertmanager, by default, look elsewhere for a config
2606 r
+= ['--config.file=/etc/alertmanager/alertmanager.yml']
2607 if daemon_type
== 'promtail':
2608 r
+= ['--config.expand-env']
2609 if daemon_type
== 'node-exporter':
2610 r
+= ['--path.procfs=/host/proc',
2611 '--path.sysfs=/host/sys',
2612 '--path.rootfs=/rootfs']
2613 elif daemon_type
== NFSGanesha
.daemon_type
:
2614 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2615 r
+= nfs_ganesha
.get_daemon_args()
2616 elif daemon_type
== HAproxy
.daemon_type
:
2617 haproxy
= HAproxy
.init(ctx
, fsid
, daemon_id
)
2618 r
+= haproxy
.get_daemon_args()
2619 elif daemon_type
== CustomContainer
.daemon_type
:
2620 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2621 r
.extend(cc
.get_daemon_args())
2622 elif daemon_type
== SNMPGateway
.daemon_type
:
2623 sc
= SNMPGateway
.init(ctx
, fsid
, daemon_id
)
2624 r
.extend(sc
.get_daemon_args())
2629 def create_daemon_dirs(ctx
, fsid
, daemon_type
, daemon_id
, uid
, gid
,
2630 config
=None, keyring
=None):
2631 # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
2632 data_dir
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
, uid
=uid
, gid
=gid
)
2634 if daemon_type
in Ceph
.daemons
:
2635 make_log_dir(ctx
, fsid
, uid
=uid
, gid
=gid
)
2638 config_path
= os
.path
.join(data_dir
, 'config')
2639 with
open(config_path
, 'w') as f
:
2640 os
.fchown(f
.fileno(), uid
, gid
)
2641 os
.fchmod(f
.fileno(), 0o600)
2645 keyring_path
= os
.path
.join(data_dir
, 'keyring')
2646 with
open(keyring_path
, 'w') as f
:
2647 os
.fchmod(f
.fileno(), 0o600)
2648 os
.fchown(f
.fileno(), uid
, gid
)
2651 if daemon_type
in Monitoring
.components
.keys():
2652 config_json
: Dict
[str, Any
] = dict()
2653 if 'config_json' in ctx
:
2654 config_json
= get_parm(ctx
.config_json
)
2656 # Set up directories specific to the monitoring component
2659 if daemon_type
== 'prometheus':
2660 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2661 daemon_type
, daemon_id
)
2662 config_dir
= 'etc/prometheus'
2663 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2664 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'alerting'), uid
, gid
, 0o755)
2665 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2666 recursive_chown(os
.path
.join(data_dir_root
, 'etc'), uid
, gid
)
2667 recursive_chown(os
.path
.join(data_dir_root
, 'data'), uid
, gid
)
2668 elif daemon_type
== 'grafana':
2669 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2670 daemon_type
, daemon_id
)
2671 config_dir
= 'etc/grafana'
2672 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2673 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'certs'), uid
, gid
, 0o755)
2674 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'provisioning/datasources'), uid
, gid
, 0o755)
2675 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2676 touch(os
.path
.join(data_dir_root
, 'data', 'grafana.db'), uid
, gid
)
2677 elif daemon_type
== 'alertmanager':
2678 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2679 daemon_type
, daemon_id
)
2680 config_dir
= 'etc/alertmanager'
2681 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2682 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'data'), uid
, gid
, 0o755)
2683 elif daemon_type
== 'promtail':
2684 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2685 daemon_type
, daemon_id
)
2686 config_dir
= 'etc/promtail'
2687 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2688 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2689 elif daemon_type
== 'loki':
2690 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2691 daemon_type
, daemon_id
)
2692 config_dir
= 'etc/loki'
2693 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2694 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2696 # populate the config directory for the component from the config-json
2697 if 'files' in config_json
:
2698 for fname
in config_json
['files']:
2699 content
= dict_get_join(config_json
['files'], fname
)
2700 if os
.path
.isabs(fname
):
2701 fpath
= os
.path
.join(data_dir_root
, fname
.lstrip(os
.path
.sep
))
2703 fpath
= os
.path
.join(data_dir_root
, config_dir
, fname
)
2704 with
open(fpath
, 'w', encoding
='utf-8') as f
:
2705 os
.fchown(f
.fileno(), uid
, gid
)
2706 os
.fchmod(f
.fileno(), 0o600)
2709 elif daemon_type
== NFSGanesha
.daemon_type
:
2710 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2711 nfs_ganesha
.create_daemon_dirs(data_dir
, uid
, gid
)
2713 elif daemon_type
== CephIscsi
.daemon_type
:
2714 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
2715 ceph_iscsi
.create_daemon_dirs(data_dir
, uid
, gid
)
2717 elif daemon_type
== HAproxy
.daemon_type
:
2718 haproxy
= HAproxy
.init(ctx
, fsid
, daemon_id
)
2719 haproxy
.create_daemon_dirs(data_dir
, uid
, gid
)
2721 elif daemon_type
== Keepalived
.daemon_type
:
2722 keepalived
= Keepalived
.init(ctx
, fsid
, daemon_id
)
2723 keepalived
.create_daemon_dirs(data_dir
, uid
, gid
)
2725 elif daemon_type
== CustomContainer
.daemon_type
:
2726 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2727 cc
.create_daemon_dirs(data_dir
, uid
, gid
)
2729 elif daemon_type
== SNMPGateway
.daemon_type
:
2730 sg
= SNMPGateway
.init(ctx
, fsid
, daemon_id
)
2731 sg
.create_daemon_conf()
2733 _write_custom_conf_files(ctx
, daemon_type
, str(daemon_id
), fsid
, uid
, gid
)
2736 def _write_custom_conf_files(ctx
: CephadmContext
, daemon_type
: str, daemon_id
: str, fsid
: str, uid
: int, gid
: int) -> None:
2737 # mostly making this its own function to make unit testing easier
2738 if 'config_json' not in ctx
or not ctx
.config_json
:
2740 config_json
= get_custom_config_files(ctx
.config_json
)
2741 custom_config_dir
= os
.path
.join(ctx
.data_dir
, fsid
, 'custom_config_files', f
'{daemon_type}.{daemon_id}')
2742 if not os
.path
.exists(custom_config_dir
):
2743 makedirs(custom_config_dir
, uid
, gid
, 0o755)
2744 mandatory_keys
= ['mount_path', 'content']
2745 for ccf
in config_json
['custom_config_files']:
2746 if all(k
in ccf
for k
in mandatory_keys
):
2747 file_path
= os
.path
.join(custom_config_dir
, os
.path
.basename(ccf
['mount_path']))
2748 with
open(file_path
, 'w+', encoding
='utf-8') as f
:
2749 os
.fchown(f
.fileno(), uid
, gid
)
2750 os
.fchmod(f
.fileno(), 0o600)
2751 f
.write(ccf
['content'])
2754 def get_parm(option
: str) -> Dict
[str, str]:
2755 js
= _get_config_json(option
)
2756 # custom_config_files is a special field that may be in the config
2757 # dict. It is used for mounting custom config files into daemon's containers
2758 # and should be accessed through the "get_custom_config_files" function.
2759 # For get_parm we need to discard it.
2760 js
.pop('custom_config_files', None)
2764 def get_custom_config_files(option
: str) -> Dict
[str, List
[Dict
[str, str]]]:
2765 js
= _get_config_json(option
)
2766 res
: Dict
[str, List
[Dict
[str, str]]] = {'custom_config_files': []}
2767 if 'custom_config_files' in js
:
2768 res
['custom_config_files'] = js
['custom_config_files']
2772 def _get_config_json(option
: str) -> Dict
[str, Any
]:
2778 if cached_stdin
is not None:
2781 j
= sys
.stdin
.read()
2784 # inline json string
2785 if option
[0] == '{' and option
[-1] == '}':
2788 elif os
.path
.exists(option
):
2789 with
open(option
, 'r') as f
:
2792 raise Error('Config file {} not found'.format(option
))
2796 except ValueError as e
:
2797 raise Error('Invalid JSON in {}: {}'.format(option
, e
))
2802 def get_config_and_keyring(ctx
):
2803 # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
2807 if 'config_json' in ctx
and ctx
.config_json
:
2808 d
= get_parm(ctx
.config_json
)
2809 config
= d
.get('config')
2810 keyring
= d
.get('keyring')
2811 if config
and keyring
:
2812 return config
, keyring
2814 if 'config' in ctx
and ctx
.config
:
2816 with
open(ctx
.config
, 'r') as f
:
2818 except FileNotFoundError
as e
:
2821 if 'key' in ctx
and ctx
.key
:
2822 keyring
= '[%s]\n\tkey = %s\n' % (ctx
.name
, ctx
.key
)
2823 elif 'keyring' in ctx
and ctx
.keyring
:
2825 with
open(ctx
.keyring
, 'r') as f
:
2827 except FileNotFoundError
as e
:
2830 return config
, keyring
2833 def get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
):
2834 # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]]
2837 if daemon_type
== CephIscsi
.daemon_type
:
2838 binds
.extend(CephIscsi
.get_container_binds())
2839 elif daemon_type
== CustomContainer
.daemon_type
:
2841 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2842 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2843 binds
.extend(cc
.get_container_binds(data_dir
))
2848 def get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
,
2850 # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
2853 if daemon_type
in Ceph
.daemons
:
2855 run_path
= os
.path
.join('/var/run/ceph', fsid
)
2856 if os
.path
.exists(run_path
):
2857 mounts
[run_path
] = '/var/run/ceph:z'
2858 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2859 mounts
[log_dir
] = '/var/log/ceph:z'
2860 crash_dir
= '/var/lib/ceph/%s/crash' % fsid
2861 if os
.path
.exists(crash_dir
):
2862 mounts
[crash_dir
] = '/var/lib/ceph/crash:z'
2863 if daemon_type
!= 'crash' and should_log_to_journald(ctx
):
2864 journald_sock_dir
= '/run/systemd/journal'
2865 mounts
[journald_sock_dir
] = journald_sock_dir
2867 if daemon_type
in Ceph
.daemons
and daemon_id
:
2868 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2869 if daemon_type
== 'rgw':
2870 cdata_dir
= '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id
)
2872 cdata_dir
= '/var/lib/ceph/%s/ceph-%s' % (daemon_type
, daemon_id
)
2873 if daemon_type
!= 'crash':
2874 mounts
[data_dir
] = cdata_dir
+ ':z'
2876 mounts
[data_dir
+ '/config'] = '/etc/ceph/ceph.conf:z'
2877 if daemon_type
in ['rbd-mirror', 'cephfs-mirror', 'crash']:
2878 # these do not search for their keyrings in a data directory
2879 mounts
[data_dir
+ '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type
, daemon_id
)
2881 if daemon_type
in ['mon', 'osd', 'clusterless-ceph-volume']:
2882 mounts
['/dev'] = '/dev' # FIXME: narrow this down?
2883 mounts
['/run/udev'] = '/run/udev'
2884 if daemon_type
in ['osd', 'clusterless-ceph-volume']:
2885 mounts
['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
2886 mounts
['/run/lvm'] = '/run/lvm'
2887 mounts
['/run/lock/lvm'] = '/run/lock/lvm'
2888 if daemon_type
== 'osd':
2889 # selinux-policy in the container may not match the host.
2890 if HostFacts(ctx
).selinux_enabled
:
2891 selinux_folder
= '/var/lib/ceph/%s/selinux' % fsid
2892 if not os
.path
.exists(selinux_folder
):
2893 os
.makedirs(selinux_folder
, mode
=0o755)
2894 mounts
[selinux_folder
] = '/sys/fs/selinux:ro'
2895 mounts
['/'] = '/rootfs'
2898 if ctx
.shared_ceph_folder
: # make easy manager modules/ceph-volume development
2899 ceph_folder
= pathify(ctx
.shared_ceph_folder
)
2900 if os
.path
.exists(ceph_folder
):
2901 mounts
[ceph_folder
+ '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
2902 mounts
[ceph_folder
+ '/src/cephadm/cephadm'] = '/usr/sbin/cephadm'
2903 mounts
[ceph_folder
+ '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
2904 mounts
[ceph_folder
+ '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
2905 mounts
[ceph_folder
+ '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard'
2906 mounts
[ceph_folder
+ '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
2908 logger
.error('{}{}{}'.format(termcolor
.red
,
2909 'Ceph shared source folder does not exist.',
2911 except AttributeError:
2914 if daemon_type
in Monitoring
.components
and daemon_id
:
2915 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2916 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2917 if daemon_type
== 'prometheus':
2918 mounts
[os
.path
.join(data_dir
, 'etc/prometheus')] = '/etc/prometheus:Z'
2919 mounts
[os
.path
.join(data_dir
, 'data')] = '/prometheus:Z'
2920 elif daemon_type
== 'loki':
2921 mounts
[os
.path
.join(data_dir
, 'etc/loki')] = '/etc/loki:Z'
2922 mounts
[os
.path
.join(data_dir
, 'data')] = '/loki:Z'
2923 elif daemon_type
== 'promtail':
2924 mounts
[os
.path
.join(data_dir
, 'etc/promtail')] = '/etc/promtail:Z'
2925 mounts
[log_dir
] = '/var/log/ceph:z'
2926 mounts
[os
.path
.join(data_dir
, 'data')] = '/promtail:Z'
2927 elif daemon_type
== 'node-exporter':
2928 mounts
['/proc'] = '/host/proc:ro'
2929 mounts
['/sys'] = '/host/sys:ro'
2930 mounts
['/'] = '/rootfs:ro'
2931 elif daemon_type
== 'grafana':
2932 mounts
[os
.path
.join(data_dir
, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
2933 mounts
[os
.path
.join(data_dir
, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
2934 mounts
[os
.path
.join(data_dir
, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
2935 mounts
[os
.path
.join(data_dir
, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
2936 elif daemon_type
== 'alertmanager':
2937 mounts
[os
.path
.join(data_dir
, 'etc/alertmanager')] = '/etc/alertmanager:Z'
2939 if daemon_type
== NFSGanesha
.daemon_type
:
2941 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2942 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2943 mounts
.update(nfs_ganesha
.get_container_mounts(data_dir
))
2945 if daemon_type
== HAproxy
.daemon_type
:
2947 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2948 mounts
.update(HAproxy
.get_container_mounts(data_dir
))
2950 if daemon_type
== CephIscsi
.daemon_type
:
2952 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2953 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2954 mounts
.update(CephIscsi
.get_container_mounts(data_dir
, log_dir
))
2956 if daemon_type
== Keepalived
.daemon_type
:
2958 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2959 mounts
.update(Keepalived
.get_container_mounts(data_dir
))
2961 if daemon_type
== CustomContainer
.daemon_type
:
2963 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2964 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2965 mounts
.update(cc
.get_container_mounts(data_dir
))
2970 def get_ceph_volume_container(ctx
: CephadmContext
,
2971 privileged
: bool = True,
2973 volume_mounts
: Dict
[str, str] = {},
2974 bind_mounts
: Optional
[List
[List
[str]]] = None,
2975 args
: List
[str] = [],
2976 envs
: Optional
[List
[str]] = None) -> 'CephContainer':
2979 envs
.append('CEPH_VOLUME_SKIP_RESTORECON=yes')
2980 envs
.append('CEPH_VOLUME_DEBUG=1')
2982 return CephContainer(
2985 entrypoint
='/usr/sbin/ceph-volume',
2987 volume_mounts
=volume_mounts
,
2988 bind_mounts
=bind_mounts
,
2990 privileged
=privileged
,
2992 memory_request
=ctx
.memory_request
,
2993 memory_limit
=ctx
.memory_limit
,
2997 def get_container(ctx
: CephadmContext
,
2998 fsid
: str, daemon_type
: str, daemon_id
: Union
[int, str],
2999 privileged
: bool = False,
3000 ptrace
: bool = False,
3001 container_args
: Optional
[List
[str]] = None) -> 'CephContainer':
3002 entrypoint
: str = ''
3004 ceph_args
: List
[str] = []
3005 envs
: List
[str] = []
3006 host_network
: bool = True
3008 if daemon_type
in Ceph
.daemons
:
3009 envs
.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
3010 if container_args
is None:
3012 if daemon_type
in ['mon', 'osd']:
3013 # mon and osd need privileged in order for libudev to query devices
3015 if daemon_type
== 'rgw':
3016 entrypoint
= '/usr/bin/radosgw'
3017 name
= 'client.rgw.%s' % daemon_id
3018 elif daemon_type
== 'rbd-mirror':
3019 entrypoint
= '/usr/bin/rbd-mirror'
3020 name
= 'client.rbd-mirror.%s' % daemon_id
3021 elif daemon_type
== 'cephfs-mirror':
3022 entrypoint
= '/usr/bin/cephfs-mirror'
3023 name
= 'client.cephfs-mirror.%s' % daemon_id
3024 elif daemon_type
== 'crash':
3025 entrypoint
= '/usr/bin/ceph-crash'
3026 name
= 'client.crash.%s' % daemon_id
3027 elif daemon_type
in ['mon', 'mgr', 'mds', 'osd']:
3028 entrypoint
= '/usr/bin/ceph-' + daemon_type
3029 name
= '%s.%s' % (daemon_type
, daemon_id
)
3030 elif daemon_type
in Monitoring
.components
:
3032 elif daemon_type
== NFSGanesha
.daemon_type
:
3033 entrypoint
= NFSGanesha
.entrypoint
3034 name
= '%s.%s' % (daemon_type
, daemon_id
)
3035 envs
.extend(NFSGanesha
.get_container_envs())
3036 elif daemon_type
== HAproxy
.daemon_type
:
3037 name
= '%s.%s' % (daemon_type
, daemon_id
)
3038 container_args
.extend(['--user=root']) # haproxy 2.4 defaults to a different user
3039 elif daemon_type
== Keepalived
.daemon_type
:
3040 name
= '%s.%s' % (daemon_type
, daemon_id
)
3041 envs
.extend(Keepalived
.get_container_envs())
3042 container_args
.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
3043 elif daemon_type
== CephIscsi
.daemon_type
:
3044 entrypoint
= CephIscsi
.entrypoint
3045 name
= '%s.%s' % (daemon_type
, daemon_id
)
3046 # So the container can modprobe iscsi_target_mod and have write perms
3047 # to configfs we need to make this a privileged container.
3049 elif daemon_type
== CustomContainer
.daemon_type
:
3050 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
3051 entrypoint
= cc
.entrypoint
3052 host_network
= False
3053 envs
.extend(cc
.get_container_envs())
3054 container_args
.extend(cc
.get_container_args())
3056 if daemon_type
in Monitoring
.components
:
3057 uid
, gid
= extract_uid_gid_monitoring(ctx
, daemon_type
)
3061 # FIXME: disable cpu/memory limits for the time being (not supported
3062 # by ubuntu 18.04 kernel!)
3064 container_args
.extend(monitoring_args
)
3065 if daemon_type
== 'node-exporter':
3066 # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
3067 # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
3068 # between the node-exporter container and the host to avoid selinux denials
3069 container_args
.extend(['--security-opt', 'label=disable'])
3070 elif daemon_type
== 'crash':
3071 ceph_args
= ['-n', name
]
3072 elif daemon_type
in Ceph
.daemons
:
3073 ceph_args
= ['-n', name
, '-f']
3074 elif daemon_type
== SNMPGateway
.daemon_type
:
3075 sg
= SNMPGateway
.init(ctx
, fsid
, daemon_id
)
3076 container_args
.append(
3077 f
'--env-file={sg.conf_file_path}'
3080 # if using podman, set -d, --conmon-pidfile & --cidfile flags
3081 # so service can have Type=Forking
3082 if isinstance(ctx
.container_engine
, Podman
):
3083 runtime_dir
= '/run'
3084 container_args
.extend([
3085 '-d', '--log-driver', 'journald',
3087 runtime_dir
+ '/ceph-%s@%s.%s.service-pid' % (fsid
, daemon_type
, daemon_id
),
3089 runtime_dir
+ '/ceph-%s@%s.%s.service-cid' % (fsid
, daemon_type
, daemon_id
),
3091 if ctx
.container_engine
.version
>= CGROUPS_SPLIT_PODMAN_VERSION
:
3092 container_args
.append('--cgroups=split')
3094 return CephContainer
.for_daemon(
3097 daemon_type
=daemon_type
,
3098 daemon_id
=str(daemon_id
),
3099 entrypoint
=entrypoint
,
3100 args
=ceph_args
+ get_daemon_args(ctx
, fsid
, daemon_type
, daemon_id
),
3101 container_args
=container_args
,
3102 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
3103 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
3105 privileged
=privileged
,
3107 host_network
=host_network
,
3111 def extract_uid_gid(ctx
, img
='', file_path
='/var/lib/ceph'):
3112 # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]
3117 if isinstance(file_path
, str):
3122 ex
: Optional
[Tuple
[str, RuntimeError]] = None
3126 out
= CephContainer(
3130 args
=['-c', '%u %g', fp
]
3131 ).run(verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
3132 uid
, gid
= out
.split(' ')
3133 return int(uid
), int(gid
)
3134 except RuntimeError as e
:
3137 raise Error(f
'Failed to extract uid/gid for path {ex[0]}: {ex[1]}')
3139 raise RuntimeError('uid/gid not found')
3142 def deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
3143 config
=None, keyring
=None,
3147 # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
3150 if any([port_in_use(ctx
, port
) for port
in ports
]):
3151 if daemon_type
== 'mgr':
3152 # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't
3153 # tell whether that is the case here.
3155 f
"ceph-mgr TCP port(s) {','.join(map(str, ports))} already in use"
3158 raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports
)), daemon_type
))
3160 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
3161 if reconfig
and not os
.path
.exists(data_dir
):
3162 raise Error('cannot reconfig, data path %s does not exist' % data_dir
)
3163 if daemon_type
== 'mon' and not os
.path
.exists(data_dir
):
3167 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
3170 tmp_config
= write_tmp(config
, uid
, gid
)
3173 create_daemon_dirs(ctx
, fsid
, daemon_type
, daemon_id
, uid
, gid
)
3174 mon_dir
= get_data_dir(fsid
, ctx
.data_dir
, 'mon', daemon_id
)
3175 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
3179 entrypoint
='/usr/bin/ceph-mon',
3182 '-i', str(daemon_id
),
3184 '-c', '/tmp/config',
3185 '--keyring', '/tmp/keyring',
3186 ] + get_daemon_args(ctx
, fsid
, 'mon', daemon_id
),
3188 log_dir
: '/var/log/ceph:z',
3189 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id
),
3190 tmp_keyring
.name
: '/tmp/keyring:z',
3191 tmp_config
.name
: '/tmp/config:z',
3196 with
open(mon_dir
+ '/config', 'w') as f
:
3197 os
.fchown(f
.fileno(), uid
, gid
)
3198 os
.fchmod(f
.fileno(), 0o600)
3201 # dirs, conf, keyring
3204 fsid
, daemon_type
, daemon_id
,
3209 if daemon_type
== CephadmAgent
.daemon_type
:
3210 if ctx
.config_json
== '-':
3211 config_js
= get_parm('-')
3213 config_js
= get_parm(ctx
.config_json
)
3214 assert isinstance(config_js
, dict)
3216 cephadm_agent
= CephadmAgent(ctx
, fsid
, daemon_id
)
3217 cephadm_agent
.deploy_daemon_unit(config_js
)
3220 deploy_daemon_units(ctx
, fsid
, uid
, gid
, daemon_type
, daemon_id
,
3221 c
, osd_fsid
=osd_fsid
, ports
=ports
)
3223 raise RuntimeError('attempting to deploy a daemon without a container image')
3225 if not os
.path
.exists(data_dir
+ '/unit.created'):
3226 with
open(data_dir
+ '/unit.created', 'w') as f
:
3227 os
.fchmod(f
.fileno(), 0o600)
3228 os
.fchown(f
.fileno(), uid
, gid
)
3229 f
.write('mtime is time the daemon deployment was created\n')
3231 with
open(data_dir
+ '/unit.configured', 'w') as f
:
3232 f
.write('mtime is time we were last configured\n')
3233 os
.fchmod(f
.fileno(), 0o600)
3234 os
.fchown(f
.fileno(), uid
, gid
)
3236 update_firewalld(ctx
, daemon_type
)
3238 # Open ports explicitly required for the daemon
3241 fw
.open_ports(ports
)
3244 if reconfig
and daemon_type
not in Ceph
.daemons
:
3245 # ceph daemons do not need a restart; others (presumably) do to pick
3247 call_throws(ctx
, ['systemctl', 'reset-failed',
3248 get_unit_name(fsid
, daemon_type
, daemon_id
)])
3249 call_throws(ctx
, ['systemctl', 'restart',
3250 get_unit_name(fsid
, daemon_type
, daemon_id
)])
3253 def _write_container_cmd_to_bash(ctx
, file_obj
, container
, comment
=None, background
=False):
3254 # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None
3256 # Sometimes adding a comment, especially if there are multiple containers in one
3257 # unit file, makes it easier to read and grok.
3258 file_obj
.write('# ' + comment
+ '\n')
3259 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
3260 file_obj
.write('! ' + ' '.join(container
.rm_cmd(old_cname
=True)) + ' 2> /dev/null\n')
3261 file_obj
.write('! ' + ' '.join(container
.rm_cmd()) + ' 2> /dev/null\n')
3262 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
3263 if isinstance(ctx
.container_engine
, Podman
):
3266 + ' '.join([shlex
.quote(a
) for a
in container
.rm_cmd(storage
=True)])
3267 + ' 2> /dev/null\n')
3270 + ' '.join([shlex
.quote(a
) for a
in container
.rm_cmd(old_cname
=True, storage
=True)])
3271 + ' 2> /dev/null\n')
3273 # container run command
3275 ' '.join([shlex
.quote(a
) for a
in container
.run_cmd()])
3276 + (' &' if background
else '') + '\n')
3279 def clean_cgroup(ctx
: CephadmContext
, fsid
: str, unit_name
: str) -> None:
3280 # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
3281 # see https://tracker.ceph.com/issues/50998
3283 CGROUPV2_PATH
= Path('/sys/fs/cgroup')
3284 if not (CGROUPV2_PATH
/ 'system.slice').exists():
3285 # Only unified cgroup is affected, skip if not the case
3288 slice_name
= 'system-ceph\\x2d{}.slice'.format(fsid
.replace('-', '\\x2d'))
3289 cg_path
= CGROUPV2_PATH
/ 'system.slice' / slice_name
/ f
'{unit_name}.service'
3290 if not cg_path
.exists():
3293 def cg_trim(path
: Path
) -> None:
3294 for p
in path
.iterdir():
3301 logger
.warning(f
'Failed to trim old cgroups {cg_path}')
3304 def deploy_daemon_units(
3305 ctx
: CephadmContext
,
3310 daemon_id
: Union
[int, str],
3312 enable
: bool = True,
3314 osd_fsid
: Optional
[str] = None,
3315 ports
: Optional
[List
[int]] = None,
3318 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
3319 with
open(data_dir
+ '/unit.run.new', 'w') as f
, \
3320 open(data_dir
+ '/unit.meta.new', 'w') as metaf
:
3323 if daemon_type
in Ceph
.daemons
:
3324 install_path
= find_program('install')
3325 f
.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path
=install_path
, fsid
=fsid
, uid
=uid
, gid
=gid
))
3328 if daemon_type
== 'osd':
3329 # osds have a pre-start step
3331 simple_fn
= os
.path
.join('/etc/ceph/osd',
3332 '%s-%s.json.adopted-by-cephadm' % (daemon_id
, osd_fsid
))
3333 if os
.path
.exists(simple_fn
):
3334 f
.write('# Simple OSDs need chown on startup:\n')
3335 for n
in ['block', 'block.db', 'block.wal']:
3336 p
= os
.path
.join(data_dir
, n
)
3337 f
.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p
=p
, uid
=uid
, gid
=gid
))
3339 # if ceph-volume does not support 'ceph-volume activate', we must
3340 # do 'ceph-volume lvm activate'.
3341 test_cv
= get_ceph_volume_container(
3343 args
=['activate', '--bad-option'],
3344 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
3345 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
3346 cname
='ceph-%s-%s.%s-activate-test' % (fsid
, daemon_type
, daemon_id
),
3348 out
, err
, ret
= call(ctx
, test_cv
.run_cmd(), verbosity
=CallVerbosity
.SILENT
)
3349 # bad: ceph-volume: error: unrecognized arguments: activate --bad-option
3350 # good: ceph-volume: error: unrecognized arguments: --bad-option
3351 if 'unrecognized arguments: activate' in err
:
3352 # older ceph-volume without top-level activate or --no-tmpfs
3355 str(daemon_id
), osd_fsid
,
3361 '--osd-id', str(daemon_id
),
3362 '--osd-uuid', osd_fsid
,
3367 prestart
= get_ceph_volume_container(
3370 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
3371 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
3372 cname
='ceph-%s-%s.%s-activate' % (fsid
, daemon_type
, daemon_id
),
3374 _write_container_cmd_to_bash(ctx
, f
, prestart
, 'LVM OSDs use ceph-volume lvm activate')
3375 elif daemon_type
== CephIscsi
.daemon_type
:
3376 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=True)) + '\n')
3377 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
3378 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
3379 _write_container_cmd_to_bash(ctx
, f
, tcmu_container
, 'iscsi tcmu-runner container', background
=True)
3381 _write_container_cmd_to_bash(ctx
, f
, c
, '%s.%s' % (daemon_type
, str(daemon_id
)))
3383 # some metadata about the deploy
3384 meta
: Dict
[str, Any
] = {}
3385 if 'meta_json' in ctx
and ctx
.meta_json
:
3386 meta
= json
.loads(ctx
.meta_json
) or {}
3388 'memory_request': int(ctx
.memory_request
) if ctx
.memory_request
else None,
3389 'memory_limit': int(ctx
.memory_limit
) if ctx
.memory_limit
else None,
3391 if not meta
.get('ports'):
3392 meta
['ports'] = ports
3393 metaf
.write(json
.dumps(meta
, indent
=4) + '\n')
3395 os
.fchmod(f
.fileno(), 0o600)
3396 os
.fchmod(metaf
.fileno(), 0o600)
3397 os
.rename(data_dir
+ '/unit.run.new',
3398 data_dir
+ '/unit.run')
3399 os
.rename(data_dir
+ '/unit.meta.new',
3400 data_dir
+ '/unit.meta')
3402 # post-stop command(s)
3403 with
open(data_dir
+ '/unit.poststop.new', 'w') as f
:
3404 if daemon_type
== 'osd':
3406 poststop
= get_ceph_volume_container(
3409 'lvm', 'deactivate',
3410 str(daemon_id
), osd_fsid
,
3412 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
3413 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
3414 cname
='ceph-%s-%s.%s-deactivate' % (fsid
, daemon_type
,
3417 _write_container_cmd_to_bash(ctx
, f
, poststop
, 'deactivate osd')
3418 elif daemon_type
== CephIscsi
.daemon_type
:
3419 # make sure we also stop the tcmu container
3420 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
3421 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
3422 f
.write('! ' + ' '.join(tcmu_container
.stop_cmd()) + '\n')
3423 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=False)) + '\n')
3424 os
.fchmod(f
.fileno(), 0o600)
3425 os
.rename(data_dir
+ '/unit.poststop.new',
3426 data_dir
+ '/unit.poststop')
3428 # post-stop command(s)
3429 with
open(data_dir
+ '/unit.stop.new', 'w') as f
:
3430 # following generated script basically checks if the container exists
3431 # before stopping it. Exit code will be success either if it doesn't
3432 # exist or if it exists and is stopped successfully.
3433 container_exists
= f
'{ctx.container_engine.path} inspect %s &>/dev/null'
3434 f
.write(f
'! {container_exists % c.old_cname} || {" ".join(c.stop_cmd(old_cname=True))} \n')
3435 f
.write(f
'! {container_exists % c.cname} || {" ".join(c.stop_cmd())} \n')
3437 os
.fchmod(f
.fileno(), 0o600)
3438 os
.rename(data_dir
+ '/unit.stop.new',
3439 data_dir
+ '/unit.stop')
3442 with
open(data_dir
+ '/unit.image.new', 'w') as f
:
3443 f
.write(c
.image
+ '\n')
3444 os
.fchmod(f
.fileno(), 0o600)
3445 os
.rename(data_dir
+ '/unit.image.new',
3446 data_dir
+ '/unit.image')
3449 install_sysctl(ctx
, fsid
, daemon_type
)
3452 install_base_units(ctx
, fsid
)
3453 unit
= get_unit_file(ctx
, fsid
)
3454 unit_file
= 'ceph-%s@.service' % (fsid
)
3455 with
open(ctx
.unit_dir
+ '/' + unit_file
+ '.new', 'w') as f
:
3457 os
.rename(ctx
.unit_dir
+ '/' + unit_file
+ '.new',
3458 ctx
.unit_dir
+ '/' + unit_file
)
3459 call_throws(ctx
, ['systemctl', 'daemon-reload'])
3461 unit_name
= get_unit_name(fsid
, daemon_type
, daemon_id
)
3462 call(ctx
, ['systemctl', 'stop', unit_name
],
3463 verbosity
=CallVerbosity
.DEBUG
)
3464 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
3465 verbosity
=CallVerbosity
.DEBUG
)
3467 call_throws(ctx
, ['systemctl', 'enable', unit_name
])
3469 clean_cgroup(ctx
, fsid
, unit_name
)
3470 call_throws(ctx
, ['systemctl', 'start', unit_name
])
3473 class Firewalld(object):
3474 def __init__(self
, ctx
):
3475 # type: (CephadmContext) -> None
3477 self
.available
= self
.check()
3481 self
.cmd
= find_executable('firewall-cmd')
3483 logger
.debug('firewalld does not appear to be present')
3485 (enabled
, state
, _
) = check_unit(self
.ctx
, 'firewalld.service')
3487 logger
.debug('firewalld.service is not enabled')
3489 if state
!= 'running':
3490 logger
.debug('firewalld.service is not running')
3493 logger
.info('firewalld ready')
3496 def enable_service_for(self
, daemon_type
):
3497 # type: (str) -> None
3498 if not self
.available
:
3499 logger
.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type
)
3502 if daemon_type
== 'mon':
3504 elif daemon_type
in ['mgr', 'mds', 'osd']:
3506 elif daemon_type
== NFSGanesha
.daemon_type
:
3512 raise RuntimeError('command not defined')
3514 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-service', svc
], verbosity
=CallVerbosity
.DEBUG
)
3516 logger
.info('Enabling firewalld service %s in current zone...' % svc
)
3517 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--add-service', svc
])
3520 'unable to add service %s to current zone: %s' % (svc
, err
))
3522 logger
.debug('firewalld service %s is enabled in current zone' % svc
)
3524 def open_ports(self
, fw_ports
):
3525 # type: (List[int]) -> None
3526 if not self
.available
:
3527 logger
.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports
)
3531 raise RuntimeError('command not defined')
3533 for port
in fw_ports
:
3534 tcp_port
= str(port
) + '/tcp'
3535 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-port', tcp_port
], verbosity
=CallVerbosity
.DEBUG
)
3537 logger
.info('Enabling firewalld port %s in current zone...' % tcp_port
)
3538 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--add-port', tcp_port
])
3540 raise RuntimeError('unable to add port %s to current zone: %s' %
3543 logger
.debug('firewalld port %s is enabled in current zone' % tcp_port
)
3545 def close_ports(self
, fw_ports
):
3546 # type: (List[int]) -> None
3547 if not self
.available
:
3548 logger
.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports
)
3552 raise RuntimeError('command not defined')
3554 for port
in fw_ports
:
3555 tcp_port
= str(port
) + '/tcp'
3556 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-port', tcp_port
], verbosity
=CallVerbosity
.DEBUG
)
3558 logger
.info('Disabling port %s in current zone...' % tcp_port
)
3559 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--remove-port', tcp_port
])
3561 raise RuntimeError('unable to remove port %s from current zone: %s' %
3564 logger
.info(f
'Port {tcp_port} disabled')
3566 logger
.info(f
'firewalld port {tcp_port} already closed')
3568 def apply_rules(self
):
3570 if not self
.available
:
3574 raise RuntimeError('command not defined')
3576 call_throws(self
.ctx
, [self
.cmd
, '--reload'])
3579 def update_firewalld(ctx
, daemon_type
):
3580 # type: (CephadmContext, str) -> None
3581 if not ('skip_firewalld' in ctx
and ctx
.skip_firewalld
):
3582 firewall
= Firewalld(ctx
)
3583 firewall
.enable_service_for(daemon_type
)
3584 firewall
.apply_rules()
3587 def install_sysctl(ctx
: CephadmContext
, fsid
: str, daemon_type
: str) -> None:
3589 Set up sysctl settings
3591 def _write(conf
: Path
, lines
: List
[str]) -> None:
3593 '# created by cephadm',
3598 with
open(conf
, 'w') as f
:
3599 f
.write('\n'.join(lines
))
3601 conf
= Path(ctx
.sysctl_dir
).joinpath(f
'90-ceph-{fsid}-{daemon_type}.conf')
3602 lines
: Optional
[List
] = None
3604 if daemon_type
== 'osd':
3605 lines
= OSD
.get_sysctl_settings()
3606 elif daemon_type
== 'haproxy':
3607 lines
= HAproxy
.get_sysctl_settings()
3608 elif daemon_type
== 'keepalived':
3609 lines
= Keepalived
.get_sysctl_settings()
3611 # apply the sysctl settings
3613 Path(ctx
.sysctl_dir
).mkdir(mode
=0o755, exist_ok
=True)
3615 call_throws(ctx
, ['sysctl', '--system'])
3618 def migrate_sysctl_dir(ctx
: CephadmContext
, fsid
: str) -> None:
3620 Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration.
3621 This moves it to '/etc/sysctl.d'.
3623 deprecated_location
: str = '/usr/lib/sysctl.d'
3624 deprecated_confs
: List
[str] = glob(f
'{deprecated_location}/90-ceph-{fsid}-*.conf')
3625 if not deprecated_confs
:
3628 file_count
: int = len(deprecated_confs
)
3629 logger
.info(f
'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.')
3630 for conf
in deprecated_confs
:
3632 shutil
.move(conf
, ctx
.sysctl_dir
)
3634 except shutil
.Error
as err
:
3635 if str(err
).endswith('already exists'):
3636 logger
.warning(f
'Destination file already exists. Deleting {conf}.')
3640 except OSError as del_err
:
3641 logger
.warning(f
'Could not remove {conf}: {del_err}.')
3643 logger
.warning(f
'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}')
3645 # Log successful migration
3647 logger
.info(f
'Successfully migrated sysctl config to {ctx.sysctl_dir}.')
3650 # Log partially successful / unsuccessful migration
3651 files_processed
: int = len(deprecated_confs
)
3652 if file_count
< files_processed
:
3653 status
: str = f
'partially successful (failed {file_count}/{files_processed})'
3654 elif file_count
== files_processed
:
3655 status
= 'unsuccessful'
3656 logger
.warning(f
'Migration of sysctl configuration {status}. You may want to perform a migration manually.')
3659 def install_base_units(ctx
, fsid
):
3660 # type: (CephadmContext, str) -> None
3662 Set up ceph.target and ceph-$fsid.target units.
3665 existed
= os
.path
.exists(ctx
.unit_dir
+ '/ceph.target')
3666 with
open(ctx
.unit_dir
+ '/ceph.target.new', 'w') as f
:
3668 'Description=All Ceph clusters and services\n'
3671 'WantedBy=multi-user.target\n')
3672 os
.rename(ctx
.unit_dir
+ '/ceph.target.new',
3673 ctx
.unit_dir
+ '/ceph.target')
3675 # we disable before enable in case a different ceph.target
3676 # (from the traditional package) is present; while newer
3677 # systemd is smart enough to disable the old
3678 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
3679 # some older versions of systemd error out with EEXIST.
3680 call_throws(ctx
, ['systemctl', 'disable', 'ceph.target'])
3681 call_throws(ctx
, ['systemctl', 'enable', 'ceph.target'])
3682 call_throws(ctx
, ['systemctl', 'start', 'ceph.target'])
3685 existed
= os
.path
.exists(ctx
.unit_dir
+ '/ceph-%s.target' % fsid
)
3686 with
open(ctx
.unit_dir
+ '/ceph-%s.target.new' % fsid
, 'w') as f
:
3689 'Description=Ceph cluster {fsid}\n'
3690 'PartOf=ceph.target\n'
3691 'Before=ceph.target\n'
3694 'WantedBy=multi-user.target ceph.target\n'.format(
3697 os
.rename(ctx
.unit_dir
+ '/ceph-%s.target.new' % fsid
,
3698 ctx
.unit_dir
+ '/ceph-%s.target' % fsid
)
3700 call_throws(ctx
, ['systemctl', 'enable', 'ceph-%s.target' % fsid
])
3701 call_throws(ctx
, ['systemctl', 'start', 'ceph-%s.target' % fsid
])
3703 # logrotate for the cluster
3704 with
open(ctx
.logrotate_dir
+ '/ceph-%s' % fsid
, 'w') as f
:
3706 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
3707 in all containers, but I don't see an elegant way to send SIGHUP *just* to
3708 the daemons for this cluster. (1) systemd kill -s will get the signal to
3709 podman, but podman will exit. (2) podman kill will get the signal to the
3710 first child (bash), but that isn't the ceph daemon. This is simpler and
3713 f
.write("""# created by cephadm
3714 /var/log/ceph/%s/*.log {
3720 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror' || true
3729 def get_unit_file(ctx
, fsid
):
3730 # type: (CephadmContext, str) -> str
3732 if isinstance(ctx
.container_engine
, Podman
):
3733 extra_args
= ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3734 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3736 'PIDFile=%t/%n-pid\n')
3737 if ctx
.container_engine
.version
>= CGROUPS_SPLIT_PODMAN_VERSION
:
3738 extra_args
+= 'Delegate=yes\n'
3740 docker
= isinstance(ctx
.container_engine
, Docker
)
3741 u
= """# generated by cephadm
3743 Description=Ceph %i for {fsid}
3746 # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
3747 # these can be removed once ceph-mon will dynamically change network
3749 After=network-online.target local-fs.target time-sync.target{docker_after}
3750 Wants=network-online.target local-fs.target time-sync.target
3753 PartOf=ceph-{fsid}.target
3754 Before=ceph-{fsid}.target
3759 EnvironmentFile=-/etc/environment
3760 ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
3761 ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop'
3762 ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
3768 StartLimitInterval=30min
3772 WantedBy=ceph-{fsid}.target
3773 """.format(fsid
=fsid
,
3774 data_dir
=ctx
.data_dir
,
3775 extra_args
=extra_args
,
3776 # if docker, we depend on docker.service
3777 docker_after
=' docker.service' if docker
else '',
3778 docker_requires
='Requires=docker.service\n' if docker
else '')
3782 ##################################
3785 class CephContainer
:
3787 ctx
: CephadmContext
,
3790 args
: List
[str] = [],
3791 volume_mounts
: Dict
[str, str] = {},
3793 container_args
: List
[str] = [],
3794 envs
: Optional
[List
[str]] = None,
3795 privileged
: bool = False,
3796 ptrace
: bool = False,
3797 bind_mounts
: Optional
[List
[List
[str]]] = None,
3798 init
: Optional
[bool] = None,
3799 host_network
: bool = True,
3800 memory_request
: Optional
[str] = None,
3801 memory_limit
: Optional
[str] = None,
3805 self
.entrypoint
= entrypoint
3807 self
.volume_mounts
= volume_mounts
3809 self
.container_args
= container_args
3811 self
.privileged
= privileged
3812 self
.ptrace
= ptrace
3813 self
.bind_mounts
= bind_mounts
if bind_mounts
else []
3814 self
.init
= init
if init
else ctx
.container_init
3815 self
.host_network
= host_network
3816 self
.memory_request
= memory_request
3817 self
.memory_limit
= memory_limit
3821 ctx
: CephadmContext
,
3826 args
: List
[str] = [],
3827 volume_mounts
: Dict
[str, str] = {},
3828 container_args
: List
[str] = [],
3829 envs
: Optional
[List
[str]] = None,
3830 privileged
: bool = False,
3831 ptrace
: bool = False,
3832 bind_mounts
: Optional
[List
[List
[str]]] = None,
3833 init
: Optional
[bool] = None,
3834 host_network
: bool = True,
3835 memory_request
: Optional
[str] = None,
3836 memory_limit
: Optional
[str] = None,
3837 ) -> 'CephContainer':
3841 entrypoint
=entrypoint
,
3843 volume_mounts
=volume_mounts
,
3844 cname
='ceph-%s-%s.%s' % (fsid
, daemon_type
, daemon_id
),
3845 container_args
=container_args
,
3847 privileged
=privileged
,
3849 bind_mounts
=bind_mounts
,
3851 host_network
=host_network
,
3852 memory_request
=memory_request
,
3853 memory_limit
=memory_limit
,
3857 def cname(self
) -> str:
3859 podman adds the current container name to the /etc/hosts
3860 file. Turns out, python's `socket.getfqdn()` differs from
3861 `hostname -f`, when we have the container names containing
3864 # podman run --name foo.bar.baz.com ceph/ceph /bin/bash
3865 [root@sebastians-laptop /]# cat /etc/hosts
3868 127.0.1.1 sebastians-laptop foo.bar.baz.com
3869 [root@sebastians-laptop /]# hostname -f
3871 [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())'
3874 Fascinatingly, this doesn't happen when using dashes.
3876 return self
._cname
.replace('.', '-')
3879 def cname(self
, val
: str) -> None:
3883 def old_cname(self
) -> str:
3886 def run_cmd(self
) -> List
[str]:
3887 cmd_args
: List
[str] = [
3888 str(self
.ctx
.container_engine
.path
),
3892 # some containers (ahem, haproxy) override this, but we want a fast
3893 # shutdown always (and, more importantly, a successful exit even if we
3894 # fall back to SIGKILL).
3895 '--stop-signal=SIGTERM',
3898 if isinstance(self
.ctx
.container_engine
, Podman
):
3899 if os
.path
.exists('/etc/ceph/podman-auth.json'):
3900 cmd_args
.append('--authfile=/etc/ceph/podman-auth.json')
3903 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
3904 '-e', 'NODE_NAME=%s' % get_hostname(),
3906 vols
: List
[str] = []
3907 binds
: List
[str] = []
3909 if self
.memory_request
:
3910 cmd_args
.extend(['-e', 'POD_MEMORY_REQUEST', str(self
.memory_request
)])
3911 if self
.memory_limit
:
3912 cmd_args
.extend(['-e', 'POD_MEMORY_LIMIT', str(self
.memory_limit
)])
3913 cmd_args
.extend(['--memory', str(self
.memory_limit
)])
3915 if self
.host_network
:
3916 cmd_args
.append('--net=host')
3918 cmd_args
.extend(['--entrypoint', self
.entrypoint
])
3922 # let OSD etc read block devs that haven't been chowned
3923 '--group-add=disk'])
3924 if self
.ptrace
and not self
.privileged
:
3925 # if privileged, the SYS_PTRACE cap is already added
3926 # in addition, --cap-add and --privileged are mutually
3927 # exclusive since podman >= 2.0
3928 cmd_args
.append('--cap-add=SYS_PTRACE')
3930 cmd_args
.append('--init')
3931 envs
+= ['-e', 'CEPH_USE_RANDOM_NONCE=1']
3933 cmd_args
.extend(['--name', self
.cname
])
3935 for env
in self
.envs
:
3936 envs
.extend(['-e', env
])
3939 [['-v', '%s:%s' % (host_dir
, container_dir
)]
3940 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
3941 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
3942 for bind
in self
.bind_mounts
], [])
3945 cmd_args
+ self
.container_args
+ \
3946 envs
+ vols
+ binds
+ \
3947 [self
.image
] + self
.args
# type: ignore
3949 def shell_cmd(self
, cmd
: List
[str]) -> List
[str]:
3950 cmd_args
: List
[str] = [
3951 str(self
.ctx
.container_engine
.path
),
3957 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
3958 '-e', 'NODE_NAME=%s' % get_hostname(),
3960 vols
: List
[str] = []
3961 binds
: List
[str] = []
3963 if self
.host_network
:
3964 cmd_args
.append('--net=host')
3965 if self
.ctx
.no_hosts
:
3966 cmd_args
.append('--no-hosts')
3970 # let OSD etc read block devs that haven't been chowned
3974 cmd_args
.append('--init')
3975 envs
+= ['-e', 'CEPH_USE_RANDOM_NONCE=1']
3977 for env
in self
.envs
:
3978 envs
.extend(['-e', env
])
3981 [['-v', '%s:%s' % (host_dir
, container_dir
)]
3982 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
3983 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
3984 for bind
in self
.bind_mounts
], [])
3986 return cmd_args
+ self
.container_args
+ envs
+ vols
+ binds
+ [
3987 '--entrypoint', cmd
[0],
3991 def exec_cmd(self
, cmd
):
3992 # type: (List[str]) -> List[str]
3993 cname
= get_running_container_name(self
.ctx
, self
)
3995 raise Error('unable to find container "{}"'.format(self
.cname
))
3997 str(self
.ctx
.container_engine
.path
),
3999 ] + self
.container_args
+ [
4003 def rm_cmd(self
, old_cname
: bool = False, storage
: bool = False) -> List
[str]:
4005 str(self
.ctx
.container_engine
.path
),
4009 ret
.append('--storage')
4011 ret
.append(self
.old_cname
)
4013 ret
.append(self
.cname
)
4016 def stop_cmd(self
, old_cname
: bool = False) -> List
[str]:
4018 str(self
.ctx
.container_engine
.path
),
4019 'stop', self
.old_cname
if old_cname
else self
.cname
,
4023 def run(self
, timeout
=DEFAULT_TIMEOUT
, verbosity
=CallVerbosity
.VERBOSE_ON_FAILURE
):
4024 # type: (Optional[int], CallVerbosity) -> str
4025 out
, _
, _
= call_throws(self
.ctx
, self
.run_cmd(),
4026 desc
=self
.entrypoint
, timeout
=timeout
, verbosity
=verbosity
)
4030 #####################################
4032 class MgrListener(Thread
):
4033 def __init__(self
, agent
: 'CephadmAgent') -> None:
4036 super(MgrListener
, self
).__init
__(target
=self
.run
)
4038 def run(self
) -> None:
4039 listenSocket
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
4040 listenSocket
.bind(('0.0.0.0', int(self
.agent
.listener_port
)))
4041 listenSocket
.settimeout(60)
4042 listenSocket
.listen(1)
4043 ssl_ctx
= ssl
.create_default_context(ssl
.Purpose
.CLIENT_AUTH
)
4044 ssl_ctx
.verify_mode
= ssl
.CERT_REQUIRED
4045 ssl_ctx
.load_cert_chain(self
.agent
.listener_cert_path
, self
.agent
.listener_key_path
)
4046 ssl_ctx
.load_verify_locations(self
.agent
.ca_path
)
4047 secureListenSocket
= ssl_ctx
.wrap_socket(listenSocket
, server_side
=True)
4048 while not self
.stop
:
4051 conn
, _
= secureListenSocket
.accept()
4052 except socket
.timeout
:
4055 length
: int = int(conn
.recv(10).decode())
4056 except Exception as e
:
4057 err_str
= f
'Failed to extract length of payload from message: {e}'
4058 conn
.send(err_str
.encode())
4059 logger
.error(err_str
)
4061 payload
= conn
.recv(length
).decode()
4065 data
: Dict
[Any
, Any
] = json
.loads(payload
)
4066 self
.handle_json_payload(data
)
4067 except Exception as e
:
4068 err_str
= f
'Failed to extract json payload from message: {e}'
4069 conn
.send(err_str
.encode())
4070 logger
.error(err_str
)
4073 if 'config' in data
:
4075 self
.agent
.ls_gatherer
.wakeup()
4076 self
.agent
.volume_gatherer
.wakeup()
4077 logger
.debug(f
'Got mgr message {data}')
4078 except Exception as e
:
4079 logger
.error(f
'Mgr Listener encountered exception: {e}')
4081 def shutdown(self
) -> None:
4084 def handle_json_payload(self
, data
: Dict
[Any
, Any
]) -> None:
4085 self
.agent
.ack
= int(data
['counter'])
4086 if 'config' in data
:
4087 logger
.info('Received new config from mgr')
4088 config
= data
['config']
4089 for filename
in config
:
4090 if filename
in self
.agent
.required_files
:
4091 file_path
= os
.path
.join(self
.agent
.daemon_dir
, filename
)
4092 with
open(os
.open(file_path
+ '.new', os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
4093 f
.write(config
[filename
])
4094 os
.rename(file_path
+ '.new', file_path
)
4095 self
.agent
.pull_conf_settings()
4099 class CephadmAgent():
4101 daemon_type
= 'agent'
4114 def __init__(self
, ctx
: CephadmContext
, fsid
: str, daemon_id
: Union
[int, str] = ''):
4117 self
.daemon_id
= daemon_id
4118 self
.starting_port
= 14873
4120 self
.target_port
= ''
4122 self
.daemon_dir
= os
.path
.join(ctx
.data_dir
, self
.fsid
, f
'{self.daemon_type}.{self.daemon_id}')
4123 self
.config_path
= os
.path
.join(self
.daemon_dir
, 'agent.json')
4124 self
.keyring_path
= os
.path
.join(self
.daemon_dir
, 'keyring')
4125 self
.ca_path
= os
.path
.join(self
.daemon_dir
, 'root_cert.pem')
4126 self
.listener_cert_path
= os
.path
.join(self
.daemon_dir
, 'listener.crt')
4127 self
.listener_key_path
= os
.path
.join(self
.daemon_dir
, 'listener.key')
4128 self
.listener_port
= ''
4130 self
.event
= Event()
4131 self
.mgr_listener
= MgrListener(self
)
4132 self
.ls_gatherer
= AgentGatherer(self
, lambda: self
._get
_ls
(), 'Ls')
4133 self
.volume_gatherer
= AgentGatherer(self
, lambda: self
._ceph
_volume
(enhanced
=False), 'Volume')
4134 self
.device_enhanced_scan
= False
4135 self
.recent_iteration_run_times
: List
[float] = [0.0, 0.0, 0.0]
4136 self
.recent_iteration_index
: int = 0
4137 self
.cached_ls_values
: Dict
[str, Dict
[str, str]] = {}
4139 def validate(self
, config
: Dict
[str, str] = {}) -> None:
4140 # check for the required files
4141 for fname
in self
.required_files
:
4142 if fname
not in config
:
4143 raise Error('required file missing from config: %s' % fname
)
4145 def deploy_daemon_unit(self
, config
: Dict
[str, str] = {}) -> None:
4147 raise Error('Agent needs a config')
4148 assert isinstance(config
, dict)
4149 self
.validate(config
)
4151 # Create the required config files in the daemons dir, with restricted permissions
4152 for filename
in config
:
4153 if filename
in self
.required_files
:
4154 file_path
= os
.path
.join(self
.daemon_dir
, filename
)
4155 with
open(os
.open(file_path
+ '.new', os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
4156 f
.write(config
[filename
])
4157 os
.rename(file_path
+ '.new', file_path
)
4159 unit_run_path
= os
.path
.join(self
.daemon_dir
, 'unit.run')
4160 with
open(os
.open(unit_run_path
+ '.new', os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
4161 f
.write(self
.unit_run())
4162 os
.rename(unit_run_path
+ '.new', unit_run_path
)
4164 meta
: Dict
[str, Any
] = {}
4165 meta_file_path
= os
.path
.join(self
.daemon_dir
, 'unit.meta')
4166 if 'meta_json' in self
.ctx
and self
.ctx
.meta_json
:
4167 meta
= json
.loads(self
.ctx
.meta_json
) or {}
4168 with
open(os
.open(meta_file_path
+ '.new', os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
4169 f
.write(json
.dumps(meta
, indent
=4) + '\n')
4170 os
.rename(meta_file_path
+ '.new', meta_file_path
)
4172 unit_file_path
= os
.path
.join(self
.ctx
.unit_dir
, self
.unit_name())
4173 with
open(os
.open(unit_file_path
+ '.new', os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
4174 f
.write(self
.unit_file())
4175 os
.rename(unit_file_path
+ '.new', unit_file_path
)
4177 call_throws(self
.ctx
, ['systemctl', 'daemon-reload'])
4178 call(self
.ctx
, ['systemctl', 'stop', self
.unit_name()],
4179 verbosity
=CallVerbosity
.DEBUG
)
4180 call(self
.ctx
, ['systemctl', 'reset-failed', self
.unit_name()],
4181 verbosity
=CallVerbosity
.DEBUG
)
4182 call_throws(self
.ctx
, ['systemctl', 'enable', '--now', self
.unit_name()])
4184 def unit_name(self
) -> str:
4185 return '{}.service'.format(get_unit_name(self
.fsid
, self
.daemon_type
, self
.daemon_id
))
4187 def unit_run(self
) -> str:
4188 py3
= shutil
.which('python3')
4189 binary_path
= os
.path
.realpath(sys
.argv
[0])
4190 return ('set -e\n' + f
'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n')
4192 def unit_file(self
) -> str:
4193 return """#generated by cephadm
4195 Description=cephadm agent for cluster {fsid}
4197 PartOf=ceph-{fsid}.target
4198 Before=ceph-{fsid}.target
4202 ExecStart=/bin/bash {data_dir}/unit.run
4207 WantedBy=ceph-{fsid}.target
4210 data_dir
=self
.daemon_dir
4213 def shutdown(self
) -> None:
4215 if self
.mgr_listener
.is_alive():
4216 self
.mgr_listener
.shutdown()
4218 def wakeup(self
) -> None:
4221 def pull_conf_settings(self
) -> None:
4223 with
open(self
.config_path
, 'r') as f
:
4224 config
= json
.load(f
)
4225 self
.target_ip
= config
['target_ip']
4226 self
.target_port
= config
['target_port']
4227 self
.loop_interval
= int(config
['refresh_period'])
4228 self
.starting_port
= int(config
['listener_port'])
4229 self
.host
= config
['host']
4230 use_lsm
= config
['device_enhanced_scan']
4231 except Exception as e
:
4233 raise Error(f
'Failed to get agent target ip and port from config: {e}')
4236 with
open(self
.keyring_path
, 'r') as f
:
4237 self
.keyring
= f
.read()
4238 except Exception as e
:
4240 raise Error(f
'Failed to get agent keyring: {e}')
4242 assert self
.target_ip
and self
.target_port
4244 self
.device_enhanced_scan
= False
4245 if use_lsm
.lower() == 'true':
4246 self
.device_enhanced_scan
= True
4247 self
.volume_gatherer
.update_func(lambda: self
._ceph
_volume
(enhanced
=self
.device_enhanced_scan
))
4249 def run(self
) -> None:
4250 self
.pull_conf_settings()
4253 for _
in range(1001):
4254 if not port_in_use(self
.ctx
, self
.starting_port
):
4255 self
.listener_port
= str(self
.starting_port
)
4257 self
.starting_port
+= 1
4258 if not self
.listener_port
:
4259 raise Error(f
'All 1000 ports starting at {str(self.starting_port - 1001)} taken.')
4260 except Exception as e
:
4261 raise Error(f
'Failed to pick port for agent to listen on: {e}')
4263 if not self
.mgr_listener
.is_alive():
4264 self
.mgr_listener
.start()
4266 if not self
.ls_gatherer
.is_alive():
4267 self
.ls_gatherer
.start()
4269 if not self
.volume_gatherer
.is_alive():
4270 self
.volume_gatherer
.start()
4272 ssl_ctx
= ssl
.create_default_context()
4273 ssl_ctx
.check_hostname
= True
4274 ssl_ctx
.verify_mode
= ssl
.CERT_REQUIRED
4275 ssl_ctx
.load_verify_locations(self
.ca_path
)
4277 while not self
.stop
:
4278 start_time
= time
.monotonic()
4281 # part of the networks info is returned as a set which is not JSON
4282 # serializable. The set must be converted to a list
4283 networks
= list_networks(self
.ctx
)
4285 for key
in networks
.keys():
4286 for k
, v
in networks
[key
].items():
4287 networks_list
[key
] = {k
: list(v
)}
4289 data
= json
.dumps({'host': self
.host
,
4290 'ls': (self
.ls_gatherer
.data
if self
.ack
== self
.ls_gatherer
.ack
4291 and self
.ls_gatherer
.data
is not None else []),
4292 'networks': networks_list
,
4293 'facts': HostFacts(self
.ctx
).dump(),
4294 'volume': (self
.volume_gatherer
.data
if self
.ack
== self
.volume_gatherer
.ack
4295 and self
.volume_gatherer
.data
is not None else ''),
4297 'keyring': self
.keyring
,
4298 'port': self
.listener_port
})
4299 data
= data
.encode('ascii')
4301 url
= f
'https://{self.target_ip}:{self.target_port}/data'
4303 req
= Request(url
, data
, {'Content-Type': 'application/json'})
4304 send_time
= time
.monotonic()
4305 with
urlopen(req
, context
=ssl_ctx
) as response
:
4306 response_str
= response
.read()
4307 response_json
= json
.loads(response_str
)
4308 total_request_time
= datetime
.timedelta(seconds
=(time
.monotonic() - send_time
)).total_seconds()
4309 logger
.info(f
'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.')
4310 except Exception as e
:
4311 logger
.error(f
'Failed to send metadata to mgr: {e}')
4313 end_time
= time
.monotonic()
4314 run_time
= datetime
.timedelta(seconds
=(end_time
- start_time
))
4315 self
.recent_iteration_run_times
[self
.recent_iteration_index
] = run_time
.total_seconds()
4316 self
.recent_iteration_index
= (self
.recent_iteration_index
+ 1) % 3
4317 run_time_average
= sum(self
.recent_iteration_run_times
, 0.0) / len([t
for t
in self
.recent_iteration_run_times
if t
])
4319 self
.event
.wait(max(self
.loop_interval
- int(run_time_average
), 0))
4322 def _ceph_volume(self
, enhanced
: bool = False) -> Tuple
[str, bool]:
4323 self
.ctx
.command
= 'inventory --format=json'.split()
4325 self
.ctx
.command
.append('--with-lsm')
4326 self
.ctx
.fsid
= self
.fsid
4328 stream
= io
.StringIO()
4329 with
redirect_stdout(stream
):
4330 command_ceph_volume(self
.ctx
)
4332 stdout
= stream
.getvalue()
4335 return (stdout
, False)
4337 raise Exception('ceph-volume returned empty value')
4339 def _daemon_ls_subset(self
) -> Dict
[str, Dict
[str, Any
]]:
4340 # gets a subset of ls info quickly. The results of this will tell us if our
4341 # cached info is still good or if we need to run the full ls again.
4342 # for legacy containers, we just grab the full info. For cephadmv1 containers,
4343 # we only grab enabled, state, mem_usage and container id. If container id has
4344 # not changed for any daemon, we assume our cached info is good.
4345 daemons
: Dict
[str, Dict
[str, Any
]] = {}
4346 data_dir
= self
.ctx
.data_dir
4347 seen_memusage
= {} # type: Dict[str, int]
4348 out
, err
, code
= call(
4350 [self
.ctx
.container_engine
.path
, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
4351 verbosity
=CallVerbosity
.DEBUG
4353 seen_memusage_cid_len
, seen_memusage
= _parse_mem_usage(code
, out
)
4354 # we need a mapping from container names to ids. Later we will convert daemon
4355 # names to container names to get daemons container id to see if it has changed
4356 out
, err
, code
= call(
4358 [self
.ctx
.container_engine
.path
, 'ps', '--format', '{{.ID}},{{.Names}}', '--no-trunc'],
4359 verbosity
=CallVerbosity
.DEBUG
4361 name_id_mapping
: Dict
[str, str] = self
._parse
_container
_id
_name
(code
, out
)
4362 for i
in os
.listdir(data_dir
):
4363 if i
in ['mon', 'osd', 'mds', 'mgr']:
4365 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
4368 (cluster
, daemon_id
) = j
.split('-', 1)
4369 legacy_unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
4370 (enabled
, state
, _
) = check_unit(self
.ctx
, legacy_unit_name
)
4371 daemons
[f
'{daemon_type}.{daemon_id}'] = {
4373 'name': '%s.%s' % (daemon_type
, daemon_id
),
4374 'fsid': self
.ctx
.fsid
if self
.ctx
.fsid
is not None else 'unknown',
4375 'systemd_unit': legacy_unit_name
,
4376 'enabled': 'true' if enabled
else 'false',
4380 fsid
= str(i
) # convince mypy that fsid is a str here
4381 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
4382 if '.' in j
and os
.path
.isdir(os
.path
.join(data_dir
, fsid
, j
)):
4383 (daemon_type
, daemon_id
) = j
.split('.', 1)
4384 unit_name
= get_unit_name(fsid
, daemon_type
, daemon_id
)
4385 (enabled
, state
, _
) = check_unit(self
.ctx
, unit_name
)
4387 'style': 'cephadm:v1',
4388 'systemd_unit': unit_name
,
4389 'enabled': 'true' if enabled
else 'false',
4392 c
= CephContainer
.for_daemon(self
.ctx
, self
.ctx
.fsid
, daemon_type
, daemon_id
, 'bash')
4393 container_id
: Optional
[str] = None
4394 for name
in (c
.cname
, c
.old_cname
):
4395 if name
in name_id_mapping
:
4396 container_id
= name_id_mapping
[name
]
4398 daemons
[j
]['container_id'] = container_id
4400 daemons
[j
]['memory_usage'] = seen_memusage
.get(container_id
[0:seen_memusage_cid_len
])
4403 def _parse_container_id_name(self
, code
: int, out
: str) -> Dict
[str, str]:
4404 # map container names to ids from ps output
4405 name_id_mapping
= {} # type: Dict[str, str]
4407 for line
in out
.splitlines():
4408 id, name
= line
.split(',')
4409 name_id_mapping
[name
] = id
4410 return name_id_mapping
4412 def _get_ls(self
) -> Tuple
[List
[Dict
[str, str]], bool]:
4413 if not self
.cached_ls_values
:
4414 logger
.info('No cached ls output. Running full daemon ls')
4415 ls
= list_daemons(self
.ctx
)
4417 self
.cached_ls_values
[d
['name']] = d
4420 ls_subset
= self
._daemon
_ls
_subset
()
4421 need_full_ls
= False
4422 state_change
= False
4423 if set(self
.cached_ls_values
.keys()) != set(ls_subset
.keys()):
4424 # case for a new daemon in ls or an old daemon no longer appearing.
4425 # If that happens we need a full ls
4426 logger
.info('Change detected in state of daemons. Running full daemon ls')
4427 ls
= list_daemons(self
.ctx
)
4429 self
.cached_ls_values
[d
['name']] = d
4431 for daemon
, info
in self
.cached_ls_values
.items():
4432 if info
['style'] == 'legacy':
4433 # for legacy containers, ls_subset just grabs all the info
4434 self
.cached_ls_values
[daemon
] = ls_subset
[daemon
]
4436 if info
['container_id'] != ls_subset
[daemon
]['container_id']:
4437 # case for container id having changed. We need full ls as
4438 # info we didn't grab like version and start time could have changed
4442 # want to know if a daemons state change because in those cases we want
4443 # to report back quicker
4445 self
.cached_ls_values
[daemon
]['enabled'] != ls_subset
[daemon
]['enabled']
4446 or self
.cached_ls_values
[daemon
]['state'] != ls_subset
[daemon
]['state']
4449 # if we reach here, container id matched. Update the few values we do track
4450 # from ls subset: state, enabled, memory_usage.
4451 self
.cached_ls_values
[daemon
]['enabled'] = ls_subset
[daemon
]['enabled']
4452 self
.cached_ls_values
[daemon
]['state'] = ls_subset
[daemon
]['state']
4453 if 'memory_usage' in ls_subset
[daemon
]:
4454 self
.cached_ls_values
[daemon
]['memory_usage'] = ls_subset
[daemon
]['memory_usage']
4456 logger
.info('Change detected in state of daemons. Running full daemon ls')
4457 ls
= list_daemons(self
.ctx
)
4459 self
.cached_ls_values
[d
['name']] = d
4462 ls
= [info
for daemon
, info
in self
.cached_ls_values
.items()]
4463 return (ls
, state_change
)
4466 class AgentGatherer(Thread
):
4467 def __init__(self
, agent
: 'CephadmAgent', func
: Callable
, gatherer_type
: str = 'Unnamed', initial_ack
: int = 0) -> None:
4470 self
.gatherer_type
= gatherer_type
4471 self
.ack
= initial_ack
4472 self
.event
= Event()
4473 self
.data
: Any
= None
4475 self
.recent_iteration_run_times
: List
[float] = [0.0, 0.0, 0.0]
4476 self
.recent_iteration_index
: int = 0
4477 super(AgentGatherer
, self
).__init
__(target
=self
.run
)
4479 def run(self
) -> None:
4480 while not self
.stop
:
4482 start_time
= time
.monotonic()
4484 ack
= self
.agent
.ack
4487 self
.data
, change
= self
.func()
4488 except Exception as e
:
4489 logger
.error(f
'{self.gatherer_type} Gatherer encountered exception gathering data: {e}')
4491 if ack
!= self
.ack
or change
:
4495 end_time
= time
.monotonic()
4496 run_time
= datetime
.timedelta(seconds
=(end_time
- start_time
))
4497 self
.recent_iteration_run_times
[self
.recent_iteration_index
] = run_time
.total_seconds()
4498 self
.recent_iteration_index
= (self
.recent_iteration_index
+ 1) % 3
4499 run_time_average
= sum(self
.recent_iteration_run_times
, 0.0) / len([t
for t
in self
.recent_iteration_run_times
if t
])
4501 self
.event
.wait(max(self
.agent
.loop_interval
- int(run_time_average
), 0))
4503 except Exception as e
:
4504 logger
.error(f
'{self.gatherer_type} Gatherer encountered exception: {e}')
4506 def shutdown(self
) -> None:
4509 def wakeup(self
) -> None:
4512 def update_func(self
, func
: Callable
) -> None:
4516 def command_agent(ctx
: CephadmContext
) -> None:
4517 agent
= CephadmAgent(ctx
, ctx
.fsid
, ctx
.daemon_id
)
4519 if not os
.path
.isdir(agent
.daemon_dir
):
4520 raise Error(f
'Agent daemon directory {agent.daemon_dir} does not exist. Perhaps agent was never deployed?')
4525 ##################################
4529 def command_version(ctx
):
4530 # type: (CephadmContext) -> int
4531 c
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version'])
4532 out
, err
, ret
= call(ctx
, c
.run_cmd(), desc
=c
.entrypoint
)
4537 ##################################
4541 def command_pull(ctx
):
4542 # type: (CephadmContext) -> int
4545 _pull_image(ctx
, ctx
.image
, ctx
.insecure
)
4546 except UnauthorizedRegistryError
:
4547 err_str
= 'Failed to pull container image. Check that host(s) are logged into the registry'
4548 logger
.debug(f
'Pulling image for `command_pull` failed: {err_str}')
4549 raise Error(err_str
)
4550 return command_inspect_image(ctx
)
4553 def _pull_image(ctx
, image
, insecure
=False):
4554 # type: (CephadmContext, str, bool) -> None
4555 logger
.info('Pulling container image %s...' % image
)
4558 'error creating read-write layer with ID',
4559 'net/http: TLS handshake timeout',
4560 'Digest did not match, expected',
4563 cmd
= [ctx
.container_engine
.path
, 'pull', image
]
4564 if isinstance(ctx
.container_engine
, Podman
):
4566 cmd
.append('--tls-verify=false')
4568 if os
.path
.exists('/etc/ceph/podman-auth.json'):
4569 cmd
.append('--authfile=/etc/ceph/podman-auth.json')
4570 cmd_str
= ' '.join(cmd
)
4572 for sleep_secs
in [1, 4, 25]:
4573 out
, err
, ret
= call(ctx
, cmd
, verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
4577 if 'unauthorized' in err
:
4578 raise UnauthorizedRegistryError()
4580 if not any(pattern
in err
for pattern
in ignorelist
):
4581 raise Error('Failed command: %s' % cmd_str
)
4583 logger
.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str
, sleep_secs
))
4584 time
.sleep(sleep_secs
)
4586 raise Error('Failed command: %s: maximum retries reached' % cmd_str
)
4588 ##################################
4592 def command_inspect_image(ctx
):
4593 # type: (CephadmContext) -> int
4594 out
, err
, ret
= call_throws(ctx
, [
4595 ctx
.container_engine
.path
, 'inspect',
4596 '--format', '{{.ID}},{{.RepoDigests}}',
4600 info_from
= get_image_info_from_inspect(out
.strip(), ctx
.image
)
4602 ver
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version']).run().strip()
4603 info_from
['ceph_version'] = ver
4605 print(json
.dumps(info_from
, indent
=4, sort_keys
=True))
4609 def normalize_image_digest(digest
: str) -> str:
4612 >>> normalize_image_digest('ceph/ceph', 'docker.io')
4613 'docker.io/ceph/ceph'
4616 >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')
4617 'quay.ceph.io/ceph/ceph'
4619 >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')
4622 >>> normalize_image_digest('localhost/ceph', 'docker.io')
4625 known_shortnames
= [
4630 for image
in known_shortnames
:
4631 if digest
.startswith(image
):
4632 return f
'{DEFAULT_REGISTRY}/{digest}'
4636 def get_image_info_from_inspect(out
, image
):
4637 # type: (str, str) -> Dict[str, Union[str,List[str]]]
4638 image_id
, digests
= out
.split(',', 1)
4640 raise Error('inspect {}: empty result'.format(image
))
4642 'image_id': normalize_container_id(image_id
)
4643 } # type: Dict[str, Union[str,List[str]]]
4645 r
['repo_digests'] = list(map(normalize_image_digest
, digests
[1: -1].split(' ')))
4648 ##################################
4651 def check_subnet(subnets
: str) -> Tuple
[int, List
[int], str]:
4652 """Determine whether the given string is a valid subnet
4654 :param subnets: subnet string, a single definition or comma separated list of CIDR subnets
4655 :returns: return code, IP version list of the subnets and msg describing any errors validation errors
4661 subnet_list
= subnets
.split(',')
4662 for subnet
in subnet_list
:
4663 # ensure the format of the string is as expected address/netmask
4664 subnet
= subnet
.strip()
4665 if not re
.search(r
'\/\d+$', subnet
):
4667 errors
.append(f
'{subnet} is not in CIDR format (address/netmask)')
4670 v
= ipaddress
.ip_network(subnet
).version
4672 except ValueError as e
:
4674 errors
.append(f
'{subnet} invalid: {str(e)}')
4676 return rc
, list(versions
), ', '.join(errors
)
4679 def unwrap_ipv6(address
):
4680 # type: (str) -> str
4681 if address
.startswith('[') and address
.endswith(']'):
4682 return address
[1: -1]
4686 def wrap_ipv6(address
):
4687 # type: (str) -> str
4689 # We cannot assume it's already wrapped or even an IPv6 address if
4690 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
4693 if ipaddress
.ip_address(address
).version
== 6:
4694 return f
'[{address}]'
4701 def is_ipv6(address
):
4702 # type: (str) -> bool
4703 address
= unwrap_ipv6(address
)
4705 return ipaddress
.ip_address(address
).version
== 6
4707 logger
.warning('Address: {} is not a valid IP address'.format(address
))
4711 def ip_in_subnets(ip_addr
: str, subnets
: str) -> bool:
4712 """Determine if the ip_addr belongs to any of the subnets list."""
4713 subnet_list
= [x
.strip() for x
in subnets
.split(',')]
4714 for subnet
in subnet_list
:
4715 ip_address
= unwrap_ipv6(ip_addr
) if is_ipv6(ip_addr
) else ip_addr
4716 if ipaddress
.ip_address(ip_address
) in ipaddress
.ip_network(subnet
):
4721 def parse_mon_addrv(addrv_arg
: str) -> List
[EndPoint
]:
4722 """Parse mon-addrv param into a list of mon end points."""
4723 r
= re
.compile(r
':(\d+)$')
4725 addr_arg
= addrv_arg
4726 if addr_arg
[0] != '[' or addr_arg
[-1] != ']':
4727 raise Error(f
'--mon-addrv value {addr_arg} must use square backets')
4729 for addr
in addr_arg
[1: -1].split(','):
4730 hasport
= r
.findall(addr
)
4732 raise Error(f
'--mon-addrv value {addr_arg} must include port number')
4733 port_str
= hasport
[0]
4734 addr
= re
.sub(r
'^v\d+:', '', addr
) # strip off v1: or v2: prefix
4735 base_ip
= addr
[0:-(len(port_str
)) - 1]
4736 addrv_args
.append(EndPoint(base_ip
, int(port_str
)))
4741 def parse_mon_ip(mon_ip
: str) -> List
[EndPoint
]:
4742 """Parse mon-ip param into a list of mon end points."""
4743 r
= re
.compile(r
':(\d+)$')
4745 hasport
= r
.findall(mon_ip
)
4747 port_str
= hasport
[0]
4748 base_ip
= mon_ip
[0:-(len(port_str
)) - 1]
4749 addrv_args
.append(EndPoint(base_ip
, int(port_str
)))
4751 # No port provided: use fixed ports for ceph monitor
4752 addrv_args
.append(EndPoint(mon_ip
, 3300))
4753 addrv_args
.append(EndPoint(mon_ip
, 6789))
4758 def build_addrv_params(addrv
: List
[EndPoint
]) -> str:
4759 """Convert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]"""
4761 raise Error('Detected a local mon-addrv list with more than 2 entries.')
4762 port_to_ver
: Dict
[int, str] = {6789: 'v1', 3300: 'v2'}
4763 addr_arg_list
: List
[str] = []
4765 if ep
.port
in port_to_ver
:
4766 ver
= port_to_ver
[ep
.port
]
4768 ver
= 'v2' # default mon protocol version if port is not provided
4769 logger
.warning(f
'Using msgr2 protocol for unrecognized port {ep}')
4770 addr_arg_list
.append(f
'{ver}:{ep.ip}:{ep.port}')
4772 addr_arg
= '[{0}]'.format(','.join(addr_arg_list
))
4776 def get_public_net_from_cfg(ctx
: CephadmContext
) -> Optional
[str]:
4777 """Get mon public network from configuration file."""
4778 cp
= read_config(ctx
.config
)
4779 if not cp
.has_option('global', 'public_network'):
4782 # Ensure all public CIDR networks are valid
4783 public_network
= cp
.get('global', 'public_network')
4784 rc
, _
, err_msg
= check_subnet(public_network
)
4786 raise Error(f
'Invalid public_network {public_network} parameter: {err_msg}')
4788 # Ensure all public CIDR networks are configured locally
4789 configured_subnets
= set([x
.strip() for x
in public_network
.split(',')])
4790 local_subnets
= set([x
[0] for x
in list_networks(ctx
).items()])
4791 valid_public_net
= False
4792 for net
in configured_subnets
:
4793 if net
in local_subnets
:
4794 valid_public_net
= True
4796 logger
.warning(f
'The public CIDR network {net} (from -c conf file) is not configured locally.')
4797 if not valid_public_net
:
4798 raise Error(f
'None of the public CIDR network(s) {configured_subnets} (from -c conf file) is configured locally.')
4800 # Ensure public_network is compatible with the provided mon-ip (or mon-addrv)
4802 if not ip_in_subnets(ctx
.mon_ip
, public_network
):
4803 raise Error(f
'The provided --mon-ip {ctx.mon_ip} does not belong to any public_network(s) {public_network}')
4805 addrv_args
= parse_mon_addrv(ctx
.mon_addrv
)
4806 for addrv
in addrv_args
:
4807 if not ip_in_subnets(addrv
.ip
, public_network
):
4808 raise Error(f
'The provided --mon-addrv {addrv.ip} ip does not belong to any public_network(s) {public_network}')
4810 logger
.debug(f
'Using mon public network from configuration file {public_network}')
4811 return public_network
4814 def infer_mon_network(ctx
: CephadmContext
, mon_eps
: List
[EndPoint
]) -> Optional
[str]:
4815 """Infer mon public network from local network."""
4816 # Make sure IP is configured locally, and then figure out the CIDR network
4818 for net
, ifaces
in list_networks(ctx
).items():
4819 # build local_ips list for the specified network
4820 local_ips
: List
[str] = []
4821 for _
, ls
in ifaces
.items():
4822 local_ips
.extend([ipaddress
.ip_address(ip
) for ip
in ls
])
4824 # check if any of mon ips belong to this net
4825 for mon_ep
in mon_eps
:
4827 if ipaddress
.ip_address(unwrap_ipv6(mon_ep
.ip
)) in local_ips
:
4828 mon_networks
.append(net
)
4829 logger
.info(f
'Mon IP `{mon_ep.ip}` is in CIDR network `{net}`')
4830 except ValueError as e
:
4831 logger
.warning(f
'Cannot infer CIDR network for mon IP `{mon_ep.ip}` : {e}')
4833 if not mon_networks
:
4834 raise Error('Cannot infer CIDR network. Pass --skip-mon-network to configure it later')
4836 logger
.debug(f
'Inferred mon public CIDR from local network configuration {mon_networks}')
4838 mon_networks
= list(set(mon_networks
)) # remove duplicates
4839 return ','.join(mon_networks
)
4842 def prepare_mon_addresses(ctx
: CephadmContext
) -> Tuple
[str, bool, Optional
[str]]:
4843 """Get mon public network configuration."""
4845 addrv_args
: List
[EndPoint
] = []
4846 mon_addrv
: str = '' # i.e: [v2:192.168.100.1:3300,v1:192.168.100.1:6789]
4849 ipv6
= is_ipv6(ctx
.mon_ip
)
4851 ctx
.mon_ip
= wrap_ipv6(ctx
.mon_ip
)
4852 addrv_args
= parse_mon_ip(ctx
.mon_ip
)
4853 mon_addrv
= build_addrv_params(addrv_args
)
4855 ipv6
= ctx
.mon_addrv
.count('[') > 1
4856 addrv_args
= parse_mon_addrv(ctx
.mon_addrv
)
4857 mon_addrv
= ctx
.mon_addrv
4859 raise Error('must specify --mon-ip or --mon-addrv')
4862 for end_point
in addrv_args
:
4863 check_ip_port(ctx
, end_point
)
4865 logger
.debug(f
'Base mon IP(s) is {addrv_args}, mon addrv is {mon_addrv}')
4867 if not ctx
.skip_mon_network
:
4868 mon_network
= get_public_net_from_cfg(ctx
) or infer_mon_network(ctx
, addrv_args
)
4870 return (mon_addrv
, ipv6
, mon_network
)
4873 def prepare_cluster_network(ctx
: CephadmContext
) -> Tuple
[str, bool]:
4874 # the cluster network may not exist on this node, so all we can do is
4875 # validate that the address given is valid ipv4 or ipv6 subnet
4876 ipv6_cluster_network
= False
4877 cp
= read_config(ctx
.config
)
4878 cluster_network
= ctx
.cluster_network
4879 if cluster_network
is None and cp
.has_option('global', 'cluster_network'):
4880 cluster_network
= cp
.get('global', 'cluster_network')
4883 cluser_nets
= set([x
.strip() for x
in cluster_network
.split(',')])
4884 local_subnets
= set([x
[0] for x
in list_networks(ctx
).items()])
4885 for net
in cluser_nets
:
4886 if net
not in local_subnets
:
4887 logger
.warning(f
'The cluster CIDR network {net} is not configured locally.')
4889 rc
, versions
, err_msg
= check_subnet(cluster_network
)
4891 raise Error(f
'Invalid --cluster-network parameter: {err_msg}')
4892 ipv6_cluster_network
= True if 6 in versions
else False
4894 logger
.info('Internal network (--cluster-network) has not '
4895 'been provided, OSD replication will default to '
4896 'the public_network')
4898 return cluster_network
, ipv6_cluster_network
4901 def create_initial_keys(
4902 ctx
: CephadmContext
,
4905 ) -> Tuple
[str, str, str, Any
, Any
]: # type: ignore
4909 # create some initial keys
4910 logger
.info('Creating initial keys...')
4911 mon_key
= CephContainer(
4914 entrypoint
='/usr/bin/ceph-authtool',
4915 args
=['--gen-print-key'],
4917 admin_key
= CephContainer(
4920 entrypoint
='/usr/bin/ceph-authtool',
4921 args
=['--gen-print-key'],
4923 mgr_key
= CephContainer(
4926 entrypoint
='/usr/bin/ceph-authtool',
4927 args
=['--gen-print-key'],
4930 keyring
= ('[mon.]\n'
4932 '\tcaps mon = allow *\n'
4935 '\tcaps mon = allow *\n'
4936 '\tcaps mds = allow *\n'
4937 '\tcaps mgr = allow *\n'
4938 '\tcaps osd = allow *\n'
4941 '\tcaps mon = profile mgr\n'
4942 '\tcaps mds = allow *\n'
4943 '\tcaps osd = allow *\n'
4944 % (mon_key
, admin_key
, mgr_id
, mgr_key
))
4946 admin_keyring
= write_tmp('[client.admin]\n'
4947 '\tkey = ' + admin_key
+ '\n',
4951 bootstrap_keyring
= write_tmp(keyring
, uid
, gid
)
4952 return (mon_key
, mgr_key
, admin_key
,
4953 bootstrap_keyring
, admin_keyring
)
4956 def create_initial_monmap(
4957 ctx
: CephadmContext
,
4960 mon_id
: str, mon_addr
: str
4962 logger
.info('Creating initial monmap...')
4963 monmap
= write_tmp('', 0, 0)
4964 out
= CephContainer(
4967 entrypoint
='/usr/bin/monmaptool',
4972 '--addv', mon_id
, mon_addr
,
4976 monmap
.name
: '/tmp/monmap:z',
4979 logger
.debug(f
'monmaptool for {mon_id} {mon_addr} on {out}')
4981 # pass monmap file to ceph user for use by ceph-mon --mkfs below
4982 os
.fchown(monmap
.fileno(), uid
, gid
)
4986 def prepare_create_mon(
4987 ctx
: CephadmContext
,
4989 fsid
: str, mon_id
: str,
4990 bootstrap_keyring_path
: str,
4992 ) -> Tuple
[str, str]:
4993 logger
.info('Creating mon...')
4994 create_daemon_dirs(ctx
, fsid
, 'mon', mon_id
, uid
, gid
)
4995 mon_dir
= get_data_dir(fsid
, ctx
.data_dir
, 'mon', mon_id
)
4996 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
4997 out
= CephContainer(
5000 entrypoint
='/usr/bin/ceph-mon',
5006 '--monmap', '/tmp/monmap',
5007 '--keyring', '/tmp/keyring',
5008 ] + get_daemon_args(ctx
, fsid
, 'mon', mon_id
),
5010 log_dir
: '/var/log/ceph:z',
5011 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
5012 bootstrap_keyring_path
: '/tmp/keyring:z',
5013 monmap_path
: '/tmp/monmap:z',
5016 logger
.debug(f
'create mon.{mon_id} on {out}')
5017 return (mon_dir
, log_dir
)
5021 ctx
: CephadmContext
,
5023 fsid
: str, mon_id
: str
5025 mon_c
= get_container(ctx
, fsid
, 'mon', mon_id
)
5026 ctx
.meta_json
= json
.dumps({'service_name': 'mon'})
5027 deploy_daemon(ctx
, fsid
, 'mon', mon_id
, mon_c
, uid
, gid
,
5028 config
=None, keyring
=None)
5032 ctx
: CephadmContext
,
5033 mon_id
: str, mon_dir
: str,
5034 admin_keyring_path
: str, config_path
: str
5036 logger
.info('Waiting for mon to start...')
5040 entrypoint
='/usr/bin/ceph',
5044 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
5045 admin_keyring_path
: '/etc/ceph/ceph.client.admin.keyring:z',
5046 config_path
: '/etc/ceph/ceph.conf:z',
5050 # wait for the service to become available
5051 def is_mon_available():
5053 timeout
= ctx
.timeout
if ctx
.timeout
else 60 # seconds
5054 out
, err
, ret
= call(ctx
, c
.run_cmd(),
5057 verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
5060 is_available(ctx
, 'mon', is_mon_available
)
5064 ctx
: CephadmContext
,
5066 fsid
: str, mgr_id
: str, mgr_key
: str,
5067 config
: str, clifunc
: Callable
5069 logger
.info('Creating mgr...')
5070 mgr_keyring
= '[mgr.%s]\n\tkey = %s\n' % (mgr_id
, mgr_key
)
5071 mgr_c
= get_container(ctx
, fsid
, 'mgr', mgr_id
)
5072 # Note:the default port used by the Prometheus node exporter is opened in fw
5073 ctx
.meta_json
= json
.dumps({'service_name': 'mgr'})
5074 deploy_daemon(ctx
, fsid
, 'mgr', mgr_id
, mgr_c
, uid
, gid
,
5075 config
=config
, keyring
=mgr_keyring
, ports
=[9283])
5077 # wait for the service to become available
5078 logger
.info('Waiting for mgr to start...')
5080 def is_mgr_available():
5082 timeout
= ctx
.timeout
if ctx
.timeout
else 60 # seconds
5084 out
= clifunc(['status', '-f', 'json-pretty'],
5086 verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
5088 return j
.get('mgrmap', {}).get('available', False)
5089 except Exception as e
:
5090 logger
.debug('status failed: %s' % e
)
5092 is_available(ctx
, 'mgr', is_mgr_available
)
5096 ctx
: CephadmContext
,
5097 cli
: Callable
, wait_for_mgr_restart
: Callable
5100 cli(['cephadm', 'set-user', ctx
.ssh_user
])
5103 logger
.info('Using provided ssh config...')
5105 pathify(ctx
.ssh_config
.name
): '/tmp/cephadm-ssh-config:z',
5107 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts
=mounts
)
5109 if ctx
.ssh_private_key
and ctx
.ssh_public_key
:
5110 logger
.info('Using provided ssh keys...')
5112 pathify(ctx
.ssh_private_key
.name
): '/tmp/cephadm-ssh-key:z',
5113 pathify(ctx
.ssh_public_key
.name
): '/tmp/cephadm-ssh-key.pub:z'
5115 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts
=mounts
)
5116 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts
=mounts
)
5117 ssh_pub
= cli(['cephadm', 'get-pub-key'])
5119 logger
.info('Generating ssh key...')
5120 cli(['cephadm', 'generate-key'])
5121 ssh_pub
= cli(['cephadm', 'get-pub-key'])
5122 with
open(ctx
.output_pub_ssh_key
, 'w') as f
:
5124 logger
.info('Wrote public SSH key to %s' % ctx
.output_pub_ssh_key
)
5126 authorize_ssh_key(ssh_pub
, ctx
.ssh_user
)
5128 host
= get_hostname()
5129 logger
.info('Adding host %s...' % host
)
5131 args
= ['orch', 'host', 'add', host
]
5133 args
.append(unwrap_ipv6(ctx
.mon_ip
))
5135 addrv_args
= parse_mon_addrv(ctx
.mon_addrv
)
5136 args
.append(unwrap_ipv6(addrv_args
[0].ip
))
5138 except RuntimeError as e
:
5139 raise Error('Failed to add host <%s>: %s' % (host
, e
))
5141 for t
in ['mon', 'mgr']:
5142 if not ctx
.orphan_initial_daemons
:
5143 logger
.info('Deploying %s service with default placement...' % t
)
5144 cli(['orch', 'apply', t
])
5146 logger
.info('Deploying unmanaged %s service...' % t
)
5147 cli(['orch', 'apply', t
, '--unmanaged'])
5149 if not ctx
.orphan_initial_daemons
:
5150 logger
.info('Deploying crash service with default placement...')
5151 cli(['orch', 'apply', 'crash'])
5153 if not ctx
.skip_monitoring_stack
:
5154 for t
in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
5155 logger
.info('Deploying %s service with default placement...' % t
)
5156 cli(['orch', 'apply', t
])
5158 if ctx
.with_centralized_logging
:
5159 for t
in ['loki', 'promtail']:
5160 logger
.info('Deploying %s service with default placement...' % t
)
5161 cli(['orch', 'apply', t
])
5164 def enable_cephadm_mgr_module(
5165 cli
: Callable
, wait_for_mgr_restart
: Callable
5168 logger
.info('Enabling cephadm module...')
5169 cli(['mgr', 'module', 'enable', 'cephadm'])
5170 wait_for_mgr_restart()
5171 logger
.info('Setting orchestrator backend to cephadm...')
5172 cli(['orch', 'set', 'backend', 'cephadm'])
5175 def prepare_dashboard(
5176 ctx
: CephadmContext
,
5178 cli
: Callable
, wait_for_mgr_restart
: Callable
5181 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
5182 # if the user does not want to use SSL he can change this setting once the cluster is up
5183 cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx
.ssl_dashboard_port
)])
5185 # configuring dashboard parameters
5186 logger
.info('Enabling the dashboard module...')
5187 cli(['mgr', 'module', 'enable', 'dashboard'])
5188 wait_for_mgr_restart()
5190 # dashboard crt and key
5191 if ctx
.dashboard_key
and ctx
.dashboard_crt
:
5192 logger
.info('Using provided dashboard certificate...')
5194 pathify(ctx
.dashboard_crt
.name
): '/tmp/dashboard.crt:z',
5195 pathify(ctx
.dashboard_key
.name
): '/tmp/dashboard.key:z'
5197 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts
=mounts
)
5198 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts
=mounts
)
5200 logger
.info('Generating a dashboard self-signed certificate...')
5201 cli(['dashboard', 'create-self-signed-cert'])
5203 logger
.info('Creating initial admin user...')
5204 password
= ctx
.initial_dashboard_password
or generate_password()
5205 tmp_password_file
= write_tmp(password
, uid
, gid
)
5206 cmd
= ['dashboard', 'ac-user-create', ctx
.initial_dashboard_user
, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
5207 if not ctx
.dashboard_password_noupdate
:
5208 cmd
.append('--pwd-update-required')
5209 cli(cmd
, extra_mounts
={pathify(tmp_password_file
.name
): '/tmp/dashboard.pw:z'})
5210 logger
.info('Fetching dashboard port number...')
5211 out
= cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
5214 # Open dashboard port
5215 if not ('skip_firewalld' in ctx
and ctx
.skip_firewalld
):
5217 fw
.open_ports([port
])
5220 logger
.info('Ceph Dashboard is now available at:\n\n'
5221 '\t URL: https://%s:%s/\n'
5223 '\tPassword: %s\n' % (
5225 ctx
.initial_dashboard_user
,
5229 def prepare_bootstrap_config(
5230 ctx
: CephadmContext
,
5231 fsid
: str, mon_addr
: str, image
: str
5235 cp
= read_config(ctx
.config
)
5236 if not cp
.has_section('global'):
5237 cp
.add_section('global')
5238 cp
.set('global', 'fsid', fsid
)
5239 cp
.set('global', 'mon_host', mon_addr
)
5240 cp
.set('global', 'container_image', image
)
5242 if not cp
.has_section('mon'):
5243 cp
.add_section('mon')
5245 not cp
.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
5246 and not cp
.has_option('mon', 'auth allow insecure global id reclaim')
5248 cp
.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
5250 if ctx
.single_host_defaults
:
5251 logger
.info('Adjusting default settings to suit single-host cluster...')
5252 # replicate across osds, not hosts
5254 not cp
.has_option('global', 'osd_crush_chooseleaf_type')
5255 and not cp
.has_option('global', 'osd crush chooseleaf type')
5257 cp
.set('global', 'osd_crush_chooseleaf_type', '0')
5260 not cp
.has_option('global', 'osd_pool_default_size')
5261 and not cp
.has_option('global', 'osd pool default size')
5263 cp
.set('global', 'osd_pool_default_size', '2')
5264 # disable mgr standby modules (so we can colocate multiple mgrs on one host)
5265 if not cp
.has_section('mgr'):
5266 cp
.add_section('mgr')
5268 not cp
.has_option('mgr', 'mgr_standby_modules')
5269 and not cp
.has_option('mgr', 'mgr standby modules')
5271 cp
.set('mgr', 'mgr_standby_modules', 'false')
5273 cp
.set('global', 'log_to_file', 'true')
5274 cp
.set('global', 'log_to_stderr', 'false')
5275 cp
.set('global', 'log_to_journald', 'false')
5276 cp
.set('global', 'mon_cluster_log_to_file', 'true')
5277 cp
.set('global', 'mon_cluster_log_to_stderr', 'false')
5278 cp
.set('global', 'mon_cluster_log_to_journald', 'false')
5282 config
= cpf
.getvalue()
5284 if ctx
.registry_json
or ctx
.registry_url
:
5285 command_registry_login(ctx
)
5290 def finish_bootstrap_config(
5291 ctx
: CephadmContext
,
5294 mon_id
: str, mon_dir
: str,
5295 mon_network
: Optional
[str], ipv6
: bool,
5297 cluster_network
: Optional
[str], ipv6_cluster_network
: bool
5300 if not ctx
.no_minimize_config
:
5301 logger
.info('Assimilating anything we can from ceph.conf...')
5303 'config', 'assimilate-conf',
5304 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
5306 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
5308 logger
.info('Generating new minimal ceph.conf...')
5310 'config', 'generate-minimal-conf',
5311 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
5313 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
5315 # re-read our minimized config
5316 with
open(mon_dir
+ '/config', 'r') as f
:
5318 logger
.info('Restarting the monitor...')
5322 get_unit_name(fsid
, 'mon', mon_id
)
5324 elif 'image' in ctx
and ctx
.image
:
5325 # we still want to assimilate the given container image if provided
5326 cli(['config', 'set', 'global', 'container_image', f
'{ctx.image}'])
5329 logger
.info(f
'Setting mon public_network to {mon_network}')
5330 cli(['config', 'set', 'mon', 'public_network', mon_network
])
5333 logger
.info(f
'Setting cluster_network to {cluster_network}')
5334 cli(['config', 'set', 'global', 'cluster_network', cluster_network
])
5336 if ipv6
or ipv6_cluster_network
:
5337 logger
.info('Enabling IPv6 (ms_bind_ipv6) binding')
5338 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
5340 with
open(ctx
.output_config
, 'w') as f
:
5342 logger
.info('Wrote config to %s' % ctx
.output_config
)
5346 # funcs to process spec file for apply spec
5347 def _parse_yaml_docs(f
: Iterable
[str]) -> List
[List
[str]]:
5349 current_doc
= [] # type: List[str]
5351 if re
.search(r
'^---\s+', line
):
5353 docs
.append(current_doc
)
5356 current_doc
.append(line
.rstrip())
5358 docs
.append(current_doc
)
5362 def _parse_yaml_obj(doc
: List
[str]) -> Dict
[str, str]:
5363 # note: this only parses the first layer of yaml
5364 obj
= {} # type: Dict[str, str]
5367 if line
.startswith(' '):
5368 obj
[current_key
] += line
.strip()
5369 elif line
.endswith(':'):
5370 current_key
= line
.strip(':')
5371 obj
[current_key
] = ''
5373 current_key
, val
= line
.split(':')
5374 obj
[current_key
] = val
.strip()
5378 def parse_yaml_objs(f
: Iterable
[str]) -> List
[Dict
[str, str]]:
5380 for d
in _parse_yaml_docs(f
):
5381 objs
.append(_parse_yaml_obj(d
))
5385 def _distribute_ssh_keys(ctx
: CephadmContext
, host_spec
: Dict
[str, str], bootstrap_hostname
: str) -> int:
5386 # copy ssh key to hosts in host spec (used for apply spec)
5387 ssh_key
= CEPH_DEFAULT_PUBKEY
5388 if ctx
.ssh_public_key
:
5389 ssh_key
= ctx
.ssh_public_key
.name
5391 if bootstrap_hostname
!= host_spec
['hostname']:
5392 if 'addr' in host_spec
:
5393 addr
= host_spec
['addr']
5395 addr
= host_spec
['hostname']
5396 out
, err
, code
= call(ctx
, ['sudo', '-u', ctx
.ssh_user
, 'ssh-copy-id', '-f', '-i', ssh_key
, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx
.ssh_user
, addr
)])
5398 logger
.info('\nCopying ssh key to host %s at address %s failed!\n' % (host_spec
['hostname'], addr
))
5401 logger
.info('Added ssh key to host %s at address %s\n' % (host_spec
['hostname'], addr
))
5405 def save_cluster_config(ctx
: CephadmContext
, uid
: int, gid
: int, fsid
: str) -> None:
5406 """Save cluster configuration to the per fsid directory """
5407 def copy_file(src
: str, dst
: str) -> None:
5409 shutil
.copyfile(src
, dst
)
5411 conf_dir
= f
'{ctx.data_dir}/{fsid}/{CEPH_CONF_DIR}'
5412 makedirs(conf_dir
, uid
, gid
, DATA_DIR_MODE
)
5413 if os
.path
.exists(conf_dir
):
5414 logger
.info(f
'Saving cluster configuration to {conf_dir} directory')
5415 copy_file(ctx
.output_config
, os
.path
.join(conf_dir
, CEPH_CONF
))
5416 copy_file(ctx
.output_keyring
, os
.path
.join(conf_dir
, CEPH_KEYRING
))
5417 # ctx.output_pub_ssh_key may not exist if user has provided custom ssh keys
5418 if (os
.path
.exists(ctx
.output_pub_ssh_key
)):
5419 copy_file(ctx
.output_pub_ssh_key
, os
.path
.join(conf_dir
, CEPH_PUBKEY
))
5421 logger
.warning(f
'Cannot create cluster configuration directory {conf_dir}')
5425 def command_bootstrap(ctx
):
5426 # type: (CephadmContext) -> int
5430 if not ctx
.output_config
:
5431 ctx
.output_config
= os
.path
.join(ctx
.output_dir
, CEPH_CONF
)
5432 if not ctx
.output_keyring
:
5433 ctx
.output_keyring
= os
.path
.join(ctx
.output_dir
, CEPH_KEYRING
)
5434 if not ctx
.output_pub_ssh_key
:
5435 ctx
.output_pub_ssh_key
= os
.path
.join(ctx
.output_dir
, CEPH_PUBKEY
)
5437 if bool(ctx
.ssh_private_key
) is not bool(ctx
.ssh_public_key
):
5438 raise Error('--ssh-private-key and --ssh-public-key must be provided together or not at all.')
5441 data_dir_base
= os
.path
.join(ctx
.data_dir
, ctx
.fsid
)
5442 if os
.path
.exists(data_dir_base
):
5443 raise Error(f
"A cluster with the same fsid '{ctx.fsid}' already exists.")
5445 logger
.warning('Specifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.')
5447 # verify output files
5448 for f
in [ctx
.output_config
, ctx
.output_keyring
,
5449 ctx
.output_pub_ssh_key
]:
5450 if not ctx
.allow_overwrite
:
5451 if os
.path
.exists(f
):
5452 raise Error('%s already exists; delete or pass '
5453 '--allow-overwrite to overwrite' % f
)
5454 dirname
= os
.path
.dirname(f
)
5455 if dirname
and not os
.path
.exists(dirname
):
5456 fname
= os
.path
.basename(f
)
5457 logger
.info(f
'Creating directory {dirname} for {fname}')
5459 # use makedirs to create intermediate missing dirs
5460 os
.makedirs(dirname
, 0o755)
5461 except PermissionError
:
5462 raise Error(f
'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
5464 (user_conf
, _
) = get_config_and_keyring(ctx
)
5466 if ctx
.ssh_user
!= 'root':
5467 check_ssh_connectivity(ctx
)
5469 if not ctx
.skip_prepare_host
:
5470 command_prepare_host(ctx
)
5472 logger
.info('Skip prepare_host')
5475 fsid
= ctx
.fsid
or make_fsid()
5476 if not is_fsid(fsid
):
5477 raise Error('not an fsid: %s' % fsid
)
5478 logger
.info('Cluster fsid: %s' % fsid
)
5480 hostname
= get_hostname()
5481 if '.' in hostname
and not ctx
.allow_fqdn_hostname
:
5482 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname
, hostname
.split('.')[0]))
5483 mon_id
= ctx
.mon_id
or hostname
5484 mgr_id
= ctx
.mgr_id
or generate_service_id()
5486 lock
= FileLock(ctx
, fsid
)
5489 (addr_arg
, ipv6
, mon_network
) = prepare_mon_addresses(ctx
)
5490 cluster_network
, ipv6_cluster_network
= prepare_cluster_network(ctx
)
5492 config
= prepare_bootstrap_config(ctx
, fsid
, addr_arg
, ctx
.image
)
5494 if not ctx
.skip_pull
:
5496 _pull_image(ctx
, ctx
.image
)
5497 except UnauthorizedRegistryError
:
5498 err_str
= 'Failed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentials'
5499 logger
.debug(f
'Pulling image for bootstrap on {hostname} failed: {err_str}')
5500 raise Error(err_str
)
5502 image_ver
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version']).run().strip()
5503 logger
.info(f
'Ceph version: {image_ver}')
5505 if not ctx
.allow_mismatched_release
:
5506 image_release
= image_ver
.split()[4]
5507 if image_release
not in \
5508 [DEFAULT_IMAGE_RELEASE
, LATEST_STABLE_RELEASE
]:
5510 f
'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};'
5511 ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
5514 logger
.info('Extracting ceph user uid/gid from container image...')
5515 (uid
, gid
) = extract_uid_gid(ctx
)
5517 # create some initial keys
5518 (mon_key
, mgr_key
, admin_key
, bootstrap_keyring
, admin_keyring
) = create_initial_keys(ctx
, uid
, gid
, mgr_id
)
5520 monmap
= create_initial_monmap(ctx
, uid
, gid
, fsid
, mon_id
, addr_arg
)
5521 (mon_dir
, log_dir
) = prepare_create_mon(ctx
, uid
, gid
, fsid
, mon_id
,
5522 bootstrap_keyring
.name
, monmap
.name
)
5524 with
open(mon_dir
+ '/config', 'w') as f
:
5525 os
.fchown(f
.fileno(), uid
, gid
)
5526 os
.fchmod(f
.fileno(), 0o600)
5529 make_var_run(ctx
, fsid
, uid
, gid
)
5530 create_mon(ctx
, uid
, gid
, fsid
, mon_id
)
5532 # config to issue various CLI commands
5533 tmp_config
= write_tmp(config
, uid
, gid
)
5535 # a CLI helper to reduce our typing
5536 def cli(cmd
, extra_mounts
={}, timeout
=DEFAULT_TIMEOUT
, verbosity
=CallVerbosity
.VERBOSE_ON_FAILURE
):
5537 # type: (List[str], Dict[str, str], Optional[int], CallVerbosity) -> str
5539 log_dir
: '/var/log/ceph:z',
5540 admin_keyring
.name
: '/etc/ceph/ceph.client.admin.keyring:z',
5541 tmp_config
.name
: '/etc/ceph/ceph.conf:z',
5543 for k
, v
in extra_mounts
.items():
5545 timeout
= timeout
or ctx
.timeout
5546 return CephContainer(
5549 entrypoint
='/usr/bin/ceph',
5551 volume_mounts
=mounts
,
5552 ).run(timeout
=timeout
, verbosity
=verbosity
)
5554 wait_for_mon(ctx
, mon_id
, mon_dir
, admin_keyring
.name
, tmp_config
.name
)
5556 finish_bootstrap_config(ctx
, fsid
, config
, mon_id
, mon_dir
,
5557 mon_network
, ipv6
, cli
,
5558 cluster_network
, ipv6_cluster_network
)
5561 with
open(ctx
.output_keyring
, 'w') as f
:
5562 os
.fchmod(f
.fileno(), 0o600)
5563 f
.write('[client.admin]\n'
5564 '\tkey = ' + admin_key
+ '\n')
5565 logger
.info('Wrote keyring to %s' % ctx
.output_keyring
)
5568 create_mgr(ctx
, uid
, gid
, fsid
, mgr_id
, mgr_key
, config
, cli
)
5571 # user given config settings were already assimilated earlier
5572 # but if the given settings contained any attributes in
5573 # the mgr (e.g. mgr/cephadm/container_image_prometheus)
5574 # they don't seem to be stored if there isn't a mgr yet.
5575 # Since re-assimilating the same conf settings should be
5576 # idempotent we can just do it again here.
5577 with tempfile
.NamedTemporaryFile(buffering
=0) as tmp
:
5578 tmp
.write(user_conf
.encode('utf-8'))
5579 cli(['config', 'assimilate-conf',
5580 '-i', '/var/lib/ceph/user.conf'],
5581 {tmp
.name
: '/var/lib/ceph/user.conf:z'})
5583 # wait for mgr to restart (after enabling a module)
5584 def wait_for_mgr_restart() -> None:
5585 # first get latest mgrmap epoch from the mon. try newer 'mgr
5586 # stat' command first, then fall back to 'mgr dump' if
5589 j
= json_loads_retry(lambda: cli(['mgr', 'stat'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
))
5591 j
= json_loads_retry(lambda: cli(['mgr', 'dump'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
))
5594 # wait for mgr to have it
5595 logger
.info('Waiting for the mgr to restart...')
5597 def mgr_has_latest_epoch():
5600 out
= cli(['tell', 'mgr', 'mgr_status'])
5602 return j
['mgrmap_epoch'] >= epoch
5603 except Exception as e
:
5604 logger
.debug('tell mgr mgr_status failed: %s' % e
)
5606 is_available(ctx
, 'mgr epoch %d' % epoch
, mgr_has_latest_epoch
)
5608 enable_cephadm_mgr_module(cli
, wait_for_mgr_restart
)
5611 if not ctx
.skip_ssh
:
5612 prepare_ssh(ctx
, cli
, wait_for_mgr_restart
)
5614 if ctx
.registry_url
and ctx
.registry_username
and ctx
.registry_password
:
5615 registry_credentials
= {'url': ctx
.registry_url
, 'username': ctx
.registry_username
, 'password': ctx
.registry_password
}
5616 cli(['config-key', 'set', 'mgr/cephadm/registry_credentials', json
.dumps(registry_credentials
)])
5618 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx
.container_init
), '--force'])
5620 if not ctx
.skip_dashboard
:
5621 prepare_dashboard(ctx
, uid
, gid
, cli
, wait_for_mgr_restart
)
5623 if ctx
.output_config
== CEPH_DEFAULT_CONF
and not ctx
.skip_admin_label
and not ctx
.no_minimize_config
:
5624 logger
.info('Enabling client.admin keyring and conf on hosts with "admin" label')
5626 cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin'])
5627 cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin'])
5629 logger
.info('Unable to set up "admin" label; assuming older version of Ceph')
5632 logger
.info('Applying %s to cluster' % ctx
.apply_spec
)
5633 # copy ssh key to hosts in spec file
5634 with
open(ctx
.apply_spec
) as f
:
5636 for spec
in parse_yaml_objs(f
):
5637 if spec
.get('service_type') == 'host':
5638 _distribute_ssh_keys(ctx
, spec
, hostname
)
5640 logger
.info('Unable to parse %s succesfully' % ctx
.apply_spec
)
5643 mounts
[pathify(ctx
.apply_spec
)] = '/tmp/spec.yml:ro'
5645 out
= cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts
=mounts
)
5648 ctx
.error_code
= -errno
.EINVAL
5649 logger
.info('\nApplying %s to cluster failed!\n' % ctx
.apply_spec
)
5651 save_cluster_config(ctx
, uid
, gid
, fsid
)
5653 # enable autotune for osd_memory_target
5654 logger
.info('Enabling autotune for osd_memory_target')
5655 cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true'])
5657 # Notify the Dashboard to show the 'Expand cluster' page on first log in.
5658 cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED'])
5660 logger
.info('You can access the Ceph CLI as following in case of multi-cluster or non-default config:\n\n'
5661 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
5665 ctx
.output_keyring
))
5667 logger
.info('Or, if you are only running a single cluster on this host:\n\n\tsudo %s shell \n' % (sys
.argv
[0]))
5669 logger
.info('Please consider enabling telemetry to help improve Ceph:\n\n'
5670 '\tceph telemetry on\n\n'
5671 'For more information see:\n\n'
5672 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
5673 logger
.info('Bootstrap complete.')
5674 return ctx
.error_code
5676 ##################################
5679 def command_registry_login(ctx
: CephadmContext
) -> int:
5680 if ctx
.registry_json
:
5681 logger
.info('Pulling custom registry login info from %s.' % ctx
.registry_json
)
5682 d
= get_parm(ctx
.registry_json
)
5683 if d
.get('url') and d
.get('username') and d
.get('password'):
5684 ctx
.registry_url
= d
.get('url')
5685 ctx
.registry_username
= d
.get('username')
5686 ctx
.registry_password
= d
.get('password')
5687 registry_login(ctx
, ctx
.registry_url
, ctx
.registry_username
, ctx
.registry_password
)
5689 raise Error('json provided for custom registry login did not include all necessary fields. '
5690 'Please setup json file as\n'
5692 ' "url": "REGISTRY_URL",\n'
5693 ' "username": "REGISTRY_USERNAME",\n'
5694 ' "password": "REGISTRY_PASSWORD"\n'
5696 elif ctx
.registry_url
and ctx
.registry_username
and ctx
.registry_password
:
5697 registry_login(ctx
, ctx
.registry_url
, ctx
.registry_username
, ctx
.registry_password
)
5699 raise Error('Invalid custom registry arguments received. To login to a custom registry include '
5700 '--registry-url, --registry-username and --registry-password '
5701 'options or --registry-json option')
5705 def registry_login(ctx
: CephadmContext
, url
: Optional
[str], username
: Optional
[str], password
: Optional
[str]) -> None:
5706 logger
.info('Logging into custom registry.')
5708 engine
= ctx
.container_engine
5709 cmd
= [engine
.path
, 'login',
5710 '-u', username
, '-p', password
,
5712 if isinstance(engine
, Podman
):
5713 cmd
.append('--authfile=/etc/ceph/podman-auth.json')
5714 out
, _
, _
= call_throws(ctx
, cmd
)
5715 if isinstance(engine
, Podman
):
5716 os
.chmod('/etc/ceph/podman-auth.json', 0o600)
5718 raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx
.registry_url
, ctx
.registry_username
))
5720 ##################################
5723 def extract_uid_gid_monitoring(ctx
, daemon_type
):
5724 # type: (CephadmContext, str) -> Tuple[int, int]
5726 if daemon_type
== 'prometheus':
5727 uid
, gid
= extract_uid_gid(ctx
, file_path
='/etc/prometheus')
5728 elif daemon_type
== 'node-exporter':
5729 uid
, gid
= 65534, 65534
5730 elif daemon_type
== 'grafana':
5731 uid
, gid
= extract_uid_gid(ctx
, file_path
='/var/lib/grafana')
5732 elif daemon_type
== 'loki':
5733 uid
, gid
= extract_uid_gid(ctx
, file_path
='/etc/loki')
5734 elif daemon_type
== 'promtail':
5735 uid
, gid
= extract_uid_gid(ctx
, file_path
='/etc/promtail')
5736 elif daemon_type
== 'alertmanager':
5737 uid
, gid
= extract_uid_gid(ctx
, file_path
=['/etc/alertmanager', '/etc/prometheus'])
5739 raise Error('{} not implemented yet'.format(daemon_type
))
5743 def get_deployment_container(ctx
: CephadmContext
,
5744 fsid
: str, daemon_type
: str, daemon_id
: Union
[int, str],
5745 privileged
: bool = False,
5746 ptrace
: bool = False,
5747 container_args
: Optional
[List
[str]] = None) -> 'CephContainer':
5748 # wrapper for get_container specifically for containers made during the `cephadm deploy`
5749 # command. Adds some extra things such as extra container args and custom config files
5750 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
, privileged
, ptrace
, container_args
)
5751 if 'extra_container_args' in ctx
and ctx
.extra_container_args
:
5752 c
.container_args
.extend(ctx
.extra_container_args
)
5753 if 'config_json' in ctx
and ctx
.config_json
:
5754 conf_files
= get_custom_config_files(ctx
.config_json
)
5755 mandatory_keys
= ['mount_path', 'content']
5756 for conf
in conf_files
['custom_config_files']:
5757 if all(k
in conf
for k
in mandatory_keys
):
5758 mount_path
= conf
['mount_path']
5759 file_path
= os
.path
.join(
5762 'custom_config_files',
5763 f
'{daemon_type}.{daemon_id}',
5764 os
.path
.basename(mount_path
)
5766 c
.volume_mounts
[file_path
] = mount_path
5771 def command_deploy(ctx
):
5772 # type: (CephadmContext) -> None
5773 daemon_type
, daemon_id
= ctx
.name
.split('.', 1)
5775 lock
= FileLock(ctx
, ctx
.fsid
)
5778 if daemon_type
not in get_supported_daemons():
5779 raise Error('daemon type %s not recognized' % daemon_type
)
5782 unit_name
= get_unit_name(ctx
.fsid
, daemon_type
, daemon_id
)
5783 (_
, state
, _
) = check_unit(ctx
, unit_name
)
5784 if state
== 'running' or is_container_running(ctx
, CephContainer
.for_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, 'bash')):
5788 logger
.info('%s daemon %s ...' % ('Reconfig', ctx
.name
))
5790 logger
.info('%s daemon %s ...' % ('Redeploy', ctx
.name
))
5792 logger
.info('%s daemon %s ...' % ('Deploy', ctx
.name
))
5794 # Migrate sysctl conf files from /usr/lib to /etc
5795 migrate_sysctl_dir(ctx
, ctx
.fsid
)
5797 # Get and check ports explicitly required to be opened
5798 daemon_ports
= [] # type: List[int]
5800 # only check port in use if not reconfig or redeploy since service
5801 # we are redeploying/reconfiguring will already be using the port
5802 if not ctx
.reconfig
and not redeploy
:
5804 daemon_ports
= list(map(int, ctx
.tcp_ports
.split()))
5806 if daemon_type
in Ceph
.daemons
:
5807 config
, keyring
= get_config_and_keyring(ctx
)
5808 uid
, gid
= extract_uid_gid(ctx
)
5809 make_var_run(ctx
, ctx
.fsid
, uid
, gid
)
5811 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
5812 ptrace
=ctx
.allow_ptrace
)
5813 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
5814 config
=config
, keyring
=keyring
,
5815 osd_fsid
=ctx
.osd_fsid
,
5816 reconfig
=ctx
.reconfig
,
5819 elif daemon_type
in Monitoring
.components
:
5820 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
5822 # make sure provided config-json is sufficient
5823 config
= get_parm(ctx
.config_json
) # type: ignore
5824 required_files
= Monitoring
.components
[daemon_type
].get('config-json-files', list())
5825 required_args
= Monitoring
.components
[daemon_type
].get('config-json-args', list())
5827 if not config
or not all(c
in config
.get('files', {}).keys() for c
in required_files
): # type: ignore
5828 raise Error('{} deployment requires config-json which must '
5829 'contain file content for {}'.format(daemon_type
.capitalize(), ', '.join(required_files
)))
5831 if not config
or not all(c
in config
.keys() for c
in required_args
): # type: ignore
5832 raise Error('{} deployment requires config-json which must '
5833 'contain arg for {}'.format(daemon_type
.capitalize(), ', '.join(required_args
)))
5835 uid
, gid
= extract_uid_gid_monitoring(ctx
, daemon_type
)
5836 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5837 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
5838 reconfig
=ctx
.reconfig
,
5841 elif daemon_type
== NFSGanesha
.daemon_type
:
5842 if not ctx
.reconfig
and not redeploy
and not daemon_ports
:
5843 daemon_ports
= list(NFSGanesha
.port_map
.values())
5845 config
, keyring
= get_config_and_keyring(ctx
)
5846 # TODO: extract ganesha uid/gid (997, 994) ?
5847 uid
, gid
= extract_uid_gid(ctx
)
5848 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5849 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
5850 config
=config
, keyring
=keyring
,
5851 reconfig
=ctx
.reconfig
,
5854 elif daemon_type
== CephIscsi
.daemon_type
:
5855 config
, keyring
= get_config_and_keyring(ctx
)
5856 uid
, gid
= extract_uid_gid(ctx
)
5857 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5858 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
5859 config
=config
, keyring
=keyring
,
5860 reconfig
=ctx
.reconfig
,
5863 elif daemon_type
== HAproxy
.daemon_type
:
5864 haproxy
= HAproxy
.init(ctx
, ctx
.fsid
, daemon_id
)
5865 uid
, gid
= haproxy
.extract_uid_gid_haproxy()
5866 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5867 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
5868 reconfig
=ctx
.reconfig
,
5871 elif daemon_type
== Keepalived
.daemon_type
:
5872 keepalived
= Keepalived
.init(ctx
, ctx
.fsid
, daemon_id
)
5873 uid
, gid
= keepalived
.extract_uid_gid_keepalived()
5874 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5875 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
5876 reconfig
=ctx
.reconfig
,
5879 elif daemon_type
== CustomContainer
.daemon_type
:
5880 cc
= CustomContainer
.init(ctx
, ctx
.fsid
, daemon_id
)
5881 if not ctx
.reconfig
and not redeploy
:
5882 daemon_ports
.extend(cc
.ports
)
5883 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
5884 privileged
=cc
.privileged
,
5885 ptrace
=ctx
.allow_ptrace
)
5886 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
,
5887 uid
=cc
.uid
, gid
=cc
.gid
, config
=None,
5888 keyring
=None, reconfig
=ctx
.reconfig
,
5891 elif daemon_type
== CephadmAgent
.daemon_type
:
5892 # get current user gid and uid
5895 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, None,
5896 uid
, gid
, ports
=daemon_ports
)
5898 elif daemon_type
== SNMPGateway
.daemon_type
:
5899 sc
= SNMPGateway
.init(ctx
, ctx
.fsid
, daemon_id
)
5900 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5901 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
,
5906 raise Error('daemon type {} not implemented in command_deploy function'
5907 .format(daemon_type
))
5909 ##################################
5913 def command_run(ctx
):
5914 # type: (CephadmContext) -> int
5915 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
5916 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5917 command
= c
.run_cmd()
5918 return call_timeout(ctx
, command
, ctx
.timeout
)
5920 ##################################
5927 def command_shell(ctx
):
5928 # type: (CephadmContext) -> int
5929 cp
= read_config(ctx
.config
)
5930 if cp
.has_option('global', 'fsid') and \
5931 cp
.get('global', 'fsid') != ctx
.fsid
:
5932 raise Error('fsid does not match ceph.conf')
5936 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
5938 daemon_type
= ctx
.name
5941 daemon_type
= 'osd' # get the most mounts
5944 if ctx
.fsid
and daemon_type
in Ceph
.daemons
:
5945 make_log_dir(ctx
, ctx
.fsid
)
5947 if daemon_id
and not ctx
.fsid
:
5948 raise Error('must pass --fsid to specify cluster')
5950 # in case a dedicated keyring for the specified fsid is found we us it.
5951 # Otherwise, use /etc/ceph files by default, if present. We do this instead of
5952 # making these defaults in the arg parser because we don't want an error
5953 # if they don't exist.
5955 keyring_file
= f
'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_KEYRING}'
5956 if os
.path
.exists(keyring_file
):
5957 ctx
.keyring
= keyring_file
5958 elif os
.path
.exists(CEPH_DEFAULT_KEYRING
):
5959 ctx
.keyring
= CEPH_DEFAULT_KEYRING
5961 container_args
: List
[str] = ['-i']
5962 mounts
= get_container_mounts(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
5963 no_config
=True if ctx
.config
else False)
5964 binds
= get_container_binds(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5966 mounts
[pathify(ctx
.config
)] = '/etc/ceph/ceph.conf:z'
5968 mounts
[pathify(ctx
.keyring
)] = '/etc/ceph/ceph.keyring:z'
5970 for _mount
in ctx
.mount
:
5971 split_src_dst
= _mount
.split(':')
5972 mount
= pathify(split_src_dst
[0])
5973 filename
= os
.path
.basename(split_src_dst
[0])
5974 if len(split_src_dst
) > 1:
5975 dst
= split_src_dst
[1]
5976 if len(split_src_dst
) == 3:
5977 dst
= '{}:{}'.format(dst
, split_src_dst
[2])
5980 mounts
[mount
] = '/mnt/{}'.format(filename
)
5982 command
= ctx
.command
5988 '-e', 'PS1=%s' % CUSTOM_PS1
,
5991 home
= os
.path
.join(ctx
.data_dir
, ctx
.fsid
, 'home')
5992 if not os
.path
.exists(home
):
5993 logger
.debug('Creating root home at %s' % home
)
5994 makedirs(home
, 0, 0, 0o660)
5995 if os
.path
.exists('/etc/skel'):
5996 for f
in os
.listdir('/etc/skel'):
5997 if f
.startswith('.bash'):
5998 shutil
.copyfile(os
.path
.join('/etc/skel', f
),
5999 os
.path
.join(home
, f
))
6000 mounts
[home
] = '/root'
6002 for i
in ctx
.volume
:
6003 a
, b
= i
.split(':', 1)
6009 entrypoint
='doesnotmatter',
6011 container_args
=container_args
,
6012 volume_mounts
=mounts
,
6016 command
= c
.shell_cmd(command
)
6018 return call_timeout(ctx
, command
, ctx
.timeout
)
6020 ##################################
6024 def command_enter(ctx
):
6025 # type: (CephadmContext) -> int
6027 raise Error('must pass --fsid to specify cluster')
6028 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
6029 container_args
= ['-i'] # type: List[str]
6031 command
= ctx
.command
6037 '-e', 'PS1=%s' % CUSTOM_PS1
,
6042 entrypoint
='doesnotmatter',
6043 container_args
=container_args
,
6044 cname
='ceph-%s-%s.%s' % (ctx
.fsid
, daemon_type
, daemon_id
),
6046 command
= c
.exec_cmd(command
)
6047 return call_timeout(ctx
, command
, ctx
.timeout
)
6049 ##################################
6055 def command_ceph_volume(ctx
):
6056 # type: (CephadmContext) -> None
6057 cp
= read_config(ctx
.config
)
6058 if cp
.has_option('global', 'fsid') and \
6059 cp
.get('global', 'fsid') != ctx
.fsid
:
6060 raise Error('fsid does not match ceph.conf')
6063 make_log_dir(ctx
, ctx
.fsid
)
6065 lock
= FileLock(ctx
, ctx
.fsid
)
6068 (uid
, gid
) = (0, 0) # ceph-volume runs as root
6069 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'osd', None)
6074 (config
, keyring
) = get_config_and_keyring(ctx
)
6078 tmp_config
= write_tmp(config
, uid
, gid
)
6079 mounts
[tmp_config
.name
] = '/etc/ceph/ceph.conf:z'
6083 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
6084 mounts
[tmp_keyring
.name
] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
6086 c
= get_ceph_volume_container(
6090 volume_mounts
=mounts
,
6093 out
, err
, code
= call_throws(ctx
, c
.run_cmd(), verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
6097 ##################################
6101 def command_unit(ctx
):
6102 # type: (CephadmContext) -> int
6104 raise Error('must pass --fsid to specify cluster')
6106 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
6110 ['systemctl', ctx
.command
, unit_name
],
6111 verbosity
=CallVerbosity
.VERBOSE
,
6116 ##################################
6120 def command_logs(ctx
):
6121 # type: (CephadmContext) -> None
6123 raise Error('must pass --fsid to specify cluster')
6125 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
6127 cmd
= [find_program('journalctl')]
6128 cmd
.extend(['-u', unit_name
])
6130 cmd
.extend(ctx
.command
)
6132 # call this directly, without our wrapper, so that we get an unmolested
6133 # stdout with logger prefixing.
6134 logger
.debug('Running command: %s' % ' '.join(cmd
))
6135 subprocess
.call(cmd
, env
=os
.environ
.copy()) # type: ignore
6137 ##################################
6140 def list_networks(ctx
):
6141 # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]]
6143 # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
6144 # so we'll need to use a regex to parse 'ip' command output.
6146 # out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
6147 # j = json.loads(out)
6149 res
= _list_ipv4_networks(ctx
)
6150 res
.update(_list_ipv6_networks(ctx
))
6154 def _list_ipv4_networks(ctx
: CephadmContext
) -> Dict
[str, Dict
[str, Set
[str]]]:
6155 execstr
: Optional
[str] = find_executable('ip')
6157 raise FileNotFoundError("unable to find 'ip' command")
6158 out
, _
, _
= call_throws(ctx
, [execstr
, 'route', 'ls'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
6159 return _parse_ipv4_route(out
)
6162 def _parse_ipv4_route(out
: str) -> Dict
[str, Dict
[str, Set
[str]]]:
6163 r
= {} # type: Dict[str, Dict[str, Set[str]]]
6164 p
= re
.compile(r
'^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)')
6165 for line
in out
.splitlines():
6170 if '/' not in net
: # aggregate /32 mask for single host sub-networks
6176 if iface
not in r
[net
]:
6177 r
[net
][iface
] = set()
6178 r
[net
][iface
].add(ip
)
6182 def _list_ipv6_networks(ctx
: CephadmContext
) -> Dict
[str, Dict
[str, Set
[str]]]:
6183 execstr
: Optional
[str] = find_executable('ip')
6185 raise FileNotFoundError("unable to find 'ip' command")
6186 routes
, _
, _
= call_throws(ctx
, [execstr
, '-6', 'route', 'ls'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
6187 ips
, _
, _
= call_throws(ctx
, [execstr
, '-6', 'addr', 'ls'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
6188 return _parse_ipv6_route(routes
, ips
)
6191 def _parse_ipv6_route(routes
: str, ips
: str) -> Dict
[str, Dict
[str, Set
[str]]]:
6192 r
= {} # type: Dict[str, Dict[str, Set[str]]]
6193 route_p
= re
.compile(r
'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
6194 ip_p
= re
.compile(r
'^\s+inet6 (\S+)/(.*)scope (.*)$')
6195 iface_p
= re
.compile(r
'^(\d+): (\S+): (.*)$')
6196 for line
in routes
.splitlines():
6197 m
= route_p
.findall(line
)
6198 if not m
or m
[0][0].lower() == 'default':
6201 if '/' not in net
: # aggregate /128 mask for single host sub-networks
6204 if iface
== 'lo': # skip loopback devices
6208 if iface
not in r
[net
]:
6209 r
[net
][iface
] = set()
6212 for line
in ips
.splitlines():
6213 m
= ip_p
.findall(line
)
6215 m
= iface_p
.findall(line
)
6217 # drop @... suffix, if present
6218 iface
= m
[0][1].split('@')[0]
6221 # find the network it belongs to
6222 net
= [n
for n
in r
.keys()
6223 if ipaddress
.ip_address(ip
) in ipaddress
.ip_network(n
)]
6224 if net
and iface
in r
[net
[0]]:
6226 r
[net
[0]][iface
].add(ip
)
6231 def command_list_networks(ctx
):
6232 # type: (CephadmContext) -> None
6233 r
= list_networks(ctx
)
6235 def serialize_sets(obj
: Any
) -> Any
:
6236 return list(obj
) if isinstance(obj
, set) else obj
6238 print(json
.dumps(r
, indent
=4, default
=serialize_sets
))
6240 ##################################
6243 def command_ls(ctx
):
6244 # type: (CephadmContext) -> None
6245 ls
= list_daemons(ctx
, detail
=not ctx
.no_detail
,
6246 legacy_dir
=ctx
.legacy_dir
)
6247 print(json
.dumps(ls
, indent
=4))
6250 def with_units_to_int(v
: str) -> int:
6251 if v
.endswith('iB'):
6253 elif v
.endswith('B'):
6256 if v
[-1].upper() == 'K':
6259 elif v
[-1].upper() == 'M':
6262 elif v
[-1].upper() == 'G':
6263 mult
= 1024 * 1024 * 1024
6265 elif v
[-1].upper() == 'T':
6266 mult
= 1024 * 1024 * 1024 * 1024
6268 return int(float(v
) * mult
)
6271 def list_daemons(ctx
, detail
=True, legacy_dir
=None):
6272 # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
6273 host_version
: Optional
[str] = None
6275 container_path
= ctx
.container_engine
.path
6277 data_dir
= ctx
.data_dir
6278 if legacy_dir
is not None:
6279 data_dir
= os
.path
.abspath(legacy_dir
+ data_dir
)
6281 # keep track of ceph versions we see
6282 seen_versions
= {} # type: Dict[str, Optional[str]]
6284 # keep track of image digests
6285 seen_digests
= {} # type: Dict[str, List[str]]
6287 # keep track of memory and cpu usage we've seen
6288 seen_memusage
= {} # type: Dict[str, int]
6289 seen_cpuperc
= {} # type: Dict[str, str]
6290 out
, err
, code
= call(
6292 [container_path
, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
6293 verbosity
=CallVerbosity
.QUIET
6295 seen_memusage_cid_len
, seen_memusage
= _parse_mem_usage(code
, out
)
6297 out
, err
, code
= call(
6299 [container_path
, 'stats', '--format', '{{.ID}},{{.CPUPerc}}', '--no-stream'],
6300 verbosity
=CallVerbosity
.QUIET
6302 seen_cpuperc_cid_len
, seen_cpuperc
= _parse_cpu_perc(code
, out
)
6305 if os
.path
.exists(data_dir
):
6306 for i
in os
.listdir(data_dir
):
6307 if i
in ['mon', 'osd', 'mds', 'mgr']:
6309 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
6312 (cluster
, daemon_id
) = j
.split('-', 1)
6313 fsid
= get_legacy_daemon_fsid(ctx
,
6314 cluster
, daemon_type
, daemon_id
,
6315 legacy_dir
=legacy_dir
)
6316 legacy_unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
6317 val
: Dict
[str, Any
] = {
6319 'name': '%s.%s' % (daemon_type
, daemon_id
),
6320 'fsid': fsid
if fsid
is not None else 'unknown',
6321 'systemd_unit': legacy_unit_name
,
6324 (val
['enabled'], val
['state'], _
) = check_unit(ctx
, legacy_unit_name
)
6325 if not host_version
:
6327 out
, err
, code
= call(ctx
,
6329 verbosity
=CallVerbosity
.QUIET
)
6330 if not code
and out
.startswith('ceph version '):
6331 host_version
= out
.split(' ')[2]
6334 val
['host_version'] = host_version
6337 fsid
= str(i
) # convince mypy that fsid is a str here
6338 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
6339 if '.' in j
and os
.path
.isdir(os
.path
.join(data_dir
, fsid
, j
)):
6341 (daemon_type
, daemon_id
) = j
.split('.', 1)
6342 unit_name
= get_unit_name(fsid
,
6348 'style': 'cephadm:v1',
6351 'systemd_unit': unit_name
,
6355 (val
['enabled'], val
['state'], _
) = check_unit(ctx
, unit_name
)
6359 image_digests
= None
6363 out
, err
, code
= get_container_stats(ctx
, container_path
, fsid
, daemon_type
, daemon_id
)
6365 (container_id
, image_name
, image_id
, start
,
6366 version
) = out
.strip().split(',')
6367 image_id
= normalize_container_id(image_id
)
6368 daemon_type
= name
.split('.', 1)[0]
6369 start_stamp
= try_convert_datetime(start
)
6371 # collect digests for this image id
6372 image_digests
= seen_digests
.get(image_id
)
6373 if not image_digests
:
6374 out
, err
, code
= call(
6377 container_path
, 'image', 'inspect', image_id
,
6378 '--format', '{{.RepoDigests}}',
6380 verbosity
=CallVerbosity
.QUIET
)
6382 image_digests
= list(set(map(
6383 normalize_image_digest
,
6384 out
.strip()[1:-1].split(' '))))
6385 seen_digests
[image_id
] = image_digests
6387 # identify software version inside the container (if we can)
6388 if not version
or '.' not in version
:
6389 version
= seen_versions
.get(image_id
, None)
6390 if daemon_type
== NFSGanesha
.daemon_type
:
6391 version
= NFSGanesha
.get_version(ctx
, container_id
)
6392 if daemon_type
== CephIscsi
.daemon_type
:
6393 version
= CephIscsi
.get_version(ctx
, container_id
)
6395 if daemon_type
in Ceph
.daemons
:
6396 out
, err
, code
= call(ctx
,
6397 [container_path
, 'exec', container_id
,
6399 verbosity
=CallVerbosity
.QUIET
)
6401 out
.startswith('ceph version '):
6402 version
= out
.split(' ')[2]
6403 seen_versions
[image_id
] = version
6404 elif daemon_type
== 'grafana':
6405 out
, err
, code
= call(ctx
,
6406 [container_path
, 'exec', container_id
,
6407 'grafana-server', '-v'],
6408 verbosity
=CallVerbosity
.QUIET
)
6410 out
.startswith('Version '):
6411 version
= out
.split(' ')[1]
6412 seen_versions
[image_id
] = version
6413 elif daemon_type
in ['prometheus',
6418 version
= Monitoring
.get_version(ctx
, container_id
, daemon_type
)
6419 seen_versions
[image_id
] = version
6420 elif daemon_type
== 'haproxy':
6421 out
, err
, code
= call(ctx
,
6422 [container_path
, 'exec', container_id
,
6424 verbosity
=CallVerbosity
.QUIET
)
6426 out
.startswith('HA-Proxy version '):
6427 version
= out
.split(' ')[2]
6428 seen_versions
[image_id
] = version
6429 elif daemon_type
== 'keepalived':
6430 out
, err
, code
= call(ctx
,
6431 [container_path
, 'exec', container_id
,
6432 'keepalived', '--version'],
6433 verbosity
=CallVerbosity
.QUIET
)
6435 err
.startswith('Keepalived '):
6436 version
= err
.split(' ')[1]
6437 if version
[0] == 'v':
6438 version
= version
[1:]
6439 seen_versions
[image_id
] = version
6440 elif daemon_type
== CustomContainer
.daemon_type
:
6441 # Because a custom container can contain
6442 # everything, we do not know which command
6443 # to execute to get the version.
6445 elif daemon_type
== SNMPGateway
.daemon_type
:
6446 version
= SNMPGateway
.get_version(ctx
, fsid
, daemon_id
)
6447 seen_versions
[image_id
] = version
6449 logger
.warning('version for unknown daemon type %s' % daemon_type
)
6451 vfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.image') # type: ignore
6453 with
open(vfile
, 'r') as f
:
6454 image_name
= f
.read().strip() or None
6459 mfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.meta') # type: ignore
6461 with
open(mfile
, 'r') as f
:
6462 meta
= json
.loads(f
.read())
6467 val
['container_id'] = container_id
6468 val
['container_image_name'] = image_name
6469 val
['container_image_id'] = image_id
6470 val
['container_image_digests'] = image_digests
6472 val
['memory_usage'] = seen_memusage
.get(container_id
[0:seen_memusage_cid_len
])
6473 val
['cpu_percentage'] = seen_cpuperc
.get(container_id
[0:seen_cpuperc_cid_len
])
6474 val
['version'] = version
6475 val
['started'] = start_stamp
6476 val
['created'] = get_file_timestamp(
6477 os
.path
.join(data_dir
, fsid
, j
, 'unit.created')
6479 val
['deployed'] = get_file_timestamp(
6480 os
.path
.join(data_dir
, fsid
, j
, 'unit.image'))
6481 val
['configured'] = get_file_timestamp(
6482 os
.path
.join(data_dir
, fsid
, j
, 'unit.configured'))
6488 def _parse_mem_usage(code
: int, out
: str) -> Tuple
[int, Dict
[str, int]]:
6489 # keep track of memory usage we've seen
6490 seen_memusage
= {} # type: Dict[str, int]
6491 seen_memusage_cid_len
= 0
6493 for line
in out
.splitlines():
6494 (cid
, usage
) = line
.split(',')
6495 (used
, limit
) = usage
.split(' / ')
6497 seen_memusage
[cid
] = with_units_to_int(used
)
6498 if not seen_memusage_cid_len
:
6499 seen_memusage_cid_len
= len(cid
)
6501 logger
.info('unable to parse memory usage line\n>{}'.format(line
))
6503 return seen_memusage_cid_len
, seen_memusage
6506 def _parse_cpu_perc(code
: int, out
: str) -> Tuple
[int, Dict
[str, str]]:
6508 seen_cpuperc_cid_len
= 0
6510 for line
in out
.splitlines():
6511 (cid
, cpuperc
) = line
.split(',')
6513 seen_cpuperc
[cid
] = cpuperc
6514 if not seen_cpuperc_cid_len
:
6515 seen_cpuperc_cid_len
= len(cid
)
6517 logger
.info('unable to parse cpu percentage line\n>{}'.format(line
))
6519 return seen_cpuperc_cid_len
, seen_cpuperc
6522 def get_daemon_description(ctx
, fsid
, name
, detail
=False, legacy_dir
=None):
6523 # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str]
6525 for d
in list_daemons(ctx
, detail
=detail
, legacy_dir
=legacy_dir
):
6526 if d
['fsid'] != fsid
:
6528 if d
['name'] != name
:
6531 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name
))
6534 def get_container_stats(ctx
: CephadmContext
, container_path
: str, fsid
: str, daemon_type
: str, daemon_id
: str) -> Tuple
[str, str, int]:
6535 c
= CephContainer
.for_daemon(ctx
, fsid
, daemon_type
, daemon_id
, 'bash')
6536 out
, err
, code
= '', '', -1
6537 for name
in (c
.cname
, c
.old_cname
):
6539 container_path
, 'inspect',
6540 '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
6543 out
, err
, code
= call(ctx
, cmd
, verbosity
=CallVerbosity
.QUIET
)
6546 return out
, err
, code
6548 ##################################
6552 def command_adopt(ctx
):
6553 # type: (CephadmContext) -> None
6555 if not ctx
.skip_pull
:
6557 _pull_image(ctx
, ctx
.image
)
6558 except UnauthorizedRegistryError
:
6559 err_str
= 'Failed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url <url> --registry-username <username> --registry-password <password>` or supply login info via a json file with `cephadm registry-login --registry-json <file>`'
6560 logger
.debug(f
'Pulling image for `command_adopt` failed: {err_str}')
6561 raise Error(err_str
)
6563 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
6566 if ctx
.style
!= 'legacy':
6567 raise Error('adoption of style %s not implemented' % ctx
.style
)
6570 fsid
= get_legacy_daemon_fsid(ctx
,
6574 legacy_dir
=ctx
.legacy_dir
)
6576 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
6577 lock
= FileLock(ctx
, fsid
)
6580 # call correct adoption
6581 if daemon_type
in Ceph
.daemons
:
6582 command_adopt_ceph(ctx
, daemon_type
, daemon_id
, fsid
)
6583 elif daemon_type
== 'prometheus':
6584 command_adopt_prometheus(ctx
, daemon_id
, fsid
)
6585 elif daemon_type
== 'grafana':
6586 command_adopt_grafana(ctx
, daemon_id
, fsid
)
6587 elif daemon_type
== 'node-exporter':
6588 raise Error('adoption of node-exporter not implemented')
6589 elif daemon_type
== 'alertmanager':
6590 command_adopt_alertmanager(ctx
, daemon_id
, fsid
)
6592 raise Error('daemon type %s not recognized' % daemon_type
)
6595 class AdoptOsd(object):
6596 def __init__(self
, ctx
, osd_data_dir
, osd_id
):
6597 # type: (CephadmContext, str, str) -> None
6599 self
.osd_data_dir
= osd_data_dir
6600 self
.osd_id
= osd_id
6602 def check_online_osd(self
):
6603 # type: () -> Tuple[Optional[str], Optional[str]]
6605 osd_fsid
, osd_type
= None, None
6607 path
= os
.path
.join(self
.osd_data_dir
, 'fsid')
6609 with
open(path
, 'r') as f
:
6610 osd_fsid
= f
.read().strip()
6611 logger
.info('Found online OSD at %s' % path
)
6613 logger
.info('Unable to read OSD fsid from %s' % path
)
6614 if os
.path
.exists(os
.path
.join(self
.osd_data_dir
, 'type')):
6615 with
open(os
.path
.join(self
.osd_data_dir
, 'type')) as f
:
6616 osd_type
= f
.read().strip()
6618 logger
.info('"type" file missing for OSD data dir')
6620 return osd_fsid
, osd_type
6622 def check_offline_lvm_osd(self
):
6623 # type: () -> Tuple[Optional[str], Optional[str]]
6624 osd_fsid
, osd_type
= None, None
6626 c
= get_ceph_volume_container(
6628 args
=['lvm', 'list', '--format=json'],
6630 out
, err
, code
= call_throws(self
.ctx
, c
.run_cmd())
6633 js
= json
.loads(out
)
6634 if self
.osd_id
in js
:
6635 logger
.info('Found offline LVM OSD {}'.format(self
.osd_id
))
6636 osd_fsid
= js
[self
.osd_id
][0]['tags']['ceph.osd_fsid']
6637 for device
in js
[self
.osd_id
]:
6638 if device
['tags']['ceph.type'] == 'block':
6639 osd_type
= 'bluestore'
6641 if device
['tags']['ceph.type'] == 'data':
6642 osd_type
= 'filestore'
6644 except ValueError as e
:
6645 logger
.info('Invalid JSON in ceph-volume lvm list: {}'.format(e
))
6647 return osd_fsid
, osd_type
6649 def check_offline_simple_osd(self
):
6650 # type: () -> Tuple[Optional[str], Optional[str]]
6651 osd_fsid
, osd_type
= None, None
6653 osd_file
= glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self
.osd_id
))
6654 if len(osd_file
) == 1:
6655 with
open(osd_file
[0], 'r') as f
:
6657 js
= json
.loads(f
.read())
6658 logger
.info('Found offline simple OSD {}'.format(self
.osd_id
))
6659 osd_fsid
= js
['fsid']
6660 osd_type
= js
['type']
6661 if osd_type
!= 'filestore':
6662 # need this to be mounted for the adopt to work, as it
6663 # needs to move files from this directory
6664 call_throws(self
.ctx
, ['mount', js
['data']['path'], self
.osd_data_dir
])
6665 except ValueError as e
:
6666 logger
.info('Invalid JSON in {}: {}'.format(osd_file
, e
))
6668 return osd_fsid
, osd_type
6670 def change_cluster_name(self
) -> None:
6671 logger
.info('Attempting to convert osd cluster name to ceph . . .')
6672 c
= get_ceph_volume_container(
6674 args
=['lvm', 'list', '{}'.format(self
.osd_id
), '--format=json'],
6676 out
, err
, code
= call_throws(self
.ctx
, c
.run_cmd())
6678 raise Exception(f
'Failed to get list of LVs: {err}\nceph-volume failed with rc {code}')
6680 js
= json
.loads(out
)
6682 raise RuntimeError(f
'Failed to find osd.{self.osd_id}')
6683 device
: Optional
[Dict
[Any
, Any
]] = None
6684 for d
in js
[self
.osd_id
]:
6685 if d
['type'] == 'block':
6689 raise RuntimeError(f
'Failed to find block device for osd.{self.osd_id}')
6690 vg
= device
['vg_name']
6691 out
, err
, code
= call_throws(self
.ctx
, ['lvchange', '--deltag', f
'ceph.cluster_name={self.ctx.cluster}', vg
])
6693 raise RuntimeError(f
"Can't delete tag ceph.cluster_name={self.ctx.cluster} on osd.{self.osd_id}.\nlvchange failed with rc {code}")
6694 out
, err
, code
= call_throws(self
.ctx
, ['lvchange', '--addtag', 'ceph.cluster_name=ceph', vg
])
6696 raise RuntimeError(f
"Can't add tag ceph.cluster_name=ceph on osd.{self.osd_id}.\nlvchange failed with rc {code}")
6697 logger
.info('Successfully converted osd cluster name')
6698 except (Exception, RuntimeError) as e
:
6699 logger
.info(f
'Failed to convert osd cluster name: {e}')
6702 def command_adopt_ceph(ctx
, daemon_type
, daemon_id
, fsid
):
6703 # type: (CephadmContext, str, str, str) -> None
6705 (uid
, gid
) = extract_uid_gid(ctx
)
6707 data_dir_src
= ('/var/lib/ceph/%s/%s-%s' %
6708 (daemon_type
, ctx
.cluster
, daemon_id
))
6709 data_dir_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_dir_src
)
6711 if not os
.path
.exists(data_dir_src
):
6712 raise Error("{}.{} data directory '{}' does not exist. "
6713 'Incorrect ID specified, or daemon already adopted?'.format(
6714 daemon_type
, daemon_id
, data_dir_src
))
6717 if daemon_type
== 'osd':
6718 adopt_osd
= AdoptOsd(ctx
, data_dir_src
, daemon_id
)
6719 osd_fsid
, osd_type
= adopt_osd
.check_online_osd()
6721 osd_fsid
, osd_type
= adopt_osd
.check_offline_lvm_osd()
6723 osd_fsid
, osd_type
= adopt_osd
.check_offline_simple_osd()
6725 raise Error('Unable to find OSD {}'.format(daemon_id
))
6726 elif ctx
.cluster
!= 'ceph':
6727 adopt_osd
.change_cluster_name()
6728 logger
.info('objectstore_type is %s' % osd_type
)
6730 if osd_type
== 'filestore':
6731 raise Error('FileStore is not supported by cephadm')
6733 # NOTE: implicit assumption here that the units correspond to the
6734 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
6736 unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
6737 (enabled
, state
, _
) = check_unit(ctx
, unit_name
)
6738 if state
== 'running':
6739 logger
.info('Stopping old systemd unit %s...' % unit_name
)
6740 call_throws(ctx
, ['systemctl', 'stop', unit_name
])
6742 logger
.info('Disabling old systemd unit %s...' % unit_name
)
6743 call_throws(ctx
, ['systemctl', 'disable', unit_name
])
6746 logger
.info('Moving data...')
6747 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
6749 move_files(ctx
, glob(os
.path
.join(data_dir_src
, '*')),
6752 logger
.debug('Remove dir `%s`' % (data_dir_src
))
6753 if os
.path
.ismount(data_dir_src
):
6754 call_throws(ctx
, ['umount', data_dir_src
])
6755 os
.rmdir(data_dir_src
)
6757 logger
.info('Chowning content...')
6758 call_throws(ctx
, ['chown', '-c', '-R', '%d.%d' % (uid
, gid
), data_dir_dst
])
6760 if daemon_type
== 'mon':
6761 # rename *.ldb -> *.sst, in case they are coming from ubuntu
6762 store
= os
.path
.join(data_dir_dst
, 'store.db')
6764 if os
.path
.exists(store
):
6765 for oldf
in os
.listdir(store
):
6766 if oldf
.endswith('.ldb'):
6767 newf
= oldf
.replace('.ldb', '.sst')
6768 oldp
= os
.path
.join(store
, oldf
)
6769 newp
= os
.path
.join(store
, newf
)
6770 logger
.debug('Renaming %s -> %s' % (oldp
, newp
))
6771 os
.rename(oldp
, newp
)
6773 logger
.info('Renamed %d leveldb *.ldb files to *.sst',
6775 if daemon_type
== 'osd':
6776 for n
in ['block', 'block.db', 'block.wal']:
6777 p
= os
.path
.join(data_dir_dst
, n
)
6778 if os
.path
.exists(p
):
6779 logger
.info('Chowning %s...' % p
)
6780 os
.chown(p
, uid
, gid
)
6781 # disable the ceph-volume 'simple' mode files on the host
6782 simple_fn
= os
.path
.join('/etc/ceph/osd',
6783 '%s-%s.json' % (daemon_id
, osd_fsid
))
6784 if os
.path
.exists(simple_fn
):
6785 new_fn
= simple_fn
+ '.adopted-by-cephadm'
6786 logger
.info('Renaming %s -> %s', simple_fn
, new_fn
)
6787 os
.rename(simple_fn
, new_fn
)
6788 logger
.info('Disabling host unit ceph-volume@ simple unit...')
6789 call(ctx
, ['systemctl', 'disable',
6790 'ceph-volume@simple-%s-%s.service' % (daemon_id
, osd_fsid
)])
6792 # assume this is an 'lvm' c-v for now, but don't error
6794 logger
.info('Disabling host unit ceph-volume@ lvm unit...')
6795 call(ctx
, ['systemctl', 'disable',
6796 'ceph-volume@lvm-%s-%s.service' % (daemon_id
, osd_fsid
)])
6799 config_src
= '/etc/ceph/%s.conf' % (ctx
.cluster
)
6800 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
6801 config_dst
= os
.path
.join(data_dir_dst
, 'config')
6802 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
6805 logger
.info('Moving logs...')
6806 log_dir_src
= ('/var/log/ceph/%s-%s.%s.log*' %
6807 (ctx
.cluster
, daemon_type
, daemon_id
))
6808 log_dir_src
= os
.path
.abspath(ctx
.legacy_dir
+ log_dir_src
)
6809 log_dir_dst
= make_log_dir(ctx
, fsid
, uid
=uid
, gid
=gid
)
6810 move_files(ctx
, glob(log_dir_src
),
6814 logger
.info('Creating new units...')
6815 make_var_run(ctx
, fsid
, uid
, gid
)
6816 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
6817 deploy_daemon_units(ctx
, fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
6818 enable
=True, # unconditionally enable the new unit
6819 start
=(state
== 'running' or ctx
.force_start
),
6821 update_firewalld(ctx
, daemon_type
)
6824 def command_adopt_prometheus(ctx
, daemon_id
, fsid
):
6825 # type: (CephadmContext, str, str) -> None
6826 daemon_type
= 'prometheus'
6827 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
6829 _stop_and_disable(ctx
, 'prometheus')
6831 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
6835 config_src
= '/etc/prometheus/prometheus.yml'
6836 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
6837 config_dst
= os
.path
.join(data_dir_dst
, 'etc/prometheus')
6838 makedirs(config_dst
, uid
, gid
, 0o755)
6839 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
6842 data_src
= '/var/lib/prometheus/metrics/'
6843 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
6844 data_dst
= os
.path
.join(data_dir_dst
, 'data')
6845 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
6847 make_var_run(ctx
, fsid
, uid
, gid
)
6848 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
6849 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
6850 update_firewalld(ctx
, daemon_type
)
6853 def command_adopt_grafana(ctx
, daemon_id
, fsid
):
6854 # type: (CephadmContext, str, str) -> None
6856 daemon_type
= 'grafana'
6857 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
6859 _stop_and_disable(ctx
, 'grafana-server')
6861 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
6865 config_src
= '/etc/grafana/grafana.ini'
6866 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
6867 config_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
6868 makedirs(config_dst
, uid
, gid
, 0o755)
6869 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
6871 prov_src
= '/etc/grafana/provisioning/'
6872 prov_src
= os
.path
.abspath(ctx
.legacy_dir
+ prov_src
)
6873 prov_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
6874 copy_tree(ctx
, [prov_src
], prov_dst
, uid
=uid
, gid
=gid
)
6877 cert
= '/etc/grafana/grafana.crt'
6878 key
= '/etc/grafana/grafana.key'
6879 if os
.path
.exists(cert
) and os
.path
.exists(key
):
6880 cert_src
= '/etc/grafana/grafana.crt'
6881 cert_src
= os
.path
.abspath(ctx
.legacy_dir
+ cert_src
)
6882 makedirs(os
.path
.join(data_dir_dst
, 'etc/grafana/certs'), uid
, gid
, 0o755)
6883 cert_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_file')
6884 copy_files(ctx
, [cert_src
], cert_dst
, uid
=uid
, gid
=gid
)
6886 key_src
= '/etc/grafana/grafana.key'
6887 key_src
= os
.path
.abspath(ctx
.legacy_dir
+ key_src
)
6888 key_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_key')
6889 copy_files(ctx
, [key_src
], key_dst
, uid
=uid
, gid
=gid
)
6891 _adjust_grafana_ini(os
.path
.join(config_dst
, 'grafana.ini'))
6893 logger
.debug('Skipping ssl, missing cert {} or key {}'.format(cert
, key
))
6895 # data - possible custom dashboards/plugins
6896 data_src
= '/var/lib/grafana/'
6897 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
6898 data_dst
= os
.path
.join(data_dir_dst
, 'data')
6899 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
6901 make_var_run(ctx
, fsid
, uid
, gid
)
6902 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
6903 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
6904 update_firewalld(ctx
, daemon_type
)
6907 def command_adopt_alertmanager(ctx
, daemon_id
, fsid
):
6908 # type: (CephadmContext, str, str) -> None
6910 daemon_type
= 'alertmanager'
6911 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
6913 _stop_and_disable(ctx
, 'prometheus-alertmanager')
6915 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
6919 config_src
= '/etc/prometheus/alertmanager.yml'
6920 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
6921 config_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager')
6922 makedirs(config_dst
, uid
, gid
, 0o755)
6923 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
6926 data_src
= '/var/lib/prometheus/alertmanager/'
6927 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
6928 data_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager/data')
6929 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
6931 make_var_run(ctx
, fsid
, uid
, gid
)
6932 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
6933 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
6934 update_firewalld(ctx
, daemon_type
)
6937 def _adjust_grafana_ini(filename
):
6938 # type: (str) -> None
6940 # Update cert_file, cert_key pathnames in server section
6941 # ConfigParser does not preserve comments
6943 with
open(filename
, 'r') as grafana_ini
:
6944 lines
= grafana_ini
.readlines()
6945 with
open('{}.new'.format(filename
), 'w') as grafana_ini
:
6946 server_section
= False
6948 if line
.startswith('['):
6949 server_section
= False
6950 if line
.startswith('[server]'):
6951 server_section
= True
6953 line
= re
.sub(r
'^cert_file.*',
6954 'cert_file = /etc/grafana/certs/cert_file', line
)
6955 line
= re
.sub(r
'^cert_key.*',
6956 'cert_key = /etc/grafana/certs/cert_key', line
)
6957 grafana_ini
.write(line
)
6958 os
.rename('{}.new'.format(filename
), filename
)
6959 except OSError as err
:
6960 raise Error('Cannot update {}: {}'.format(filename
, err
))
6963 def _stop_and_disable(ctx
, unit_name
):
6964 # type: (CephadmContext, str) -> None
6966 (enabled
, state
, _
) = check_unit(ctx
, unit_name
)
6967 if state
== 'running':
6968 logger
.info('Stopping old systemd unit %s...' % unit_name
)
6969 call_throws(ctx
, ['systemctl', 'stop', unit_name
])
6971 logger
.info('Disabling old systemd unit %s...' % unit_name
)
6972 call_throws(ctx
, ['systemctl', 'disable', unit_name
])
6974 ##################################
6977 def command_rm_daemon(ctx
):
6978 # type: (CephadmContext) -> None
6979 lock
= FileLock(ctx
, ctx
.fsid
)
6982 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
6983 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
6985 if daemon_type
in ['mon', 'osd'] and not ctx
.force
:
6986 raise Error('must pass --force to proceed: '
6987 'this command may destroy precious data!')
6989 call(ctx
, ['systemctl', 'stop', unit_name
],
6990 verbosity
=CallVerbosity
.DEBUG
)
6991 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
6992 verbosity
=CallVerbosity
.DEBUG
)
6993 call(ctx
, ['systemctl', 'disable', unit_name
],
6994 verbosity
=CallVerbosity
.DEBUG
)
6995 data_dir
= get_data_dir(ctx
.fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
6996 if daemon_type
in ['mon', 'osd', 'prometheus'] and \
6997 not ctx
.force_delete_data
:
6998 # rename it out of the way -- do not delete
6999 backup_dir
= os
.path
.join(ctx
.data_dir
, ctx
.fsid
, 'removed')
7000 if not os
.path
.exists(backup_dir
):
7001 makedirs(backup_dir
, 0, 0, DATA_DIR_MODE
)
7002 dirname
= '%s.%s_%s' % (daemon_type
, daemon_id
,
7003 datetime
.datetime
.utcnow().strftime(DATEFMT
))
7005 os
.path
.join(backup_dir
, dirname
))
7007 call_throws(ctx
, ['rm', '-rf', data_dir
])
7009 if 'tcp_ports' in ctx
and ctx
.tcp_ports
is not None:
7010 ports
: List
[int] = [int(p
) for p
in ctx
.tcp_ports
.split()]
7013 fw
.close_ports(ports
)
7015 except RuntimeError as e
:
7016 # in case we cannot close the ports we will remove
7017 # the daemon but keep them open.
7018 logger
.warning(f
' Error when trying to close ports: {e}')
7021 ##################################
7024 def _zap(ctx
: CephadmContext
, what
: str) -> None:
7025 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'clusterless-ceph-volume', None)
7026 c
= get_ceph_volume_container(ctx
,
7027 args
=['lvm', 'zap', '--destroy', what
],
7028 volume_mounts
=mounts
,
7030 logger
.info(f
'Zapping {what}...')
7031 out
, err
, code
= call_throws(ctx
, c
.run_cmd())
7035 def _zap_osds(ctx
: CephadmContext
) -> None:
7036 # assume fsid lock already held
7039 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'clusterless-ceph-volume', None)
7040 c
= get_ceph_volume_container(ctx
,
7041 args
=['inventory', '--format', 'json'],
7042 volume_mounts
=mounts
,
7044 out
, err
, code
= call_throws(ctx
, c
.run_cmd())
7046 raise Error('failed to list osd inventory')
7048 ls
= json
.loads(out
)
7049 except ValueError as e
:
7050 raise Error(f
'Invalid JSON in ceph-volume inventory: {e}')
7053 matches
= [lv
.get('cluster_fsid') == ctx
.fsid
and i
.get('ceph_device') for lv
in i
.get('lvs', [])]
7054 if any(matches
) and all(matches
):
7055 _zap(ctx
, i
.get('path'))
7057 lv_names
= [lv
['name'] for lv
in i
.get('lvs', [])]
7058 # TODO: we need to map the lv_names back to device paths (the vg
7059 # id isn't part of the output here!)
7060 logger
.warning(f
'Not zapping LVs (not implemented): {lv_names}')
7063 def command_zap_osds(ctx
: CephadmContext
) -> None:
7065 raise Error('must pass --force to proceed: '
7066 'this command may destroy precious data!')
7068 lock
= FileLock(ctx
, ctx
.fsid
)
7073 ##################################
7076 def get_ceph_cluster_count(ctx
: CephadmContext
) -> int:
7077 return len([c
for c
in os
.listdir(ctx
.data_dir
) if is_fsid(c
)])
7080 def command_rm_cluster(ctx
):
7081 # type: (CephadmContext) -> None
7083 raise Error('must pass --force to proceed: '
7084 'this command may destroy precious data!')
7086 lock
= FileLock(ctx
, ctx
.fsid
)
7089 def disable_systemd_service(unit_name
: str) -> None:
7090 call(ctx
, ['systemctl', 'stop', unit_name
],
7091 verbosity
=CallVerbosity
.DEBUG
)
7092 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
7093 verbosity
=CallVerbosity
.DEBUG
)
7094 call(ctx
, ['systemctl', 'disable', unit_name
],
7095 verbosity
=CallVerbosity
.DEBUG
)
7097 # stop + disable individual daemon units
7098 for d
in list_daemons(ctx
, detail
=False):
7099 if d
['fsid'] != ctx
.fsid
:
7101 if d
['style'] != 'cephadm:v1':
7103 disable_systemd_service(get_unit_name(ctx
.fsid
, d
['name']))
7106 for unit_name
in ['ceph-%s.target' % ctx
.fsid
]:
7107 disable_systemd_service(unit_name
)
7109 slice_name
= 'system-ceph\\x2d{}.slice'.format(ctx
.fsid
.replace('-', '\\x2d'))
7110 call(ctx
, ['systemctl', 'stop', slice_name
],
7111 verbosity
=CallVerbosity
.DEBUG
)
7118 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
7119 + '/ceph-%s@.service' % ctx
.fsid
])
7120 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
7121 + '/ceph-%s.target' % ctx
.fsid
])
7122 call_throws(ctx
, ['rm', '-rf',
7123 ctx
.unit_dir
+ '/ceph-%s.target.wants' % ctx
.fsid
])
7125 call_throws(ctx
, ['rm', '-rf', ctx
.data_dir
+ '/' + ctx
.fsid
])
7127 if not ctx
.keep_logs
:
7129 call_throws(ctx
, ['rm', '-rf', ctx
.log_dir
+ '/' + ctx
.fsid
])
7130 call_throws(ctx
, ['rm', '-rf', ctx
.log_dir
7131 + '/*.wants/ceph-%s@*' % ctx
.fsid
])
7133 # rm logrotate config
7134 call_throws(ctx
, ['rm', '-f', ctx
.logrotate_dir
+ '/ceph-%s' % ctx
.fsid
])
7136 # if last cluster on host remove shared files
7137 if get_ceph_cluster_count(ctx
) == 0:
7138 disable_systemd_service('ceph.target')
7140 # rm shared ceph target files
7141 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
+ '/multi-user.target.wants/ceph.target'])
7142 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
+ '/ceph.target'])
7144 # rm cephadm logrotate config
7145 call_throws(ctx
, ['rm', '-f', ctx
.logrotate_dir
+ '/cephadm'])
7147 if not ctx
.keep_logs
:
7148 # remove all cephadm logs
7149 for fname
in glob(f
'{ctx.log_dir}/cephadm.log*'):
7152 # rm sysctl settings
7153 sysctl_dirs
: List
[Path
] = [Path(ctx
.sysctl_dir
), Path('/usr/lib/sysctl.d')]
7155 for sysctl_dir
in sysctl_dirs
:
7156 for p
in sysctl_dir
.glob(f
'90-ceph-{ctx.fsid}-*.conf'):
7159 # cleanup remaining ceph directories
7160 ceph_dirs
= [f
'/run/ceph/{ctx.fsid}', f
'/tmp/var/lib/ceph/{ctx.fsid}', f
'/var/run/ceph/{ctx.fsid}']
7161 for dd
in ceph_dirs
:
7162 shutil
.rmtree(dd
, ignore_errors
=True)
7164 # clean up config, keyring, and pub key files
7165 files
= [CEPH_DEFAULT_CONF
, CEPH_DEFAULT_PUBKEY
, CEPH_DEFAULT_KEYRING
]
7166 if os
.path
.exists(files
[0]):
7168 with
open(files
[0]) as f
:
7169 if ctx
.fsid
in f
.read():
7172 # rm configuration files on /etc/ceph
7173 for n
in range(0, len(files
)):
7174 if os
.path
.exists(files
[n
]):
7177 ##################################
7180 def check_time_sync(ctx
, enabler
=None):
7181 # type: (CephadmContext, Optional[Packager]) -> bool
7183 'chrony.service', # 18.04 (at least)
7184 'chronyd.service', # el / opensuse
7185 'systemd-timesyncd.service',
7186 'ntpd.service', # el7 (at least)
7187 'ntp.service', # 18.04 (at least)
7188 'ntpsec.service', # 20.04 (at least) / buster
7189 'openntpd.service', # ubuntu / debian
7191 if not check_units(ctx
, units
, enabler
):
7192 logger
.warning('No time sync service is running; checked for %s' % units
)
7197 def command_check_host(ctx
: CephadmContext
) -> None:
7199 commands
= ['systemctl', 'lvcreate']
7202 engine
= check_container_engine(ctx
)
7203 logger
.info(f
'{engine} is present')
7205 errors
.append(str(e
))
7207 for command
in commands
:
7209 find_program(command
)
7210 logger
.info('%s is present' % command
)
7212 errors
.append('%s binary does not appear to be installed' % command
)
7214 # check for configured+running chronyd or ntp
7215 if not check_time_sync(ctx
):
7216 errors
.append('No time synchronization is active')
7218 if 'expect_hostname' in ctx
and ctx
.expect_hostname
:
7219 if get_hostname().lower() != ctx
.expect_hostname
.lower():
7220 errors
.append('hostname "%s" does not match expected hostname "%s"' % (
7221 get_hostname(), ctx
.expect_hostname
))
7223 logger
.info('Hostname "%s" matches what is expected.',
7224 ctx
.expect_hostname
)
7227 raise Error('\nERROR: '.join(errors
))
7229 logger
.info('Host looks OK')
7231 ##################################
7234 def get_ssh_vars(ssh_user
: str) -> Tuple
[int, int, str]:
7236 s_pwd
= pwd
.getpwnam(ssh_user
)
7238 raise Error('Cannot find uid/gid for ssh-user: %s' % (ssh_user
))
7240 ssh_uid
= s_pwd
.pw_uid
7241 ssh_gid
= s_pwd
.pw_gid
7242 ssh_dir
= os
.path
.join(s_pwd
.pw_dir
, '.ssh')
7243 return ssh_uid
, ssh_gid
, ssh_dir
7246 def authorize_ssh_key(ssh_pub_key
: str, ssh_user
: str) -> bool:
7247 """Authorize the public key for the provided ssh user"""
7249 def key_in_file(path
: str, key
: str) -> bool:
7250 if not os
.path
.exists(path
):
7252 with
open(path
) as f
:
7253 lines
= f
.readlines()
7255 if line
.strip() == key
.strip():
7259 logger
.info(f
'Adding key to {ssh_user}@localhost authorized_keys...')
7260 if ssh_pub_key
is None or ssh_pub_key
.isspace():
7261 raise Error('Trying to authorize an empty ssh key')
7263 ssh_pub_key
= ssh_pub_key
.strip()
7264 ssh_uid
, ssh_gid
, ssh_dir
= get_ssh_vars(ssh_user
)
7265 if not os
.path
.exists(ssh_dir
):
7266 makedirs(ssh_dir
, ssh_uid
, ssh_gid
, 0o700)
7268 auth_keys_file
= '%s/authorized_keys' % ssh_dir
7269 if key_in_file(auth_keys_file
, ssh_pub_key
):
7270 logger
.info(f
'key already in {ssh_user}@localhost authorized_keys...')
7274 if os
.path
.exists(auth_keys_file
):
7275 with
open(auth_keys_file
, 'r') as f
:
7276 f
.seek(0, os
.SEEK_END
)
7278 f
.seek(f
.tell() - 1, os
.SEEK_SET
) # go to last char
7279 if f
.read() != '\n':
7282 with
open(auth_keys_file
, 'a') as f
:
7283 os
.fchown(f
.fileno(), ssh_uid
, ssh_gid
) # just in case we created it
7284 os
.fchmod(f
.fileno(), 0o600) # just in case we created it
7287 f
.write(ssh_pub_key
+ '\n')
7292 def revoke_ssh_key(key
: str, ssh_user
: str) -> None:
7293 """Revoke the public key authorization for the ssh user"""
7294 ssh_uid
, ssh_gid
, ssh_dir
= get_ssh_vars(ssh_user
)
7295 auth_keys_file
= '%s/authorized_keys' % ssh_dir
7297 if os
.path
.exists(auth_keys_file
):
7298 with
open(auth_keys_file
, 'r') as f
:
7299 lines
= f
.readlines()
7300 _
, filename
= tempfile
.mkstemp()
7301 with
open(filename
, 'w') as f
:
7302 os
.fchown(f
.fileno(), ssh_uid
, ssh_gid
)
7303 os
.fchmod(f
.fileno(), 0o600) # secure access to the keys file
7305 if line
.strip() == key
.strip():
7311 shutil
.move(filename
, auth_keys_file
)
7313 logger
.warning('Cannot find the ssh key to be deleted')
7316 def check_ssh_connectivity(ctx
: CephadmContext
) -> None:
7318 def cmd_is_available(cmd
: str) -> bool:
7319 if shutil
.which(cmd
) is None:
7320 logger
.warning(f
'Command not found: {cmd}')
7324 if not cmd_is_available('ssh') or not cmd_is_available('ssh-keygen'):
7325 logger
.warning('Cannot check ssh connectivity. Skipping...')
7328 logger
.info('Verifying ssh connectivity ...')
7329 if ctx
.ssh_private_key
and ctx
.ssh_public_key
:
7330 # let's use the keys provided by the user
7331 ssh_priv_key_path
= pathify(ctx
.ssh_private_key
.name
)
7332 ssh_pub_key_path
= pathify(ctx
.ssh_public_key
.name
)
7334 # no custom keys, let's generate some random keys just for this check
7335 ssh_priv_key_path
= f
'/tmp/ssh_key_{uuid.uuid1()}'
7336 ssh_pub_key_path
= f
'{ssh_priv_key_path}.pub'
7337 ssh_key_gen_cmd
= ['ssh-keygen', '-q', '-t', 'rsa', '-N', '', '-C', '', '-f', ssh_priv_key_path
]
7338 _
, _
, code
= call(ctx
, ssh_key_gen_cmd
)
7340 logger
.warning('Cannot generate keys to check ssh connectivity.')
7343 with
open(ssh_pub_key_path
, 'r') as f
:
7344 key
= f
.read().strip()
7345 new_key
= authorize_ssh_key(key
, ctx
.ssh_user
)
7346 ssh_cfg_file_arg
= ['-F', pathify(ctx
.ssh_config
.name
)] if ctx
.ssh_config
else []
7347 _
, _
, code
= call(ctx
, ['ssh', '-o StrictHostKeyChecking=no',
7348 *ssh_cfg_file_arg
, '-i', ssh_priv_key_path
,
7349 '-o PasswordAuthentication=no',
7350 f
'{ctx.ssh_user}@{get_hostname()}',
7353 # we only remove the key if it's a new one. In case the user has provided
7354 # some already existing key then we don't alter authorized_keys file
7356 revoke_ssh_key(key
, ctx
.ssh_user
)
7358 pub_key_msg
= '- The public key file configured by --ssh-public-key is valid\n' if ctx
.ssh_public_key
else ''
7359 prv_key_msg
= '- The private key file configured by --ssh-private-key is valid\n' if ctx
.ssh_private_key
else ''
7360 ssh_cfg_msg
= '- The ssh configuration file configured by --ssh-config is valid\n' if ctx
.ssh_config
else ''
7362 ** Please verify your user's ssh configuration and make sure:
7363 - User {ctx.ssh_user} must have passwordless sudo access
7364 {pub_key_msg}{prv_key_msg}{ssh_cfg_msg}
7367 raise Error(err_msg
)
7370 def command_prepare_host(ctx
: CephadmContext
) -> None:
7371 logger
.info('Verifying podman|docker is present...')
7374 check_container_engine(ctx
)
7376 logger
.warning(str(e
))
7378 pkg
= create_packager(ctx
)
7379 pkg
.install_podman()
7381 logger
.info('Verifying lvm2 is present...')
7382 if not find_executable('lvcreate'):
7384 pkg
= create_packager(ctx
)
7385 pkg
.install(['lvm2'])
7387 logger
.info('Verifying time synchronization is in place...')
7388 if not check_time_sync(ctx
):
7390 pkg
= create_packager(ctx
)
7391 pkg
.install(['chrony'])
7392 # check again, and this time try to enable
7394 check_time_sync(ctx
, enabler
=pkg
)
7396 if 'expect_hostname' in ctx
and ctx
.expect_hostname
and ctx
.expect_hostname
!= get_hostname():
7397 logger
.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx
.expect_hostname
))
7398 call_throws(ctx
, ['hostname', ctx
.expect_hostname
])
7399 with
open('/etc/hostname', 'w') as f
:
7400 f
.write(ctx
.expect_hostname
+ '\n')
7402 logger
.info('Repeating the final host check...')
7403 command_check_host(ctx
)
7405 ##################################
7408 class CustomValidation(argparse
.Action
):
7410 def _check_name(self
, values
: str) -> None:
7412 (daemon_type
, daemon_id
) = values
.split('.', 1)
7414 raise argparse
.ArgumentError(self
,
7415 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com')
7417 daemons
= get_supported_daemons()
7418 if daemon_type
not in daemons
:
7419 raise argparse
.ArgumentError(self
,
7420 'name must declare the type of daemon e.g. '
7421 '{}'.format(', '.join(daemons
)))
7423 def __call__(self
, parser
: argparse
.ArgumentParser
, namespace
: argparse
.Namespace
, values
: Union
[str, Sequence
[Any
], None],
7424 option_string
: Optional
[str] = None) -> None:
7425 assert isinstance(values
, str)
7426 if self
.dest
== 'name':
7427 self
._check
_name
(values
)
7428 setattr(namespace
, self
.dest
, values
)
7430 ##################################
7434 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
7436 distro_version
= None
7437 distro_codename
= None
7438 with
open('/etc/os-release', 'r') as f
:
7439 for line
in f
.readlines():
7441 if '=' not in line
or line
.startswith('#'):
7443 (var
, val
) = line
.split('=', 1)
7444 if val
[0] == '"' and val
[-1] == '"':
7447 distro
= val
.lower()
7448 elif var
== 'VERSION_ID':
7449 distro_version
= val
.lower()
7450 elif var
== 'VERSION_CODENAME':
7451 distro_codename
= val
.lower()
7452 return distro
, distro_version
, distro_codename
7455 class Packager(object):
7456 def __init__(self
, ctx
: CephadmContext
,
7457 stable
: Optional
[str] = None, version
: Optional
[str] = None,
7458 branch
: Optional
[str] = None, commit
: Optional
[str] = None):
7460 (stable
and not version
and not branch
and not commit
) or \
7461 (not stable
and version
and not branch
and not commit
) or \
7462 (not stable
and not version
and branch
) or \
7463 (not stable
and not version
and not branch
and not commit
)
7465 self
.stable
= stable
7466 self
.version
= version
7467 self
.branch
= branch
7468 self
.commit
= commit
7470 def validate(self
) -> None:
7471 """Validate parameters before writing any state to disk."""
7474 def add_repo(self
) -> None:
7475 raise NotImplementedError
7477 def rm_repo(self
) -> None:
7478 raise NotImplementedError
7480 def install(self
, ls
: List
[str]) -> None:
7481 raise NotImplementedError
7483 def install_podman(self
) -> None:
7484 raise NotImplementedError
7486 def query_shaman(self
, distro
: str, distro_version
: Any
, branch
: Optional
[str], commit
: Optional
[str]) -> str:
7488 logger
.info('Fetching repo metadata from shaman and chacra...')
7489 shaman_url
= 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
7491 distro_version
=distro_version
,
7493 sha1
=commit
or 'latest',
7497 shaman_response
= urlopen(shaman_url
)
7498 except HTTPError
as err
:
7499 logger
.error('repository not found in shaman (might not be available yet)')
7500 raise Error('%s, failed to fetch %s' % (err
, shaman_url
))
7503 chacra_url
= shaman_response
.geturl()
7504 chacra_response
= urlopen(chacra_url
)
7505 except HTTPError
as err
:
7506 logger
.error('repository not found in chacra (might not be available yet)')
7507 raise Error('%s, failed to fetch %s' % (err
, chacra_url
))
7508 return chacra_response
.read().decode('utf-8')
7510 def repo_gpgkey(self
) -> Tuple
[str, str]:
7511 if self
.ctx
.gpg_url
:
7512 return self
.ctx
.gpg_url
, 'manual'
7513 if self
.stable
or self
.version
:
7514 return 'https://download.ceph.com/keys/release.gpg', 'release'
7516 return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild'
7518 def enable_service(self
, service
: str) -> None:
7520 Start and enable the service (typically using systemd).
7522 call_throws(self
.ctx
, ['systemctl', 'enable', '--now', service
])
7525 class Apt(Packager
):
7531 def __init__(self
, ctx
: CephadmContext
,
7532 stable
: Optional
[str], version
: Optional
[str], branch
: Optional
[str], commit
: Optional
[str],
7533 distro
: Optional
[str], distro_version
: Optional
[str], distro_codename
: Optional
[str]) -> None:
7534 super(Apt
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
7535 branch
=branch
, commit
=commit
)
7538 self
.distro
= self
.DISTRO_NAMES
[distro
]
7539 self
.distro_codename
= distro_codename
7540 self
.distro_version
= distro_version
7542 def repo_path(self
) -> str:
7543 return '/etc/apt/sources.list.d/ceph.list'
7545 def add_repo(self
) -> None:
7547 url
, name
= self
.repo_gpgkey()
7548 logger
.info('Installing repo GPG key from %s...' % url
)
7550 response
= urlopen(url
)
7551 except HTTPError
as err
:
7552 logger
.error('failed to fetch GPG repo key from %s: %s' % (
7554 raise Error('failed to fetch GPG key')
7555 key
= response
.read()
7556 with
open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
, 'wb') as f
:
7560 content
= 'deb %s/debian-%s/ %s main\n' % (
7561 self
.ctx
.repo_url
, self
.version
, self
.distro_codename
)
7563 content
= 'deb %s/debian-%s/ %s main\n' % (
7564 self
.ctx
.repo_url
, self
.stable
, self
.distro_codename
)
7566 content
= self
.query_shaman(self
.distro
, self
.distro_codename
, self
.branch
,
7569 logger
.info('Installing repo file at %s...' % self
.repo_path())
7570 with
open(self
.repo_path(), 'w') as f
:
7575 def rm_repo(self
) -> None:
7576 for name
in ['autobuild', 'release', 'manual']:
7577 p
= '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
7578 if os
.path
.exists(p
):
7579 logger
.info('Removing repo GPG key %s...' % p
)
7581 if os
.path
.exists(self
.repo_path()):
7582 logger
.info('Removing repo at %s...' % self
.repo_path())
7583 os
.unlink(self
.repo_path())
7585 if self
.distro
== 'ubuntu':
7586 self
.rm_kubic_repo()
7588 def install(self
, ls
: List
[str]) -> None:
7589 logger
.info('Installing packages %s...' % ls
)
7590 call_throws(self
.ctx
, ['apt-get', 'install', '-y'] + ls
)
7592 def update(self
) -> None:
7593 logger
.info('Updating package list...')
7594 call_throws(self
.ctx
, ['apt-get', 'update'])
7596 def install_podman(self
) -> None:
7597 if self
.distro
== 'ubuntu':
7598 logger
.info('Setting up repo for podman...')
7599 self
.add_kubic_repo()
7602 logger
.info('Attempting podman install...')
7604 self
.install(['podman'])
7606 logger
.info('Podman did not work. Falling back to docker...')
7607 self
.install(['docker.io'])
7609 def kubic_repo_url(self
) -> str:
7610 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
7611 'libcontainers:/stable/xUbuntu_%s/' % self
.distro_version
7613 def kubic_repo_path(self
) -> str:
7614 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
7616 def kubric_repo_gpgkey_url(self
) -> str:
7617 return '%s/Release.key' % self
.kubic_repo_url()
7619 def kubric_repo_gpgkey_path(self
) -> str:
7620 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
7622 def add_kubic_repo(self
) -> None:
7623 url
= self
.kubric_repo_gpgkey_url()
7624 logger
.info('Installing repo GPG key from %s...' % url
)
7626 response
= urlopen(url
)
7627 except HTTPError
as err
:
7628 logger
.error('failed to fetch GPG repo key from %s: %s' % (
7630 raise Error('failed to fetch GPG key')
7631 key
= response
.read().decode('utf-8')
7632 tmp_key
= write_tmp(key
, 0, 0)
7633 keyring
= self
.kubric_repo_gpgkey_path()
7634 call_throws(self
.ctx
, ['apt-key', '--keyring', keyring
, 'add', tmp_key
.name
])
7636 logger
.info('Installing repo file at %s...' % self
.kubic_repo_path())
7637 content
= 'deb %s /\n' % self
.kubic_repo_url()
7638 with
open(self
.kubic_repo_path(), 'w') as f
:
7641 def rm_kubic_repo(self
) -> None:
7642 keyring
= self
.kubric_repo_gpgkey_path()
7643 if os
.path
.exists(keyring
):
7644 logger
.info('Removing repo GPG key %s...' % keyring
)
7647 p
= self
.kubic_repo_path()
7648 if os
.path
.exists(p
):
7649 logger
.info('Removing repo at %s...' % p
)
7653 class YumDnf(Packager
):
7655 'centos': ('centos', 'el'),
7656 'rhel': ('centos', 'el'),
7657 'scientific': ('centos', 'el'),
7658 'rocky': ('centos', 'el'),
7659 'almalinux': ('centos', 'el'),
7660 'ol': ('centos', 'el'),
7661 'fedora': ('fedora', 'fc'),
7662 'mariner': ('mariner', 'cm'),
7665 def __init__(self
, ctx
: CephadmContext
,
7666 stable
: Optional
[str], version
: Optional
[str], branch
: Optional
[str], commit
: Optional
[str],
7667 distro
: Optional
[str], distro_version
: Optional
[str]) -> None:
7668 super(YumDnf
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
7669 branch
=branch
, commit
=commit
)
7671 assert distro_version
7673 self
.major
= int(distro_version
.split('.')[0])
7674 self
.distro_normalized
= self
.DISTRO_NAMES
[distro
][0]
7675 self
.distro_code
= self
.DISTRO_NAMES
[distro
][1] + str(self
.major
)
7676 if (self
.distro_code
== 'fc' and self
.major
>= 30) or \
7677 (self
.distro_code
== 'el' and self
.major
>= 8):
7679 elif (self
.distro_code
== 'cm'):
7684 def custom_repo(self
, **kw
: Any
) -> str:
7686 Repo files need special care in that a whole line should not be present
7687 if there is no value for it. Because we were using `format()` we could
7688 not conditionally add a line for a repo file. So the end result would
7689 contain a key with a missing value (say if we were passing `None`).
7691 For example, it could look like::
7698 Which breaks. This function allows us to conditionally add lines,
7699 preserving an order and be more careful.
7701 Previously, and for historical purposes, this is how the template used
7717 # by using tuples (vs a dict) we preserve the order of what we want to
7718 # return, like starting with a [repo name]
7720 ('reponame', '[%s]'),
7721 ('name', 'name=%s'),
7722 ('baseurl', 'baseurl=%s'),
7723 ('enabled', 'enabled=%s'),
7724 ('gpgcheck', 'gpgcheck=%s'),
7725 ('_type', 'type=%s'),
7726 ('gpgkey', 'gpgkey=%s'),
7727 ('proxy', 'proxy=%s'),
7728 ('priority', 'priority=%s'),
7732 tmpl_key
, tmpl_value
= line
# key values from tmpl
7734 # ensure that there is an actual value (not None nor empty string)
7735 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
7736 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
7738 return '\n'.join(lines
)
7740 def repo_path(self
) -> str:
7741 return '/etc/yum.repos.d/ceph.repo'
7743 def repo_baseurl(self
) -> str:
7744 assert self
.stable
or self
.version
7746 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
, self
.version
,
7749 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
, self
.stable
,
7752 def validate(self
) -> None:
7753 if self
.distro_code
.startswith('fc'):
7754 raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro')
7755 if self
.distro_code
== 'el7':
7756 if self
.stable
and self
.stable
>= 'pacific':
7757 raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it')
7758 if self
.version
and self
.version
.split('.')[0] >= '16':
7759 raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it')
7761 if self
.stable
or self
.version
:
7762 # we know that yum & dnf require there to be a
7763 # $base_url/$arch/repodata/repomd.xml so we can test if this URL
7764 # is gettable in order to validate the inputs
7765 test_url
= self
.repo_baseurl() + '/noarch/repodata/repomd.xml'
7768 except HTTPError
as err
:
7769 logger
.error('unable to fetch repo metadata: %r', err
)
7770 raise Error('failed to fetch repository metadata. please check'
7771 ' the provided parameters are correct and try again')
7773 def add_repo(self
) -> None:
7774 if self
.stable
or self
.version
:
7777 'Ceph': '$basearch',
7778 'Ceph-noarch': 'noarch',
7779 'Ceph-source': 'SRPMS'}.items():
7780 content
+= '[%s]\n' % (n
)
7781 content
+= self
.custom_repo(
7783 baseurl
=self
.repo_baseurl() + '/' + t
,
7786 gpgkey
=self
.repo_gpgkey()[0],
7790 content
= self
.query_shaman(self
.distro_normalized
, self
.major
,
7794 logger
.info('Writing repo to %s...' % self
.repo_path())
7795 with
open(self
.repo_path(), 'w') as f
:
7798 if self
.distro_code
.startswith('el'):
7799 logger
.info('Enabling EPEL...')
7800 call_throws(self
.ctx
, [self
.tool
, 'install', '-y', 'epel-release'])
7802 def rm_repo(self
) -> None:
7803 if os
.path
.exists(self
.repo_path()):
7804 os
.unlink(self
.repo_path())
7806 def install(self
, ls
: List
[str]) -> None:
7807 logger
.info('Installing packages %s...' % ls
)
7808 call_throws(self
.ctx
, [self
.tool
, 'install', '-y'] + ls
)
7810 def install_podman(self
) -> None:
7811 self
.install(['podman'])
7814 class Zypper(Packager
):
7817 'opensuse-tumbleweed',
7821 def __init__(self
, ctx
: CephadmContext
,
7822 stable
: Optional
[str], version
: Optional
[str], branch
: Optional
[str], commit
: Optional
[str],
7823 distro
: Optional
[str], distro_version
: Optional
[str]) -> None:
7824 super(Zypper
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
7825 branch
=branch
, commit
=commit
)
7826 assert distro
is not None
7828 self
.tool
= 'zypper'
7829 self
.distro
= 'opensuse'
7830 self
.distro_version
= '15.1'
7831 if 'tumbleweed' not in distro
and distro_version
is not None:
7832 self
.distro_version
= distro_version
7834 def custom_repo(self
, **kw
: Any
) -> str:
7836 See YumDnf for format explanation.
7840 # by using tuples (vs a dict) we preserve the order of what we want to
7841 # return, like starting with a [repo name]
7843 ('reponame', '[%s]'),
7844 ('name', 'name=%s'),
7845 ('baseurl', 'baseurl=%s'),
7846 ('enabled', 'enabled=%s'),
7847 ('gpgcheck', 'gpgcheck=%s'),
7848 ('_type', 'type=%s'),
7849 ('gpgkey', 'gpgkey=%s'),
7850 ('proxy', 'proxy=%s'),
7851 ('priority', 'priority=%s'),
7855 tmpl_key
, tmpl_value
= line
# key values from tmpl
7857 # ensure that there is an actual value (not None nor empty string)
7858 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
7859 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
7861 return '\n'.join(lines
)
7863 def repo_path(self
) -> str:
7864 return '/etc/zypp/repos.d/ceph.repo'
7866 def repo_baseurl(self
) -> str:
7867 assert self
.stable
or self
.version
7869 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
,
7870 self
.stable
, self
.distro
)
7872 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
,
7873 self
.stable
, self
.distro
)
7875 def add_repo(self
) -> None:
7876 if self
.stable
or self
.version
:
7879 'Ceph': '$basearch',
7880 'Ceph-noarch': 'noarch',
7881 'Ceph-source': 'SRPMS'}.items():
7882 content
+= '[%s]\n' % (n
)
7883 content
+= self
.custom_repo(
7885 baseurl
=self
.repo_baseurl() + '/' + t
,
7888 gpgkey
=self
.repo_gpgkey()[0],
7892 content
= self
.query_shaman(self
.distro
, self
.distro_version
,
7896 logger
.info('Writing repo to %s...' % self
.repo_path())
7897 with
open(self
.repo_path(), 'w') as f
:
7900 def rm_repo(self
) -> None:
7901 if os
.path
.exists(self
.repo_path()):
7902 os
.unlink(self
.repo_path())
7904 def install(self
, ls
: List
[str]) -> None:
7905 logger
.info('Installing packages %s...' % ls
)
7906 call_throws(self
.ctx
, [self
.tool
, 'in', '-y'] + ls
)
7908 def install_podman(self
) -> None:
7909 self
.install(['podman'])
7912 def create_packager(ctx
: CephadmContext
,
7913 stable
: Optional
[str] = None, version
: Optional
[str] = None,
7914 branch
: Optional
[str] = None, commit
: Optional
[str] = None) -> Packager
:
7915 distro
, distro_version
, distro_codename
= get_distro()
7916 if distro
in YumDnf
.DISTRO_NAMES
:
7917 return YumDnf(ctx
, stable
=stable
, version
=version
,
7918 branch
=branch
, commit
=commit
,
7919 distro
=distro
, distro_version
=distro_version
)
7920 elif distro
in Apt
.DISTRO_NAMES
:
7921 return Apt(ctx
, stable
=stable
, version
=version
,
7922 branch
=branch
, commit
=commit
,
7923 distro
=distro
, distro_version
=distro_version
,
7924 distro_codename
=distro_codename
)
7925 elif distro
in Zypper
.DISTRO_NAMES
:
7926 return Zypper(ctx
, stable
=stable
, version
=version
,
7927 branch
=branch
, commit
=commit
,
7928 distro
=distro
, distro_version
=distro_version
)
7929 raise Error('Distro %s version %s not supported' % (distro
, distro_version
))
7932 def command_add_repo(ctx
: CephadmContext
) -> None:
7933 if ctx
.version
and ctx
.release
:
7934 raise Error('you can specify either --release or --version but not both')
7935 if not ctx
.version
and not ctx
.release
and not ctx
.dev
and not ctx
.dev_commit
:
7936 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
7939 (x
, y
, z
) = ctx
.version
.split('.')
7941 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
7943 # Pacific =/= pacific in this case, set to undercase to avoid confision
7944 ctx
.release
= ctx
.release
.lower()
7946 pkg
= create_packager(ctx
, stable
=ctx
.release
,
7947 version
=ctx
.version
,
7949 commit
=ctx
.dev_commit
)
7952 logger
.info('Completed adding repo.')
7955 def command_rm_repo(ctx
: CephadmContext
) -> None:
7956 pkg
= create_packager(ctx
)
7960 def command_install(ctx
: CephadmContext
) -> None:
7961 pkg
= create_packager(ctx
)
7962 pkg
.install(ctx
.packages
)
7965 def command_rescan_disks(ctx
: CephadmContext
) -> str:
7967 def probe_hba(scan_path
: str) -> None:
7968 """Tell the adapter to rescan"""
7969 with
open(scan_path
, 'w') as f
:
7972 cmd
= ctx
.func
.__name
__.replace('command_', '')
7973 logger
.info(f
'{cmd}: starting')
7976 all_scan_files
= glob('/sys/class/scsi_host/*/scan')
7979 for scan_path
in all_scan_files
:
7980 adapter_name
= os
.path
.basename(os
.path
.dirname(scan_path
))
7981 proc_name
= read_file([os
.path
.join(os
.path
.dirname(scan_path
), 'proc_name')])
7982 if proc_name
in ['unknown', 'usb-storage']:
7983 skipped
.append(os
.path
.basename(scan_path
))
7984 logger
.info(f
'{cmd}: rescan skipping incompatible host adapter {adapter_name} : {proc_name}')
7987 scan_files
.append(scan_path
)
7990 logger
.info(f
'{cmd}: no compatible HBAs found')
7991 return 'Ok. No compatible HBAs found'
7993 responses
= async_run(concurrent_tasks(probe_hba
, scan_files
))
7994 failures
= [r
for r
in responses
if r
]
7996 logger
.info(f
'{cmd}: Complete. {len(scan_files)} adapters rescanned, {len(failures)} failures, {len(skipped)} skipped')
7998 elapsed
= time
.time() - start
8000 plural
= 's' if len(failures
) > 1 else ''
8001 if len(failures
) == len(scan_files
):
8002 return f
'Failed. All {len(scan_files)} rescan requests failed'
8004 return f
'Partial. {len(scan_files) - len(failures)} successful, {len(failures)} failure{plural} against: {", ".join(failures)}'
8006 return f
'Ok. {len(all_scan_files)} adapters detected: {len(scan_files)} rescanned, {len(skipped)} skipped, {len(failures)} failed ({elapsed:.2f}s)'
8008 ##################################
8011 def get_ipv4_address(ifname
):
8012 # type: (str) -> str
8013 def _extract(sock
: socket
.socket
, offset
: int) -> str:
8014 return socket
.inet_ntop(
8019 struct
.pack('256s', bytes(ifname
[:15], 'utf-8'))
8022 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_DGRAM
)
8024 addr
= _extract(s
, 35093) # '0x8915' = SIOCGIFADDR
8025 dq_mask
= _extract(s
, 35099) # 0x891b = SIOCGIFNETMASK
8027 # interface does not have an ipv4 address
8030 dec_mask
= sum([bin(int(i
)).count('1')
8031 for i
in dq_mask
.split('.')])
8032 return '{}/{}'.format(addr
, dec_mask
)
8035 def get_ipv6_address(ifname
):
8036 # type: (str) -> str
8037 if not os
.path
.exists('/proc/net/if_inet6'):
8040 raw
= read_file(['/proc/net/if_inet6'])
8041 data
= raw
.splitlines()
8042 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
8043 # field 0 is ipv6, field 2 is scope
8044 for iface_setting
in data
:
8045 field
= iface_setting
.split()
8046 if field
[-1] == ifname
:
8048 ipv6_fmtd
= ':'.join([ipv6_raw
[_p
:_p
+ 4] for _p
in range(0, len(field
[0]), 4)])
8049 # apply naming rules using ipaddress module
8050 ipv6
= ipaddress
.ip_address(ipv6_fmtd
)
8051 return '{}/{}'.format(str(ipv6
), int('0x{}'.format(field
[2]), 16))
8055 def bytes_to_human(num
, mode
='decimal'):
8056 # type: (float, str) -> str
8057 """Convert a bytes value into it's human-readable form.
8059 :param num: number, in bytes, to convert
8060 :param mode: Either decimal (default) or binary to determine divisor
8061 :returns: string representing the bytes value in a more readable format
8063 unit_list
= ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
8067 if mode
== 'binary':
8068 unit_list
= ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
8072 for unit
in unit_list
:
8073 if abs(num
) < divisor
:
8074 return '%3.1f%s' % (num
, unit
)
8076 return '%.1f%s' % (num
, yotta
)
8079 def read_file(path_list
, file_name
=''):
8080 # type: (List[str], str) -> str
8081 """Returns the content of the first file found within the `path_list`
8083 :param path_list: list of file paths to search
8084 :param file_name: optional file_name to be applied to a file path
8085 :returns: content of the file or 'Unknown'
8087 for path
in path_list
:
8089 file_path
= os
.path
.join(path
, file_name
)
8092 if os
.path
.exists(file_path
):
8093 with
open(file_path
, 'r') as f
:
8095 content
= f
.read().strip()
8097 # sysfs may populate the file, but for devices like
8098 # virtio reads can fail
8104 ##################################
8108 _dmi_path_list
= ['/sys/class/dmi/id']
8109 _nic_path_list
= ['/sys/class/net']
8110 _apparmor_path_list
= ['/etc/apparmor']
8111 _disk_vendor_workarounds
= {
8112 '0x1af4': 'Virtio Block Device'
8114 _excluded_block_devices
= ('sr', 'zram', 'dm-')
8116 def __init__(self
, ctx
: CephadmContext
):
8117 self
.ctx
: CephadmContext
= ctx
8118 self
.cpu_model
: str = 'Unknown'
8119 self
.cpu_count
: int = 0
8120 self
.cpu_cores
: int = 0
8121 self
.cpu_threads
: int = 0
8122 self
.interfaces
: Dict
[str, Any
] = {}
8124 self
._meminfo
: List
[str] = read_file(['/proc/meminfo']).splitlines()
8126 self
._process
_nics
()
8127 self
.arch
: str = platform
.processor()
8128 self
.kernel
: str = platform
.release()
8130 def _get_cpuinfo(self
):
8132 """Determine cpu information via /proc/cpuinfo"""
8133 raw
= read_file(['/proc/cpuinfo'])
8134 output
= raw
.splitlines()
8138 field
= [f
.strip() for f
in line
.split(':')]
8139 if 'model name' in line
:
8140 self
.cpu_model
= field
[1]
8141 if 'physical id' in line
:
8142 cpu_set
.add(field
[1])
8143 if 'siblings' in line
:
8144 self
.cpu_threads
= int(field
[1].strip())
8145 if 'cpu cores' in line
:
8146 self
.cpu_cores
= int(field
[1].strip())
8148 self
.cpu_count
= len(cpu_set
)
8150 def _get_block_devs(self
):
8151 # type: () -> List[str]
8152 """Determine the list of block devices by looking at /sys/block"""
8153 return [dev
for dev
in os
.listdir('/sys/block')
8154 if not dev
.startswith(HostFacts
._excluded
_block
_devices
)]
8156 def _get_devs_by_type(self
, rota
='0'):
8157 # type: (str) -> List[str]
8158 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
8160 for blk_dev
in self
._get
_block
_devs
():
8161 rot_path
= '/sys/block/{}/queue/rotational'.format(blk_dev
)
8162 rot_value
= read_file([rot_path
])
8163 if rot_value
== rota
:
8164 devs
.append(blk_dev
)
8168 def operating_system(self
):
8170 """Determine OS version"""
8171 raw_info
= read_file(['/etc/os-release'])
8172 os_release
= raw_info
.splitlines()
8176 for line
in os_release
:
8178 var_name
, var_value
= line
.split('=')
8179 rel_dict
[var_name
] = var_value
.strip('"')
8181 # Would normally use PRETTY_NAME, but NAME and VERSION are more
8183 if all(_v
in rel_dict
for _v
in ['NAME', 'VERSION']):
8184 rel_str
= '{} {}'.format(rel_dict
['NAME'], rel_dict
['VERSION'])
8190 """Return the hostname"""
8191 return platform
.node()
8194 def subscribed(self
):
8196 """Highlevel check to see if the host is subscribed to receive updates/support"""
8200 entitlements_dir
= '/etc/pki/entitlement'
8201 if os
.path
.exists(entitlements_dir
):
8202 pems
= glob('{}/*.pem'.format(entitlements_dir
))
8208 os_name
= self
.operating_system
8209 if os_name
.upper().startswith('RED HAT'):
8215 def hdd_count(self
):
8217 """Return a count of HDDs (spinners)"""
8218 return len(self
._get
_devs
_by
_type
(rota
='1'))
8220 def _get_capacity(self
, dev
):
8221 # type: (str) -> int
8222 """Determine the size of a given device"""
8223 size_path
= os
.path
.join('/sys/block', dev
, 'size')
8224 size_blocks
= int(read_file([size_path
]))
8225 blk_path
= os
.path
.join('/sys/block', dev
, 'queue', 'logical_block_size')
8226 blk_count
= int(read_file([blk_path
]))
8227 return size_blocks
* blk_count
8229 def _get_capacity_by_type(self
, rota
='0'):
8230 # type: (str) -> int
8231 """Return the total capacity of a category of device (flash or hdd)"""
8232 devs
= self
._get
_devs
_by
_type
(rota
=rota
)
8235 capacity
+= self
._get
_capacity
(dev
)
8238 def _dev_list(self
, dev_list
):
8239 # type: (List[str]) -> List[Dict[str, object]]
8240 """Return a 'pretty' name list for each device in the `dev_list`"""
8243 for dev
in dev_list
:
8244 disk_model
= read_file(['/sys/block/{}/device/model'.format(dev
)]).strip()
8245 disk_rev
= read_file(['/sys/block/{}/device/rev'.format(dev
)]).strip()
8246 disk_wwid
= read_file(['/sys/block/{}/device/wwid'.format(dev
)]).strip()
8247 vendor
= read_file(['/sys/block/{}/device/vendor'.format(dev
)]).strip()
8248 disk_vendor
= HostFacts
._disk
_vendor
_workarounds
.get(vendor
, vendor
)
8249 disk_size_bytes
= self
._get
_capacity
(dev
)
8251 'description': '{} {} ({})'.format(disk_vendor
, disk_model
, bytes_to_human(disk_size_bytes
)),
8252 'vendor': disk_vendor
,
8253 'model': disk_model
,
8257 'disk_size_bytes': disk_size_bytes
,
8263 # type: () -> List[Dict[str, object]]
8264 """Return a list of devices that are HDDs (spinners)"""
8265 devs
= self
._get
_devs
_by
_type
(rota
='1')
8266 return self
._dev
_list
(devs
)
8269 def flash_list(self
):
8270 # type: () -> List[Dict[str, object]]
8271 """Return a list of devices that are flash based (SSD, NVMe)"""
8272 devs
= self
._get
_devs
_by
_type
(rota
='0')
8273 return self
._dev
_list
(devs
)
8276 def hdd_capacity_bytes(self
):
8278 """Return the total capacity for all HDD devices (bytes)"""
8279 return self
._get
_capacity
_by
_type
(rota
='1')
8282 def hdd_capacity(self
):
8284 """Return the total capacity for all HDD devices (human readable format)"""
8285 return bytes_to_human(self
.hdd_capacity_bytes
)
8289 # type: () -> Dict[str, float]
8290 """Return the cpu load average data for the host"""
8291 raw
= read_file(['/proc/loadavg']).strip()
8294 '1min': float(data
[0]),
8295 '5min': float(data
[1]),
8296 '15min': float(data
[2]),
8300 def flash_count(self
):
8302 """Return the number of flash devices in the system (SSD, NVMe)"""
8303 return len(self
._get
_devs
_by
_type
(rota
='0'))
8306 def flash_capacity_bytes(self
):
8308 """Return the total capacity for all flash devices (bytes)"""
8309 return self
._get
_capacity
_by
_type
(rota
='0')
8312 def flash_capacity(self
):
8314 """Return the total capacity for all Flash devices (human readable format)"""
8315 return bytes_to_human(self
.flash_capacity_bytes
)
8317 def _process_nics(self
):
8319 """Look at the NIC devices and extract network related metadata"""
8320 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
8327 for nic_path
in HostFacts
._nic
_path
_list
:
8328 if not os
.path
.exists(nic_path
):
8330 for iface
in os
.listdir(nic_path
):
8332 if os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bridge')):
8334 elif os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bonding')):
8335 nic_type
= 'bonding'
8337 nic_type
= hw_lookup
.get(read_file([os
.path
.join(nic_path
, iface
, 'type')]), 'Unknown')
8339 if nic_type
== 'loopback': # skip loopback devices
8342 lower_devs_list
= [os
.path
.basename(link
.replace('lower_', '')) for link
in glob(os
.path
.join(nic_path
, iface
, 'lower_*'))]
8343 upper_devs_list
= [os
.path
.basename(link
.replace('upper_', '')) for link
in glob(os
.path
.join(nic_path
, iface
, 'upper_*'))]
8346 mtu
= int(read_file([os
.path
.join(nic_path
, iface
, 'mtu')]))
8350 operstate
= read_file([os
.path
.join(nic_path
, iface
, 'operstate')])
8352 speed
= int(read_file([os
.path
.join(nic_path
, iface
, 'speed')]))
8353 except (OSError, ValueError):
8354 # OSError : device doesn't support the ethtool get_link_ksettings
8355 # ValueError : raised when the read fails, and returns Unknown
8357 # Either way, we show a -1 when speed isn't available
8360 dev_link
= os
.path
.join(nic_path
, iface
, 'device')
8361 if os
.path
.exists(dev_link
):
8363 driver_path
= os
.path
.join(dev_link
, 'driver')
8364 if os
.path
.exists(driver_path
):
8365 driver
= os
.path
.basename(os
.path
.realpath(driver_path
))
8373 self
.interfaces
[iface
] = {
8375 'upper_devs_list': upper_devs_list
,
8376 'lower_devs_list': lower_devs_list
,
8377 'operstate': operstate
,
8379 'nic_type': nic_type
,
8382 'ipv4_address': get_ipv4_address(iface
),
8383 'ipv6_address': get_ipv6_address(iface
),
8387 def nic_count(self
):
8389 """Return a total count of all physical NICs detected in the host"""
8391 for iface
in self
.interfaces
:
8392 if self
.interfaces
[iface
]['iftype'] == 'physical':
8393 phys_devs
.append(iface
)
8394 return len(phys_devs
)
8396 def _get_mem_data(self
, field_name
):
8397 # type: (str) -> int
8398 for line
in self
._meminfo
:
8399 if line
.startswith(field_name
):
8405 def memory_total_kb(self
):
8407 """Determine the memory installed (kb)"""
8408 return self
._get
_mem
_data
('MemTotal')
8411 def memory_free_kb(self
):
8413 """Determine the memory free (not cache, immediately usable)"""
8414 return self
._get
_mem
_data
('MemFree')
8417 def memory_available_kb(self
):
8419 """Determine the memory available to new applications without swapping"""
8420 return self
._get
_mem
_data
('MemAvailable')
8425 """Determine server vendor from DMI data in sysfs"""
8426 return read_file(HostFacts
._dmi
_path
_list
, 'sys_vendor')
8431 """Determine server model information from DMI data in sysfs"""
8432 family
= read_file(HostFacts
._dmi
_path
_list
, 'product_family')
8433 product
= read_file(HostFacts
._dmi
_path
_list
, 'product_name')
8434 if family
== 'Unknown' and product
:
8435 return '{}'.format(product
)
8437 return '{} ({})'.format(family
, product
)
8440 def bios_version(self
):
8442 """Determine server BIOS version from DMI data in sysfs"""
8443 return read_file(HostFacts
._dmi
_path
_list
, 'bios_version')
8446 def bios_date(self
):
8448 """Determine server BIOS date from DMI data in sysfs"""
8449 return read_file(HostFacts
._dmi
_path
_list
, 'bios_date')
8452 def timestamp(self
):
8454 """Return the current time as Epoch seconds"""
8458 def system_uptime(self
):
8460 """Return the system uptime (in secs)"""
8461 raw_time
= read_file(['/proc/uptime'])
8462 up_secs
, _
= raw_time
.split()
8463 return float(up_secs
)
8466 def kernel_security(self
):
8467 # type: () -> Dict[str, str]
8468 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
8469 def _fetch_selinux() -> Dict
[str, str]:
8470 """Get the selinux status"""
8473 out
, err
, code
= call(self
.ctx
, ['sestatus'],
8474 verbosity
=CallVerbosity
.QUIET
)
8475 security
['type'] = 'SELinux'
8476 status
, mode
, policy
= '', '', ''
8477 for line
in out
.split('\n'):
8478 if line
.startswith('SELinux status:'):
8479 k
, v
= line
.split(':')
8481 elif line
.startswith('Current mode:'):
8482 k
, v
= line
.split(':')
8484 elif line
.startswith('Loaded policy name:'):
8485 k
, v
= line
.split(':')
8487 if status
== 'disabled':
8488 security
['description'] = 'SELinux: Disabled'
8490 security
['description'] = 'SELinux: Enabled({}, {})'.format(mode
, policy
)
8491 except Exception as e
:
8492 logger
.info('unable to get selinux status: %s' % e
)
8495 def _fetch_apparmor() -> Dict
[str, str]:
8496 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
8498 for apparmor_path
in HostFacts
._apparmor
_path
_list
:
8499 if os
.path
.exists(apparmor_path
):
8500 security
['type'] = 'AppArmor'
8501 security
['description'] = 'AppArmor: Enabled'
8503 profiles
= read_file(['/sys/kernel/security/apparmor/profiles'])
8504 if len(profiles
) == 0:
8509 summary
= {} # type: Dict[str, int]
8510 for line
in profiles
.split('\n'):
8511 item
, mode
= line
.split(' ')
8512 mode
= mode
.strip('()')
8517 summary_str
= ','.join(['{} {}'.format(v
, k
) for k
, v
in summary
.items()])
8518 security
= {**security
, **summary
} # type: ignore
8519 security
['description'] += '({})'.format(summary_str
)
8525 if os
.path
.exists('/sys/kernel/security/lsm'):
8526 lsm
= read_file(['/sys/kernel/security/lsm']).strip()
8527 if 'selinux' in lsm
:
8528 ret
= _fetch_selinux()
8529 elif 'apparmor' in lsm
:
8530 ret
= _fetch_apparmor()
8534 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor'
8542 'description': 'Linux Security Module framework is not available'
8546 def selinux_enabled(self
) -> bool:
8547 return (self
.kernel_security
['type'] == 'SELinux') and \
8548 (self
.kernel_security
['description'] != 'SELinux: Disabled')
8551 def kernel_parameters(self
):
8552 # type: () -> Dict[str, str]
8553 """Get kernel parameters required/used in Ceph clusters"""
8556 out
, _
, _
= call_throws(self
.ctx
, ['sysctl', '-a'], verbosity
=CallVerbosity
.SILENT
)
8558 param_list
= out
.split('\n')
8559 param_dict
= {param
.split(' = ')[0]: param
.split(' = ')[-1] for param
in param_list
}
8561 # return only desired parameters
8562 if 'net.ipv4.ip_nonlocal_bind' in param_dict
:
8563 k_param
['net.ipv4.ip_nonlocal_bind'] = param_dict
['net.ipv4.ip_nonlocal_bind']
8568 def _process_net_data(tcp_file
: str, protocol
: str = 'tcp') -> List
[int]:
8569 listening_ports
= []
8570 # Connections state documentation
8571 # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h
8572 # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul <port>)
8578 if protocol
not in listening_state
.keys():
8581 if os
.path
.exists(tcp_file
):
8582 with
open(tcp_file
) as f
:
8583 tcp_data
= f
.readlines()[1:]
8585 for con
in tcp_data
:
8586 con_info
= con
.strip().split()
8587 if con_info
[3] == listening_state
[protocol
]:
8588 local_port
= int(con_info
[1].split(':')[1], 16)
8589 listening_ports
.append(local_port
)
8591 return listening_ports
8594 def tcp_ports_used(self
) -> List
[int]:
8595 return HostFacts
._process
_net
_data
('/proc/net/tcp')
8598 def tcp6_ports_used(self
) -> List
[int]:
8599 return HostFacts
._process
_net
_data
('/proc/net/tcp6')
8602 def udp_ports_used(self
) -> List
[int]:
8603 return HostFacts
._process
_net
_data
('/proc/net/udp', 'udp')
8606 def udp6_ports_used(self
) -> List
[int]:
8607 return HostFacts
._process
_net
_data
('/proc/net/udp6', 'udp')
8611 """Return the attributes of this HostFacts object as json"""
8613 k
: getattr(self
, k
) for k
in dir(self
)
8614 if not k
.startswith('_')
8615 and isinstance(getattr(self
, k
), (float, int, str, list, dict, tuple))
8617 return json
.dumps(data
, indent
=2, sort_keys
=True)
8619 ##################################
8622 def command_gather_facts(ctx
: CephadmContext
) -> None:
8623 """gather_facts is intended to provide host releated metadata to the caller"""
8624 host
= HostFacts(ctx
)
8628 ##################################
8631 def systemd_target_state(ctx
: CephadmContext
, target_name
: str, subsystem
: str = 'ceph') -> bool:
8633 return os
.path
.exists(
8636 f
'{subsystem}.target.wants',
8642 def target_exists(ctx
: CephadmContext
) -> bool:
8643 return os
.path
.exists(ctx
.unit_dir
+ '/ceph.target')
8647 def command_maintenance(ctx
: CephadmContext
) -> str:
8649 raise Error('failed - must pass --fsid to specify cluster')
8651 target
= f
'ceph-{ctx.fsid}.target'
8653 if ctx
.maintenance_action
.lower() == 'enter':
8654 logger
.info('Requested to place host into maintenance')
8655 if systemd_target_state(ctx
, target
):
8656 _out
, _err
, code
= call(ctx
,
8657 ['systemctl', 'disable', target
],
8658 verbosity
=CallVerbosity
.DEBUG
)
8660 logger
.error(f
'Failed to disable the {target} target')
8661 return 'failed - to disable the target'
8663 # stopping a target waits by default
8664 _out
, _err
, code
= call(ctx
,
8665 ['systemctl', 'stop', target
],
8666 verbosity
=CallVerbosity
.DEBUG
)
8668 logger
.error(f
'Failed to stop the {target} target')
8669 return 'failed - to disable the target'
8671 return f
'success - systemd target {target} disabled'
8674 return 'skipped - target already disabled'
8677 logger
.info('Requested to exit maintenance state')
8678 # if we've never deployed a daemon on this host there will be no systemd
8679 # target to disable so attempting a disable will fail. We still need to
8680 # return success here or host will be permanently stuck in maintenance mode
8681 # as no daemons can be deployed so no systemd target will ever exist to disable.
8682 if not target_exists(ctx
):
8683 return 'skipped - systemd target not present on this host. Host removed from maintenance mode.'
8684 # exit maintenance request
8685 if not systemd_target_state(ctx
, target
):
8686 _out
, _err
, code
= call(ctx
,
8687 ['systemctl', 'enable', target
],
8688 verbosity
=CallVerbosity
.DEBUG
)
8690 logger
.error(f
'Failed to enable the {target} target')
8691 return 'failed - unable to enable the target'
8693 # starting a target waits by default
8694 _out
, _err
, code
= call(ctx
,
8695 ['systemctl', 'start', target
],
8696 verbosity
=CallVerbosity
.DEBUG
)
8698 logger
.error(f
'Failed to start the {target} target')
8699 return 'failed - unable to start the target'
8701 return f
'success - systemd target {target} enabled and started'
8702 return f
'success - systemd target {target} enabled and started'
8704 ##################################
8708 # type: () -> argparse.ArgumentParser
8709 parser
= argparse
.ArgumentParser(
8710 description
='Bootstrap Ceph daemons with systemd and containers.',
8711 formatter_class
=argparse
.ArgumentDefaultsHelpFormatter
)
8712 parser
.add_argument(
8714 help='container image. Can also be set via the "CEPHADM_IMAGE" '
8716 parser
.add_argument(
8718 action
='store_true',
8719 help='use docker instead of podman')
8720 parser
.add_argument(
8723 help='base directory for daemon data')
8724 parser
.add_argument(
8727 help='base directory for daemon logs')
8728 parser
.add_argument(
8730 default
=LOGROTATE_DIR
,
8731 help='location of logrotate configuration files')
8732 parser
.add_argument(
8735 help='location of sysctl configuration files')
8736 parser
.add_argument(
8739 help='base directory for systemd units')
8740 parser
.add_argument(
8742 action
='store_true',
8743 help='Show debug-level log messages')
8744 parser
.add_argument(
8747 default
=DEFAULT_TIMEOUT
,
8748 help='timeout in seconds')
8749 parser
.add_argument(
8752 default
=DEFAULT_RETRY
,
8753 help='max number of retries')
8754 parser
.add_argument(
8758 help='set environment variable')
8759 parser
.add_argument(
8760 '--no-container-init',
8761 action
='store_true',
8762 default
=not CONTAINER_INIT
,
8763 help='Do not run podman/docker with `--init`')
8765 subparsers
= parser
.add_subparsers(help='sub-command')
8767 parser_version
= subparsers
.add_parser(
8768 'version', help='get ceph version from container')
8769 parser_version
.set_defaults(func
=command_version
)
8771 parser_pull
= subparsers
.add_parser(
8772 'pull', help='pull the default container image')
8773 parser_pull
.set_defaults(func
=command_pull
)
8774 parser_pull
.add_argument(
8776 action
='store_true',
8777 help=argparse
.SUPPRESS
,
8780 parser_inspect_image
= subparsers
.add_parser(
8781 'inspect-image', help='inspect local container image')
8782 parser_inspect_image
.set_defaults(func
=command_inspect_image
)
8784 parser_ls
= subparsers
.add_parser(
8785 'ls', help='list daemon instances on this host')
8786 parser_ls
.set_defaults(func
=command_ls
)
8787 parser_ls
.add_argument(
8789 action
='store_true',
8790 help='Do not include daemon status')
8791 parser_ls
.add_argument(
8794 help='base directory for legacy daemon data')
8796 parser_list_networks
= subparsers
.add_parser(
8797 'list-networks', help='list IP networks')
8798 parser_list_networks
.set_defaults(func
=command_list_networks
)
8800 parser_adopt
= subparsers
.add_parser(
8801 'adopt', help='adopt daemon deployed with a different tool')
8802 parser_adopt
.set_defaults(func
=command_adopt
)
8803 parser_adopt
.add_argument(
8806 help='daemon name (type.id)')
8807 parser_adopt
.add_argument(
8810 help='deployment style (legacy, ...)')
8811 parser_adopt
.add_argument(
8814 help='cluster name')
8815 parser_adopt
.add_argument(
8818 help='base directory for legacy daemon data')
8819 parser_adopt
.add_argument(
8821 help='Additional configuration information in JSON format')
8822 parser_adopt
.add_argument(
8824 action
='store_true',
8825 help='Do not configure firewalld')
8826 parser_adopt
.add_argument(
8828 action
='store_true',
8829 help='do not pull the default image before adopting')
8830 parser_adopt
.add_argument(
8832 action
='store_true',
8833 help='start newly adoped daemon, even if it was not running previously')
8834 parser_adopt
.add_argument(
8836 action
='store_true',
8837 default
=CONTAINER_INIT
,
8838 help=argparse
.SUPPRESS
)
8840 parser_rm_daemon
= subparsers
.add_parser(
8841 'rm-daemon', help='remove daemon instance')
8842 parser_rm_daemon
.set_defaults(func
=command_rm_daemon
)
8843 parser_rm_daemon
.add_argument(
8846 action
=CustomValidation
,
8847 help='daemon name (type.id)')
8848 parser_rm_daemon
.add_argument(
8850 help='List of tcp ports to close in the host firewall')
8851 parser_rm_daemon
.add_argument(
8854 help='cluster FSID')
8855 parser_rm_daemon
.add_argument(
8857 action
='store_true',
8858 help='proceed, even though this may destroy valuable data')
8859 parser_rm_daemon
.add_argument(
8860 '--force-delete-data',
8861 action
='store_true',
8862 help='delete valuable daemon data instead of making a backup')
8864 parser_rm_cluster
= subparsers
.add_parser(
8865 'rm-cluster', help='remove all daemons for a cluster')
8866 parser_rm_cluster
.set_defaults(func
=command_rm_cluster
)
8867 parser_rm_cluster
.add_argument(
8870 help='cluster FSID')
8871 parser_rm_cluster
.add_argument(
8873 action
='store_true',
8874 help='proceed, even though this may destroy valuable data')
8875 parser_rm_cluster
.add_argument(
8877 action
='store_true',
8878 help='do not remove log files')
8879 parser_rm_cluster
.add_argument(
8881 action
='store_true',
8882 help='zap OSD devices for this cluster')
8884 parser_run
= subparsers
.add_parser(
8885 'run', help='run a ceph daemon, in a container, in the foreground')
8886 parser_run
.set_defaults(func
=command_run
)
8887 parser_run
.add_argument(
8890 help='daemon name (type.id)')
8891 parser_run
.add_argument(
8894 help='cluster FSID')
8896 parser_shell
= subparsers
.add_parser(
8897 'shell', help='run an interactive shell inside a daemon container')
8898 parser_shell
.set_defaults(func
=command_shell
)
8899 parser_shell
.add_argument(
8900 '--shared_ceph_folder',
8901 metavar
='CEPH_SOURCE_FOLDER',
8902 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
8903 parser_shell
.add_argument(
8905 help='cluster FSID')
8906 parser_shell
.add_argument(
8908 help='daemon name (type.id)')
8909 parser_shell
.add_argument(
8911 help='ceph.conf to pass through to the container')
8912 parser_shell
.add_argument(
8914 help='ceph.keyring to pass through to the container')
8915 parser_shell
.add_argument(
8917 help=('mount a file or directory in the container. '
8918 'Support multiple mounts. '
8919 'ie: `--mount /foo /bar:/bar`. '
8920 'When no destination is passed, default is /mnt'),
8922 parser_shell
.add_argument(
8926 help='set environment variable')
8927 parser_shell
.add_argument(
8931 help='set environment variable')
8932 parser_shell
.add_argument(
8933 'command', nargs
=argparse
.REMAINDER
,
8934 help='command (optional)')
8935 parser_shell
.add_argument(
8937 action
='store_true',
8938 help='dont pass /etc/hosts through to the container')
8940 parser_enter
= subparsers
.add_parser(
8941 'enter', help='run an interactive shell inside a running daemon container')
8942 parser_enter
.set_defaults(func
=command_enter
)
8943 parser_enter
.add_argument(
8945 help='cluster FSID')
8946 parser_enter
.add_argument(
8949 help='daemon name (type.id)')
8950 parser_enter
.add_argument(
8951 'command', nargs
=argparse
.REMAINDER
,
8954 parser_ceph_volume
= subparsers
.add_parser(
8955 'ceph-volume', help='run ceph-volume inside a container')
8956 parser_ceph_volume
.set_defaults(func
=command_ceph_volume
)
8957 parser_ceph_volume
.add_argument(
8958 '--shared_ceph_folder',
8959 metavar
='CEPH_SOURCE_FOLDER',
8960 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
8961 parser_ceph_volume
.add_argument(
8963 help='cluster FSID')
8964 parser_ceph_volume
.add_argument(
8966 help='JSON file with config and (client.bootstrap-osd) key')
8967 parser_ceph_volume
.add_argument(
8969 help='ceph conf file')
8970 parser_ceph_volume
.add_argument(
8972 help='ceph.keyring to pass through to the container')
8973 parser_ceph_volume
.add_argument(
8974 'command', nargs
=argparse
.REMAINDER
,
8977 parser_zap_osds
= subparsers
.add_parser(
8978 'zap-osds', help='zap all OSDs associated with a particular fsid')
8979 parser_zap_osds
.set_defaults(func
=command_zap_osds
)
8980 parser_zap_osds
.add_argument(
8983 help='cluster FSID')
8984 parser_zap_osds
.add_argument(
8986 action
='store_true',
8987 help='proceed, even though this may destroy valuable data')
8989 parser_unit
= subparsers
.add_parser(
8990 'unit', help="operate on the daemon's systemd unit")
8991 parser_unit
.set_defaults(func
=command_unit
)
8992 parser_unit
.add_argument(
8994 help='systemd command (start, stop, restart, enable, disable, ...)')
8995 parser_unit
.add_argument(
8997 help='cluster FSID')
8998 parser_unit
.add_argument(
9001 help='daemon name (type.id)')
9003 parser_logs
= subparsers
.add_parser(
9004 'logs', help='print journald logs for a daemon container')
9005 parser_logs
.set_defaults(func
=command_logs
)
9006 parser_logs
.add_argument(
9008 help='cluster FSID')
9009 parser_logs
.add_argument(
9012 help='daemon name (type.id)')
9013 parser_logs
.add_argument(
9014 'command', nargs
='*',
9015 help='additional journalctl args')
9017 parser_bootstrap
= subparsers
.add_parser(
9018 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
9019 parser_bootstrap
.set_defaults(func
=command_bootstrap
)
9020 parser_bootstrap
.add_argument(
9022 help='ceph conf file to incorporate')
9023 parser_bootstrap
.add_argument(
9026 help='mon id (default: local hostname)')
9027 group
= parser_bootstrap
.add_mutually_exclusive_group()
9030 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
9034 parser_bootstrap
.add_argument(
9037 help='mgr id (default: randomly generated)')
9038 parser_bootstrap
.add_argument(
9040 help='cluster FSID')
9041 parser_bootstrap
.add_argument(
9043 default
='/etc/ceph',
9044 help='directory to write config, keyring, and pub key files')
9045 parser_bootstrap
.add_argument(
9047 help='location to write keyring file with new cluster admin and mon keys')
9048 parser_bootstrap
.add_argument(
9050 help='location to write conf file to connect to new cluster')
9051 parser_bootstrap
.add_argument(
9052 '--output-pub-ssh-key',
9053 help="location to write the cluster's public SSH key")
9054 parser_bootstrap
.add_argument(
9055 '--skip-admin-label',
9056 action
='store_true',
9057 help='do not create admin label for ceph.conf and client.admin keyring distribution')
9058 parser_bootstrap
.add_argument(
9060 action
='store_true',
9061 help='skip setup of ssh key on local host')
9062 parser_bootstrap
.add_argument(
9063 '--initial-dashboard-user',
9065 help='Initial user for the dashboard')
9066 parser_bootstrap
.add_argument(
9067 '--initial-dashboard-password',
9068 help='Initial password for the initial dashboard user')
9069 parser_bootstrap
.add_argument(
9070 '--ssl-dashboard-port',
9073 help='Port number used to connect with dashboard using SSL')
9074 parser_bootstrap
.add_argument(
9076 type=argparse
.FileType('r'),
9077 help='Dashboard key')
9078 parser_bootstrap
.add_argument(
9080 type=argparse
.FileType('r'),
9081 help='Dashboard certificate')
9083 parser_bootstrap
.add_argument(
9085 type=argparse
.FileType('r'),
9087 parser_bootstrap
.add_argument(
9088 '--ssh-private-key',
9089 type=argparse
.FileType('r'),
9090 help='SSH private key')
9091 parser_bootstrap
.add_argument(
9093 type=argparse
.FileType('r'),
9094 help='SSH public key')
9095 parser_bootstrap
.add_argument(
9098 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
9099 parser_bootstrap
.add_argument(
9100 '--skip-mon-network',
9101 action
='store_true',
9102 help='set mon public_network based on bootstrap mon ip')
9103 parser_bootstrap
.add_argument(
9105 action
='store_true',
9106 help='do not enable the Ceph Dashboard')
9107 parser_bootstrap
.add_argument(
9108 '--dashboard-password-noupdate',
9109 action
='store_true',
9110 help='stop forced dashboard password change')
9111 parser_bootstrap
.add_argument(
9112 '--no-minimize-config',
9113 action
='store_true',
9114 help='do not assimilate and minimize the config file')
9115 parser_bootstrap
.add_argument(
9116 '--skip-ping-check',
9117 action
='store_true',
9118 help='do not verify that mon IP is pingable')
9119 parser_bootstrap
.add_argument(
9121 action
='store_true',
9122 help='do not pull the default image before bootstrapping')
9123 parser_bootstrap
.add_argument(
9125 action
='store_true',
9126 help='Do not configure firewalld')
9127 parser_bootstrap
.add_argument(
9128 '--allow-overwrite',
9129 action
='store_true',
9130 help='allow overwrite of existing --output-* config/keyring/ssh files')
9131 parser_bootstrap
.add_argument(
9132 '--allow-fqdn-hostname',
9133 action
='store_true',
9134 help='allow hostname that is fully-qualified (contains ".")')
9135 parser_bootstrap
.add_argument(
9136 '--allow-mismatched-release',
9137 action
='store_true',
9138 help="allow bootstrap of ceph that doesn't match this version of cephadm")
9139 parser_bootstrap
.add_argument(
9140 '--skip-prepare-host',
9141 action
='store_true',
9142 help='Do not prepare host')
9143 parser_bootstrap
.add_argument(
9144 '--orphan-initial-daemons',
9145 action
='store_true',
9146 help='Set mon and mgr service to `unmanaged`, Do not create the crash service')
9147 parser_bootstrap
.add_argument(
9148 '--skip-monitoring-stack',
9149 action
='store_true',
9150 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
9151 parser_bootstrap
.add_argument(
9152 '--with-centralized-logging',
9153 action
='store_true',
9154 help='Automatically provision centralized logging (promtail, loki)')
9155 parser_bootstrap
.add_argument(
9157 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
9158 parser_bootstrap
.add_argument(
9159 '--shared_ceph_folder',
9160 metavar
='CEPH_SOURCE_FOLDER',
9161 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9163 parser_bootstrap
.add_argument(
9165 help='url for custom registry')
9166 parser_bootstrap
.add_argument(
9167 '--registry-username',
9168 help='username for custom registry')
9169 parser_bootstrap
.add_argument(
9170 '--registry-password',
9171 help='password for custom registry')
9172 parser_bootstrap
.add_argument(
9174 help='json file with custom registry login info (URL, Username, Password)')
9175 parser_bootstrap
.add_argument(
9177 action
='store_true',
9178 default
=CONTAINER_INIT
,
9179 help=argparse
.SUPPRESS
)
9180 parser_bootstrap
.add_argument(
9181 '--cluster-network',
9182 help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
9183 parser_bootstrap
.add_argument(
9184 '--single-host-defaults',
9185 action
='store_true',
9186 help='adjust configuration defaults to suit a single-host cluster')
9187 parser_bootstrap
.add_argument(
9189 action
='store_true',
9190 help='configure cluster to log to traditional log files in /var/log/ceph/$fsid')
9192 parser_deploy
= subparsers
.add_parser(
9193 'deploy', help='deploy a daemon')
9194 parser_deploy
.set_defaults(func
=command_deploy
)
9195 parser_deploy
.add_argument(
9198 action
=CustomValidation
,
9199 help='daemon name (type.id)')
9200 parser_deploy
.add_argument(
9203 help='cluster FSID')
9204 parser_deploy
.add_argument(
9206 help='config file for new daemon')
9207 parser_deploy
.add_argument(
9209 help='Additional configuration information in JSON format')
9210 parser_deploy
.add_argument(
9212 help='keyring for new daemon')
9213 parser_deploy
.add_argument(
9215 help='key for new daemon')
9216 parser_deploy
.add_argument(
9218 help='OSD uuid, if creating an OSD container')
9219 parser_deploy
.add_argument(
9221 action
='store_true',
9222 help='Do not configure firewalld')
9223 parser_deploy
.add_argument(
9225 help='List of tcp ports to open in the host firewall')
9226 parser_deploy
.add_argument(
9228 action
='store_true',
9229 help='Reconfigure a previously deployed daemon')
9230 parser_deploy
.add_argument(
9232 action
='store_true',
9233 help='Allow SYS_PTRACE on daemon container')
9234 parser_deploy
.add_argument(
9236 action
='store_true',
9237 default
=CONTAINER_INIT
,
9238 help=argparse
.SUPPRESS
)
9239 parser_deploy
.add_argument(
9241 help='Container memory request/target'
9243 parser_deploy
.add_argument(
9245 help='Container memory hard limit'
9247 parser_deploy
.add_argument(
9249 help='JSON dict of additional metadata'
9251 parser_deploy
.add_argument(
9252 '--extra-container-args',
9255 help='Additional container arguments to apply to deamon'
9258 parser_check_host
= subparsers
.add_parser(
9259 'check-host', help='check host configuration')
9260 parser_check_host
.set_defaults(func
=command_check_host
)
9261 parser_check_host
.add_argument(
9262 '--expect-hostname',
9263 help='Check that hostname matches an expected value')
9265 parser_prepare_host
= subparsers
.add_parser(
9266 'prepare-host', help='prepare a host for cephadm use')
9267 parser_prepare_host
.set_defaults(func
=command_prepare_host
)
9268 parser_prepare_host
.add_argument(
9269 '--expect-hostname',
9270 help='Set hostname')
9272 parser_add_repo
= subparsers
.add_parser(
9273 'add-repo', help='configure package repository')
9274 parser_add_repo
.set_defaults(func
=command_add_repo
)
9275 parser_add_repo
.add_argument(
9277 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE
))
9278 parser_add_repo
.add_argument(
9280 help='use specific upstream version (x.y.z)')
9281 parser_add_repo
.add_argument(
9283 help='use specified bleeding edge build from git branch or tag')
9284 parser_add_repo
.add_argument(
9286 help='use specified bleeding edge build from git commit')
9287 parser_add_repo
.add_argument(
9289 help='specify alternative GPG key location')
9290 parser_add_repo
.add_argument(
9292 default
='https://download.ceph.com',
9293 help='specify alternative repo location')
9296 parser_rm_repo
= subparsers
.add_parser(
9297 'rm-repo', help='remove package repository configuration')
9298 parser_rm_repo
.set_defaults(func
=command_rm_repo
)
9300 parser_install
= subparsers
.add_parser(
9301 'install', help='install ceph package(s)')
9302 parser_install
.set_defaults(func
=command_install
)
9303 parser_install
.add_argument(
9304 'packages', nargs
='*',
9305 default
=['cephadm'],
9308 parser_registry_login
= subparsers
.add_parser(
9309 'registry-login', help='log host into authenticated registry')
9310 parser_registry_login
.set_defaults(func
=command_registry_login
)
9311 parser_registry_login
.add_argument(
9313 help='url for custom registry')
9314 parser_registry_login
.add_argument(
9315 '--registry-username',
9316 help='username for custom registry')
9317 parser_registry_login
.add_argument(
9318 '--registry-password',
9319 help='password for custom registry')
9320 parser_registry_login
.add_argument(
9322 help='json file with custom registry login info (URL, Username, Password)')
9323 parser_registry_login
.add_argument(
9325 help='cluster FSID')
9327 parser_gather_facts
= subparsers
.add_parser(
9328 'gather-facts', help='gather and return host related information (JSON format)')
9329 parser_gather_facts
.set_defaults(func
=command_gather_facts
)
9331 parser_maintenance
= subparsers
.add_parser(
9332 'host-maintenance', help='Manage the maintenance state of a host')
9333 parser_maintenance
.add_argument(
9335 help='cluster FSID')
9336 parser_maintenance
.add_argument(
9337 'maintenance_action',
9339 choices
=['enter', 'exit'],
9340 help='Maintenance action - enter maintenance, or exit maintenance')
9341 parser_maintenance
.set_defaults(func
=command_maintenance
)
9343 parser_agent
= subparsers
.add_parser(
9344 'agent', help='start cephadm agent')
9345 parser_agent
.set_defaults(func
=command_agent
)
9346 parser_agent
.add_argument(
9349 help='cluster FSID')
9350 parser_agent
.add_argument(
9352 help='daemon id for agent')
9354 parser_disk_rescan
= subparsers
.add_parser(
9355 'disk-rescan', help='rescan all HBAs to detect new/removed devices')
9356 parser_disk_rescan
.set_defaults(func
=command_rescan_disks
)
9361 def _parse_args(av
: List
[str]) -> argparse
.Namespace
:
9362 parser
= _get_parser()
9364 args
= parser
.parse_args(av
)
9365 if 'command' in args
and args
.command
and args
.command
[0] == '--':
9368 # workaround argparse to deprecate the subparser `--container-init` flag
9369 # container_init and no_container_init must always be mutually exclusive
9370 container_init_args
= ('--container-init', '--no-container-init')
9371 if set(container_init_args
).issubset(av
):
9372 parser
.error('argument %s: not allowed with argument %s' % (container_init_args
))
9373 elif '--container-init' in av
:
9374 args
.no_container_init
= not args
.container_init
9376 args
.container_init
= not args
.no_container_init
9377 assert args
.container_init
is not args
.no_container_init
9382 def cephadm_init_ctx(args
: List
[str]) -> CephadmContext
:
9383 ctx
= CephadmContext()
9384 ctx
.set_args(_parse_args(args
))
9388 def cephadm_init_logging(ctx
: CephadmContext
, args
: List
[str]) -> None:
9389 """Configure the logging for cephadm as well as updating the system
9390 to have the expected log dir and logrotate configuration.
9392 logging
.addLevelName(QUIET_LOG_LEVEL
, 'QUIET')
9394 if not os
.path
.exists(LOG_DIR
):
9395 os
.makedirs(LOG_DIR
)
9396 operations
= ['bootstrap', 'rm-cluster']
9397 if any(op
in args
for op
in operations
):
9398 dictConfig(interactive_logging_config
)
9400 dictConfig(logging_config
)
9402 logger
= logging
.getLogger()
9403 logger
.setLevel(QUIET_LOG_LEVEL
)
9405 if not os
.path
.exists(ctx
.logrotate_dir
+ '/cephadm'):
9406 with
open(ctx
.logrotate_dir
+ '/cephadm', 'w') as f
:
9407 f
.write("""# created by cephadm
9408 /var/log/ceph/cephadm.log {
9419 for handler
in logger
.handlers
:
9420 if handler
.name
in ['console', 'log_file', 'console_stdout']:
9421 handler
.setLevel(QUIET_LOG_LEVEL
)
9422 logger
.debug('%s\ncephadm %s' % ('-' * 80, args
))
9425 def cephadm_require_root() -> None:
9426 """Exit if the process is not running as root."""
9427 if os
.geteuid() != 0:
9428 sys
.stderr
.write('ERROR: cephadm should be run as root\n')
9436 ctx
= cephadm_init_ctx(av
)
9437 if not ctx
.has_function():
9438 sys
.stderr
.write('No command specified; pass -h or --help for usage\n')
9441 cephadm_require_root()
9442 cephadm_init_logging(ctx
, av
)
9445 ctx
.container_engine
= find_container_engine(ctx
)
9446 if ctx
.func
not in \
9449 command_prepare_host
,
9454 check_container_engine(ctx
)
9460 logger
.error('ERROR: %s' % e
)
9467 if __name__
== '__main__':