4 import asyncio
.subprocess
12 from logging
.config
import dictConfig
29 from typing
import Dict
, List
, Tuple
, Optional
, Union
, Any
, NoReturn
, Callable
, IO
, Sequence
, TypeVar
, cast
, Set
, Iterable
, TextIO
34 from configparser
import ConfigParser
35 from contextlib
import redirect_stdout
36 from functools
import wraps
38 from io
import StringIO
39 from threading
import Thread
, Event
40 from urllib
.error
import HTTPError
, URLError
41 from urllib
.request
import urlopen
, Request
42 from pathlib
import Path
44 FuncT
= TypeVar('FuncT', bound
=Callable
)
46 # Default container images -----------------------------------------------------
47 DEFAULT_IMAGE
= 'quay.io/ceph/ceph:v17'
48 DEFAULT_IMAGE_IS_MASTER
= False
49 DEFAULT_IMAGE_RELEASE
= 'quincy'
50 DEFAULT_PROMETHEUS_IMAGE
= 'quay.io/prometheus/prometheus:v2.33.4'
51 DEFAULT_LOKI_IMAGE
= 'docker.io/grafana/loki:2.4.0'
52 DEFAULT_PROMTAIL_IMAGE
= 'docker.io/grafana/promtail:2.4.0'
53 DEFAULT_NODE_EXPORTER_IMAGE
= 'quay.io/prometheus/node-exporter:v1.3.1'
54 DEFAULT_ALERT_MANAGER_IMAGE
= 'quay.io/prometheus/alertmanager:v0.23.0'
55 DEFAULT_GRAFANA_IMAGE
= 'quay.io/ceph/ceph-grafana:8.3.5'
56 DEFAULT_HAPROXY_IMAGE
= 'quay.io/ceph/haproxy:2.3'
57 DEFAULT_KEEPALIVED_IMAGE
= 'quay.io/ceph/keepalived:2.1.5'
58 DEFAULT_SNMP_GATEWAY_IMAGE
= 'docker.io/maxwo/snmp-notifier:v1.2.1'
59 DEFAULT_REGISTRY
= 'docker.io' # normalize unqualified digests to this
60 # ------------------------------------------------------------------------------
62 LATEST_STABLE_RELEASE
= 'quincy'
63 DATA_DIR
= '/var/lib/ceph'
64 LOG_DIR
= '/var/log/ceph'
65 LOCK_DIR
= '/run/cephadm'
66 LOGROTATE_DIR
= '/etc/logrotate.d'
67 SYSCTL_DIR
= '/etc/sysctl.d'
68 UNIT_DIR
= '/etc/systemd/system'
69 CEPH_CONF_DIR
= 'config'
70 CEPH_CONF
= 'ceph.conf'
71 CEPH_PUBKEY
= 'ceph.pub'
72 CEPH_KEYRING
= 'ceph.client.admin.keyring'
73 CEPH_DEFAULT_CONF
= f
'/etc/ceph/{CEPH_CONF}'
74 CEPH_DEFAULT_KEYRING
= f
'/etc/ceph/{CEPH_KEYRING}'
75 CEPH_DEFAULT_PUBKEY
= f
'/etc/ceph/{CEPH_PUBKEY}'
79 MIN_PODMAN_VERSION
= (2, 0, 2)
80 CGROUPS_SPLIT_PODMAN_VERSION
= (2, 1, 0)
81 PIDS_LIMIT_UNLIMITED_PODMAN_VERSION
= (3, 4, 1)
82 CUSTOM_PS1
= r
'[ceph: \u@\h \W]\$ '
83 DEFAULT_TIMEOUT
= None # in seconds
85 DATEFMT
= '%Y-%m-%dT%H:%M:%S.%fZ'
86 QUIET_LOG_LEVEL
= 9 # DEBUG is 10, so using 9 to be lower level than DEBUG
88 logger
: logging
.Logger
= None # type: ignore
91 You can invoke cephadm in two ways:
93 1. The normal way, at the command line.
95 2. By piping the script to the python3 binary. In this latter case, you should
96 prepend one or more lines to the beginning of the script.
100 injected_argv = [...]
104 injected_argv = ['ls']
106 For reading stdin from the '--config-json -' argument,
108 injected_stdin = '...'
113 ##################################
116 async def run_func(func
: Callable
, cmd
: str) -> subprocess
.CompletedProcess
:
117 logger
.debug(f
'running function {func.__name__}, with parms: {cmd}')
122 async def concurrent_tasks(func
: Callable
, cmd_list
: List
[str]) -> List
[Any
]:
125 tasks
.append(run_func(func
, cmd
))
127 data
= await asyncio
.gather(*tasks
)
133 """EndPoint representing an ip:port format"""
135 def __init__(self
, ip
: str, port
: int) -> None:
139 def __str__(self
) -> str:
140 return f
'{self.ip}:{self.port}'
142 def __repr__(self
) -> str:
143 return f
'{self.ip}:{self.port}'
147 def __init__(self
, container_id
: str,
151 version
: str) -> None:
152 self
.container_id
= container_id
153 self
.image_name
= image_name
154 self
.image_id
= image_id
156 self
.version
= version
158 def __eq__(self
, other
: Any
) -> bool:
159 if not isinstance(other
, ContainerInfo
):
160 return NotImplemented
161 return (self
.container_id
== other
.container_id
162 and self
.image_name
== other
.image_name
163 and self
.image_id
== other
.image_id
164 and self
.start
== other
.start
165 and self
.version
== other
.version
)
170 def __init__(self
) -> None:
172 self
.docker
: bool = False
173 self
.data_dir
: str = DATA_DIR
174 self
.log_dir
: str = LOG_DIR
175 self
.logrotate_dir
: str = LOGROTATE_DIR
176 self
.sysctl_dir
: str = SYSCTL_DIR
177 self
.unit_dir
: str = UNIT_DIR
178 self
.verbose
: bool = False
179 self
.timeout
: Optional
[int] = DEFAULT_TIMEOUT
180 self
.retry
: int = DEFAULT_RETRY
181 self
.env
: List
[str] = []
182 self
.memory_request
: Optional
[int] = None
183 self
.memory_limit
: Optional
[int] = None
184 self
.log_to_journald
: Optional
[bool] = None
186 self
.container_init
: bool = CONTAINER_INIT
187 self
.container_engine
: Optional
[ContainerEngine
] = None
189 def set_from_args(self
, args
: argparse
.Namespace
) -> None:
190 argdict
: Dict
[str, Any
] = vars(args
)
191 for k
, v
in argdict
.items():
196 class CephadmContext
:
198 def __init__(self
) -> None:
199 self
.__dict
__['_args'] = None
200 self
.__dict
__['_conf'] = BaseConfig()
202 def set_args(self
, args
: argparse
.Namespace
) -> None:
203 self
._conf
.set_from_args(args
)
206 def has_function(self
) -> bool:
207 return 'func' in self
._args
209 def __contains__(self
, name
: str) -> bool:
210 return hasattr(self
, name
)
212 def __getattr__(self
, name
: str) -> Any
:
213 if '_conf' in self
.__dict
__ and hasattr(self
._conf
, name
):
214 return getattr(self
._conf
, name
)
215 elif '_args' in self
.__dict
__ and hasattr(self
._args
, name
):
216 return getattr(self
._args
, name
)
218 return super().__getattribute
__(name
)
220 def __setattr__(self
, name
: str, value
: Any
) -> None:
221 if hasattr(self
._conf
, name
):
222 setattr(self
._conf
, name
, value
)
223 elif hasattr(self
._args
, name
):
224 setattr(self
._args
, name
, value
)
226 super().__setattr
__(name
, value
)
229 class ContainerEngine
:
230 def __init__(self
) -> None:
231 self
.path
= find_program(self
.EXE
)
236 raise NotImplementedError()
238 def __str__(self
) -> str:
239 return f
'{self.EXE} ({self.path})'
242 class Podman(ContainerEngine
):
245 def __init__(self
) -> None:
247 self
._version
: Optional
[Tuple
[int, ...]] = None
250 def version(self
) -> Tuple
[int, ...]:
251 if self
._version
is None:
252 raise RuntimeError('Please call `get_version` first')
255 def get_version(self
, ctx
: CephadmContext
) -> None:
256 out
, _
, _
= call_throws(ctx
, [self
.path
, 'version', '--format', '{{.Client.Version}}'], verbosity
=CallVerbosity
.QUIET
)
257 self
._version
= _parse_podman_version(out
)
259 def __str__(self
) -> str:
260 version
= '.'.join(map(str, self
.version
))
261 return f
'{self.EXE} ({self.path}) version {version}'
264 class Docker(ContainerEngine
):
268 CONTAINER_PREFERENCE
= (Podman
, Docker
) # prefer podman to docker
271 # During normal cephadm operations (cephadm ls, gather-facts, etc ) we use:
272 # stdout: for JSON output only
273 # stderr: for error, debug, info, etc
276 'disable_existing_loggers': True,
279 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
285 'class': 'logging.StreamHandler',
289 'class': 'logging.handlers.WatchedFileHandler',
290 'formatter': 'cephadm',
291 'filename': '%s/cephadm.log' % LOG_DIR
,
297 'handlers': ['console', 'log_file'],
303 class ExcludeErrorsFilter(logging
.Filter
):
304 def filter(self
, record
: logging
.LogRecord
) -> bool:
305 """Only lets through log messages with log level below WARNING ."""
306 return record
.levelno
< logging
.WARNING
309 # When cephadm is used as standard binary (bootstrap, rm-cluster, etc) we use:
310 # stdout: for debug and info
311 # stderr: for errors and warnings
312 interactive_logging_config
= {
316 '()': ExcludeErrorsFilter
319 'disable_existing_loggers': True,
322 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
328 'class': 'logging.StreamHandler',
329 'filters': ['exclude_errors'],
334 'class': 'logging.StreamHandler',
339 'class': 'logging.handlers.WatchedFileHandler',
340 'formatter': 'cephadm',
341 'filename': '%s/cephadm.log' % LOG_DIR
,
347 'handlers': ['console_stdout', 'console_stderr', 'log_file'],
359 class Error(Exception):
363 class TimeoutExpired(Error
):
367 class UnauthorizedRegistryError(Error
):
370 ##################################
374 daemons
= ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror',
375 'crash', 'cephfs-mirror', 'ceph-exporter')
376 gateways
= ('iscsi', 'nfs')
378 ##################################
383 def get_sysctl_settings() -> List
[str]:
385 '# allow a large number of OSDs',
386 'fs.aio-max-nr = 1048576',
387 'kernel.pid_max = 4194304',
391 ##################################
395 """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks"""
396 daemon_type
= 'snmp-gateway'
397 SUPPORTED_VERSIONS
= ['V2c', 'V3']
398 default_image
= DEFAULT_SNMP_GATEWAY_IMAGE
400 env_filename
= 'snmp-gateway.conf'
405 daemon_id
: Union
[int, str],
406 config_json
: Dict
[str, Any
],
407 image
: Optional
[str] = None) -> None:
410 self
.daemon_id
= daemon_id
411 self
.image
= image
or SNMPGateway
.default_image
413 self
.uid
= config_json
.get('uid', 0)
414 self
.gid
= config_json
.get('gid', 0)
416 self
.destination
= config_json
.get('destination', '')
417 self
.snmp_version
= config_json
.get('snmp_version', 'V2c')
418 self
.snmp_community
= config_json
.get('snmp_community', 'public')
419 self
.log_level
= config_json
.get('log_level', 'info')
420 self
.snmp_v3_auth_username
= config_json
.get('snmp_v3_auth_username', '')
421 self
.snmp_v3_auth_password
= config_json
.get('snmp_v3_auth_password', '')
422 self
.snmp_v3_auth_protocol
= config_json
.get('snmp_v3_auth_protocol', '')
423 self
.snmp_v3_priv_protocol
= config_json
.get('snmp_v3_priv_protocol', '')
424 self
.snmp_v3_priv_password
= config_json
.get('snmp_v3_priv_password', '')
425 self
.snmp_v3_engine_id
= config_json
.get('snmp_v3_engine_id', '')
430 def init(cls
, ctx
: CephadmContext
, fsid
: str,
431 daemon_id
: Union
[int, str]) -> 'SNMPGateway':
432 assert ctx
.config_json
433 return cls(ctx
, fsid
, daemon_id
,
434 get_parm(ctx
.config_json
), ctx
.image
)
437 def get_version(ctx
: CephadmContext
, fsid
: str, daemon_id
: str) -> Optional
[str]:
438 """Return the version of the notifer from it's http endpoint"""
439 path
= os
.path
.join(ctx
.data_dir
, fsid
, f
'snmp-gateway.{daemon_id}', 'unit.meta')
441 with
open(path
, 'r') as env
:
442 metadata
= json
.loads(env
.read())
443 except (OSError, json
.JSONDecodeError
):
446 ports
= metadata
.get('ports', [])
451 with
urlopen(f
'http://127.0.0.1:{ports[0]}/') as r
:
452 html
= r
.read().decode('utf-8').split('\n')
453 except (HTTPError
, URLError
):
458 if stripped
.startswith(('<pre>', '<PRE>')) and \
459 stripped
.endswith(('</pre>', '</PRE>')):
460 # <pre>(version=1.2.1, branch=HEAD, revision=7...
461 return stripped
.split(',')[0].split('version=')[1]
466 def port(self
) -> int:
467 if not self
.ctx
.tcp_ports
:
468 return self
.DEFAULT_PORT
470 if len(self
.ctx
.tcp_ports
) > 0:
471 return int(self
.ctx
.tcp_ports
.split()[0])
473 return self
.DEFAULT_PORT
475 def get_daemon_args(self
) -> List
[str]:
478 f
'--web.listen-address=:{self.port}',
479 f
'--snmp.destination={self.destination}',
480 f
'--snmp.version={self.snmp_version}',
481 f
'--log.level={self.log_level}',
482 '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl'
485 if self
.snmp_version
== 'V3':
486 # common auth settings
488 '--snmp.authentication-enabled',
489 f
'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}',
490 f
'--snmp.security-engine-id={self.snmp_v3_engine_id}'
492 # authPriv setting is applied if we have a privacy protocol setting
493 if self
.snmp_v3_priv_protocol
:
495 '--snmp.private-enabled',
496 f
'--snmp.private-protocol={self.snmp_v3_priv_protocol}'
499 return base_args
+ v3_args
502 def data_dir(self
) -> str:
503 return os
.path
.join(self
.ctx
.data_dir
, self
.ctx
.fsid
, f
'{self.daemon_type}.{self.daemon_id}')
506 def conf_file_path(self
) -> str:
507 return os
.path
.join(self
.data_dir
, self
.env_filename
)
509 def create_daemon_conf(self
) -> None:
510 """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
511 with
open(os
.open(self
.conf_file_path
, os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
512 if self
.snmp_version
== 'V2c':
513 f
.write(f
'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n')
515 f
.write(f
'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n')
516 f
.write(f
'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n')
517 if self
.snmp_v3_priv_password
:
518 f
.write(f
'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n')
520 def validate(self
) -> None:
521 """Validate the settings
524 Error: if the fsid doesn't look like an fsid
525 Error: if the snmp version is not supported
526 Error: destination IP and port address missing
528 if not is_fsid(self
.fsid
):
529 raise Error(f
'not a valid fsid: {self.fsid}')
531 if self
.snmp_version
not in SNMPGateway
.SUPPORTED_VERSIONS
:
532 raise Error(f
'not a valid snmp version: {self.snmp_version}')
534 if not self
.destination
:
535 raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener')
538 ##################################
539 class Monitoring(object):
540 """Define the configs for the monitoring containers"""
543 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI
544 'node-exporter': [9100],
546 'alertmanager': [9093, 9094],
553 'image': DEFAULT_PROMETHEUS_IMAGE
,
557 '--config.file=/etc/prometheus/prometheus.yml',
558 '--storage.tsdb.path=/prometheus',
560 'config-json-files': [
565 'image': DEFAULT_LOKI_IMAGE
,
569 '--config.file=/etc/loki/loki.yml',
571 'config-json-files': [
576 'image': DEFAULT_PROMTAIL_IMAGE
,
580 '--config.file=/etc/promtail/promtail.yml',
582 'config-json-files': [
587 'image': DEFAULT_NODE_EXPORTER_IMAGE
,
591 '--no-collector.timex',
595 'image': DEFAULT_GRAFANA_IMAGE
,
599 'config-json-files': [
601 'provisioning/datasources/ceph-dashboard.yml',
607 'image': DEFAULT_ALERT_MANAGER_IMAGE
,
611 '--cluster.listen-address=:{}'.format(port_map
['alertmanager'][1]),
613 'config-json-files': [
616 'config-json-args': [
623 def get_version(ctx
, container_id
, daemon_type
):
624 # type: (CephadmContext, str, str) -> str
626 :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
628 assert daemon_type
in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail')
629 cmd
= daemon_type
.replace('-', '_')
634 if daemon_type
== 'alertmanager':
635 for cmd
in ['alertmanager', 'prometheus-alertmanager']:
636 out
, err
, code
= call(ctx
, [
637 ctx
.container_engine
.path
, 'exec', container_id
, cmd
,
639 ], verbosity
=CallVerbosity
.QUIET
)
642 cmd
= 'alertmanager' # reset cmd for version extraction
644 out
, err
, code
= call(ctx
, [
645 ctx
.container_engine
.path
, 'exec', container_id
, cmd
, '--version'
646 ], verbosity
=CallVerbosity
.QUIET
)
648 if err
.startswith('%s, version ' % cmd
):
649 version
= err
.split(' ')[2]
650 elif out
.startswith('%s, version ' % cmd
):
651 version
= out
.split(' ')[2]
654 ##################################
657 def populate_files(config_dir
, config_files
, uid
, gid
):
658 # type: (str, Dict, int, int) -> None
659 """create config files for different services"""
660 for fname
in config_files
:
661 config_file
= os
.path
.join(config_dir
, fname
)
662 config_content
= dict_get_join(config_files
, fname
)
663 logger
.info('Write file: %s' % (config_file
))
664 with
open(config_file
, 'w', encoding
='utf-8') as f
:
665 os
.fchown(f
.fileno(), uid
, gid
)
666 os
.fchmod(f
.fileno(), 0o600)
667 f
.write(config_content
)
670 class NFSGanesha(object):
671 """Defines a NFS-Ganesha container"""
674 entrypoint
= '/usr/bin/ganesha.nfsd'
675 daemon_args
= ['-F', '-L', 'STDERR']
677 required_files
= ['ganesha.conf']
688 image
=DEFAULT_IMAGE
):
689 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
692 self
.daemon_id
= daemon_id
695 # config-json options
696 self
.pool
= dict_get(config_json
, 'pool', require
=True)
697 self
.namespace
= dict_get(config_json
, 'namespace')
698 self
.userid
= dict_get(config_json
, 'userid')
699 self
.extra_args
= dict_get(config_json
, 'extra_args', [])
700 self
.files
= dict_get(config_json
, 'files', {})
701 self
.rgw
= dict_get(config_json
, 'rgw', {})
703 # validate the supplied args
707 def init(cls
, ctx
, fsid
, daemon_id
):
708 # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
709 return cls(ctx
, fsid
, daemon_id
, get_parm(ctx
.config_json
), ctx
.image
)
711 def get_container_mounts(self
, data_dir
):
712 # type: (str) -> Dict[str, str]
714 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
715 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
716 mounts
[os
.path
.join(data_dir
, 'etc/ganesha')] = '/etc/ganesha:z'
718 cluster
= self
.rgw
.get('cluster', 'ceph')
719 rgw_user
= self
.rgw
.get('user', 'admin')
720 mounts
[os
.path
.join(data_dir
, 'keyring.rgw')] = \
721 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster
, rgw_user
)
725 def get_container_envs():
726 # type: () -> List[str]
728 'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF
)
733 def get_version(ctx
, container_id
):
734 # type: (CephadmContext, str) -> Optional[str]
736 out
, err
, code
= call(ctx
,
737 [ctx
.container_engine
.path
, 'exec', container_id
,
738 NFSGanesha
.entrypoint
, '-v'],
739 verbosity
=CallVerbosity
.QUIET
)
741 match
= re
.search(r
'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out
)
743 version
= match
.group(1)
748 if not is_fsid(self
.fsid
):
749 raise Error('not an fsid: %s' % self
.fsid
)
750 if not self
.daemon_id
:
751 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
753 raise Error('invalid image: %s' % self
.image
)
755 # check for the required files
756 if self
.required_files
:
757 for fname
in self
.required_files
:
758 if fname
not in self
.files
:
759 raise Error('required file missing from config-json: %s' % fname
)
761 # check for an RGW config
763 if not self
.rgw
.get('keyring'):
764 raise Error('RGW keyring is missing')
765 if not self
.rgw
.get('user'):
766 raise Error('RGW user is missing')
768 def get_daemon_name(self
):
770 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
772 def get_container_name(self
, desc
=None):
773 # type: (Optional[str]) -> str
774 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
776 cname
= '%s-%s' % (cname
, desc
)
779 def get_daemon_args(self
):
780 # type: () -> List[str]
781 return self
.daemon_args
+ self
.extra_args
783 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
784 # type: (str, int, int) -> None
785 """Create files under the container data dir"""
786 if not os
.path
.isdir(data_dir
):
787 raise OSError('data_dir is not a directory: %s' % (data_dir
))
789 logger
.info('Creating ganesha config...')
791 # create the ganesha conf dir
792 config_dir
= os
.path
.join(data_dir
, 'etc/ganesha')
793 makedirs(config_dir
, uid
, gid
, 0o755)
795 # populate files from the config-json
796 populate_files(config_dir
, self
.files
, uid
, gid
)
798 # write the RGW keyring
800 keyring_path
= os
.path
.join(data_dir
, 'keyring.rgw')
801 with
open(keyring_path
, 'w') as f
:
802 os
.fchmod(f
.fileno(), 0o600)
803 os
.fchown(f
.fileno(), uid
, gid
)
804 f
.write(self
.rgw
.get('keyring', ''))
806 ##################################
809 class CephIscsi(object):
810 """Defines a Ceph-Iscsi container"""
812 daemon_type
= 'iscsi'
813 entrypoint
= '/usr/bin/rbd-target-api'
815 required_files
= ['iscsi-gateway.cfg']
822 image
=DEFAULT_IMAGE
):
823 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
826 self
.daemon_id
= daemon_id
829 # config-json options
830 self
.files
= dict_get(config_json
, 'files', {})
832 # validate the supplied args
836 def init(cls
, ctx
, fsid
, daemon_id
):
837 # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
838 return cls(ctx
, fsid
, daemon_id
,
839 get_parm(ctx
.config_json
), ctx
.image
)
842 def get_container_mounts(data_dir
, log_dir
):
843 # type: (str, str) -> Dict[str, str]
845 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
846 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
847 mounts
[os
.path
.join(data_dir
, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
848 mounts
[os
.path
.join(data_dir
, 'configfs')] = '/sys/kernel/config'
849 mounts
[log_dir
] = '/var/log:z'
850 mounts
['/dev'] = '/dev'
854 def get_container_binds():
855 # type: () -> List[List[str]]
857 lib_modules
= ['type=bind',
858 'source=/lib/modules',
859 'destination=/lib/modules',
861 binds
.append(lib_modules
)
865 def get_version(ctx
, container_id
):
866 # type: (CephadmContext, str) -> Optional[str]
868 out
, err
, code
= call(ctx
,
869 [ctx
.container_engine
.path
, 'exec', container_id
,
870 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
871 verbosity
=CallVerbosity
.QUIET
)
873 version
= out
.strip()
878 if not is_fsid(self
.fsid
):
879 raise Error('not an fsid: %s' % self
.fsid
)
880 if not self
.daemon_id
:
881 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
883 raise Error('invalid image: %s' % self
.image
)
885 # check for the required files
886 if self
.required_files
:
887 for fname
in self
.required_files
:
888 if fname
not in self
.files
:
889 raise Error('required file missing from config-json: %s' % fname
)
891 def get_daemon_name(self
):
893 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
895 def get_container_name(self
, desc
=None):
896 # type: (Optional[str]) -> str
897 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
899 cname
= '%s-%s' % (cname
, desc
)
902 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
903 # type: (str, int, int) -> None
904 """Create files under the container data dir"""
905 if not os
.path
.isdir(data_dir
):
906 raise OSError('data_dir is not a directory: %s' % (data_dir
))
908 logger
.info('Creating ceph-iscsi config...')
909 configfs_dir
= os
.path
.join(data_dir
, 'configfs')
910 makedirs(configfs_dir
, uid
, gid
, 0o755)
912 # populate files from the config-json
913 populate_files(data_dir
, self
.files
, uid
, gid
)
916 def configfs_mount_umount(data_dir
, mount
=True):
917 # type: (str, bool) -> List[str]
918 mount_path
= os
.path
.join(data_dir
, 'configfs')
920 cmd
= 'if ! grep -qs {0} /proc/mounts; then ' \
921 'mount -t configfs none {0}; fi'.format(mount_path
)
923 cmd
= 'if grep -qs {0} /proc/mounts; then ' \
924 'umount {0}; fi'.format(mount_path
)
927 def get_tcmu_runner_container(self
):
928 # type: () -> CephContainer
929 tcmu_container
= get_container(self
.ctx
, self
.fsid
, self
.daemon_type
, self
.daemon_id
)
930 tcmu_container
.entrypoint
= '/usr/bin/tcmu-runner'
931 tcmu_container
.cname
= self
.get_container_name(desc
='tcmu')
932 # remove extra container args for tcmu container.
933 # extra args could cause issue with forking service type
934 tcmu_container
.container_args
= []
935 set_pids_limit_unlimited(self
.ctx
, tcmu_container
.container_args
)
936 return tcmu_container
938 ##################################
941 class CephExporter(object):
942 """Defines a Ceph exporter container"""
944 daemon_type
= 'ceph-exporter'
945 entrypoint
= '/usr/bin/ceph-exporter'
948 'ceph-exporter': DEFAULT_PORT
,
953 fsid
: str, daemon_id
: Union
[int, str],
954 config_json
: Dict
[str, Any
],
955 image
: str = DEFAULT_IMAGE
) -> None:
958 self
.daemon_id
= daemon_id
961 self
.sock_dir
= config_json
.get('sock-dir', '/var/run/ceph/')
962 self
.addrs
= config_json
.get('addrs', socket
.gethostbyname(socket
.gethostname()))
963 self
.port
= config_json
.get('port', self
.DEFAULT_PORT
)
964 self
.prio_limit
= config_json
.get('prio-limit', 5)
965 self
.stats_period
= config_json
.get('stats-period', 5)
970 def init(cls
, ctx
: CephadmContext
, fsid
: str,
971 daemon_id
: Union
[int, str]) -> 'CephExporter':
972 return cls(ctx
, fsid
, daemon_id
,
973 get_parm(ctx
.config_json
), ctx
.image
)
976 def get_container_mounts() -> Dict
[str, str]:
978 mounts
['/var/run/ceph'] = '/var/run/ceph:z'
981 def get_daemon_args(self
) -> List
[str]:
983 f
'--sock-dir={self.sock_dir}',
984 f
'--addrs={self.addrs}',
985 f
'--port={self.port}',
986 f
'--prio-limit={self.prio_limit}',
987 f
'--stats-period={self.stats_period}',
991 def validate(self
) -> None:
992 if not os
.path
.isdir(self
.sock_dir
):
993 raise Error(f
'Directory does not exist. Got: {self.sock_dir}')
996 ##################################
999 class HAproxy(object):
1000 """Defines an HAproxy container"""
1001 daemon_type
= 'haproxy'
1002 required_files
= ['haproxy.cfg']
1003 default_image
= DEFAULT_HAPROXY_IMAGE
1006 ctx
: CephadmContext
,
1007 fsid
: str, daemon_id
: Union
[int, str],
1008 config_json
: Dict
, image
: str) -> None:
1011 self
.daemon_id
= daemon_id
1014 # config-json options
1015 self
.files
= dict_get(config_json
, 'files', {})
1020 def init(cls
, ctx
: CephadmContext
,
1021 fsid
: str, daemon_id
: Union
[int, str]) -> 'HAproxy':
1022 return cls(ctx
, fsid
, daemon_id
, get_parm(ctx
.config_json
),
1025 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
1026 """Create files under the container data dir"""
1027 if not os
.path
.isdir(data_dir
):
1028 raise OSError('data_dir is not a directory: %s' % (data_dir
))
1030 # create additional directories in data dir for HAproxy to use
1031 if not os
.path
.isdir(os
.path
.join(data_dir
, 'haproxy')):
1032 makedirs(os
.path
.join(data_dir
, 'haproxy'), uid
, gid
, DATA_DIR_MODE
)
1034 data_dir
= os
.path
.join(data_dir
, 'haproxy')
1035 populate_files(data_dir
, self
.files
, uid
, gid
)
1037 def get_daemon_args(self
) -> List
[str]:
1038 return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
1042 if not is_fsid(self
.fsid
):
1043 raise Error('not an fsid: %s' % self
.fsid
)
1044 if not self
.daemon_id
:
1045 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
1047 raise Error('invalid image: %s' % self
.image
)
1049 # check for the required files
1050 if self
.required_files
:
1051 for fname
in self
.required_files
:
1052 if fname
not in self
.files
:
1053 raise Error('required file missing from config-json: %s' % fname
)
1055 def get_daemon_name(self
):
1057 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
1059 def get_container_name(self
, desc
=None):
1060 # type: (Optional[str]) -> str
1061 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
1063 cname
= '%s-%s' % (cname
, desc
)
1066 def extract_uid_gid_haproxy(self
) -> Tuple
[int, int]:
1067 # better directory for this?
1068 return extract_uid_gid(self
.ctx
, file_path
='/var/lib')
1071 def get_container_mounts(data_dir
: str) -> Dict
[str, str]:
1073 mounts
[os
.path
.join(data_dir
, 'haproxy')] = '/var/lib/haproxy'
1077 def get_sysctl_settings() -> List
[str]:
1079 '# IP forwarding and non-local bind',
1080 'net.ipv4.ip_forward = 1',
1081 'net.ipv4.ip_nonlocal_bind = 1',
1084 ##################################
1087 class Keepalived(object):
1088 """Defines an Keepalived container"""
1089 daemon_type
= 'keepalived'
1090 required_files
= ['keepalived.conf']
1091 default_image
= DEFAULT_KEEPALIVED_IMAGE
1094 ctx
: CephadmContext
,
1095 fsid
: str, daemon_id
: Union
[int, str],
1096 config_json
: Dict
, image
: str) -> None:
1099 self
.daemon_id
= daemon_id
1102 # config-json options
1103 self
.files
= dict_get(config_json
, 'files', {})
1108 def init(cls
, ctx
: CephadmContext
, fsid
: str,
1109 daemon_id
: Union
[int, str]) -> 'Keepalived':
1110 return cls(ctx
, fsid
, daemon_id
,
1111 get_parm(ctx
.config_json
), ctx
.image
)
1113 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
1114 """Create files under the container data dir"""
1115 if not os
.path
.isdir(data_dir
):
1116 raise OSError('data_dir is not a directory: %s' % (data_dir
))
1118 # create additional directories in data dir for keepalived to use
1119 if not os
.path
.isdir(os
.path
.join(data_dir
, 'keepalived')):
1120 makedirs(os
.path
.join(data_dir
, 'keepalived'), uid
, gid
, DATA_DIR_MODE
)
1122 # populate files from the config-json
1123 populate_files(data_dir
, self
.files
, uid
, gid
)
1127 if not is_fsid(self
.fsid
):
1128 raise Error('not an fsid: %s' % self
.fsid
)
1129 if not self
.daemon_id
:
1130 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
1132 raise Error('invalid image: %s' % self
.image
)
1134 # check for the required files
1135 if self
.required_files
:
1136 for fname
in self
.required_files
:
1137 if fname
not in self
.files
:
1138 raise Error('required file missing from config-json: %s' % fname
)
1140 def get_daemon_name(self
):
1142 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
1144 def get_container_name(self
, desc
=None):
1145 # type: (Optional[str]) -> str
1146 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
1148 cname
= '%s-%s' % (cname
, desc
)
1152 def get_container_envs():
1153 # type: () -> List[str]
1155 'KEEPALIVED_AUTOCONF=false',
1156 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
1157 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
1158 'KEEPALIVED_DEBUG=false'
1163 def get_sysctl_settings() -> List
[str]:
1165 '# IP forwarding and non-local bind',
1166 'net.ipv4.ip_forward = 1',
1167 'net.ipv4.ip_nonlocal_bind = 1',
1170 def extract_uid_gid_keepalived(self
) -> Tuple
[int, int]:
1171 # better directory for this?
1172 return extract_uid_gid(self
.ctx
, file_path
='/var/lib')
1175 def get_container_mounts(data_dir
: str) -> Dict
[str, str]:
1177 mounts
[os
.path
.join(data_dir
, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
1180 ##################################
1183 class CustomContainer(object):
1184 """Defines a custom container"""
1185 daemon_type
= 'container'
1188 fsid
: str, daemon_id
: Union
[int, str],
1189 config_json
: Dict
, image
: str) -> None:
1191 self
.daemon_id
= daemon_id
1194 # config-json options
1195 self
.entrypoint
= dict_get(config_json
, 'entrypoint')
1196 self
.uid
= dict_get(config_json
, 'uid', 65534) # nobody
1197 self
.gid
= dict_get(config_json
, 'gid', 65534) # nobody
1198 self
.volume_mounts
= dict_get(config_json
, 'volume_mounts', {})
1199 self
.args
= dict_get(config_json
, 'args', [])
1200 self
.envs
= dict_get(config_json
, 'envs', [])
1201 self
.privileged
= dict_get(config_json
, 'privileged', False)
1202 self
.bind_mounts
= dict_get(config_json
, 'bind_mounts', [])
1203 self
.ports
= dict_get(config_json
, 'ports', [])
1204 self
.dirs
= dict_get(config_json
, 'dirs', [])
1205 self
.files
= dict_get(config_json
, 'files', {})
1208 def init(cls
, ctx
: CephadmContext
,
1209 fsid
: str, daemon_id
: Union
[int, str]) -> 'CustomContainer':
1210 return cls(fsid
, daemon_id
,
1211 get_parm(ctx
.config_json
), ctx
.image
)
1213 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
1215 Create dirs/files below the container data directory.
1217 logger
.info('Creating custom container configuration '
1218 'dirs/files in {} ...'.format(data_dir
))
1220 if not os
.path
.isdir(data_dir
):
1221 raise OSError('data_dir is not a directory: %s' % data_dir
)
1223 for dir_path
in self
.dirs
:
1224 logger
.info('Creating directory: {}'.format(dir_path
))
1225 dir_path
= os
.path
.join(data_dir
, dir_path
.strip('/'))
1226 makedirs(dir_path
, uid
, gid
, 0o755)
1228 for file_path
in self
.files
:
1229 logger
.info('Creating file: {}'.format(file_path
))
1230 content
= dict_get_join(self
.files
, file_path
)
1231 file_path
= os
.path
.join(data_dir
, file_path
.strip('/'))
1232 with
open(file_path
, 'w', encoding
='utf-8') as f
:
1233 os
.fchown(f
.fileno(), uid
, gid
)
1234 os
.fchmod(f
.fileno(), 0o600)
1237 def get_daemon_args(self
) -> List
[str]:
1240 def get_container_args(self
) -> List
[str]:
1243 def get_container_envs(self
) -> List
[str]:
1246 def get_container_mounts(self
, data_dir
: str) -> Dict
[str, str]:
1248 Get the volume mounts. Relative source paths will be located below
1249 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
1259 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
1263 for source
, destination
in self
.volume_mounts
.items():
1264 source
= os
.path
.join(data_dir
, source
)
1265 mounts
[source
] = destination
1268 def get_container_binds(self
, data_dir
: str) -> List
[List
[str]]:
1270 Get the bind mounts. Relative `source=...` paths will be located below
1271 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
1276 'source=lib/modules',
1277 'destination=/lib/modules',
1283 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
1287 binds
= self
.bind_mounts
.copy()
1289 for index
, value
in enumerate(bind
):
1290 match
= re
.match(r
'^source=(.+)$', value
)
1292 bind
[index
] = 'source={}'.format(os
.path
.join(
1293 data_dir
, match
.group(1)))
1296 ##################################
1299 def touch(file_path
: str, uid
: Optional
[int] = None, gid
: Optional
[int] = None) -> None:
1300 Path(file_path
).touch()
1302 os
.chown(file_path
, uid
, gid
)
1305 ##################################
1308 def dict_get(d
: Dict
, key
: str, default
: Any
= None, require
: bool = False) -> Any
:
1310 Helper function to get a key from a dictionary.
1311 :param d: The dictionary to process.
1312 :param key: The name of the key to get.
1313 :param default: The default value in case the key does not
1314 exist. Default is `None`.
1315 :param require: Set to `True` if the key is required. An
1316 exception will be raised if the key does not exist in
1317 the given dictionary.
1318 :return: Returns the value of the given key.
1319 :raises: :exc:`self.Error` if the given key does not exist
1320 and `require` is set to `True`.
1322 if require
and key
not in d
.keys():
1323 raise Error('{} missing from dict'.format(key
))
1324 return d
.get(key
, default
) # type: ignore
1326 ##################################
1329 def dict_get_join(d
: Dict
, key
: str) -> Any
:
1331 Helper function to get the value of a given key from a dictionary.
1332 `List` values will be converted to a string by joining them with a
1334 :param d: The dictionary to process.
1335 :param key: The name of the key to get.
1336 :return: Returns the value of the given key. If it was a `list`, it
1337 will be joining with a line break.
1340 if isinstance(value
, list):
1341 value
= '\n'.join(map(str, value
))
1344 ##################################
1347 def get_supported_daemons():
1348 # type: () -> List[str]
1349 supported_daemons
= list(Ceph
.daemons
)
1350 supported_daemons
.extend(Monitoring
.components
)
1351 supported_daemons
.append(NFSGanesha
.daemon_type
)
1352 supported_daemons
.append(CephIscsi
.daemon_type
)
1353 supported_daemons
.append(CustomContainer
.daemon_type
)
1354 supported_daemons
.append(HAproxy
.daemon_type
)
1355 supported_daemons
.append(Keepalived
.daemon_type
)
1356 supported_daemons
.append(CephadmAgent
.daemon_type
)
1357 supported_daemons
.append(SNMPGateway
.daemon_type
)
1358 assert len(supported_daemons
) == len(set(supported_daemons
))
1359 return supported_daemons
1361 ##################################
1364 class PortOccupiedError(Error
):
1368 def attempt_bind(ctx
, s
, address
, port
):
1369 # type: (CephadmContext, socket.socket, str, int) -> None
1371 s
.setsockopt(socket
.SOL_SOCKET
, socket
.SO_REUSEADDR
, 1)
1372 s
.bind((address
, port
))
1373 except OSError as e
:
1374 if e
.errno
== errno
.EADDRINUSE
:
1375 msg
= 'Cannot bind to IP %s port %d: %s' % (address
, port
, e
)
1377 raise PortOccupiedError(msg
)
1380 except Exception as e
:
1386 def port_in_use(ctx
, port_num
):
1387 # type: (CephadmContext, int) -> bool
1388 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
1389 logger
.info('Verifying port %d ...' % port_num
)
1391 def _port_in_use(af
: socket
.AddressFamily
, address
: str) -> bool:
1393 s
= socket
.socket(af
, socket
.SOCK_STREAM
)
1394 attempt_bind(ctx
, s
, address
, port_num
)
1395 except PortOccupiedError
:
1397 except OSError as e
:
1398 if e
.errno
in (errno
.EAFNOSUPPORT
, errno
.EADDRNOTAVAIL
):
1399 # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
1400 # being tested here and one might be intentionally be disabled.
1401 # In that case no error should be raised.
1406 return any(_port_in_use(af
, address
) for af
, address
in (
1407 (socket
.AF_INET
, '0.0.0.0'),
1408 (socket
.AF_INET6
, '::')
1412 def check_ip_port(ctx
, ep
):
1413 # type: (CephadmContext, EndPoint) -> None
1414 if not ctx
.skip_ping_check
:
1415 logger
.info(f
'Verifying IP {ep.ip} port {ep.port} ...')
1417 s
= socket
.socket(socket
.AF_INET6
, socket
.SOCK_STREAM
)
1418 ip
= unwrap_ipv6(ep
.ip
)
1420 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
1422 attempt_bind(ctx
, s
, ip
, ep
.port
)
1424 ##################################
1427 # this is an abbreviated version of
1428 # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
1429 # that drops all of the compatibility (this is Unix/Linux only).
1431 class Timeout(TimeoutError
):
1433 Raised when the lock could not be acquired in *timeout*
1437 def __init__(self
, lock_file
: str) -> None:
1440 #: The path of the file lock.
1441 self
.lock_file
= lock_file
1444 def __str__(self
) -> str:
1445 temp
= "The file lock '{}' could not be acquired."\
1446 .format(self
.lock_file
)
1450 class _Acquire_ReturnProxy(object):
1451 def __init__(self
, lock
: 'FileLock') -> None:
1455 def __enter__(self
) -> 'FileLock':
1458 def __exit__(self
, exc_type
: Any
, exc_value
: Any
, traceback
: Any
) -> None:
1463 class FileLock(object):
1464 def __init__(self
, ctx
: CephadmContext
, name
: str, timeout
: int = -1) -> None:
1465 if not os
.path
.exists(LOCK_DIR
):
1466 os
.mkdir(LOCK_DIR
, 0o700)
1467 self
._lock
_file
= os
.path
.join(LOCK_DIR
, name
+ '.lock')
1470 # The file descriptor for the *_lock_file* as it is returned by the
1471 # os.open() function.
1472 # This file lock is only NOT None, if the object currently holds the
1474 self
._lock
_file
_fd
: Optional
[int] = None
1475 self
.timeout
= timeout
1476 # The lock counter is used for implementing the nested locking
1477 # mechanism. Whenever the lock is acquired, the counter is increased and
1478 # the lock is only released, when this value is 0 again.
1479 self
._lock
_counter
= 0
1483 def is_locked(self
) -> bool:
1484 return self
._lock
_file
_fd
is not None
1486 def acquire(self
, timeout
: Optional
[int] = None, poll_intervall
: float = 0.05) -> _Acquire_ReturnProxy
:
1488 Acquires the file lock or fails with a :exc:`Timeout` error.
1489 .. code-block:: python
1490 # You can use this method in the context manager (recommended)
1491 with lock.acquire():
1493 # Or use an equivalent try-finally construct:
1500 The maximum time waited for the file lock.
1501 If ``timeout < 0``, there is no timeout and this method will
1502 block until the lock could be acquired.
1503 If ``timeout`` is None, the default :attr:`~timeout` is used.
1504 :arg float poll_intervall:
1505 We check once in *poll_intervall* seconds if we can acquire the
1508 if the lock could not be acquired in *timeout* seconds.
1509 .. versionchanged:: 2.0.0
1510 This method returns now a *proxy* object instead of *self*,
1511 so that it can be used in a with statement without side effects.
1514 # Use the default timeout, if no timeout is provided.
1516 timeout
= self
.timeout
1518 # Increment the number right at the beginning.
1519 # We can still undo it, if something fails.
1520 self
._lock
_counter
+= 1
1523 lock_filename
= self
._lock
_file
1524 start_time
= time
.time()
1527 if not self
.is_locked
:
1528 logger
.log(QUIET_LOG_LEVEL
, 'Acquiring lock %s on %s', lock_id
,
1533 logger
.log(QUIET_LOG_LEVEL
, 'Lock %s acquired on %s', lock_id
,
1536 elif timeout
>= 0 and time
.time() - start_time
> timeout
:
1537 logger
.warning('Timeout acquiring lock %s on %s', lock_id
,
1539 raise Timeout(self
._lock
_file
)
1543 'Lock %s not acquired on %s, waiting %s seconds ...',
1544 lock_id
, lock_filename
, poll_intervall
1546 time
.sleep(poll_intervall
)
1548 # Something did go wrong, so decrement the counter.
1549 self
._lock
_counter
= max(0, self
._lock
_counter
- 1)
1552 return _Acquire_ReturnProxy(lock
=self
)
1554 def release(self
, force
: bool = False) -> None:
1556 Releases the file lock.
1557 Please note, that the lock is only completly released, if the lock
1559 Also note, that the lock file itself is not automatically deleted.
1561 If true, the lock counter is ignored and the lock is released in
1565 self
._lock
_counter
-= 1
1567 if self
._lock
_counter
== 0 or force
:
1568 # lock_id = id(self)
1569 # lock_filename = self._lock_file
1571 # Can't log in shutdown:
1572 # File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open
1573 # NameError: name 'open' is not defined
1574 # logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
1576 self
._lock
_counter
= 0
1577 # logger.debug('Lock %s released on %s', lock_id, lock_filename)
1581 def __enter__(self
) -> 'FileLock':
1585 def __exit__(self
, exc_type
: Any
, exc_value
: Any
, traceback
: Any
) -> None:
1589 def __del__(self
) -> None:
1590 self
.release(force
=True)
1593 def _acquire(self
) -> None:
1594 open_mode
= os
.O_RDWR | os
.O_CREAT | os
.O_TRUNC
1595 fd
= os
.open(self
._lock
_file
, open_mode
)
1598 fcntl
.flock(fd
, fcntl
.LOCK_EX | fcntl
.LOCK_NB
)
1599 except (IOError, OSError):
1602 self
._lock
_file
_fd
= fd
1605 def _release(self
) -> None:
1606 # Do not remove the lockfile:
1608 # https://github.com/benediktschmitt/py-filelock/issues/31
1609 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
1610 fd
= self
._lock
_file
_fd
1611 self
._lock
_file
_fd
= None
1612 fcntl
.flock(fd
, fcntl
.LOCK_UN
) # type: ignore
1613 os
.close(fd
) # type: ignore
1617 ##################################
1618 # Popen wrappers, lifted from ceph-volume
1620 class CallVerbosity(Enum
):
1623 # Normal Operation: <log-level-when-no-errors>, Errors: <log-level-when-error>
1625 # NOTE: QUIET log level is custom level only used when --verbose is passed
1628 # Normal Operation: None, Errors: None
1630 # Normal Operation: QUIET, Error: QUIET
1632 # Normal Operation: DEBUG, Error: DEBUG
1634 # Normal Operation: QUIET, Error: INFO
1635 QUIET_UNLESS_ERROR
= 3
1636 # Normal Operation: DEBUG, Error: INFO
1637 VERBOSE_ON_FAILURE
= 4
1638 # Normal Operation: INFO, Error: INFO
1641 def success_log_level(self
) -> int:
1642 _verbosity_level_to_log_level
= {
1644 self
.QUIET
: QUIET_LOG_LEVEL
,
1645 self
.DEBUG
: logging
.DEBUG
,
1646 self
.QUIET_UNLESS_ERROR
: QUIET_LOG_LEVEL
,
1647 self
.VERBOSE_ON_FAILURE
: logging
.DEBUG
,
1648 self
.VERBOSE
: logging
.INFO
1650 return _verbosity_level_to_log_level
[self
] # type: ignore
1652 def error_log_level(self
) -> int:
1653 _verbosity_level_to_log_level
= {
1655 self
.QUIET
: QUIET_LOG_LEVEL
,
1656 self
.DEBUG
: logging
.DEBUG
,
1657 self
.QUIET_UNLESS_ERROR
: logging
.INFO
,
1658 self
.VERBOSE_ON_FAILURE
: logging
.INFO
,
1659 self
.VERBOSE
: logging
.INFO
1661 return _verbosity_level_to_log_level
[self
] # type: ignore
1664 if sys
.version_info
< (3, 8):
1668 from asyncio
import events
1670 class ThreadedChildWatcher(asyncio
.AbstractChildWatcher
):
1671 """Threaded child watcher implementation.
1672 The watcher uses a thread per process
1673 for waiting for the process finish.
1674 It doesn't require subscription on POSIX signal
1675 but a thread creation is not free.
1676 The watcher has O(1) complexity, its performance doesn't depend
1677 on amount of spawn processes.
1680 def __init__(self
) -> None:
1681 self
._pid
_counter
= itertools
.count(0)
1682 self
._threads
: Dict
[Any
, Any
] = {}
1684 def is_active(self
) -> bool:
1687 def close(self
) -> None:
1688 self
._join
_threads
()
1690 def _join_threads(self
) -> None:
1691 """Internal: Join all non-daemon threads"""
1692 threads
= [thread
for thread
in list(self
._threads
.values())
1693 if thread
.is_alive() and not thread
.daemon
]
1694 for thread
in threads
:
1697 def __enter__(self
) -> Any
:
1700 def __exit__(self
, exc_type
: Any
, exc_val
: Any
, exc_tb
: Any
) -> None:
1703 def __del__(self
, _warn
: Any
= warnings
.warn
) -> None:
1704 threads
= [thread
for thread
in list(self
._threads
.values())
1705 if thread
.is_alive()]
1707 _warn(f
'{self.__class__} has registered but not finished child processes',
1711 def add_child_handler(self
, pid
: Any
, callback
: Any
, *args
: Any
) -> None:
1712 loop
= events
.get_event_loop()
1713 thread
= threading
.Thread(target
=self
._do
_waitpid
,
1714 name
=f
'waitpid-{next(self._pid_counter)}',
1715 args
=(loop
, pid
, callback
, args
),
1717 self
._threads
[pid
] = thread
1720 def remove_child_handler(self
, pid
: Any
) -> bool:
1721 # asyncio never calls remove_child_handler() !!!
1722 # The method is no-op but is implemented because
1723 # abstract base classe requires it
1726 def attach_loop(self
, loop
: Any
) -> None:
1729 def _do_waitpid(self
, loop
: Any
, expected_pid
: Any
, callback
: Any
, args
: Any
) -> None:
1730 assert expected_pid
> 0
1733 pid
, status
= os
.waitpid(expected_pid
, 0)
1734 except ChildProcessError
:
1735 # The child process is already reaped
1736 # (may happen if waitpid() is called elsewhere).
1740 'Unknown child process pid %d, will report returncode 255',
1743 if os
.WIFEXITED(status
):
1744 returncode
= os
.WEXITSTATUS(status
)
1745 elif os
.WIFSIGNALED(status
):
1746 returncode
= -os
.WTERMSIG(status
)
1748 raise ValueError(f
'unknown wait status {status}')
1749 if loop
.get_debug():
1750 logger
.debug('process %s exited with returncode %s',
1751 expected_pid
, returncode
)
1753 if loop
.is_closed():
1754 logger
.warning('Loop %r that handles pid %r is closed', loop
, pid
)
1756 loop
.call_soon_threadsafe(callback
, pid
, returncode
, *args
)
1758 self
._threads
.pop(expected_pid
)
1760 # unlike SafeChildWatcher which handles SIGCHLD in the main thread,
1761 # ThreadedChildWatcher runs in a separated thread, hence allows us to
1762 # run create_subprocess_exec() in non-main thread, see
1763 # https://bugs.python.org/issue35621
1764 asyncio
.set_child_watcher(ThreadedChildWatcher())
1768 from asyncio
import run
as async_run
# type: ignore[attr-defined]
1770 def async_run(coro
): # type: ignore
1771 loop
= asyncio
.new_event_loop()
1773 asyncio
.set_event_loop(loop
)
1774 return loop
.run_until_complete(coro
)
1777 loop
.run_until_complete(loop
.shutdown_asyncgens())
1779 asyncio
.set_event_loop(None)
1783 def call(ctx
: CephadmContext
,
1785 desc
: Optional
[str] = None,
1786 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
1787 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
1788 **kwargs
: Any
) -> Tuple
[str, str, int]:
1790 Wrap subprocess.Popen to
1792 - log stdout/stderr to a logger,
1794 - cleanly return out, err, returncode
1796 :param timeout: timeout in seconds
1799 prefix
= command
[0] if desc
is None else desc
1802 timeout
= timeout
or ctx
.timeout
1804 async def tee(reader
: asyncio
.StreamReader
) -> str:
1805 collected
= StringIO()
1806 async for line
in reader
:
1807 message
= line
.decode('utf-8')
1808 collected
.write(message
)
1809 return collected
.getvalue()
1811 async def run_with_timeout() -> Tuple
[str, str, int]:
1812 process
= await asyncio
.create_subprocess_exec(
1814 stdout
=asyncio
.subprocess
.PIPE
,
1815 stderr
=asyncio
.subprocess
.PIPE
,
1816 env
=os
.environ
.copy())
1817 assert process
.stdout
1818 assert process
.stderr
1820 stdout
, stderr
= await asyncio
.gather(tee(process
.stdout
),
1821 tee(process
.stderr
))
1822 returncode
= await asyncio
.wait_for(process
.wait(), timeout
)
1823 except asyncio
.TimeoutError
:
1824 logger
.info(prefix
+ f
'timeout after {timeout} seconds')
1827 return stdout
, stderr
, returncode
1829 stdout
, stderr
, returncode
= async_run(run_with_timeout())
1830 log_level
= verbosity
.success_log_level()
1832 log_level
= verbosity
.error_log_level()
1833 logger
.log(log_level
, f
'Non-zero exit code {returncode} from {" ".join(command)}')
1834 for line
in stdout
.splitlines():
1835 logger
.log(log_level
, prefix
+ 'stdout ' + line
)
1836 for line
in stderr
.splitlines():
1837 logger
.log(log_level
, prefix
+ 'stderr ' + line
)
1838 return stdout
, stderr
, returncode
1842 ctx
: CephadmContext
,
1844 desc
: Optional
[str] = None,
1845 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
1846 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
1847 **kwargs
: Any
) -> Tuple
[str, str, int]:
1848 out
, err
, ret
= call(ctx
, command
, desc
, verbosity
, timeout
, **kwargs
)
1850 for s
in (out
, err
):
1851 if s
.strip() and len(s
.splitlines()) <= 2: # readable message?
1852 raise RuntimeError(f
'Failed command: {" ".join(command)}: {s}')
1853 raise RuntimeError('Failed command: %s' % ' '.join(command
))
1854 return out
, err
, ret
1857 def call_timeout(ctx
, command
, timeout
):
1858 # type: (CephadmContext, List[str], int) -> int
1859 logger
.debug('Running command (timeout=%s): %s'
1860 % (timeout
, ' '.join(command
)))
1862 def raise_timeout(command
, timeout
):
1863 # type: (List[str], int) -> NoReturn
1864 msg
= 'Command `%s` timed out after %s seconds' % (command
, timeout
)
1866 raise TimeoutExpired(msg
)
1869 return subprocess
.call(command
, timeout
=timeout
, env
=os
.environ
.copy())
1870 except subprocess
.TimeoutExpired
:
1871 raise_timeout(command
, timeout
)
1873 ##################################
1876 def json_loads_retry(cli_func
: Callable
[[], str]) -> Any
:
1877 for sleep_secs
in [1, 4, 4]:
1879 return json
.loads(cli_func())
1880 except json
.JSONDecodeError
:
1881 logger
.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs
)
1882 time
.sleep(sleep_secs
)
1883 return json
.loads(cli_func())
1886 def is_available(ctx
, what
, func
):
1887 # type: (CephadmContext, str, Callable[[], bool]) -> None
1889 Wait for a service to become available
1891 :param what: the name of the service
1892 :param func: the callable object that determines availability
1895 logger
.info('Waiting for %s...' % what
)
1899 logger
.info('%s is available'
1903 raise Error('%s not available after %s tries'
1906 logger
.info('%s not available, waiting (%s/%s)...'
1907 % (what
, num
, retry
))
1913 def read_config(fn
):
1914 # type: (Optional[str]) -> ConfigParser
1922 # type: (str) -> str
1923 p
= os
.path
.expanduser(p
)
1924 return os
.path
.abspath(p
)
1927 def get_file_timestamp(fn
):
1928 # type: (str) -> Optional[str]
1930 mt
= os
.path
.getmtime(fn
)
1931 return datetime
.datetime
.fromtimestamp(
1932 mt
, tz
=datetime
.timezone
.utc
1938 def try_convert_datetime(s
):
1939 # type: (str) -> Optional[str]
1940 # This is super irritating because
1941 # 1) podman and docker use different formats
1942 # 2) python's strptime can't parse either one
1945 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1946 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1947 # 2020-03-03 15:52:30.136257504 -0600 CST
1948 # (In the podman case, there is a different string format for
1949 # 'inspect' and 'inspect --format {{.Created}}'!!)
1951 # In *all* cases, the 9 digit second precision is too much for
1952 # python's strptime. Shorten it to 6 digits.
1953 p
= re
.compile(r
'(\.[\d]{6})[\d]*')
1956 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
1957 if s
and s
[-1] == 'Z':
1958 s
= s
[:-1] + '-0000'
1960 # cut off the redundant 'CST' part that strptime can't parse, if
1963 s
= ' '.join(v
[0:3])
1965 # try parsing with several format strings
1967 '%Y-%m-%dT%H:%M:%S.%f%z',
1968 '%Y-%m-%d %H:%M:%S.%f %z',
1972 # return timestamp normalized to UTC, rendered as DATEFMT.
1973 return datetime
.datetime
.strptime(s
, f
).astimezone(tz
=datetime
.timezone
.utc
).strftime(DATEFMT
)
1979 def _parse_podman_version(version_str
):
1980 # type: (str) -> Tuple[int, ...]
1981 def to_int(val
: str, org_e
: Optional
[Exception] = None) -> int:
1982 if not val
and org_e
:
1986 except ValueError as e
:
1987 return to_int(val
[0:-1], org_e
or e
)
1989 return tuple(map(to_int
, version_str
.split('.')))
1994 return socket
.gethostname()
1997 def get_short_hostname():
1999 return get_hostname().split('.', 1)[0]
2004 return socket
.getfqdn() or socket
.gethostname()
2009 return platform
.uname().machine
2012 def generate_service_id():
2014 return get_short_hostname() + '.' + ''.join(random
.choice(string
.ascii_lowercase
)
2018 def generate_password():
2020 return ''.join(random
.choice(string
.ascii_lowercase
+ string
.digits
)
2024 def normalize_container_id(i
):
2025 # type: (str) -> str
2026 # docker adds the sha256: prefix, but AFAICS both
2027 # docker (18.09.7 in bionic at least) and podman
2028 # both always use sha256, so leave off the prefix
2031 if i
.startswith(prefix
):
2038 return str(uuid
.uuid1())
2042 # type: (str) -> bool
2050 def validate_fsid(func
: FuncT
) -> FuncT
:
2052 def _validate_fsid(ctx
: CephadmContext
) -> Any
:
2053 if 'fsid' in ctx
and ctx
.fsid
:
2054 if not is_fsid(ctx
.fsid
):
2055 raise Error('not an fsid: %s' % ctx
.fsid
)
2057 return cast(FuncT
, _validate_fsid
)
2060 def infer_fsid(func
: FuncT
) -> FuncT
:
2062 If we only find a single fsid in /var/lib/ceph/*, use that
2066 def _infer_fsid(ctx
: CephadmContext
) -> Any
:
2067 if 'fsid' in ctx
and ctx
.fsid
:
2068 logger
.debug('Using specified fsid: %s' % ctx
.fsid
)
2073 cp
= read_config(ctx
.config
)
2074 if cp
.has_option('global', 'fsid'):
2075 fsids
.add(cp
.get('global', 'fsid'))
2077 daemon_list
= list_daemons(ctx
, detail
=False)
2078 for daemon
in daemon_list
:
2079 if not is_fsid(daemon
['fsid']):
2082 elif 'name' not in ctx
or not ctx
.name
:
2083 # ctx.name not specified
2084 fsids
.add(daemon
['fsid'])
2085 elif daemon
['name'] == ctx
.name
:
2086 # ctx.name is a match
2087 fsids
.add(daemon
['fsid'])
2088 fsids
= sorted(fsids
)
2091 # some commands do not always require an fsid
2093 elif len(fsids
) == 1:
2094 logger
.info('Inferring fsid %s' % fsids
[0])
2097 raise Error('Cannot infer an fsid, one must be specified (using --fsid): %s' % fsids
)
2100 return cast(FuncT
, _infer_fsid
)
2103 def infer_config(func
: FuncT
) -> FuncT
:
2105 Infer the clusater configuration using the followign priority order:
2106 1- if the user has provided custom conf file (-c option) use it
2107 2- otherwise if daemon --name has been provided use daemon conf
2108 3- otherwise find the mon daemon conf file and use it (if v1)
2109 4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it
2110 5- finally: fallback to the default file /etc/ceph/ceph.conf
2113 def _infer_config(ctx
: CephadmContext
) -> Any
:
2115 def config_path(daemon_type
: str, daemon_name
: str) -> str:
2116 data_dir
= get_data_dir(ctx
.fsid
, ctx
.data_dir
, daemon_type
, daemon_name
)
2117 return os
.path
.join(data_dir
, 'config')
2119 def get_mon_daemon_name(fsid
: str) -> Optional
[str]:
2120 daemon_list
= list_daemons(ctx
, detail
=False)
2121 for daemon
in daemon_list
:
2123 daemon
.get('name', '').startswith('mon.')
2124 and daemon
.get('fsid', '') == fsid
2125 and daemon
.get('style', '') == 'cephadm:v1'
2126 and os
.path
.exists(config_path('mon', daemon
['name'].split('.', 1)[1]))
2128 return daemon
['name']
2131 ctx
.config
= ctx
.config
if 'config' in ctx
else None
2132 # check if user has provided conf by using -c option
2133 if ctx
.config
and (ctx
.config
!= CEPH_DEFAULT_CONF
):
2134 logger
.debug(f
'Using specified config: {ctx.config}')
2137 if 'fsid' in ctx
and ctx
.fsid
:
2138 name
= ctx
.name
if ('name' in ctx
and ctx
.name
) else get_mon_daemon_name(ctx
.fsid
)
2139 if name
is not None:
2140 # daemon name has been specified (or inffered from mon), let's use its conf
2141 ctx
.config
= config_path(name
.split('.', 1)[0], name
.split('.', 1)[1])
2143 # no daemon, in case the cluster has a config dir then use it
2144 ceph_conf
= f
'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_CONF}'
2145 if os
.path
.exists(ceph_conf
):
2146 ctx
.config
= ceph_conf
2149 logger
.info(f
'Inferring config {ctx.config}')
2150 elif os
.path
.exists(CEPH_DEFAULT_CONF
):
2151 logger
.debug(f
'Using default config {CEPH_DEFAULT_CONF}')
2152 ctx
.config
= CEPH_DEFAULT_CONF
2155 return cast(FuncT
, _infer_config
)
2158 def _get_default_image(ctx
: CephadmContext
) -> str:
2159 if DEFAULT_IMAGE_IS_MASTER
:
2160 warn
= """This is a development version of cephadm.
2161 For information regarding the latest stable release:
2162 https://docs.ceph.com/docs/{}/cephadm/install
2163 """.format(LATEST_STABLE_RELEASE
)
2164 for line
in warn
.splitlines():
2165 logger
.warning('{}{}{}'.format(termcolor
.yellow
, line
, termcolor
.end
))
2166 return DEFAULT_IMAGE
2169 def infer_image(func
: FuncT
) -> FuncT
:
2171 Use the most recent ceph image
2174 def _infer_image(ctx
: CephadmContext
) -> Any
:
2176 ctx
.image
= os
.environ
.get('CEPHADM_IMAGE')
2178 ctx
.image
= infer_local_ceph_image(ctx
, ctx
.container_engine
.path
)
2180 ctx
.image
= _get_default_image(ctx
)
2183 return cast(FuncT
, _infer_image
)
2186 def default_image(func
: FuncT
) -> FuncT
:
2188 def _default_image(ctx
: CephadmContext
) -> Any
:
2190 if 'name' in ctx
and ctx
.name
:
2191 type_
= ctx
.name
.split('.', 1)[0]
2192 if type_
in Monitoring
.components
:
2193 ctx
.image
= Monitoring
.components
[type_
]['image']
2194 if type_
== 'haproxy':
2195 ctx
.image
= HAproxy
.default_image
2196 if type_
== 'keepalived':
2197 ctx
.image
= Keepalived
.default_image
2198 if type_
== SNMPGateway
.daemon_type
:
2199 ctx
.image
= SNMPGateway
.default_image
2201 ctx
.image
= os
.environ
.get('CEPHADM_IMAGE')
2203 ctx
.image
= _get_default_image(ctx
)
2207 return cast(FuncT
, _default_image
)
2210 def get_container_info(ctx
: CephadmContext
, daemon_filter
: str, by_name
: bool) -> Optional
[ContainerInfo
]:
2212 :param ctx: Cephadm context
2213 :param daemon_filter: daemon name or type
2214 :param by_name: must be set to True if daemon name is provided
2215 :return: Container information or None
2217 def daemon_name_or_type(daemon
: Dict
[str, str]) -> str:
2218 return daemon
['name'] if by_name
else daemon
['name'].split('.', 1)[0]
2220 if by_name
and '.' not in daemon_filter
:
2221 logger
.warning(f
'Trying to get container info using invalid daemon name {daemon_filter}')
2223 daemons
= list_daemons(ctx
, detail
=False)
2224 matching_daemons
= [d
for d
in daemons
if daemon_name_or_type(d
) == daemon_filter
and d
['fsid'] == ctx
.fsid
]
2225 if matching_daemons
:
2226 d_type
, d_id
= matching_daemons
[0]['name'].split('.', 1)
2227 out
, _
, code
= get_container_stats(ctx
, ctx
.container_engine
.path
, ctx
.fsid
, d_type
, d_id
)
2229 (container_id
, image_name
, image_id
, start
, version
) = out
.strip().split(',')
2230 return ContainerInfo(container_id
, image_name
, image_id
, start
, version
)
2234 def infer_local_ceph_image(ctx
: CephadmContext
, container_path
: str) -> Optional
[str]:
2236 Infer the local ceph image based on the following priority criteria:
2237 1- the image specified by --image arg (if provided).
2238 2- the same image as the daemon container specified by --name arg (if provided).
2239 3- image used by any ceph container running on the host. In this case we use daemon types.
2240 4- if no container is found then we use the most ceph recent image on the host.
2242 Note: any selected container must have the same fsid inferred previously.
2244 :return: The most recent local ceph image (already pulled)
2246 # '|' special character is used to separate the output fields into:
2247 # - Repository@digest
2250 # - Image creation date
2251 out
, _
, _
= call_throws(ctx
,
2252 [container_path
, 'images',
2253 '--filter', 'label=ceph=True',
2254 '--filter', 'dangling=false',
2255 '--format', '{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}'])
2257 container_info
= None
2258 daemon_name
= ctx
.name
if ('name' in ctx
and ctx
.name
and '.' in ctx
.name
) else None
2259 daemons_ls
= [daemon_name
] if daemon_name
is not None else Ceph
.daemons
# daemon types: 'mon', 'mgr', etc
2260 for daemon
in daemons_ls
:
2261 container_info
= get_container_info(ctx
, daemon
, daemon_name
is not None)
2262 if container_info
is not None:
2263 logger
.debug(f
"Using container info for daemon '{daemon}'")
2266 for image
in out
.splitlines():
2267 if image
and not image
.isspace():
2268 (digest
, image_id
, tag
, created_date
) = image
.lstrip().split('|')
2269 if container_info
is not None and image_id
not in container_info
.image_id
:
2271 if digest
and not digest
.endswith('@'):
2272 logger
.info(f
"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}")
2277 def write_tmp(s
, uid
, gid
):
2278 # type: (str, int, int) -> IO[str]
2279 tmp_f
= tempfile
.NamedTemporaryFile(mode
='w',
2281 os
.fchown(tmp_f
.fileno(), uid
, gid
)
2288 def makedirs(dir, uid
, gid
, mode
):
2289 # type: (str, int, int, int) -> None
2290 if not os
.path
.exists(dir):
2291 os
.makedirs(dir, mode
=mode
)
2294 os
.chown(dir, uid
, gid
)
2295 os
.chmod(dir, mode
) # the above is masked by umask...
2298 def get_data_dir(fsid
, data_dir
, t
, n
):
2299 # type: (str, str, str, Union[int, str]) -> str
2300 return os
.path
.join(data_dir
, fsid
, '%s.%s' % (t
, n
))
2303 def get_log_dir(fsid
, log_dir
):
2304 # type: (str, str) -> str
2305 return os
.path
.join(log_dir
, fsid
)
2308 def make_data_dir_base(fsid
, data_dir
, uid
, gid
):
2309 # type: (str, str, int, int) -> str
2310 data_dir_base
= os
.path
.join(data_dir
, fsid
)
2311 makedirs(data_dir_base
, uid
, gid
, DATA_DIR_MODE
)
2312 makedirs(os
.path
.join(data_dir_base
, 'crash'), uid
, gid
, DATA_DIR_MODE
)
2313 makedirs(os
.path
.join(data_dir_base
, 'crash', 'posted'), uid
, gid
,
2315 return data_dir_base
2318 def make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
, uid
=None, gid
=None):
2319 # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str
2320 if uid
is None or gid
is None:
2321 uid
, gid
= extract_uid_gid(ctx
)
2322 make_data_dir_base(fsid
, ctx
.data_dir
, uid
, gid
)
2323 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2324 makedirs(data_dir
, uid
, gid
, DATA_DIR_MODE
)
2328 def make_log_dir(ctx
, fsid
, uid
=None, gid
=None):
2329 # type: (CephadmContext, str, Optional[int], Optional[int]) -> str
2330 if uid
is None or gid
is None:
2331 uid
, gid
= extract_uid_gid(ctx
)
2332 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2333 makedirs(log_dir
, uid
, gid
, LOG_DIR_MODE
)
2337 def make_var_run(ctx
, fsid
, uid
, gid
):
2338 # type: (CephadmContext, str, int, int) -> None
2339 call_throws(ctx
, ['install', '-d', '-m0770', '-o', str(uid
), '-g', str(gid
),
2340 '/var/run/ceph/%s' % fsid
])
2343 def copy_tree(ctx
, src
, dst
, uid
=None, gid
=None):
2344 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
2346 Copy a directory tree from src to dst
2348 if uid
is None or gid
is None:
2349 (uid
, gid
) = extract_uid_gid(ctx
)
2353 if os
.path
.isdir(dst
):
2354 dst_dir
= os
.path
.join(dst
, os
.path
.basename(src_dir
))
2356 logger
.debug('copy directory `%s` -> `%s`' % (src_dir
, dst_dir
))
2357 shutil
.rmtree(dst_dir
, ignore_errors
=True)
2358 shutil
.copytree(src_dir
, dst_dir
) # dirs_exist_ok needs python 3.8
2360 for dirpath
, dirnames
, filenames
in os
.walk(dst_dir
):
2361 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dirpath
))
2362 os
.chown(dirpath
, uid
, gid
)
2363 for filename
in filenames
:
2364 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, filename
))
2365 os
.chown(os
.path
.join(dirpath
, filename
), uid
, gid
)
2368 def copy_files(ctx
, src
, dst
, uid
=None, gid
=None):
2369 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
2371 Copy a files from src to dst
2373 if uid
is None or gid
is None:
2374 (uid
, gid
) = extract_uid_gid(ctx
)
2376 for src_file
in src
:
2378 if os
.path
.isdir(dst
):
2379 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
2381 logger
.debug('copy file `%s` -> `%s`' % (src_file
, dst_file
))
2382 shutil
.copyfile(src_file
, dst_file
)
2384 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dst_file
))
2385 os
.chown(dst_file
, uid
, gid
)
2388 def move_files(ctx
, src
, dst
, uid
=None, gid
=None):
2389 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
2391 Move files from src to dst
2393 if uid
is None or gid
is None:
2394 (uid
, gid
) = extract_uid_gid(ctx
)
2396 for src_file
in src
:
2398 if os
.path
.isdir(dst
):
2399 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
2401 if os
.path
.islink(src_file
):
2402 # shutil.move() in py2 does not handle symlinks correctly
2403 src_rl
= os
.readlink(src_file
)
2404 logger
.debug("symlink '%s' -> '%s'" % (dst_file
, src_rl
))
2405 os
.symlink(src_rl
, dst_file
)
2408 logger
.debug("move file '%s' -> '%s'" % (src_file
, dst_file
))
2409 shutil
.move(src_file
, dst_file
)
2410 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dst_file
))
2411 os
.chown(dst_file
, uid
, gid
)
2414 def recursive_chown(path
: str, uid
: int, gid
: int) -> None:
2415 for dirpath
, dirnames
, filenames
in os
.walk(path
):
2416 os
.chown(dirpath
, uid
, gid
)
2417 for filename
in filenames
:
2418 os
.chown(os
.path
.join(dirpath
, filename
), uid
, gid
)
2421 # copied from distutils
2422 def find_executable(executable
: str, path
: Optional
[str] = None) -> Optional
[str]:
2423 """Tries to find 'executable' in the directories listed in 'path'.
2424 A string listing directories separated by 'os.pathsep'; defaults to
2425 os.environ['PATH']. Returns the complete filename or None if not found.
2427 _
, ext
= os
.path
.splitext(executable
)
2428 if (sys
.platform
== 'win32') and (ext
!= '.exe'):
2429 executable
= executable
+ '.exe'
2431 if os
.path
.isfile(executable
):
2435 path
= os
.environ
.get('PATH', None)
2438 path
= os
.confstr('CS_PATH')
2439 except (AttributeError, ValueError):
2440 # os.confstr() or CS_PATH is not available
2442 # bpo-35755: Don't use os.defpath if the PATH environment variable is
2443 # set to an empty string
2445 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
2449 paths
= path
.split(os
.pathsep
)
2451 f
= os
.path
.join(p
, executable
)
2452 if os
.path
.isfile(f
):
2453 # the file exists, we have a shot at spawn working
2458 def find_program(filename
):
2459 # type: (str) -> str
2460 name
= find_executable(filename
)
2462 raise ValueError('%s not found' % filename
)
2466 def find_container_engine(ctx
: CephadmContext
) -> Optional
[ContainerEngine
]:
2470 for i
in CONTAINER_PREFERENCE
:
2478 def check_container_engine(ctx
: CephadmContext
) -> ContainerEngine
:
2479 engine
= ctx
.container_engine
2480 if not isinstance(engine
, CONTAINER_PREFERENCE
):
2481 # See https://github.com/python/mypy/issues/8993
2482 exes
: List
[str] = [i
.EXE
for i
in CONTAINER_PREFERENCE
] # type: ignore
2483 raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install <container engine>`'.format(' or '.join(exes
)))
2484 elif isinstance(engine
, Podman
):
2485 engine
.get_version(ctx
)
2486 if engine
.version
< MIN_PODMAN_VERSION
:
2487 raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION
)
2491 def get_unit_name(fsid
, daemon_type
, daemon_id
=None):
2492 # type: (str, str, Optional[Union[int, str]]) -> str
2493 # accept either name or type + id
2494 if daemon_id
is not None:
2495 return 'ceph-%s@%s.%s' % (fsid
, daemon_type
, daemon_id
)
2497 return 'ceph-%s@%s' % (fsid
, daemon_type
)
2500 def get_unit_name_by_daemon_name(ctx
: CephadmContext
, fsid
: str, name
: str) -> str:
2501 daemon
= get_daemon_description(ctx
, fsid
, name
)
2503 return daemon
['systemd_unit']
2505 raise Error('Failed to get unit name for {}'.format(daemon
))
2508 def check_unit(ctx
, unit_name
):
2509 # type: (CephadmContext, str) -> Tuple[bool, str, bool]
2510 # NOTE: we ignore the exit code here because systemctl outputs
2511 # various exit codes based on the state of the service, but the
2512 # string result is more explicit (and sufficient).
2516 out
, err
, code
= call(ctx
, ['systemctl', 'is-enabled', unit_name
],
2517 verbosity
=CallVerbosity
.QUIET
)
2521 elif 'disabled' in out
:
2523 except Exception as e
:
2524 logger
.warning('unable to run systemctl: %s' % e
)
2530 out
, err
, code
= call(ctx
, ['systemctl', 'is-active', unit_name
],
2531 verbosity
=CallVerbosity
.QUIET
)
2533 if out
in ['active']:
2535 elif out
in ['inactive']:
2537 elif out
in ['failed', 'auto-restart']:
2541 except Exception as e
:
2542 logger
.warning('unable to run systemctl: %s' % e
)
2544 return (enabled
, state
, installed
)
2547 def check_units(ctx
, units
, enabler
=None):
2548 # type: (CephadmContext, List[str], Optional[Packager]) -> bool
2550 (enabled
, state
, installed
) = check_unit(ctx
, u
)
2551 if enabled
and state
== 'running':
2552 logger
.info('Unit %s is enabled and running' % u
)
2554 if enabler
is not None:
2556 logger
.info('Enabling unit %s' % u
)
2557 enabler
.enable_service(u
)
2561 def is_container_running(ctx
: CephadmContext
, c
: 'CephContainer') -> bool:
2562 if ctx
.name
.split('.', 1)[0] in ['agent', 'cephadm-exporter']:
2563 # these are non-containerized daemon types
2565 return bool(get_running_container_name(ctx
, c
))
2568 def get_running_container_name(ctx
: CephadmContext
, c
: 'CephContainer') -> Optional
[str]:
2569 for name
in [c
.cname
, c
.old_cname
]:
2570 out
, err
, ret
= call(ctx
, [
2571 ctx
.container_engine
.path
, 'container', 'inspect',
2572 '--format', '{{.State.Status}}', name
2574 if out
.strip() == 'running':
2579 def get_legacy_config_fsid(cluster
, legacy_dir
=None):
2580 # type: (str, Optional[str]) -> Optional[str]
2581 config_file
= '/etc/ceph/%s.conf' % cluster
2582 if legacy_dir
is not None:
2583 config_file
= os
.path
.abspath(legacy_dir
+ config_file
)
2585 if os
.path
.exists(config_file
):
2586 config
= read_config(config_file
)
2587 if config
.has_section('global') and config
.has_option('global', 'fsid'):
2588 return config
.get('global', 'fsid')
2592 def get_legacy_daemon_fsid(ctx
, cluster
,
2593 daemon_type
, daemon_id
, legacy_dir
=None):
2594 # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str]
2596 if daemon_type
== 'osd':
2598 fsid_file
= os
.path
.join(ctx
.data_dir
,
2600 'ceph-%s' % daemon_id
,
2602 if legacy_dir
is not None:
2603 fsid_file
= os
.path
.abspath(legacy_dir
+ fsid_file
)
2604 with
open(fsid_file
, 'r') as f
:
2605 fsid
= f
.read().strip()
2609 fsid
= get_legacy_config_fsid(cluster
, legacy_dir
=legacy_dir
)
2613 def should_log_to_journald(ctx
: CephadmContext
) -> bool:
2614 if ctx
.log_to_journald
is not None:
2615 return ctx
.log_to_journald
2616 return isinstance(ctx
.container_engine
, Podman
) and \
2617 ctx
.container_engine
.version
>= CGROUPS_SPLIT_PODMAN_VERSION
2620 def get_daemon_args(ctx
, fsid
, daemon_type
, daemon_id
):
2621 # type: (CephadmContext, str, str, Union[int, str]) -> List[str]
2622 r
= list() # type: List[str]
2624 if daemon_type
in Ceph
.daemons
and daemon_type
not in ['crash', 'ceph-exporter']:
2626 '--setuser', 'ceph',
2627 '--setgroup', 'ceph',
2628 '--default-log-to-file=false',
2630 log_to_journald
= should_log_to_journald(ctx
)
2633 '--default-log-to-journald=true',
2634 '--default-log-to-stderr=false',
2638 '--default-log-to-stderr=true',
2639 '--default-log-stderr-prefix=debug ',
2641 if daemon_type
== 'mon':
2643 '--default-mon-cluster-log-to-file=false',
2647 '--default-mon-cluster-log-to-journald=true',
2648 '--default-mon-cluster-log-to-stderr=false',
2651 r
+= ['--default-mon-cluster-log-to-stderr=true']
2652 elif daemon_type
in Monitoring
.components
:
2653 metadata
= Monitoring
.components
[daemon_type
]
2654 r
+= metadata
.get('args', list())
2655 # set ip and port to bind to for nodeexporter,alertmanager,prometheus
2656 if daemon_type
not in ['grafana', 'loki', 'promtail']:
2658 port
= Monitoring
.port_map
[daemon_type
][0]
2659 if 'meta_json' in ctx
and ctx
.meta_json
:
2660 meta
= json
.loads(ctx
.meta_json
) or {}
2661 if 'ip' in meta
and meta
['ip']:
2663 if 'ports' in meta
and meta
['ports']:
2664 port
= meta
['ports'][0]
2665 r
+= [f
'--web.listen-address={ip}:{port}']
2666 if daemon_type
== 'prometheus':
2667 config
= get_parm(ctx
.config_json
)
2668 retention_time
= config
.get('retention_time', '15d')
2669 retention_size
= config
.get('retention_size', '0') # default to disabled
2670 r
+= [f
'--storage.tsdb.retention.time={retention_time}']
2671 r
+= [f
'--storage.tsdb.retention.size={retention_size}']
2674 r
+= [f
'--web.external-url={scheme}://{host}:{port}']
2675 if daemon_type
== 'alertmanager':
2676 config
= get_parm(ctx
.config_json
)
2677 peers
= config
.get('peers', list()) # type: ignore
2679 r
+= ['--cluster.peer={}'.format(peer
)]
2680 # some alertmanager, by default, look elsewhere for a config
2681 r
+= ['--config.file=/etc/alertmanager/alertmanager.yml']
2682 if daemon_type
== 'promtail':
2683 r
+= ['--config.expand-env']
2684 if daemon_type
== 'node-exporter':
2685 r
+= ['--path.procfs=/host/proc',
2686 '--path.sysfs=/host/sys',
2687 '--path.rootfs=/rootfs']
2688 elif daemon_type
== NFSGanesha
.daemon_type
:
2689 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2690 r
+= nfs_ganesha
.get_daemon_args()
2691 elif daemon_type
== CephExporter
.daemon_type
:
2692 ceph_exporter
= CephExporter
.init(ctx
, fsid
, daemon_id
)
2693 r
.extend(ceph_exporter
.get_daemon_args())
2694 elif daemon_type
== HAproxy
.daemon_type
:
2695 haproxy
= HAproxy
.init(ctx
, fsid
, daemon_id
)
2696 r
+= haproxy
.get_daemon_args()
2697 elif daemon_type
== CustomContainer
.daemon_type
:
2698 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2699 r
.extend(cc
.get_daemon_args())
2700 elif daemon_type
== SNMPGateway
.daemon_type
:
2701 sc
= SNMPGateway
.init(ctx
, fsid
, daemon_id
)
2702 r
.extend(sc
.get_daemon_args())
2707 def create_daemon_dirs(ctx
, fsid
, daemon_type
, daemon_id
, uid
, gid
,
2708 config
=None, keyring
=None):
2709 # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
2710 data_dir
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
, uid
=uid
, gid
=gid
)
2712 if daemon_type
in Ceph
.daemons
:
2713 make_log_dir(ctx
, fsid
, uid
=uid
, gid
=gid
)
2716 config_path
= os
.path
.join(data_dir
, 'config')
2717 with
open(config_path
, 'w') as f
:
2718 os
.fchown(f
.fileno(), uid
, gid
)
2719 os
.fchmod(f
.fileno(), 0o600)
2723 keyring_path
= os
.path
.join(data_dir
, 'keyring')
2724 with
open(keyring_path
, 'w') as f
:
2725 os
.fchmod(f
.fileno(), 0o600)
2726 os
.fchown(f
.fileno(), uid
, gid
)
2729 if daemon_type
in Monitoring
.components
.keys():
2730 config_json
: Dict
[str, Any
] = dict()
2731 if 'config_json' in ctx
:
2732 config_json
= get_parm(ctx
.config_json
)
2734 # Set up directories specific to the monitoring component
2737 if daemon_type
== 'prometheus':
2738 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2739 daemon_type
, daemon_id
)
2740 config_dir
= 'etc/prometheus'
2741 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2742 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'alerting'), uid
, gid
, 0o755)
2743 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2744 recursive_chown(os
.path
.join(data_dir_root
, 'etc'), uid
, gid
)
2745 recursive_chown(os
.path
.join(data_dir_root
, 'data'), uid
, gid
)
2746 elif daemon_type
== 'grafana':
2747 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2748 daemon_type
, daemon_id
)
2749 config_dir
= 'etc/grafana'
2750 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2751 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'certs'), uid
, gid
, 0o755)
2752 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'provisioning/datasources'), uid
, gid
, 0o755)
2753 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2754 touch(os
.path
.join(data_dir_root
, 'data', 'grafana.db'), uid
, gid
)
2755 elif daemon_type
== 'alertmanager':
2756 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2757 daemon_type
, daemon_id
)
2758 config_dir
= 'etc/alertmanager'
2759 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2760 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'data'), uid
, gid
, 0o755)
2761 elif daemon_type
== 'promtail':
2762 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2763 daemon_type
, daemon_id
)
2764 config_dir
= 'etc/promtail'
2765 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2766 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2767 elif daemon_type
== 'loki':
2768 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2769 daemon_type
, daemon_id
)
2770 config_dir
= 'etc/loki'
2771 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2772 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2774 # populate the config directory for the component from the config-json
2775 if 'files' in config_json
:
2776 for fname
in config_json
['files']:
2777 content
= dict_get_join(config_json
['files'], fname
)
2778 if os
.path
.isabs(fname
):
2779 fpath
= os
.path
.join(data_dir_root
, fname
.lstrip(os
.path
.sep
))
2781 fpath
= os
.path
.join(data_dir_root
, config_dir
, fname
)
2782 with
open(fpath
, 'w', encoding
='utf-8') as f
:
2783 os
.fchown(f
.fileno(), uid
, gid
)
2784 os
.fchmod(f
.fileno(), 0o600)
2787 elif daemon_type
== NFSGanesha
.daemon_type
:
2788 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2789 nfs_ganesha
.create_daemon_dirs(data_dir
, uid
, gid
)
2791 elif daemon_type
== CephIscsi
.daemon_type
:
2792 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
2793 ceph_iscsi
.create_daemon_dirs(data_dir
, uid
, gid
)
2795 elif daemon_type
== HAproxy
.daemon_type
:
2796 haproxy
= HAproxy
.init(ctx
, fsid
, daemon_id
)
2797 haproxy
.create_daemon_dirs(data_dir
, uid
, gid
)
2799 elif daemon_type
== Keepalived
.daemon_type
:
2800 keepalived
= Keepalived
.init(ctx
, fsid
, daemon_id
)
2801 keepalived
.create_daemon_dirs(data_dir
, uid
, gid
)
2803 elif daemon_type
== CustomContainer
.daemon_type
:
2804 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2805 cc
.create_daemon_dirs(data_dir
, uid
, gid
)
2807 elif daemon_type
== SNMPGateway
.daemon_type
:
2808 sg
= SNMPGateway
.init(ctx
, fsid
, daemon_id
)
2809 sg
.create_daemon_conf()
2811 _write_custom_conf_files(ctx
, daemon_type
, str(daemon_id
), fsid
, uid
, gid
)
2814 def _write_custom_conf_files(ctx
: CephadmContext
, daemon_type
: str, daemon_id
: str, fsid
: str, uid
: int, gid
: int) -> None:
2815 # mostly making this its own function to make unit testing easier
2816 if 'config_json' not in ctx
or not ctx
.config_json
:
2818 config_json
= get_custom_config_files(ctx
.config_json
)
2819 custom_config_dir
= os
.path
.join(ctx
.data_dir
, fsid
, 'custom_config_files', f
'{daemon_type}.{daemon_id}')
2820 if not os
.path
.exists(custom_config_dir
):
2821 makedirs(custom_config_dir
, uid
, gid
, 0o755)
2822 mandatory_keys
= ['mount_path', 'content']
2823 for ccf
in config_json
['custom_config_files']:
2824 if all(k
in ccf
for k
in mandatory_keys
):
2825 file_path
= os
.path
.join(custom_config_dir
, os
.path
.basename(ccf
['mount_path']))
2826 with
open(file_path
, 'w+', encoding
='utf-8') as f
:
2827 os
.fchown(f
.fileno(), uid
, gid
)
2828 os
.fchmod(f
.fileno(), 0o600)
2829 f
.write(ccf
['content'])
2832 def get_parm(option
: str) -> Dict
[str, str]:
2833 js
= _get_config_json(option
)
2834 # custom_config_files is a special field that may be in the config
2835 # dict. It is used for mounting custom config files into daemon's containers
2836 # and should be accessed through the "get_custom_config_files" function.
2837 # For get_parm we need to discard it.
2838 js
.pop('custom_config_files', None)
2842 def get_custom_config_files(option
: str) -> Dict
[str, List
[Dict
[str, str]]]:
2843 js
= _get_config_json(option
)
2844 res
: Dict
[str, List
[Dict
[str, str]]] = {'custom_config_files': []}
2845 if 'custom_config_files' in js
:
2846 res
['custom_config_files'] = js
['custom_config_files']
2850 def _get_config_json(option
: str) -> Dict
[str, Any
]:
2856 if cached_stdin
is not None:
2859 j
= sys
.stdin
.read()
2862 # inline json string
2863 if option
[0] == '{' and option
[-1] == '}':
2866 elif os
.path
.exists(option
):
2867 with
open(option
, 'r') as f
:
2870 raise Error('Config file {} not found'.format(option
))
2874 except ValueError as e
:
2875 raise Error('Invalid JSON in {}: {}'.format(option
, e
))
2880 def get_config_and_keyring(ctx
):
2881 # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
2885 if 'config_json' in ctx
and ctx
.config_json
:
2886 d
= get_parm(ctx
.config_json
)
2887 config
= d
.get('config')
2888 keyring
= d
.get('keyring')
2889 if config
and keyring
:
2890 return config
, keyring
2892 if 'config' in ctx
and ctx
.config
:
2894 with
open(ctx
.config
, 'r') as f
:
2896 except FileNotFoundError
as e
:
2899 if 'key' in ctx
and ctx
.key
:
2900 keyring
= '[%s]\n\tkey = %s\n' % (ctx
.name
, ctx
.key
)
2901 elif 'keyring' in ctx
and ctx
.keyring
:
2903 with
open(ctx
.keyring
, 'r') as f
:
2905 except FileNotFoundError
as e
:
2908 return config
, keyring
2911 def get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
):
2912 # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]]
2915 if daemon_type
== CephIscsi
.daemon_type
:
2916 binds
.extend(CephIscsi
.get_container_binds())
2917 elif daemon_type
== CustomContainer
.daemon_type
:
2919 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2920 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2921 binds
.extend(cc
.get_container_binds(data_dir
))
2926 def get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
,
2928 # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
2931 if daemon_type
in Ceph
.daemons
:
2933 run_path
= os
.path
.join('/var/run/ceph', fsid
)
2934 if os
.path
.exists(run_path
):
2935 mounts
[run_path
] = '/var/run/ceph:z'
2936 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2937 mounts
[log_dir
] = '/var/log/ceph:z'
2938 crash_dir
= '/var/lib/ceph/%s/crash' % fsid
2939 if os
.path
.exists(crash_dir
):
2940 mounts
[crash_dir
] = '/var/lib/ceph/crash:z'
2941 if daemon_type
!= 'crash' and should_log_to_journald(ctx
):
2942 journald_sock_dir
= '/run/systemd/journal'
2943 mounts
[journald_sock_dir
] = journald_sock_dir
2945 if daemon_type
in Ceph
.daemons
and daemon_id
:
2946 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2947 if daemon_type
== 'rgw':
2948 cdata_dir
= '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id
)
2950 cdata_dir
= '/var/lib/ceph/%s/ceph-%s' % (daemon_type
, daemon_id
)
2951 if daemon_type
!= 'crash':
2952 mounts
[data_dir
] = cdata_dir
+ ':z'
2954 mounts
[data_dir
+ '/config'] = '/etc/ceph/ceph.conf:z'
2955 if daemon_type
in ['rbd-mirror', 'cephfs-mirror', 'crash', 'ceph-exporter']:
2956 # these do not search for their keyrings in a data directory
2957 mounts
[data_dir
+ '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type
, daemon_id
)
2959 if daemon_type
in ['mon', 'osd', 'clusterless-ceph-volume']:
2960 mounts
['/dev'] = '/dev' # FIXME: narrow this down?
2961 mounts
['/run/udev'] = '/run/udev'
2962 if daemon_type
in ['osd', 'clusterless-ceph-volume']:
2963 mounts
['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
2964 mounts
['/run/lvm'] = '/run/lvm'
2965 mounts
['/run/lock/lvm'] = '/run/lock/lvm'
2966 if daemon_type
== 'osd':
2967 # selinux-policy in the container may not match the host.
2968 if HostFacts(ctx
).selinux_enabled
:
2969 selinux_folder
= '/var/lib/ceph/%s/selinux' % fsid
2970 if not os
.path
.exists(selinux_folder
):
2971 os
.makedirs(selinux_folder
, mode
=0o755)
2972 mounts
[selinux_folder
] = '/sys/fs/selinux:ro'
2973 mounts
['/'] = '/rootfs'
2976 if ctx
.shared_ceph_folder
: # make easy manager modules/ceph-volume development
2977 ceph_folder
= pathify(ctx
.shared_ceph_folder
)
2978 if os
.path
.exists(ceph_folder
):
2979 mounts
[ceph_folder
+ '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
2980 mounts
[ceph_folder
+ '/src/cephadm/cephadm'] = '/usr/sbin/cephadm'
2981 mounts
[ceph_folder
+ '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
2982 mounts
[ceph_folder
+ '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
2983 mounts
[ceph_folder
+ '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard'
2984 mounts
[ceph_folder
+ '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
2986 logger
.error('{}{}{}'.format(termcolor
.red
,
2987 'Ceph shared source folder does not exist.',
2989 except AttributeError:
2992 if daemon_type
in Monitoring
.components
and daemon_id
:
2993 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2994 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2995 if daemon_type
== 'prometheus':
2996 mounts
[os
.path
.join(data_dir
, 'etc/prometheus')] = '/etc/prometheus:Z'
2997 mounts
[os
.path
.join(data_dir
, 'data')] = '/prometheus:Z'
2998 elif daemon_type
== 'loki':
2999 mounts
[os
.path
.join(data_dir
, 'etc/loki')] = '/etc/loki:Z'
3000 mounts
[os
.path
.join(data_dir
, 'data')] = '/loki:Z'
3001 elif daemon_type
== 'promtail':
3002 mounts
[os
.path
.join(data_dir
, 'etc/promtail')] = '/etc/promtail:Z'
3003 mounts
[log_dir
] = '/var/log/ceph:z'
3004 mounts
[os
.path
.join(data_dir
, 'data')] = '/promtail:Z'
3005 elif daemon_type
== 'node-exporter':
3006 mounts
['/proc'] = '/host/proc:ro'
3007 mounts
['/sys'] = '/host/sys:ro'
3008 mounts
['/'] = '/rootfs:ro'
3009 elif daemon_type
== 'grafana':
3010 mounts
[os
.path
.join(data_dir
, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
3011 mounts
[os
.path
.join(data_dir
, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
3012 mounts
[os
.path
.join(data_dir
, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
3013 mounts
[os
.path
.join(data_dir
, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
3014 elif daemon_type
== 'alertmanager':
3015 mounts
[os
.path
.join(data_dir
, 'etc/alertmanager')] = '/etc/alertmanager:Z'
3017 if daemon_type
== NFSGanesha
.daemon_type
:
3019 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
3020 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
3021 mounts
.update(nfs_ganesha
.get_container_mounts(data_dir
))
3023 if daemon_type
== HAproxy
.daemon_type
:
3025 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
3026 mounts
.update(HAproxy
.get_container_mounts(data_dir
))
3028 if daemon_type
== CephIscsi
.daemon_type
:
3030 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
3031 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
3032 mounts
.update(CephIscsi
.get_container_mounts(data_dir
, log_dir
))
3034 if daemon_type
== Keepalived
.daemon_type
:
3036 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
3037 mounts
.update(Keepalived
.get_container_mounts(data_dir
))
3039 if daemon_type
== CustomContainer
.daemon_type
:
3041 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
3042 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
3043 mounts
.update(cc
.get_container_mounts(data_dir
))
3048 def get_ceph_volume_container(ctx
: CephadmContext
,
3049 privileged
: bool = True,
3051 volume_mounts
: Dict
[str, str] = {},
3052 bind_mounts
: Optional
[List
[List
[str]]] = None,
3053 args
: List
[str] = [],
3054 envs
: Optional
[List
[str]] = None) -> 'CephContainer':
3057 envs
.append('CEPH_VOLUME_SKIP_RESTORECON=yes')
3058 envs
.append('CEPH_VOLUME_DEBUG=1')
3060 return CephContainer(
3063 entrypoint
='/usr/sbin/ceph-volume',
3065 volume_mounts
=volume_mounts
,
3066 bind_mounts
=bind_mounts
,
3068 privileged
=privileged
,
3070 memory_request
=ctx
.memory_request
,
3071 memory_limit
=ctx
.memory_limit
,
3075 def set_pids_limit_unlimited(ctx
: CephadmContext
, container_args
: List
[str]) -> None:
3076 # set container's pids-limit to unlimited rather than default (Docker 4096 / Podman 2048)
3077 # Useful for daemons like iscsi where the default pids-limit limits the number of luns
3078 # per iscsi target or rgw where increasing the rgw_thread_pool_size to a value near
3079 # the default pids-limit may cause the container to crash.
3081 isinstance(ctx
.container_engine
, Podman
)
3082 and ctx
.container_engine
.version
>= PIDS_LIMIT_UNLIMITED_PODMAN_VERSION
3084 container_args
.append('--pids-limit=-1')
3086 container_args
.append('--pids-limit=0')
3089 def get_container(ctx
: CephadmContext
,
3090 fsid
: str, daemon_type
: str, daemon_id
: Union
[int, str],
3091 privileged
: bool = False,
3092 ptrace
: bool = False,
3093 container_args
: Optional
[List
[str]] = None) -> 'CephContainer':
3094 entrypoint
: str = ''
3096 ceph_args
: List
[str] = []
3097 envs
: List
[str] = []
3098 host_network
: bool = True
3100 if daemon_type
in Ceph
.daemons
:
3101 envs
.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
3102 if container_args
is None:
3104 if daemon_type
in Ceph
.daemons
or daemon_type
in Ceph
.gateways
:
3105 set_pids_limit_unlimited(ctx
, container_args
)
3106 if daemon_type
in ['mon', 'osd']:
3107 # mon and osd need privileged in order for libudev to query devices
3109 if daemon_type
== 'rgw':
3110 entrypoint
= '/usr/bin/radosgw'
3111 name
= 'client.rgw.%s' % daemon_id
3112 elif daemon_type
== 'rbd-mirror':
3113 entrypoint
= '/usr/bin/rbd-mirror'
3114 name
= 'client.rbd-mirror.%s' % daemon_id
3115 elif daemon_type
== 'cephfs-mirror':
3116 entrypoint
= '/usr/bin/cephfs-mirror'
3117 name
= 'client.cephfs-mirror.%s' % daemon_id
3118 elif daemon_type
== 'crash':
3119 entrypoint
= '/usr/bin/ceph-crash'
3120 name
= 'client.crash.%s' % daemon_id
3121 elif daemon_type
in ['mon', 'mgr', 'mds', 'osd']:
3122 entrypoint
= '/usr/bin/ceph-' + daemon_type
3123 name
= '%s.%s' % (daemon_type
, daemon_id
)
3124 elif daemon_type
in Monitoring
.components
:
3126 elif daemon_type
== NFSGanesha
.daemon_type
:
3127 entrypoint
= NFSGanesha
.entrypoint
3128 name
= '%s.%s' % (daemon_type
, daemon_id
)
3129 envs
.extend(NFSGanesha
.get_container_envs())
3130 elif daemon_type
== CephExporter
.daemon_type
:
3131 entrypoint
= CephExporter
.entrypoint
3132 name
= 'client.ceph-exporter.%s' % daemon_id
3133 elif daemon_type
== HAproxy
.daemon_type
:
3134 name
= '%s.%s' % (daemon_type
, daemon_id
)
3135 container_args
.extend(['--user=root']) # haproxy 2.4 defaults to a different user
3136 elif daemon_type
== Keepalived
.daemon_type
:
3137 name
= '%s.%s' % (daemon_type
, daemon_id
)
3138 envs
.extend(Keepalived
.get_container_envs())
3139 container_args
.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
3140 elif daemon_type
== CephIscsi
.daemon_type
:
3141 entrypoint
= CephIscsi
.entrypoint
3142 name
= '%s.%s' % (daemon_type
, daemon_id
)
3143 # So the container can modprobe iscsi_target_mod and have write perms
3144 # to configfs we need to make this a privileged container.
3146 elif daemon_type
== CustomContainer
.daemon_type
:
3147 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
3148 entrypoint
= cc
.entrypoint
3149 host_network
= False
3150 envs
.extend(cc
.get_container_envs())
3151 container_args
.extend(cc
.get_container_args())
3153 if daemon_type
in Monitoring
.components
:
3154 uid
, gid
= extract_uid_gid_monitoring(ctx
, daemon_type
)
3158 # FIXME: disable cpu/memory limits for the time being (not supported
3159 # by ubuntu 18.04 kernel!)
3161 container_args
.extend(monitoring_args
)
3162 if daemon_type
== 'node-exporter':
3163 # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
3164 # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
3165 # between the node-exporter container and the host to avoid selinux denials
3166 container_args
.extend(['--security-opt', 'label=disable'])
3167 elif daemon_type
== 'crash':
3168 ceph_args
= ['-n', name
]
3169 elif daemon_type
in Ceph
.daemons
:
3170 ceph_args
= ['-n', name
, '-f']
3171 elif daemon_type
== SNMPGateway
.daemon_type
:
3172 sg
= SNMPGateway
.init(ctx
, fsid
, daemon_id
)
3173 container_args
.append(
3174 f
'--env-file={sg.conf_file_path}'
3177 # if using podman, set -d, --conmon-pidfile & --cidfile flags
3178 # so service can have Type=Forking
3179 if isinstance(ctx
.container_engine
, Podman
):
3180 runtime_dir
= '/run'
3181 container_args
.extend([
3182 '-d', '--log-driver', 'journald',
3184 runtime_dir
+ '/ceph-%s@%s.%s.service-pid' % (fsid
, daemon_type
, daemon_id
),
3186 runtime_dir
+ '/ceph-%s@%s.%s.service-cid' % (fsid
, daemon_type
, daemon_id
),
3188 if ctx
.container_engine
.version
>= CGROUPS_SPLIT_PODMAN_VERSION
and not ctx
.no_cgroups_split
:
3189 container_args
.append('--cgroups=split')
3191 return CephContainer
.for_daemon(
3194 daemon_type
=daemon_type
,
3195 daemon_id
=str(daemon_id
),
3196 entrypoint
=entrypoint
,
3197 args
=ceph_args
+ get_daemon_args(ctx
, fsid
, daemon_type
, daemon_id
),
3198 container_args
=container_args
,
3199 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
3200 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
3202 privileged
=privileged
,
3204 host_network
=host_network
,
3208 def extract_uid_gid(ctx
, img
='', file_path
='/var/lib/ceph'):
3209 # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]
3214 if isinstance(file_path
, str):
3219 ex
: Optional
[Tuple
[str, RuntimeError]] = None
3223 out
= CephContainer(
3227 args
=['-c', '%u %g', fp
]
3228 ).run(verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
3229 uid
, gid
= out
.split(' ')
3230 return int(uid
), int(gid
)
3231 except RuntimeError as e
:
3234 raise Error(f
'Failed to extract uid/gid for path {ex[0]}: {ex[1]}')
3236 raise RuntimeError('uid/gid not found')
3239 def deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
3240 config
=None, keyring
=None,
3244 # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
3247 if any([port_in_use(ctx
, port
) for port
in ports
]):
3248 if daemon_type
== 'mgr':
3249 # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't
3250 # tell whether that is the case here.
3252 f
"ceph-mgr TCP port(s) {','.join(map(str, ports))} already in use"
3255 raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports
)), daemon_type
))
3257 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
3258 if reconfig
and not os
.path
.exists(data_dir
):
3259 raise Error('cannot reconfig, data path %s does not exist' % data_dir
)
3260 if daemon_type
== 'mon' and not os
.path
.exists(data_dir
):
3264 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
3267 tmp_config
= write_tmp(config
, uid
, gid
)
3270 create_daemon_dirs(ctx
, fsid
, daemon_type
, daemon_id
, uid
, gid
)
3271 mon_dir
= get_data_dir(fsid
, ctx
.data_dir
, 'mon', daemon_id
)
3272 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
3276 entrypoint
='/usr/bin/ceph-mon',
3279 '-i', str(daemon_id
),
3281 '-c', '/tmp/config',
3282 '--keyring', '/tmp/keyring',
3283 ] + get_daemon_args(ctx
, fsid
, 'mon', daemon_id
),
3285 log_dir
: '/var/log/ceph:z',
3286 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id
),
3287 tmp_keyring
.name
: '/tmp/keyring:z',
3288 tmp_config
.name
: '/tmp/config:z',
3293 with
open(mon_dir
+ '/config', 'w') as f
:
3294 os
.fchown(f
.fileno(), uid
, gid
)
3295 os
.fchmod(f
.fileno(), 0o600)
3298 # dirs, conf, keyring
3301 fsid
, daemon_type
, daemon_id
,
3306 if daemon_type
== CephadmAgent
.daemon_type
:
3307 if ctx
.config_json
== '-':
3308 config_js
= get_parm('-')
3310 config_js
= get_parm(ctx
.config_json
)
3311 assert isinstance(config_js
, dict)
3313 cephadm_agent
= CephadmAgent(ctx
, fsid
, daemon_id
)
3314 cephadm_agent
.deploy_daemon_unit(config_js
)
3317 deploy_daemon_units(ctx
, fsid
, uid
, gid
, daemon_type
, daemon_id
,
3318 c
, osd_fsid
=osd_fsid
, ports
=ports
)
3320 raise RuntimeError('attempting to deploy a daemon without a container image')
3322 if not os
.path
.exists(data_dir
+ '/unit.created'):
3323 with
open(data_dir
+ '/unit.created', 'w') as f
:
3324 os
.fchmod(f
.fileno(), 0o600)
3325 os
.fchown(f
.fileno(), uid
, gid
)
3326 f
.write('mtime is time the daemon deployment was created\n')
3328 with
open(data_dir
+ '/unit.configured', 'w') as f
:
3329 f
.write('mtime is time we were last configured\n')
3330 os
.fchmod(f
.fileno(), 0o600)
3331 os
.fchown(f
.fileno(), uid
, gid
)
3333 update_firewalld(ctx
, daemon_type
)
3335 # Open ports explicitly required for the daemon
3338 fw
.open_ports(ports
+ fw
.external_ports
.get(daemon_type
, []))
3341 if reconfig
and daemon_type
not in Ceph
.daemons
:
3342 # ceph daemons do not need a restart; others (presumably) do to pick
3344 call_throws(ctx
, ['systemctl', 'reset-failed',
3345 get_unit_name(fsid
, daemon_type
, daemon_id
)])
3346 call_throws(ctx
, ['systemctl', 'restart',
3347 get_unit_name(fsid
, daemon_type
, daemon_id
)])
3350 def _write_container_cmd_to_bash(ctx
, file_obj
, container
, comment
=None, background
=False):
3351 # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None
3353 # Sometimes adding a comment, especially if there are multiple containers in one
3354 # unit file, makes it easier to read and grok.
3355 file_obj
.write('# ' + comment
+ '\n')
3356 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
3357 file_obj
.write('! ' + ' '.join(container
.rm_cmd(old_cname
=True)) + ' 2> /dev/null\n')
3358 file_obj
.write('! ' + ' '.join(container
.rm_cmd()) + ' 2> /dev/null\n')
3359 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
3360 if isinstance(ctx
.container_engine
, Podman
):
3363 + ' '.join([shlex
.quote(a
) for a
in container
.rm_cmd(storage
=True)])
3364 + ' 2> /dev/null\n')
3367 + ' '.join([shlex
.quote(a
) for a
in container
.rm_cmd(old_cname
=True, storage
=True)])
3368 + ' 2> /dev/null\n')
3370 # container run command
3372 ' '.join([shlex
.quote(a
) for a
in container
.run_cmd()])
3373 + (' &' if background
else '') + '\n')
3376 def clean_cgroup(ctx
: CephadmContext
, fsid
: str, unit_name
: str) -> None:
3377 # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
3378 # see https://tracker.ceph.com/issues/50998
3380 CGROUPV2_PATH
= Path('/sys/fs/cgroup')
3381 if not (CGROUPV2_PATH
/ 'system.slice').exists():
3382 # Only unified cgroup is affected, skip if not the case
3385 slice_name
= 'system-ceph\\x2d{}.slice'.format(fsid
.replace('-', '\\x2d'))
3386 cg_path
= CGROUPV2_PATH
/ 'system.slice' / slice_name
/ f
'{unit_name}.service'
3387 if not cg_path
.exists():
3390 def cg_trim(path
: Path
) -> None:
3391 for p
in path
.iterdir():
3398 logger
.warning(f
'Failed to trim old cgroups {cg_path}')
3401 def deploy_daemon_units(
3402 ctx
: CephadmContext
,
3407 daemon_id
: Union
[int, str],
3409 enable
: bool = True,
3411 osd_fsid
: Optional
[str] = None,
3412 ports
: Optional
[List
[int]] = None,
3416 def add_stop_actions(f
: TextIO
) -> None:
3417 # following generated script basically checks if the container exists
3418 # before stopping it. Exit code will be success either if it doesn't
3419 # exist or if it exists and is stopped successfully.
3420 container_exists
= f
'{ctx.container_engine.path} inspect %s &>/dev/null'
3421 f
.write(f
'! {container_exists % c.old_cname} || {" ".join(c.stop_cmd(old_cname=True))} \n')
3422 f
.write(f
'! {container_exists % c.cname} || {" ".join(c.stop_cmd())} \n')
3424 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
3425 with
open(data_dir
+ '/unit.run.new', 'w') as f
, \
3426 open(data_dir
+ '/unit.meta.new', 'w') as metaf
:
3429 if daemon_type
in Ceph
.daemons
:
3430 install_path
= find_program('install')
3431 f
.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path
=install_path
, fsid
=fsid
, uid
=uid
, gid
=gid
))
3434 if daemon_type
== 'osd':
3435 # osds have a pre-start step
3437 simple_fn
= os
.path
.join('/etc/ceph/osd',
3438 '%s-%s.json.adopted-by-cephadm' % (daemon_id
, osd_fsid
))
3439 if os
.path
.exists(simple_fn
):
3440 f
.write('# Simple OSDs need chown on startup:\n')
3441 for n
in ['block', 'block.db', 'block.wal']:
3442 p
= os
.path
.join(data_dir
, n
)
3443 f
.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p
=p
, uid
=uid
, gid
=gid
))
3445 # if ceph-volume does not support 'ceph-volume activate', we must
3446 # do 'ceph-volume lvm activate'.
3447 test_cv
= get_ceph_volume_container(
3449 args
=['activate', '--bad-option'],
3450 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
3451 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
3452 cname
='ceph-%s-%s.%s-activate-test' % (fsid
, daemon_type
, daemon_id
),
3454 out
, err
, ret
= call(ctx
, test_cv
.run_cmd(), verbosity
=CallVerbosity
.SILENT
)
3455 # bad: ceph-volume: error: unrecognized arguments: activate --bad-option
3456 # good: ceph-volume: error: unrecognized arguments: --bad-option
3457 if 'unrecognized arguments: activate' in err
:
3458 # older ceph-volume without top-level activate or --no-tmpfs
3461 str(daemon_id
), osd_fsid
,
3467 '--osd-id', str(daemon_id
),
3468 '--osd-uuid', osd_fsid
,
3473 prestart
= get_ceph_volume_container(
3476 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
3477 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
3478 cname
='ceph-%s-%s.%s-activate' % (fsid
, daemon_type
, daemon_id
),
3480 _write_container_cmd_to_bash(ctx
, f
, prestart
, 'LVM OSDs use ceph-volume lvm activate')
3481 elif daemon_type
== CephIscsi
.daemon_type
:
3482 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=True)) + '\n')
3483 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
3484 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
3485 _write_container_cmd_to_bash(ctx
, f
, tcmu_container
, 'iscsi tcmu-runner container', background
=True)
3487 _write_container_cmd_to_bash(ctx
, f
, c
, '%s.%s' % (daemon_type
, str(daemon_id
)))
3489 # some metadata about the deploy
3490 meta
: Dict
[str, Any
] = {}
3491 if 'meta_json' in ctx
and ctx
.meta_json
:
3492 meta
= json
.loads(ctx
.meta_json
) or {}
3494 'memory_request': int(ctx
.memory_request
) if ctx
.memory_request
else None,
3495 'memory_limit': int(ctx
.memory_limit
) if ctx
.memory_limit
else None,
3497 if not meta
.get('ports'):
3498 meta
['ports'] = ports
3499 metaf
.write(json
.dumps(meta
, indent
=4) + '\n')
3501 os
.fchmod(f
.fileno(), 0o600)
3502 os
.fchmod(metaf
.fileno(), 0o600)
3503 os
.rename(data_dir
+ '/unit.run.new',
3504 data_dir
+ '/unit.run')
3505 os
.rename(data_dir
+ '/unit.meta.new',
3506 data_dir
+ '/unit.meta')
3508 # post-stop command(s)
3509 with
open(data_dir
+ '/unit.poststop.new', 'w') as f
:
3510 # this is a fallback to eventually stop any underlying container that was not stopped properly by unit.stop,
3511 # this could happen in very slow setups as described in the issue https://tracker.ceph.com/issues/58242.
3513 if daemon_type
== 'osd':
3515 poststop
= get_ceph_volume_container(
3518 'lvm', 'deactivate',
3519 str(daemon_id
), osd_fsid
,
3521 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
3522 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
3523 cname
='ceph-%s-%s.%s-deactivate' % (fsid
, daemon_type
,
3526 _write_container_cmd_to_bash(ctx
, f
, poststop
, 'deactivate osd')
3527 elif daemon_type
== CephIscsi
.daemon_type
:
3528 # make sure we also stop the tcmu container
3529 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
3530 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
3531 f
.write('! ' + ' '.join(tcmu_container
.stop_cmd()) + '\n')
3532 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=False)) + '\n')
3533 os
.fchmod(f
.fileno(), 0o600)
3534 os
.rename(data_dir
+ '/unit.poststop.new',
3535 data_dir
+ '/unit.poststop')
3537 # post-stop command(s)
3538 with
open(data_dir
+ '/unit.stop.new', 'w') as f
:
3540 os
.fchmod(f
.fileno(), 0o600)
3541 os
.rename(data_dir
+ '/unit.stop.new',
3542 data_dir
+ '/unit.stop')
3545 with
open(data_dir
+ '/unit.image.new', 'w') as f
:
3546 f
.write(c
.image
+ '\n')
3547 os
.fchmod(f
.fileno(), 0o600)
3548 os
.rename(data_dir
+ '/unit.image.new',
3549 data_dir
+ '/unit.image')
3552 install_sysctl(ctx
, fsid
, daemon_type
)
3555 install_base_units(ctx
, fsid
)
3556 unit
= get_unit_file(ctx
, fsid
)
3557 unit_file
= 'ceph-%s@.service' % (fsid
)
3558 with
open(ctx
.unit_dir
+ '/' + unit_file
+ '.new', 'w') as f
:
3560 os
.rename(ctx
.unit_dir
+ '/' + unit_file
+ '.new',
3561 ctx
.unit_dir
+ '/' + unit_file
)
3562 call_throws(ctx
, ['systemctl', 'daemon-reload'])
3564 unit_name
= get_unit_name(fsid
, daemon_type
, daemon_id
)
3565 call(ctx
, ['systemctl', 'stop', unit_name
],
3566 verbosity
=CallVerbosity
.DEBUG
)
3567 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
3568 verbosity
=CallVerbosity
.DEBUG
)
3570 call_throws(ctx
, ['systemctl', 'enable', unit_name
])
3572 clean_cgroup(ctx
, fsid
, unit_name
)
3573 call_throws(ctx
, ['systemctl', 'start', unit_name
])
3576 class Firewalld(object):
3578 # for specifying ports we should always open when opening
3579 # ports for a daemon of that type. Main use case is for ports
3580 # that we should open when deploying the daemon type but that
3581 # the daemon itself may not necessarily need to bind to the port.
3582 # This needs to be handed differently as we don't want to fail
3583 # deployment if the port cannot be bound to but we still want to
3584 # open the port in the firewall.
3585 external_ports
: Dict
[str, List
[int]] = {
3586 'iscsi': [3260] # 3260 is the well known iSCSI port
3589 def __init__(self
, ctx
):
3590 # type: (CephadmContext) -> None
3592 self
.available
= self
.check()
3596 self
.cmd
= find_executable('firewall-cmd')
3598 logger
.debug('firewalld does not appear to be present')
3600 (enabled
, state
, _
) = check_unit(self
.ctx
, 'firewalld.service')
3602 logger
.debug('firewalld.service is not enabled')
3604 if state
!= 'running':
3605 logger
.debug('firewalld.service is not running')
3608 logger
.info('firewalld ready')
3611 def enable_service_for(self
, daemon_type
):
3612 # type: (str) -> None
3613 if not self
.available
:
3614 logger
.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type
)
3617 if daemon_type
== 'mon':
3619 elif daemon_type
in ['mgr', 'mds', 'osd']:
3621 elif daemon_type
== NFSGanesha
.daemon_type
:
3627 raise RuntimeError('command not defined')
3629 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-service', svc
], verbosity
=CallVerbosity
.DEBUG
)
3631 logger
.info('Enabling firewalld service %s in current zone...' % svc
)
3632 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--add-service', svc
])
3635 'unable to add service %s to current zone: %s' % (svc
, err
))
3637 logger
.debug('firewalld service %s is enabled in current zone' % svc
)
3639 def open_ports(self
, fw_ports
):
3640 # type: (List[int]) -> None
3641 if not self
.available
:
3642 logger
.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports
)
3646 raise RuntimeError('command not defined')
3648 for port
in fw_ports
:
3649 tcp_port
= str(port
) + '/tcp'
3650 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-port', tcp_port
], verbosity
=CallVerbosity
.DEBUG
)
3652 logger
.info('Enabling firewalld port %s in current zone...' % tcp_port
)
3653 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--add-port', tcp_port
])
3655 raise RuntimeError('unable to add port %s to current zone: %s' %
3658 logger
.debug('firewalld port %s is enabled in current zone' % tcp_port
)
3660 def close_ports(self
, fw_ports
):
3661 # type: (List[int]) -> None
3662 if not self
.available
:
3663 logger
.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports
)
3667 raise RuntimeError('command not defined')
3669 for port
in fw_ports
:
3670 tcp_port
= str(port
) + '/tcp'
3671 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-port', tcp_port
], verbosity
=CallVerbosity
.DEBUG
)
3673 logger
.info('Disabling port %s in current zone...' % tcp_port
)
3674 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--remove-port', tcp_port
])
3676 raise RuntimeError('unable to remove port %s from current zone: %s' %
3679 logger
.info(f
'Port {tcp_port} disabled')
3681 logger
.info(f
'firewalld port {tcp_port} already closed')
3683 def apply_rules(self
):
3685 if not self
.available
:
3689 raise RuntimeError('command not defined')
3691 call_throws(self
.ctx
, [self
.cmd
, '--reload'])
3694 def update_firewalld(ctx
, daemon_type
):
3695 # type: (CephadmContext, str) -> None
3696 if not ('skip_firewalld' in ctx
and ctx
.skip_firewalld
):
3697 firewall
= Firewalld(ctx
)
3698 firewall
.enable_service_for(daemon_type
)
3699 firewall
.apply_rules()
3702 def install_sysctl(ctx
: CephadmContext
, fsid
: str, daemon_type
: str) -> None:
3704 Set up sysctl settings
3706 def _write(conf
: Path
, lines
: List
[str]) -> None:
3708 '# created by cephadm',
3713 with
open(conf
, 'w') as f
:
3714 f
.write('\n'.join(lines
))
3716 conf
= Path(ctx
.sysctl_dir
).joinpath(f
'90-ceph-{fsid}-{daemon_type}.conf')
3717 lines
: Optional
[List
] = None
3719 if daemon_type
== 'osd':
3720 lines
= OSD
.get_sysctl_settings()
3721 elif daemon_type
== 'haproxy':
3722 lines
= HAproxy
.get_sysctl_settings()
3723 elif daemon_type
== 'keepalived':
3724 lines
= Keepalived
.get_sysctl_settings()
3726 # apply the sysctl settings
3728 Path(ctx
.sysctl_dir
).mkdir(mode
=0o755, exist_ok
=True)
3730 call_throws(ctx
, ['sysctl', '--system'])
3733 def migrate_sysctl_dir(ctx
: CephadmContext
, fsid
: str) -> None:
3735 Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration.
3736 This moves it to '/etc/sysctl.d'.
3738 deprecated_location
: str = '/usr/lib/sysctl.d'
3739 deprecated_confs
: List
[str] = glob(f
'{deprecated_location}/90-ceph-{fsid}-*.conf')
3740 if not deprecated_confs
:
3743 file_count
: int = len(deprecated_confs
)
3744 logger
.info(f
'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.')
3745 for conf
in deprecated_confs
:
3747 shutil
.move(conf
, ctx
.sysctl_dir
)
3749 except shutil
.Error
as err
:
3750 if str(err
).endswith('already exists'):
3751 logger
.warning(f
'Destination file already exists. Deleting {conf}.')
3755 except OSError as del_err
:
3756 logger
.warning(f
'Could not remove {conf}: {del_err}.')
3758 logger
.warning(f
'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}')
3760 # Log successful migration
3762 logger
.info(f
'Successfully migrated sysctl config to {ctx.sysctl_dir}.')
3765 # Log partially successful / unsuccessful migration
3766 files_processed
: int = len(deprecated_confs
)
3767 if file_count
< files_processed
:
3768 status
: str = f
'partially successful (failed {file_count}/{files_processed})'
3769 elif file_count
== files_processed
:
3770 status
= 'unsuccessful'
3771 logger
.warning(f
'Migration of sysctl configuration {status}. You may want to perform a migration manually.')
3774 def install_base_units(ctx
, fsid
):
3775 # type: (CephadmContext, str) -> None
3777 Set up ceph.target and ceph-$fsid.target units.
3780 existed
= os
.path
.exists(ctx
.unit_dir
+ '/ceph.target')
3781 with
open(ctx
.unit_dir
+ '/ceph.target.new', 'w') as f
:
3783 'Description=All Ceph clusters and services\n'
3786 'WantedBy=multi-user.target\n')
3787 os
.rename(ctx
.unit_dir
+ '/ceph.target.new',
3788 ctx
.unit_dir
+ '/ceph.target')
3790 # we disable before enable in case a different ceph.target
3791 # (from the traditional package) is present; while newer
3792 # systemd is smart enough to disable the old
3793 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
3794 # some older versions of systemd error out with EEXIST.
3795 call_throws(ctx
, ['systemctl', 'disable', 'ceph.target'])
3796 call_throws(ctx
, ['systemctl', 'enable', 'ceph.target'])
3797 call_throws(ctx
, ['systemctl', 'start', 'ceph.target'])
3800 existed
= os
.path
.exists(ctx
.unit_dir
+ '/ceph-%s.target' % fsid
)
3801 with
open(ctx
.unit_dir
+ '/ceph-%s.target.new' % fsid
, 'w') as f
:
3804 'Description=Ceph cluster {fsid}\n'
3805 'PartOf=ceph.target\n'
3806 'Before=ceph.target\n'
3809 'WantedBy=multi-user.target ceph.target\n'.format(
3812 os
.rename(ctx
.unit_dir
+ '/ceph-%s.target.new' % fsid
,
3813 ctx
.unit_dir
+ '/ceph-%s.target' % fsid
)
3815 call_throws(ctx
, ['systemctl', 'enable', 'ceph-%s.target' % fsid
])
3816 call_throws(ctx
, ['systemctl', 'start', 'ceph-%s.target' % fsid
])
3818 # don't overwrite file in order to allow users to manipulate it
3819 if os
.path
.exists(ctx
.logrotate_dir
+ f
'/ceph-{fsid}'):
3822 # logrotate for the cluster
3823 with
open(ctx
.logrotate_dir
+ '/ceph-%s' % fsid
, 'w') as f
:
3825 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
3826 in all containers, but I don't see an elegant way to send SIGHUP *just* to
3827 the daemons for this cluster. (1) systemd kill -s will get the signal to
3828 podman, but podman will exit. (2) podman kill will get the signal to the
3829 first child (bash), but that isn't the ceph daemon. This is simpler and
3832 f
.write("""# created by cephadm
3833 /var/log/ceph/%s/*.log {
3839 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror' || true
3848 def get_unit_file(ctx
, fsid
):
3849 # type: (CephadmContext, str) -> str
3851 if isinstance(ctx
.container_engine
, Podman
):
3852 extra_args
= ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3853 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3855 'PIDFile=%t/%n-pid\n')
3856 if ctx
.container_engine
.version
>= CGROUPS_SPLIT_PODMAN_VERSION
:
3857 extra_args
+= 'Delegate=yes\n'
3859 docker
= isinstance(ctx
.container_engine
, Docker
)
3860 u
= """# generated by cephadm
3862 Description=Ceph %i for {fsid}
3865 # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
3866 # these can be removed once ceph-mon will dynamically change network
3868 After=network-online.target local-fs.target time-sync.target{docker_after}
3869 Wants=network-online.target local-fs.target time-sync.target
3872 PartOf=ceph-{fsid}.target
3873 Before=ceph-{fsid}.target
3878 EnvironmentFile=-/etc/environment
3879 ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
3880 ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop'
3881 ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
3887 StartLimitInterval=30min
3891 WantedBy=ceph-{fsid}.target
3892 """.format(fsid
=fsid
,
3893 data_dir
=ctx
.data_dir
,
3894 extra_args
=extra_args
,
3895 # if docker, we depend on docker.service
3896 docker_after
=' docker.service' if docker
else '',
3897 docker_requires
='Requires=docker.service\n' if docker
else '')
3901 ##################################
3904 class CephContainer
:
3906 ctx
: CephadmContext
,
3909 args
: List
[str] = [],
3910 volume_mounts
: Dict
[str, str] = {},
3912 container_args
: List
[str] = [],
3913 envs
: Optional
[List
[str]] = None,
3914 privileged
: bool = False,
3915 ptrace
: bool = False,
3916 bind_mounts
: Optional
[List
[List
[str]]] = None,
3917 init
: Optional
[bool] = None,
3918 host_network
: bool = True,
3919 memory_request
: Optional
[str] = None,
3920 memory_limit
: Optional
[str] = None,
3924 self
.entrypoint
= entrypoint
3926 self
.volume_mounts
= volume_mounts
3928 self
.container_args
= container_args
3930 self
.privileged
= privileged
3931 self
.ptrace
= ptrace
3932 self
.bind_mounts
= bind_mounts
if bind_mounts
else []
3933 self
.init
= init
if init
else ctx
.container_init
3934 self
.host_network
= host_network
3935 self
.memory_request
= memory_request
3936 self
.memory_limit
= memory_limit
3940 ctx
: CephadmContext
,
3945 args
: List
[str] = [],
3946 volume_mounts
: Dict
[str, str] = {},
3947 container_args
: List
[str] = [],
3948 envs
: Optional
[List
[str]] = None,
3949 privileged
: bool = False,
3950 ptrace
: bool = False,
3951 bind_mounts
: Optional
[List
[List
[str]]] = None,
3952 init
: Optional
[bool] = None,
3953 host_network
: bool = True,
3954 memory_request
: Optional
[str] = None,
3955 memory_limit
: Optional
[str] = None,
3956 ) -> 'CephContainer':
3960 entrypoint
=entrypoint
,
3962 volume_mounts
=volume_mounts
,
3963 cname
='ceph-%s-%s.%s' % (fsid
, daemon_type
, daemon_id
),
3964 container_args
=container_args
,
3966 privileged
=privileged
,
3968 bind_mounts
=bind_mounts
,
3970 host_network
=host_network
,
3971 memory_request
=memory_request
,
3972 memory_limit
=memory_limit
,
3976 def cname(self
) -> str:
3978 podman adds the current container name to the /etc/hosts
3979 file. Turns out, python's `socket.getfqdn()` differs from
3980 `hostname -f`, when we have the container names containing
3983 # podman run --name foo.bar.baz.com ceph/ceph /bin/bash
3984 [root@sebastians-laptop /]# cat /etc/hosts
3987 127.0.1.1 sebastians-laptop foo.bar.baz.com
3988 [root@sebastians-laptop /]# hostname -f
3990 [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())'
3993 Fascinatingly, this doesn't happen when using dashes.
3995 return self
._cname
.replace('.', '-')
3998 def cname(self
, val
: str) -> None:
4002 def old_cname(self
) -> str:
4005 def run_cmd(self
) -> List
[str]:
4006 cmd_args
: List
[str] = [
4007 str(self
.ctx
.container_engine
.path
),
4011 # some containers (ahem, haproxy) override this, but we want a fast
4012 # shutdown always (and, more importantly, a successful exit even if we
4013 # fall back to SIGKILL).
4014 '--stop-signal=SIGTERM',
4017 if isinstance(self
.ctx
.container_engine
, Podman
):
4018 if os
.path
.exists('/etc/ceph/podman-auth.json'):
4019 cmd_args
.append('--authfile=/etc/ceph/podman-auth.json')
4022 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
4023 '-e', 'NODE_NAME=%s' % get_hostname(),
4025 vols
: List
[str] = []
4026 binds
: List
[str] = []
4028 if self
.memory_request
:
4029 cmd_args
.extend(['-e', 'POD_MEMORY_REQUEST', str(self
.memory_request
)])
4030 if self
.memory_limit
:
4031 cmd_args
.extend(['-e', 'POD_MEMORY_LIMIT', str(self
.memory_limit
)])
4032 cmd_args
.extend(['--memory', str(self
.memory_limit
)])
4034 if self
.host_network
:
4035 cmd_args
.append('--net=host')
4037 cmd_args
.extend(['--entrypoint', self
.entrypoint
])
4041 # let OSD etc read block devs that haven't been chowned
4042 '--group-add=disk'])
4043 if self
.ptrace
and not self
.privileged
:
4044 # if privileged, the SYS_PTRACE cap is already added
4045 # in addition, --cap-add and --privileged are mutually
4046 # exclusive since podman >= 2.0
4047 cmd_args
.append('--cap-add=SYS_PTRACE')
4049 cmd_args
.append('--init')
4050 envs
+= ['-e', 'CEPH_USE_RANDOM_NONCE=1']
4052 cmd_args
.extend(['--name', self
.cname
])
4054 for env
in self
.envs
:
4055 envs
.extend(['-e', env
])
4058 [['-v', '%s:%s' % (host_dir
, container_dir
)]
4059 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
4060 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
4061 for bind
in self
.bind_mounts
], [])
4064 cmd_args
+ self
.container_args
+ \
4065 envs
+ vols
+ binds
+ \
4066 [self
.image
] + self
.args
# type: ignore
4068 def shell_cmd(self
, cmd
: List
[str]) -> List
[str]:
4069 cmd_args
: List
[str] = [
4070 str(self
.ctx
.container_engine
.path
),
4076 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
4077 '-e', 'NODE_NAME=%s' % get_hostname(),
4079 vols
: List
[str] = []
4080 binds
: List
[str] = []
4082 if self
.host_network
:
4083 cmd_args
.append('--net=host')
4084 if self
.ctx
.no_hosts
:
4085 cmd_args
.append('--no-hosts')
4089 # let OSD etc read block devs that haven't been chowned
4093 cmd_args
.append('--init')
4094 envs
+= ['-e', 'CEPH_USE_RANDOM_NONCE=1']
4096 for env
in self
.envs
:
4097 envs
.extend(['-e', env
])
4100 [['-v', '%s:%s' % (host_dir
, container_dir
)]
4101 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
4102 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
4103 for bind
in self
.bind_mounts
], [])
4105 return cmd_args
+ self
.container_args
+ envs
+ vols
+ binds
+ [
4106 '--entrypoint', cmd
[0],
4110 def exec_cmd(self
, cmd
):
4111 # type: (List[str]) -> List[str]
4112 cname
= get_running_container_name(self
.ctx
, self
)
4114 raise Error('unable to find container "{}"'.format(self
.cname
))
4116 str(self
.ctx
.container_engine
.path
),
4118 ] + self
.container_args
+ [
4122 def rm_cmd(self
, old_cname
: bool = False, storage
: bool = False) -> List
[str]:
4124 str(self
.ctx
.container_engine
.path
),
4128 ret
.append('--storage')
4130 ret
.append(self
.old_cname
)
4132 ret
.append(self
.cname
)
4135 def stop_cmd(self
, old_cname
: bool = False) -> List
[str]:
4137 str(self
.ctx
.container_engine
.path
),
4138 'stop', self
.old_cname
if old_cname
else self
.cname
,
4142 def run(self
, timeout
=DEFAULT_TIMEOUT
, verbosity
=CallVerbosity
.VERBOSE_ON_FAILURE
):
4143 # type: (Optional[int], CallVerbosity) -> str
4144 out
, _
, _
= call_throws(self
.ctx
, self
.run_cmd(),
4145 desc
=self
.entrypoint
, timeout
=timeout
, verbosity
=verbosity
)
4149 #####################################
4151 class MgrListener(Thread
):
4152 def __init__(self
, agent
: 'CephadmAgent') -> None:
4155 super(MgrListener
, self
).__init
__(target
=self
.run
)
4157 def run(self
) -> None:
4158 listenSocket
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
4159 listenSocket
.bind(('0.0.0.0', int(self
.agent
.listener_port
)))
4160 listenSocket
.settimeout(60)
4161 listenSocket
.listen(1)
4162 ssl_ctx
= ssl
.create_default_context(ssl
.Purpose
.CLIENT_AUTH
)
4163 ssl_ctx
.verify_mode
= ssl
.CERT_REQUIRED
4164 ssl_ctx
.load_cert_chain(self
.agent
.listener_cert_path
, self
.agent
.listener_key_path
)
4165 ssl_ctx
.load_verify_locations(self
.agent
.ca_path
)
4166 secureListenSocket
= ssl_ctx
.wrap_socket(listenSocket
, server_side
=True)
4167 while not self
.stop
:
4170 conn
, _
= secureListenSocket
.accept()
4171 except socket
.timeout
:
4174 length
: int = int(conn
.recv(10).decode())
4175 except Exception as e
:
4176 err_str
= f
'Failed to extract length of payload from message: {e}'
4177 conn
.send(err_str
.encode())
4178 logger
.error(err_str
)
4180 payload
= conn
.recv(length
).decode()
4184 data
: Dict
[Any
, Any
] = json
.loads(payload
)
4185 self
.handle_json_payload(data
)
4186 except Exception as e
:
4187 err_str
= f
'Failed to extract json payload from message: {e}'
4188 conn
.send(err_str
.encode())
4189 logger
.error(err_str
)
4192 if 'config' in data
:
4194 self
.agent
.ls_gatherer
.wakeup()
4195 self
.agent
.volume_gatherer
.wakeup()
4196 logger
.debug(f
'Got mgr message {data}')
4197 except Exception as e
:
4198 logger
.error(f
'Mgr Listener encountered exception: {e}')
4200 def shutdown(self
) -> None:
4203 def handle_json_payload(self
, data
: Dict
[Any
, Any
]) -> None:
4204 self
.agent
.ack
= int(data
['counter'])
4205 if 'config' in data
:
4206 logger
.info('Received new config from mgr')
4207 config
= data
['config']
4208 for filename
in config
:
4209 if filename
in self
.agent
.required_files
:
4210 file_path
= os
.path
.join(self
.agent
.daemon_dir
, filename
)
4211 with
open(os
.open(file_path
+ '.new', os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
4212 f
.write(config
[filename
])
4213 os
.rename(file_path
+ '.new', file_path
)
4214 self
.agent
.pull_conf_settings()
4218 class CephadmAgent():
4220 daemon_type
= 'agent'
4233 def __init__(self
, ctx
: CephadmContext
, fsid
: str, daemon_id
: Union
[int, str] = ''):
4236 self
.daemon_id
= daemon_id
4237 self
.starting_port
= 14873
4239 self
.target_port
= ''
4241 self
.daemon_dir
= os
.path
.join(ctx
.data_dir
, self
.fsid
, f
'{self.daemon_type}.{self.daemon_id}')
4242 self
.config_path
= os
.path
.join(self
.daemon_dir
, 'agent.json')
4243 self
.keyring_path
= os
.path
.join(self
.daemon_dir
, 'keyring')
4244 self
.ca_path
= os
.path
.join(self
.daemon_dir
, 'root_cert.pem')
4245 self
.listener_cert_path
= os
.path
.join(self
.daemon_dir
, 'listener.crt')
4246 self
.listener_key_path
= os
.path
.join(self
.daemon_dir
, 'listener.key')
4247 self
.listener_port
= ''
4249 self
.event
= Event()
4250 self
.mgr_listener
= MgrListener(self
)
4251 self
.ls_gatherer
= AgentGatherer(self
, lambda: self
._get
_ls
(), 'Ls')
4252 self
.volume_gatherer
= AgentGatherer(self
, lambda: self
._ceph
_volume
(enhanced
=False), 'Volume')
4253 self
.device_enhanced_scan
= False
4254 self
.recent_iteration_run_times
: List
[float] = [0.0, 0.0, 0.0]
4255 self
.recent_iteration_index
: int = 0
4256 self
.cached_ls_values
: Dict
[str, Dict
[str, str]] = {}
4258 def validate(self
, config
: Dict
[str, str] = {}) -> None:
4259 # check for the required files
4260 for fname
in self
.required_files
:
4261 if fname
not in config
:
4262 raise Error('required file missing from config: %s' % fname
)
4264 def deploy_daemon_unit(self
, config
: Dict
[str, str] = {}) -> None:
4266 raise Error('Agent needs a config')
4267 assert isinstance(config
, dict)
4268 self
.validate(config
)
4270 # Create the required config files in the daemons dir, with restricted permissions
4271 for filename
in config
:
4272 if filename
in self
.required_files
:
4273 file_path
= os
.path
.join(self
.daemon_dir
, filename
)
4274 with
open(os
.open(file_path
+ '.new', os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
4275 f
.write(config
[filename
])
4276 os
.rename(file_path
+ '.new', file_path
)
4278 unit_run_path
= os
.path
.join(self
.daemon_dir
, 'unit.run')
4279 with
open(os
.open(unit_run_path
+ '.new', os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
4280 f
.write(self
.unit_run())
4281 os
.rename(unit_run_path
+ '.new', unit_run_path
)
4283 meta
: Dict
[str, Any
] = {}
4284 meta_file_path
= os
.path
.join(self
.daemon_dir
, 'unit.meta')
4285 if 'meta_json' in self
.ctx
and self
.ctx
.meta_json
:
4286 meta
= json
.loads(self
.ctx
.meta_json
) or {}
4287 with
open(os
.open(meta_file_path
+ '.new', os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
4288 f
.write(json
.dumps(meta
, indent
=4) + '\n')
4289 os
.rename(meta_file_path
+ '.new', meta_file_path
)
4291 unit_file_path
= os
.path
.join(self
.ctx
.unit_dir
, self
.unit_name())
4292 with
open(os
.open(unit_file_path
+ '.new', os
.O_CREAT | os
.O_WRONLY
, 0o600), 'w') as f
:
4293 f
.write(self
.unit_file())
4294 os
.rename(unit_file_path
+ '.new', unit_file_path
)
4296 call_throws(self
.ctx
, ['systemctl', 'daemon-reload'])
4297 call(self
.ctx
, ['systemctl', 'stop', self
.unit_name()],
4298 verbosity
=CallVerbosity
.DEBUG
)
4299 call(self
.ctx
, ['systemctl', 'reset-failed', self
.unit_name()],
4300 verbosity
=CallVerbosity
.DEBUG
)
4301 call_throws(self
.ctx
, ['systemctl', 'enable', '--now', self
.unit_name()])
4303 def unit_name(self
) -> str:
4304 return '{}.service'.format(get_unit_name(self
.fsid
, self
.daemon_type
, self
.daemon_id
))
4306 def unit_run(self
) -> str:
4307 py3
= shutil
.which('python3')
4308 binary_path
= os
.path
.realpath(sys
.argv
[0])
4309 return ('set -e\n' + f
'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n')
4311 def unit_file(self
) -> str:
4312 return """#generated by cephadm
4314 Description=cephadm agent for cluster {fsid}
4316 PartOf=ceph-{fsid}.target
4317 Before=ceph-{fsid}.target
4321 ExecStart=/bin/bash {data_dir}/unit.run
4326 WantedBy=ceph-{fsid}.target
4329 data_dir
=self
.daemon_dir
4332 def shutdown(self
) -> None:
4334 if self
.mgr_listener
.is_alive():
4335 self
.mgr_listener
.shutdown()
4337 def wakeup(self
) -> None:
4340 def pull_conf_settings(self
) -> None:
4342 with
open(self
.config_path
, 'r') as f
:
4343 config
= json
.load(f
)
4344 self
.target_ip
= config
['target_ip']
4345 self
.target_port
= config
['target_port']
4346 self
.loop_interval
= int(config
['refresh_period'])
4347 self
.starting_port
= int(config
['listener_port'])
4348 self
.host
= config
['host']
4349 use_lsm
= config
['device_enhanced_scan']
4350 except Exception as e
:
4352 raise Error(f
'Failed to get agent target ip and port from config: {e}')
4355 with
open(self
.keyring_path
, 'r') as f
:
4356 self
.keyring
= f
.read()
4357 except Exception as e
:
4359 raise Error(f
'Failed to get agent keyring: {e}')
4361 assert self
.target_ip
and self
.target_port
4363 self
.device_enhanced_scan
= False
4364 if use_lsm
.lower() == 'true':
4365 self
.device_enhanced_scan
= True
4366 self
.volume_gatherer
.update_func(lambda: self
._ceph
_volume
(enhanced
=self
.device_enhanced_scan
))
4368 def run(self
) -> None:
4369 self
.pull_conf_settings()
4372 for _
in range(1001):
4373 if not port_in_use(self
.ctx
, self
.starting_port
):
4374 self
.listener_port
= str(self
.starting_port
)
4376 self
.starting_port
+= 1
4377 if not self
.listener_port
:
4378 raise Error(f
'All 1000 ports starting at {str(self.starting_port - 1001)} taken.')
4379 except Exception as e
:
4380 raise Error(f
'Failed to pick port for agent to listen on: {e}')
4382 if not self
.mgr_listener
.is_alive():
4383 self
.mgr_listener
.start()
4385 if not self
.ls_gatherer
.is_alive():
4386 self
.ls_gatherer
.start()
4388 if not self
.volume_gatherer
.is_alive():
4389 self
.volume_gatherer
.start()
4391 ssl_ctx
= ssl
.create_default_context()
4392 ssl_ctx
.check_hostname
= True
4393 ssl_ctx
.verify_mode
= ssl
.CERT_REQUIRED
4394 ssl_ctx
.load_verify_locations(self
.ca_path
)
4396 while not self
.stop
:
4397 start_time
= time
.monotonic()
4400 # part of the networks info is returned as a set which is not JSON
4401 # serializable. The set must be converted to a list
4402 networks
= list_networks(self
.ctx
)
4404 for key
in networks
.keys():
4405 for k
, v
in networks
[key
].items():
4406 networks_list
[key
] = {k
: list(v
)}
4408 data
= json
.dumps({'host': self
.host
,
4409 'ls': (self
.ls_gatherer
.data
if self
.ack
== self
.ls_gatherer
.ack
4410 and self
.ls_gatherer
.data
is not None else []),
4411 'networks': networks_list
,
4412 'facts': HostFacts(self
.ctx
).dump(),
4413 'volume': (self
.volume_gatherer
.data
if self
.ack
== self
.volume_gatherer
.ack
4414 and self
.volume_gatherer
.data
is not None else ''),
4416 'keyring': self
.keyring
,
4417 'port': self
.listener_port
})
4418 data
= data
.encode('ascii')
4420 url
= f
'https://{self.target_ip}:{self.target_port}/data'
4422 req
= Request(url
, data
, {'Content-Type': 'application/json'})
4423 send_time
= time
.monotonic()
4424 with
urlopen(req
, context
=ssl_ctx
) as response
:
4425 response_str
= response
.read()
4426 response_json
= json
.loads(response_str
)
4427 total_request_time
= datetime
.timedelta(seconds
=(time
.monotonic() - send_time
)).total_seconds()
4428 logger
.info(f
'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.')
4429 except Exception as e
:
4430 logger
.error(f
'Failed to send metadata to mgr: {e}')
4432 end_time
= time
.monotonic()
4433 run_time
= datetime
.timedelta(seconds
=(end_time
- start_time
))
4434 self
.recent_iteration_run_times
[self
.recent_iteration_index
] = run_time
.total_seconds()
4435 self
.recent_iteration_index
= (self
.recent_iteration_index
+ 1) % 3
4436 run_time_average
= sum(self
.recent_iteration_run_times
, 0.0) / len([t
for t
in self
.recent_iteration_run_times
if t
])
4438 self
.event
.wait(max(self
.loop_interval
- int(run_time_average
), 0))
4441 def _ceph_volume(self
, enhanced
: bool = False) -> Tuple
[str, bool]:
4442 self
.ctx
.command
= 'inventory --format=json'.split()
4444 self
.ctx
.command
.append('--with-lsm')
4445 self
.ctx
.fsid
= self
.fsid
4447 stream
= io
.StringIO()
4448 with
redirect_stdout(stream
):
4449 command_ceph_volume(self
.ctx
)
4451 stdout
= stream
.getvalue()
4454 return (stdout
, False)
4456 raise Exception('ceph-volume returned empty value')
4458 def _daemon_ls_subset(self
) -> Dict
[str, Dict
[str, Any
]]:
4459 # gets a subset of ls info quickly. The results of this will tell us if our
4460 # cached info is still good or if we need to run the full ls again.
4461 # for legacy containers, we just grab the full info. For cephadmv1 containers,
4462 # we only grab enabled, state, mem_usage and container id. If container id has
4463 # not changed for any daemon, we assume our cached info is good.
4464 daemons
: Dict
[str, Dict
[str, Any
]] = {}
4465 data_dir
= self
.ctx
.data_dir
4466 seen_memusage
= {} # type: Dict[str, int]
4467 out
, err
, code
= call(
4469 [self
.ctx
.container_engine
.path
, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
4470 verbosity
=CallVerbosity
.DEBUG
4472 seen_memusage_cid_len
, seen_memusage
= _parse_mem_usage(code
, out
)
4473 # we need a mapping from container names to ids. Later we will convert daemon
4474 # names to container names to get daemons container id to see if it has changed
4475 out
, err
, code
= call(
4477 [self
.ctx
.container_engine
.path
, 'ps', '--format', '{{.ID}},{{.Names}}', '--no-trunc'],
4478 verbosity
=CallVerbosity
.DEBUG
4480 name_id_mapping
: Dict
[str, str] = self
._parse
_container
_id
_name
(code
, out
)
4481 for i
in os
.listdir(data_dir
):
4482 if i
in ['mon', 'osd', 'mds', 'mgr']:
4484 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
4487 (cluster
, daemon_id
) = j
.split('-', 1)
4488 legacy_unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
4489 (enabled
, state
, _
) = check_unit(self
.ctx
, legacy_unit_name
)
4490 daemons
[f
'{daemon_type}.{daemon_id}'] = {
4492 'name': '%s.%s' % (daemon_type
, daemon_id
),
4493 'fsid': self
.ctx
.fsid
if self
.ctx
.fsid
is not None else 'unknown',
4494 'systemd_unit': legacy_unit_name
,
4495 'enabled': 'true' if enabled
else 'false',
4499 fsid
= str(i
) # convince mypy that fsid is a str here
4500 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
4501 if '.' in j
and os
.path
.isdir(os
.path
.join(data_dir
, fsid
, j
)):
4502 (daemon_type
, daemon_id
) = j
.split('.', 1)
4503 unit_name
= get_unit_name(fsid
, daemon_type
, daemon_id
)
4504 (enabled
, state
, _
) = check_unit(self
.ctx
, unit_name
)
4506 'style': 'cephadm:v1',
4507 'systemd_unit': unit_name
,
4508 'enabled': 'true' if enabled
else 'false',
4511 c
= CephContainer
.for_daemon(self
.ctx
, self
.ctx
.fsid
, daemon_type
, daemon_id
, 'bash')
4512 container_id
: Optional
[str] = None
4513 for name
in (c
.cname
, c
.old_cname
):
4514 if name
in name_id_mapping
:
4515 container_id
= name_id_mapping
[name
]
4517 daemons
[j
]['container_id'] = container_id
4519 daemons
[j
]['memory_usage'] = seen_memusage
.get(container_id
[0:seen_memusage_cid_len
])
4522 def _parse_container_id_name(self
, code
: int, out
: str) -> Dict
[str, str]:
4523 # map container names to ids from ps output
4524 name_id_mapping
= {} # type: Dict[str, str]
4526 for line
in out
.splitlines():
4527 id, name
= line
.split(',')
4528 name_id_mapping
[name
] = id
4529 return name_id_mapping
4531 def _get_ls(self
) -> Tuple
[List
[Dict
[str, str]], bool]:
4532 if not self
.cached_ls_values
:
4533 logger
.info('No cached ls output. Running full daemon ls')
4534 ls
= list_daemons(self
.ctx
)
4536 self
.cached_ls_values
[d
['name']] = d
4539 ls_subset
= self
._daemon
_ls
_subset
()
4540 need_full_ls
= False
4541 state_change
= False
4542 if set(self
.cached_ls_values
.keys()) != set(ls_subset
.keys()):
4543 # case for a new daemon in ls or an old daemon no longer appearing.
4544 # If that happens we need a full ls
4545 logger
.info('Change detected in state of daemons. Running full daemon ls')
4546 ls
= list_daemons(self
.ctx
)
4548 self
.cached_ls_values
[d
['name']] = d
4550 for daemon
, info
in self
.cached_ls_values
.items():
4551 if info
['style'] == 'legacy':
4552 # for legacy containers, ls_subset just grabs all the info
4553 self
.cached_ls_values
[daemon
] = ls_subset
[daemon
]
4555 if info
['container_id'] != ls_subset
[daemon
]['container_id']:
4556 # case for container id having changed. We need full ls as
4557 # info we didn't grab like version and start time could have changed
4561 # want to know if a daemons state change because in those cases we want
4562 # to report back quicker
4564 self
.cached_ls_values
[daemon
]['enabled'] != ls_subset
[daemon
]['enabled']
4565 or self
.cached_ls_values
[daemon
]['state'] != ls_subset
[daemon
]['state']
4568 # if we reach here, container id matched. Update the few values we do track
4569 # from ls subset: state, enabled, memory_usage.
4570 self
.cached_ls_values
[daemon
]['enabled'] = ls_subset
[daemon
]['enabled']
4571 self
.cached_ls_values
[daemon
]['state'] = ls_subset
[daemon
]['state']
4572 if 'memory_usage' in ls_subset
[daemon
]:
4573 self
.cached_ls_values
[daemon
]['memory_usage'] = ls_subset
[daemon
]['memory_usage']
4575 logger
.info('Change detected in state of daemons. Running full daemon ls')
4576 ls
= list_daemons(self
.ctx
)
4578 self
.cached_ls_values
[d
['name']] = d
4581 ls
= [info
for daemon
, info
in self
.cached_ls_values
.items()]
4582 return (ls
, state_change
)
4585 class AgentGatherer(Thread
):
4586 def __init__(self
, agent
: 'CephadmAgent', func
: Callable
, gatherer_type
: str = 'Unnamed', initial_ack
: int = 0) -> None:
4589 self
.gatherer_type
= gatherer_type
4590 self
.ack
= initial_ack
4591 self
.event
= Event()
4592 self
.data
: Any
= None
4594 self
.recent_iteration_run_times
: List
[float] = [0.0, 0.0, 0.0]
4595 self
.recent_iteration_index
: int = 0
4596 super(AgentGatherer
, self
).__init
__(target
=self
.run
)
4598 def run(self
) -> None:
4599 while not self
.stop
:
4601 start_time
= time
.monotonic()
4603 ack
= self
.agent
.ack
4606 self
.data
, change
= self
.func()
4607 except Exception as e
:
4608 logger
.error(f
'{self.gatherer_type} Gatherer encountered exception gathering data: {e}')
4610 if ack
!= self
.ack
or change
:
4614 end_time
= time
.monotonic()
4615 run_time
= datetime
.timedelta(seconds
=(end_time
- start_time
))
4616 self
.recent_iteration_run_times
[self
.recent_iteration_index
] = run_time
.total_seconds()
4617 self
.recent_iteration_index
= (self
.recent_iteration_index
+ 1) % 3
4618 run_time_average
= sum(self
.recent_iteration_run_times
, 0.0) / len([t
for t
in self
.recent_iteration_run_times
if t
])
4620 self
.event
.wait(max(self
.agent
.loop_interval
- int(run_time_average
), 0))
4622 except Exception as e
:
4623 logger
.error(f
'{self.gatherer_type} Gatherer encountered exception: {e}')
4625 def shutdown(self
) -> None:
4628 def wakeup(self
) -> None:
4631 def update_func(self
, func
: Callable
) -> None:
4635 def command_agent(ctx
: CephadmContext
) -> None:
4636 agent
= CephadmAgent(ctx
, ctx
.fsid
, ctx
.daemon_id
)
4638 if not os
.path
.isdir(agent
.daemon_dir
):
4639 raise Error(f
'Agent daemon directory {agent.daemon_dir} does not exist. Perhaps agent was never deployed?')
4644 ##################################
4648 def command_version(ctx
):
4649 # type: (CephadmContext) -> int
4650 c
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version'])
4651 out
, err
, ret
= call(ctx
, c
.run_cmd(), desc
=c
.entrypoint
)
4656 ##################################
4660 def command_pull(ctx
):
4661 # type: (CephadmContext) -> int
4664 _pull_image(ctx
, ctx
.image
, ctx
.insecure
)
4665 except UnauthorizedRegistryError
:
4666 err_str
= 'Failed to pull container image. Check that host(s) are logged into the registry'
4667 logger
.debug(f
'Pulling image for `command_pull` failed: {err_str}')
4668 raise Error(err_str
)
4669 return command_inspect_image(ctx
)
4672 def _pull_image(ctx
, image
, insecure
=False):
4673 # type: (CephadmContext, str, bool) -> None
4674 logger
.info('Pulling container image %s...' % image
)
4677 'error creating read-write layer with ID',
4678 'net/http: TLS handshake timeout',
4679 'Digest did not match, expected',
4682 cmd
= [ctx
.container_engine
.path
, 'pull', image
]
4683 if isinstance(ctx
.container_engine
, Podman
):
4685 cmd
.append('--tls-verify=false')
4687 if os
.path
.exists('/etc/ceph/podman-auth.json'):
4688 cmd
.append('--authfile=/etc/ceph/podman-auth.json')
4689 cmd_str
= ' '.join(cmd
)
4691 for sleep_secs
in [1, 4, 25]:
4692 out
, err
, ret
= call(ctx
, cmd
, verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
4696 if 'unauthorized' in err
:
4697 raise UnauthorizedRegistryError()
4699 if not any(pattern
in err
for pattern
in ignorelist
):
4700 raise Error('Failed command: %s' % cmd_str
)
4702 logger
.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str
, sleep_secs
))
4703 time
.sleep(sleep_secs
)
4705 raise Error('Failed command: %s: maximum retries reached' % cmd_str
)
4707 ##################################
4711 def command_inspect_image(ctx
):
4712 # type: (CephadmContext) -> int
4713 out
, err
, ret
= call_throws(ctx
, [
4714 ctx
.container_engine
.path
, 'inspect',
4715 '--format', '{{.ID}},{{.RepoDigests}}',
4719 info_from
= get_image_info_from_inspect(out
.strip(), ctx
.image
)
4721 ver
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version']).run().strip()
4722 info_from
['ceph_version'] = ver
4724 print(json
.dumps(info_from
, indent
=4, sort_keys
=True))
4728 def normalize_image_digest(digest
: str) -> str:
4731 >>> normalize_image_digest('ceph/ceph', 'docker.io')
4732 'docker.io/ceph/ceph'
4735 >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')
4736 'quay.ceph.io/ceph/ceph'
4738 >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')
4741 >>> normalize_image_digest('localhost/ceph', 'docker.io')
4744 known_shortnames
= [
4749 for image
in known_shortnames
:
4750 if digest
.startswith(image
):
4751 return f
'{DEFAULT_REGISTRY}/{digest}'
4755 def get_image_info_from_inspect(out
, image
):
4756 # type: (str, str) -> Dict[str, Union[str,List[str]]]
4757 image_id
, digests
= out
.split(',', 1)
4759 raise Error('inspect {}: empty result'.format(image
))
4761 'image_id': normalize_container_id(image_id
)
4762 } # type: Dict[str, Union[str,List[str]]]
4764 r
['repo_digests'] = list(map(normalize_image_digest
, digests
[1: -1].split(' ')))
4767 ##################################
4770 def check_subnet(subnets
: str) -> Tuple
[int, List
[int], str]:
4771 """Determine whether the given string is a valid subnet
4773 :param subnets: subnet string, a single definition or comma separated list of CIDR subnets
4774 :returns: return code, IP version list of the subnets and msg describing any errors validation errors
4780 subnet_list
= subnets
.split(',')
4781 for subnet
in subnet_list
:
4782 # ensure the format of the string is as expected address/netmask
4783 subnet
= subnet
.strip()
4784 if not re
.search(r
'\/\d+$', subnet
):
4786 errors
.append(f
'{subnet} is not in CIDR format (address/netmask)')
4789 v
= ipaddress
.ip_network(subnet
).version
4791 except ValueError as e
:
4793 errors
.append(f
'{subnet} invalid: {str(e)}')
4795 return rc
, list(versions
), ', '.join(errors
)
4798 def unwrap_ipv6(address
):
4799 # type: (str) -> str
4800 if address
.startswith('[') and address
.endswith(']'):
4801 return address
[1: -1]
4805 def wrap_ipv6(address
):
4806 # type: (str) -> str
4808 # We cannot assume it's already wrapped or even an IPv6 address if
4809 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
4812 if ipaddress
.ip_address(address
).version
== 6:
4813 return f
'[{address}]'
4820 def is_ipv6(address
):
4821 # type: (str) -> bool
4822 address
= unwrap_ipv6(address
)
4824 return ipaddress
.ip_address(address
).version
== 6
4826 logger
.warning('Address: {} is not a valid IP address'.format(address
))
4830 def ip_in_subnets(ip_addr
: str, subnets
: str) -> bool:
4831 """Determine if the ip_addr belongs to any of the subnets list."""
4832 subnet_list
= [x
.strip() for x
in subnets
.split(',')]
4833 for subnet
in subnet_list
:
4834 ip_address
= unwrap_ipv6(ip_addr
) if is_ipv6(ip_addr
) else ip_addr
4835 if ipaddress
.ip_address(ip_address
) in ipaddress
.ip_network(subnet
):
4840 def parse_mon_addrv(addrv_arg
: str) -> List
[EndPoint
]:
4841 """Parse mon-addrv param into a list of mon end points."""
4842 r
= re
.compile(r
':(\d+)$')
4844 addr_arg
= addrv_arg
4845 if addr_arg
[0] != '[' or addr_arg
[-1] != ']':
4846 raise Error(f
'--mon-addrv value {addr_arg} must use square backets')
4848 for addr
in addr_arg
[1: -1].split(','):
4849 hasport
= r
.findall(addr
)
4851 raise Error(f
'--mon-addrv value {addr_arg} must include port number')
4852 port_str
= hasport
[0]
4853 addr
= re
.sub(r
'^v\d+:', '', addr
) # strip off v1: or v2: prefix
4854 base_ip
= addr
[0:-(len(port_str
)) - 1]
4855 addrv_args
.append(EndPoint(base_ip
, int(port_str
)))
4860 def parse_mon_ip(mon_ip
: str) -> List
[EndPoint
]:
4861 """Parse mon-ip param into a list of mon end points."""
4862 r
= re
.compile(r
':(\d+)$')
4864 hasport
= r
.findall(mon_ip
)
4866 port_str
= hasport
[0]
4867 base_ip
= mon_ip
[0:-(len(port_str
)) - 1]
4868 addrv_args
.append(EndPoint(base_ip
, int(port_str
)))
4870 # No port provided: use fixed ports for ceph monitor
4871 addrv_args
.append(EndPoint(mon_ip
, 3300))
4872 addrv_args
.append(EndPoint(mon_ip
, 6789))
4877 def build_addrv_params(addrv
: List
[EndPoint
]) -> str:
4878 """Convert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]"""
4880 raise Error('Detected a local mon-addrv list with more than 2 entries.')
4881 port_to_ver
: Dict
[int, str] = {6789: 'v1', 3300: 'v2'}
4882 addr_arg_list
: List
[str] = []
4884 if ep
.port
in port_to_ver
:
4885 ver
= port_to_ver
[ep
.port
]
4887 ver
= 'v2' # default mon protocol version if port is not provided
4888 logger
.warning(f
'Using msgr2 protocol for unrecognized port {ep}')
4889 addr_arg_list
.append(f
'{ver}:{ep.ip}:{ep.port}')
4891 addr_arg
= '[{0}]'.format(','.join(addr_arg_list
))
4895 def get_public_net_from_cfg(ctx
: CephadmContext
) -> Optional
[str]:
4896 """Get mon public network from configuration file."""
4897 cp
= read_config(ctx
.config
)
4898 if not cp
.has_option('global', 'public_network'):
4901 # Ensure all public CIDR networks are valid
4902 public_network
= cp
.get('global', 'public_network').strip('"').strip("'")
4903 rc
, _
, err_msg
= check_subnet(public_network
)
4905 raise Error(f
'Invalid public_network {public_network} parameter: {err_msg}')
4907 # Ensure all public CIDR networks are configured locally
4908 configured_subnets
= set([x
.strip() for x
in public_network
.split(',')])
4909 local_subnets
= set([x
[0] for x
in list_networks(ctx
).items()])
4910 valid_public_net
= False
4911 for net
in configured_subnets
:
4912 if net
in local_subnets
:
4913 valid_public_net
= True
4915 logger
.warning(f
'The public CIDR network {net} (from -c conf file) is not configured locally.')
4916 if not valid_public_net
:
4917 raise Error(f
'None of the public CIDR network(s) {configured_subnets} (from -c conf file) is configured locally.')
4919 # Ensure public_network is compatible with the provided mon-ip (or mon-addrv)
4921 if not ip_in_subnets(ctx
.mon_ip
, public_network
):
4922 raise Error(f
'The provided --mon-ip {ctx.mon_ip} does not belong to any public_network(s) {public_network}')
4924 addrv_args
= parse_mon_addrv(ctx
.mon_addrv
)
4925 for addrv
in addrv_args
:
4926 if not ip_in_subnets(addrv
.ip
, public_network
):
4927 raise Error(f
'The provided --mon-addrv {addrv.ip} ip does not belong to any public_network(s) {public_network}')
4929 logger
.debug(f
'Using mon public network from configuration file {public_network}')
4930 return public_network
4933 def infer_mon_network(ctx
: CephadmContext
, mon_eps
: List
[EndPoint
]) -> Optional
[str]:
4934 """Infer mon public network from local network."""
4935 # Make sure IP is configured locally, and then figure out the CIDR network
4937 for net
, ifaces
in list_networks(ctx
).items():
4938 # build local_ips list for the specified network
4939 local_ips
: List
[str] = []
4940 for _
, ls
in ifaces
.items():
4941 local_ips
.extend([ipaddress
.ip_address(ip
) for ip
in ls
])
4943 # check if any of mon ips belong to this net
4944 for mon_ep
in mon_eps
:
4946 if ipaddress
.ip_address(unwrap_ipv6(mon_ep
.ip
)) in local_ips
:
4947 mon_networks
.append(net
)
4948 logger
.info(f
'Mon IP `{mon_ep.ip}` is in CIDR network `{net}`')
4949 except ValueError as e
:
4950 logger
.warning(f
'Cannot infer CIDR network for mon IP `{mon_ep.ip}` : {e}')
4952 if not mon_networks
:
4953 raise Error('Cannot infer CIDR network. Pass --skip-mon-network to configure it later')
4955 logger
.debug(f
'Inferred mon public CIDR from local network configuration {mon_networks}')
4957 mon_networks
= list(set(mon_networks
)) # remove duplicates
4958 return ','.join(mon_networks
)
4961 def prepare_mon_addresses(ctx
: CephadmContext
) -> Tuple
[str, bool, Optional
[str]]:
4962 """Get mon public network configuration."""
4964 addrv_args
: List
[EndPoint
] = []
4965 mon_addrv
: str = '' # i.e: [v2:192.168.100.1:3300,v1:192.168.100.1:6789]
4968 ipv6
= is_ipv6(ctx
.mon_ip
)
4970 ctx
.mon_ip
= wrap_ipv6(ctx
.mon_ip
)
4971 addrv_args
= parse_mon_ip(ctx
.mon_ip
)
4972 mon_addrv
= build_addrv_params(addrv_args
)
4974 ipv6
= ctx
.mon_addrv
.count('[') > 1
4975 addrv_args
= parse_mon_addrv(ctx
.mon_addrv
)
4976 mon_addrv
= ctx
.mon_addrv
4978 raise Error('must specify --mon-ip or --mon-addrv')
4981 for end_point
in addrv_args
:
4982 check_ip_port(ctx
, end_point
)
4984 logger
.debug(f
'Base mon IP(s) is {addrv_args}, mon addrv is {mon_addrv}')
4986 if not ctx
.skip_mon_network
:
4987 mon_network
= get_public_net_from_cfg(ctx
) or infer_mon_network(ctx
, addrv_args
)
4989 return (mon_addrv
, ipv6
, mon_network
)
4992 def prepare_cluster_network(ctx
: CephadmContext
) -> Tuple
[str, bool]:
4993 # the cluster network may not exist on this node, so all we can do is
4994 # validate that the address given is valid ipv4 or ipv6 subnet
4995 ipv6_cluster_network
= False
4996 cp
= read_config(ctx
.config
)
4997 cluster_network
= ctx
.cluster_network
4998 if cluster_network
is None and cp
.has_option('global', 'cluster_network'):
4999 cluster_network
= cp
.get('global', 'cluster_network').strip('"').strip("'")
5002 cluser_nets
= set([x
.strip() for x
in cluster_network
.split(',')])
5003 local_subnets
= set([x
[0] for x
in list_networks(ctx
).items()])
5004 for net
in cluser_nets
:
5005 if net
not in local_subnets
:
5006 logger
.warning(f
'The cluster CIDR network {net} is not configured locally.')
5008 rc
, versions
, err_msg
= check_subnet(cluster_network
)
5010 raise Error(f
'Invalid --cluster-network parameter: {err_msg}')
5011 ipv6_cluster_network
= True if 6 in versions
else False
5013 logger
.info('Internal network (--cluster-network) has not '
5014 'been provided, OSD replication will default to '
5015 'the public_network')
5017 return cluster_network
, ipv6_cluster_network
5020 def create_initial_keys(
5021 ctx
: CephadmContext
,
5024 ) -> Tuple
[str, str, str, Any
, Any
]: # type: ignore
5028 # create some initial keys
5029 logger
.info('Creating initial keys...')
5030 mon_key
= CephContainer(
5033 entrypoint
='/usr/bin/ceph-authtool',
5034 args
=['--gen-print-key'],
5036 admin_key
= CephContainer(
5039 entrypoint
='/usr/bin/ceph-authtool',
5040 args
=['--gen-print-key'],
5042 mgr_key
= CephContainer(
5045 entrypoint
='/usr/bin/ceph-authtool',
5046 args
=['--gen-print-key'],
5049 keyring
= ('[mon.]\n'
5051 '\tcaps mon = allow *\n'
5054 '\tcaps mon = allow *\n'
5055 '\tcaps mds = allow *\n'
5056 '\tcaps mgr = allow *\n'
5057 '\tcaps osd = allow *\n'
5060 '\tcaps mon = profile mgr\n'
5061 '\tcaps mds = allow *\n'
5062 '\tcaps osd = allow *\n'
5063 % (mon_key
, admin_key
, mgr_id
, mgr_key
))
5065 admin_keyring
= write_tmp('[client.admin]\n'
5066 '\tkey = ' + admin_key
+ '\n',
5070 bootstrap_keyring
= write_tmp(keyring
, uid
, gid
)
5071 return (mon_key
, mgr_key
, admin_key
,
5072 bootstrap_keyring
, admin_keyring
)
5075 def create_initial_monmap(
5076 ctx
: CephadmContext
,
5079 mon_id
: str, mon_addr
: str
5081 logger
.info('Creating initial monmap...')
5082 monmap
= write_tmp('', 0, 0)
5083 out
= CephContainer(
5086 entrypoint
='/usr/bin/monmaptool',
5091 '--addv', mon_id
, mon_addr
,
5095 monmap
.name
: '/tmp/monmap:z',
5098 logger
.debug(f
'monmaptool for {mon_id} {mon_addr} on {out}')
5100 # pass monmap file to ceph user for use by ceph-mon --mkfs below
5101 os
.fchown(monmap
.fileno(), uid
, gid
)
5105 def prepare_create_mon(
5106 ctx
: CephadmContext
,
5108 fsid
: str, mon_id
: str,
5109 bootstrap_keyring_path
: str,
5111 ) -> Tuple
[str, str]:
5112 logger
.info('Creating mon...')
5113 create_daemon_dirs(ctx
, fsid
, 'mon', mon_id
, uid
, gid
)
5114 mon_dir
= get_data_dir(fsid
, ctx
.data_dir
, 'mon', mon_id
)
5115 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
5116 out
= CephContainer(
5119 entrypoint
='/usr/bin/ceph-mon',
5125 '--monmap', '/tmp/monmap',
5126 '--keyring', '/tmp/keyring',
5127 ] + get_daemon_args(ctx
, fsid
, 'mon', mon_id
),
5129 log_dir
: '/var/log/ceph:z',
5130 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
5131 bootstrap_keyring_path
: '/tmp/keyring:z',
5132 monmap_path
: '/tmp/monmap:z',
5135 logger
.debug(f
'create mon.{mon_id} on {out}')
5136 return (mon_dir
, log_dir
)
5140 ctx
: CephadmContext
,
5142 fsid
: str, mon_id
: str
5144 mon_c
= get_container(ctx
, fsid
, 'mon', mon_id
)
5145 ctx
.meta_json
= json
.dumps({'service_name': 'mon'})
5146 deploy_daemon(ctx
, fsid
, 'mon', mon_id
, mon_c
, uid
, gid
,
5147 config
=None, keyring
=None)
5151 ctx
: CephadmContext
,
5152 mon_id
: str, mon_dir
: str,
5153 admin_keyring_path
: str, config_path
: str
5155 logger
.info('Waiting for mon to start...')
5159 entrypoint
='/usr/bin/ceph',
5163 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
5164 admin_keyring_path
: '/etc/ceph/ceph.client.admin.keyring:z',
5165 config_path
: '/etc/ceph/ceph.conf:z',
5169 # wait for the service to become available
5170 def is_mon_available():
5172 timeout
= ctx
.timeout
if ctx
.timeout
else 60 # seconds
5173 out
, err
, ret
= call(ctx
, c
.run_cmd(),
5176 verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
5179 is_available(ctx
, 'mon', is_mon_available
)
5183 ctx
: CephadmContext
,
5185 fsid
: str, mgr_id
: str, mgr_key
: str,
5186 config
: str, clifunc
: Callable
5188 logger
.info('Creating mgr...')
5189 mgr_keyring
= '[mgr.%s]\n\tkey = %s\n' % (mgr_id
, mgr_key
)
5190 mgr_c
= get_container(ctx
, fsid
, 'mgr', mgr_id
)
5191 # Note:the default port used by the Prometheus node exporter is opened in fw
5192 ctx
.meta_json
= json
.dumps({'service_name': 'mgr'})
5193 deploy_daemon(ctx
, fsid
, 'mgr', mgr_id
, mgr_c
, uid
, gid
,
5194 config
=config
, keyring
=mgr_keyring
, ports
=[9283])
5196 # wait for the service to become available
5197 logger
.info('Waiting for mgr to start...')
5199 def is_mgr_available():
5201 timeout
= ctx
.timeout
if ctx
.timeout
else 60 # seconds
5203 out
= clifunc(['status', '-f', 'json-pretty'],
5205 verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
5207 return j
.get('mgrmap', {}).get('available', False)
5208 except Exception as e
:
5209 logger
.debug('status failed: %s' % e
)
5211 is_available(ctx
, 'mgr', is_mgr_available
)
5215 ctx
: CephadmContext
,
5216 cli
: Callable
, wait_for_mgr_restart
: Callable
5219 cli(['cephadm', 'set-user', ctx
.ssh_user
])
5222 logger
.info('Using provided ssh config...')
5224 pathify(ctx
.ssh_config
.name
): '/tmp/cephadm-ssh-config:z',
5226 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts
=mounts
)
5228 if ctx
.ssh_private_key
and ctx
.ssh_public_key
:
5229 logger
.info('Using provided ssh keys...')
5231 pathify(ctx
.ssh_private_key
.name
): '/tmp/cephadm-ssh-key:z',
5232 pathify(ctx
.ssh_public_key
.name
): '/tmp/cephadm-ssh-key.pub:z'
5234 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts
=mounts
)
5235 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts
=mounts
)
5236 ssh_pub
= cli(['cephadm', 'get-pub-key'])
5238 logger
.info('Generating ssh key...')
5239 cli(['cephadm', 'generate-key'])
5240 ssh_pub
= cli(['cephadm', 'get-pub-key'])
5241 with
open(ctx
.output_pub_ssh_key
, 'w') as f
:
5243 logger
.info('Wrote public SSH key to %s' % ctx
.output_pub_ssh_key
)
5245 authorize_ssh_key(ssh_pub
, ctx
.ssh_user
)
5247 host
= get_hostname()
5248 logger
.info('Adding host %s...' % host
)
5250 args
= ['orch', 'host', 'add', host
]
5252 args
.append(unwrap_ipv6(ctx
.mon_ip
))
5254 addrv_args
= parse_mon_addrv(ctx
.mon_addrv
)
5255 args
.append(unwrap_ipv6(addrv_args
[0].ip
))
5257 except RuntimeError as e
:
5258 raise Error('Failed to add host <%s>: %s' % (host
, e
))
5260 for t
in ['mon', 'mgr']:
5261 if not ctx
.orphan_initial_daemons
:
5262 logger
.info('Deploying %s service with default placement...' % t
)
5263 cli(['orch', 'apply', t
])
5265 logger
.info('Deploying unmanaged %s service...' % t
)
5266 cli(['orch', 'apply', t
, '--unmanaged'])
5268 if not ctx
.orphan_initial_daemons
:
5269 logger
.info('Deploying crash service with default placement...')
5270 cli(['orch', 'apply', 'crash'])
5272 if not ctx
.skip_monitoring_stack
:
5273 for t
in ['ceph-exporter', 'prometheus', 'grafana', 'node-exporter', 'alertmanager']:
5274 logger
.info('Deploying %s service with default placement...' % t
)
5275 cli(['orch', 'apply', t
])
5277 if ctx
.with_centralized_logging
:
5278 for t
in ['loki', 'promtail']:
5279 logger
.info('Deploying %s service with default placement...' % t
)
5280 cli(['orch', 'apply', t
])
5283 def enable_cephadm_mgr_module(
5284 cli
: Callable
, wait_for_mgr_restart
: Callable
5287 logger
.info('Enabling cephadm module...')
5288 cli(['mgr', 'module', 'enable', 'cephadm'])
5289 wait_for_mgr_restart()
5290 logger
.info('Setting orchestrator backend to cephadm...')
5291 cli(['orch', 'set', 'backend', 'cephadm'])
5294 def prepare_dashboard(
5295 ctx
: CephadmContext
,
5297 cli
: Callable
, wait_for_mgr_restart
: Callable
5300 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
5301 # if the user does not want to use SSL he can change this setting once the cluster is up
5302 cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx
.ssl_dashboard_port
)])
5304 # configuring dashboard parameters
5305 logger
.info('Enabling the dashboard module...')
5306 cli(['mgr', 'module', 'enable', 'dashboard'])
5307 wait_for_mgr_restart()
5309 # dashboard crt and key
5310 if ctx
.dashboard_key
and ctx
.dashboard_crt
:
5311 logger
.info('Using provided dashboard certificate...')
5313 pathify(ctx
.dashboard_crt
.name
): '/tmp/dashboard.crt:z',
5314 pathify(ctx
.dashboard_key
.name
): '/tmp/dashboard.key:z'
5316 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts
=mounts
)
5317 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts
=mounts
)
5319 logger
.info('Generating a dashboard self-signed certificate...')
5320 cli(['dashboard', 'create-self-signed-cert'])
5322 logger
.info('Creating initial admin user...')
5323 password
= ctx
.initial_dashboard_password
or generate_password()
5324 tmp_password_file
= write_tmp(password
, uid
, gid
)
5325 cmd
= ['dashboard', 'ac-user-create', ctx
.initial_dashboard_user
, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
5326 if not ctx
.dashboard_password_noupdate
:
5327 cmd
.append('--pwd-update-required')
5328 cli(cmd
, extra_mounts
={pathify(tmp_password_file
.name
): '/tmp/dashboard.pw:z'})
5329 logger
.info('Fetching dashboard port number...')
5330 out
= cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
5333 # Open dashboard port
5334 if not ('skip_firewalld' in ctx
and ctx
.skip_firewalld
):
5336 fw
.open_ports([port
])
5339 logger
.info('Ceph Dashboard is now available at:\n\n'
5340 '\t URL: https://%s:%s/\n'
5342 '\tPassword: %s\n' % (
5344 ctx
.initial_dashboard_user
,
5348 def prepare_bootstrap_config(
5349 ctx
: CephadmContext
,
5350 fsid
: str, mon_addr
: str, image
: str
5354 cp
= read_config(ctx
.config
)
5355 if not cp
.has_section('global'):
5356 cp
.add_section('global')
5357 cp
.set('global', 'fsid', fsid
)
5358 cp
.set('global', 'mon_host', mon_addr
)
5359 cp
.set('global', 'container_image', image
)
5361 if not cp
.has_section('mon'):
5362 cp
.add_section('mon')
5364 not cp
.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
5365 and not cp
.has_option('mon', 'auth allow insecure global id reclaim')
5367 cp
.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
5369 if ctx
.single_host_defaults
:
5370 logger
.info('Adjusting default settings to suit single-host cluster...')
5371 # replicate across osds, not hosts
5373 not cp
.has_option('global', 'osd_crush_chooseleaf_type')
5374 and not cp
.has_option('global', 'osd crush chooseleaf type')
5376 cp
.set('global', 'osd_crush_chooseleaf_type', '0')
5379 not cp
.has_option('global', 'osd_pool_default_size')
5380 and not cp
.has_option('global', 'osd pool default size')
5382 cp
.set('global', 'osd_pool_default_size', '2')
5383 # disable mgr standby modules (so we can colocate multiple mgrs on one host)
5384 if not cp
.has_section('mgr'):
5385 cp
.add_section('mgr')
5387 not cp
.has_option('mgr', 'mgr_standby_modules')
5388 and not cp
.has_option('mgr', 'mgr standby modules')
5390 cp
.set('mgr', 'mgr_standby_modules', 'false')
5392 cp
.set('global', 'log_to_file', 'true')
5393 cp
.set('global', 'log_to_stderr', 'false')
5394 cp
.set('global', 'log_to_journald', 'false')
5395 cp
.set('global', 'mon_cluster_log_to_file', 'true')
5396 cp
.set('global', 'mon_cluster_log_to_stderr', 'false')
5397 cp
.set('global', 'mon_cluster_log_to_journald', 'false')
5401 config
= cpf
.getvalue()
5403 if ctx
.registry_json
or ctx
.registry_url
:
5404 command_registry_login(ctx
)
5409 def finish_bootstrap_config(
5410 ctx
: CephadmContext
,
5413 mon_id
: str, mon_dir
: str,
5414 mon_network
: Optional
[str], ipv6
: bool,
5416 cluster_network
: Optional
[str], ipv6_cluster_network
: bool
5419 if not ctx
.no_minimize_config
:
5420 logger
.info('Assimilating anything we can from ceph.conf...')
5422 'config', 'assimilate-conf',
5423 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
5425 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
5427 logger
.info('Generating new minimal ceph.conf...')
5429 'config', 'generate-minimal-conf',
5430 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
5432 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
5434 # re-read our minimized config
5435 with
open(mon_dir
+ '/config', 'r') as f
:
5437 logger
.info('Restarting the monitor...')
5441 get_unit_name(fsid
, 'mon', mon_id
)
5443 elif 'image' in ctx
and ctx
.image
:
5444 # we still want to assimilate the given container image if provided
5445 cli(['config', 'set', 'global', 'container_image', f
'{ctx.image}'])
5448 logger
.info(f
'Setting mon public_network to {mon_network}')
5449 cli(['config', 'set', 'mon', 'public_network', mon_network
])
5452 logger
.info(f
'Setting cluster_network to {cluster_network}')
5453 cli(['config', 'set', 'global', 'cluster_network', cluster_network
])
5455 if ipv6
or ipv6_cluster_network
:
5456 logger
.info('Enabling IPv6 (ms_bind_ipv6) binding')
5457 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
5459 with
open(ctx
.output_config
, 'w') as f
:
5461 logger
.info('Wrote config to %s' % ctx
.output_config
)
5465 def _extract_host_info_from_applied_spec(f
: Iterable
[str]) -> List
[Dict
[str, str]]:
5466 # overall goal of this function is to go through an applied spec and find
5467 # the hostname (and addr is provided) for each host spec in the applied spec.
5468 # Generally, we should be able to just pass the spec to the mgr module where
5469 # proper yaml parsing can happen, but for host specs in particular we want to
5470 # be able to distribute ssh keys, which requires finding the hostname (and addr
5471 # if possible) for each potential host spec in the applied spec.
5473 specs
: List
[List
[str]] = []
5474 current_spec
: List
[str] = []
5476 if re
.search(r
'^---\s+', line
):
5478 specs
.append(current_spec
)
5483 current_spec
.append(line
)
5485 specs
.append(current_spec
)
5487 host_specs
: List
[List
[str]] = []
5490 if 'service_type' in line
:
5492 _
, type = line
.split(':')
5495 host_specs
.append(spec
)
5496 except ValueError as e
:
5497 spec_str
= '\n'.join(spec
)
5498 logger
.error(f
'Failed to pull service_type from spec:\n{spec_str}. Got error: {e}')
5500 spec_str
= '\n'.join(spec
)
5501 logger
.error(f
'Failed to find service_type within spec:\n{spec_str}')
5504 for s
in host_specs
:
5505 host_dict
= _extract_host_info_from_spec(s
)
5506 # if host_dict is empty here, we failed to pull the hostname
5507 # for the host from the spec. This should have already been logged
5508 # so at this point we just don't want to include it in our output
5510 host_dicts
.append(host_dict
)
5515 def _extract_host_info_from_spec(host_spec
: List
[str]) -> Dict
[str, str]:
5516 # note:for our purposes here, we only really want the hostname
5517 # and address of the host from each of these specs in order to
5518 # be able to distribute ssh keys. We will later apply the spec
5519 # through the mgr module where proper yaml parsing can be done
5520 # The returned dicts from this function should only contain
5521 # one or two entries, one (required) for hostname, one (optional) for addr
5523 # hostname: <hostname>
5526 # if we fail to find the hostname, an empty dict is returned
5528 host_dict
= {} # type: Dict[str, str]
5529 for line
in host_spec
:
5530 for field
in ['hostname', 'addr']:
5533 _
, field_value
= line
.split(':')
5534 field_value
= field_value
.strip()
5535 host_dict
[field
] = field_value
5536 except ValueError as e
:
5537 spec_str
= '\n'.join(host_spec
)
5538 logger
.error(f
'Error trying to pull {field} from host spec:\n{spec_str}. Got error: {e}')
5540 if 'hostname' not in host_dict
:
5541 spec_str
= '\n'.join(host_spec
)
5542 logger
.error(f
'Could not find hostname in host spec:\n{spec_str}')
5547 def _distribute_ssh_keys(ctx
: CephadmContext
, host_info
: Dict
[str, str], bootstrap_hostname
: str) -> int:
5548 # copy ssh key to hosts in host spec (used for apply spec)
5549 ssh_key
= CEPH_DEFAULT_PUBKEY
5550 if ctx
.ssh_public_key
:
5551 ssh_key
= ctx
.ssh_public_key
.name
5553 if bootstrap_hostname
!= host_info
['hostname']:
5554 if 'addr' in host_info
:
5555 addr
= host_info
['addr']
5557 addr
= host_info
['hostname']
5558 out
, err
, code
= call(ctx
, ['sudo', '-u', ctx
.ssh_user
, 'ssh-copy-id', '-f', '-i', ssh_key
, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx
.ssh_user
, addr
)])
5560 logger
.error('\nCopying ssh key to host %s at address %s failed!\n' % (host_info
['hostname'], addr
))
5563 logger
.info('Added ssh key to host %s at address %s' % (host_info
['hostname'], addr
))
5567 def save_cluster_config(ctx
: CephadmContext
, uid
: int, gid
: int, fsid
: str) -> None:
5568 """Save cluster configuration to the per fsid directory """
5569 def copy_file(src
: str, dst
: str) -> None:
5571 shutil
.copyfile(src
, dst
)
5573 conf_dir
= f
'{ctx.data_dir}/{fsid}/{CEPH_CONF_DIR}'
5574 makedirs(conf_dir
, uid
, gid
, DATA_DIR_MODE
)
5575 if os
.path
.exists(conf_dir
):
5576 logger
.info(f
'Saving cluster configuration to {conf_dir} directory')
5577 copy_file(ctx
.output_config
, os
.path
.join(conf_dir
, CEPH_CONF
))
5578 copy_file(ctx
.output_keyring
, os
.path
.join(conf_dir
, CEPH_KEYRING
))
5579 # ctx.output_pub_ssh_key may not exist if user has provided custom ssh keys
5580 if (os
.path
.exists(ctx
.output_pub_ssh_key
)):
5581 copy_file(ctx
.output_pub_ssh_key
, os
.path
.join(conf_dir
, CEPH_PUBKEY
))
5583 logger
.warning(f
'Cannot create cluster configuration directory {conf_dir}')
5587 def command_bootstrap(ctx
):
5588 # type: (CephadmContext) -> int
5592 if not ctx
.output_config
:
5593 ctx
.output_config
= os
.path
.join(ctx
.output_dir
, CEPH_CONF
)
5594 if not ctx
.output_keyring
:
5595 ctx
.output_keyring
= os
.path
.join(ctx
.output_dir
, CEPH_KEYRING
)
5596 if not ctx
.output_pub_ssh_key
:
5597 ctx
.output_pub_ssh_key
= os
.path
.join(ctx
.output_dir
, CEPH_PUBKEY
)
5599 if bool(ctx
.ssh_private_key
) is not bool(ctx
.ssh_public_key
):
5600 raise Error('--ssh-private-key and --ssh-public-key must be provided together or not at all.')
5603 data_dir_base
= os
.path
.join(ctx
.data_dir
, ctx
.fsid
)
5604 if os
.path
.exists(data_dir_base
):
5605 raise Error(f
"A cluster with the same fsid '{ctx.fsid}' already exists.")
5607 logger
.warning('Specifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.')
5609 # verify output files
5610 for f
in [ctx
.output_config
, ctx
.output_keyring
,
5611 ctx
.output_pub_ssh_key
]:
5612 if not ctx
.allow_overwrite
:
5613 if os
.path
.exists(f
):
5614 raise Error('%s already exists; delete or pass '
5615 '--allow-overwrite to overwrite' % f
)
5616 dirname
= os
.path
.dirname(f
)
5617 if dirname
and not os
.path
.exists(dirname
):
5618 fname
= os
.path
.basename(f
)
5619 logger
.info(f
'Creating directory {dirname} for {fname}')
5621 # use makedirs to create intermediate missing dirs
5622 os
.makedirs(dirname
, 0o755)
5623 except PermissionError
:
5624 raise Error(f
'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
5626 (user_conf
, _
) = get_config_and_keyring(ctx
)
5628 if ctx
.ssh_user
!= 'root':
5629 check_ssh_connectivity(ctx
)
5631 if not ctx
.skip_prepare_host
:
5632 command_prepare_host(ctx
)
5634 logger
.info('Skip prepare_host')
5637 fsid
= ctx
.fsid
or make_fsid()
5638 if not is_fsid(fsid
):
5639 raise Error('not an fsid: %s' % fsid
)
5640 logger
.info('Cluster fsid: %s' % fsid
)
5642 hostname
= get_hostname()
5643 if '.' in hostname
and not ctx
.allow_fqdn_hostname
:
5644 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname
, hostname
.split('.')[0]))
5645 mon_id
= ctx
.mon_id
or get_short_hostname()
5646 mgr_id
= ctx
.mgr_id
or generate_service_id()
5648 lock
= FileLock(ctx
, fsid
)
5651 (addr_arg
, ipv6
, mon_network
) = prepare_mon_addresses(ctx
)
5652 cluster_network
, ipv6_cluster_network
= prepare_cluster_network(ctx
)
5654 config
= prepare_bootstrap_config(ctx
, fsid
, addr_arg
, ctx
.image
)
5656 if not ctx
.skip_pull
:
5658 _pull_image(ctx
, ctx
.image
)
5659 except UnauthorizedRegistryError
:
5660 err_str
= 'Failed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentials'
5661 logger
.debug(f
'Pulling image for bootstrap on {hostname} failed: {err_str}')
5662 raise Error(err_str
)
5664 image_ver
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version']).run().strip()
5665 logger
.info(f
'Ceph version: {image_ver}')
5667 if not ctx
.allow_mismatched_release
:
5668 image_release
= image_ver
.split()[4]
5669 if image_release
not in \
5670 [DEFAULT_IMAGE_RELEASE
, LATEST_STABLE_RELEASE
]:
5672 f
'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};'
5673 ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
5676 logger
.info('Extracting ceph user uid/gid from container image...')
5677 (uid
, gid
) = extract_uid_gid(ctx
)
5679 # create some initial keys
5680 (mon_key
, mgr_key
, admin_key
, bootstrap_keyring
, admin_keyring
) = create_initial_keys(ctx
, uid
, gid
, mgr_id
)
5682 monmap
= create_initial_monmap(ctx
, uid
, gid
, fsid
, mon_id
, addr_arg
)
5683 (mon_dir
, log_dir
) = prepare_create_mon(ctx
, uid
, gid
, fsid
, mon_id
,
5684 bootstrap_keyring
.name
, monmap
.name
)
5686 with
open(mon_dir
+ '/config', 'w') as f
:
5687 os
.fchown(f
.fileno(), uid
, gid
)
5688 os
.fchmod(f
.fileno(), 0o600)
5691 make_var_run(ctx
, fsid
, uid
, gid
)
5692 create_mon(ctx
, uid
, gid
, fsid
, mon_id
)
5694 # config to issue various CLI commands
5695 tmp_config
= write_tmp(config
, uid
, gid
)
5697 # a CLI helper to reduce our typing
5698 def cli(cmd
, extra_mounts
={}, timeout
=DEFAULT_TIMEOUT
, verbosity
=CallVerbosity
.VERBOSE_ON_FAILURE
):
5699 # type: (List[str], Dict[str, str], Optional[int], CallVerbosity) -> str
5701 log_dir
: '/var/log/ceph:z',
5702 admin_keyring
.name
: '/etc/ceph/ceph.client.admin.keyring:z',
5703 tmp_config
.name
: '/etc/ceph/ceph.conf:z',
5705 for k
, v
in extra_mounts
.items():
5707 timeout
= timeout
or ctx
.timeout
5708 return CephContainer(
5711 entrypoint
='/usr/bin/ceph',
5713 volume_mounts
=mounts
,
5714 ).run(timeout
=timeout
, verbosity
=verbosity
)
5716 wait_for_mon(ctx
, mon_id
, mon_dir
, admin_keyring
.name
, tmp_config
.name
)
5718 finish_bootstrap_config(ctx
, fsid
, config
, mon_id
, mon_dir
,
5719 mon_network
, ipv6
, cli
,
5720 cluster_network
, ipv6_cluster_network
)
5723 with
open(ctx
.output_keyring
, 'w') as f
:
5724 os
.fchmod(f
.fileno(), 0o600)
5725 f
.write('[client.admin]\n'
5726 '\tkey = ' + admin_key
+ '\n')
5727 logger
.info('Wrote keyring to %s' % ctx
.output_keyring
)
5730 create_mgr(ctx
, uid
, gid
, fsid
, mgr_id
, mgr_key
, config
, cli
)
5733 # user given config settings were already assimilated earlier
5734 # but if the given settings contained any attributes in
5735 # the mgr (e.g. mgr/cephadm/container_image_prometheus)
5736 # they don't seem to be stored if there isn't a mgr yet.
5737 # Since re-assimilating the same conf settings should be
5738 # idempotent we can just do it again here.
5739 with tempfile
.NamedTemporaryFile(buffering
=0) as tmp
:
5740 tmp
.write(user_conf
.encode('utf-8'))
5741 cli(['config', 'assimilate-conf',
5742 '-i', '/var/lib/ceph/user.conf'],
5743 {tmp
.name
: '/var/lib/ceph/user.conf:z'})
5745 # wait for mgr to restart (after enabling a module)
5746 def wait_for_mgr_restart() -> None:
5747 # first get latest mgrmap epoch from the mon. try newer 'mgr
5748 # stat' command first, then fall back to 'mgr dump' if
5751 j
= json_loads_retry(lambda: cli(['mgr', 'stat'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
))
5753 j
= json_loads_retry(lambda: cli(['mgr', 'dump'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
))
5756 # wait for mgr to have it
5757 logger
.info('Waiting for the mgr to restart...')
5759 def mgr_has_latest_epoch():
5762 out
= cli(['tell', 'mgr', 'mgr_status'])
5764 return j
['mgrmap_epoch'] >= epoch
5765 except Exception as e
:
5766 logger
.debug('tell mgr mgr_status failed: %s' % e
)
5768 is_available(ctx
, 'mgr epoch %d' % epoch
, mgr_has_latest_epoch
)
5770 enable_cephadm_mgr_module(cli
, wait_for_mgr_restart
)
5773 if not ctx
.skip_ssh
:
5774 prepare_ssh(ctx
, cli
, wait_for_mgr_restart
)
5776 if ctx
.registry_url
and ctx
.registry_username
and ctx
.registry_password
:
5777 registry_credentials
= {'url': ctx
.registry_url
, 'username': ctx
.registry_username
, 'password': ctx
.registry_password
}
5778 cli(['config-key', 'set', 'mgr/cephadm/registry_credentials', json
.dumps(registry_credentials
)])
5780 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx
.container_init
), '--force'])
5782 if not ctx
.skip_dashboard
:
5783 prepare_dashboard(ctx
, uid
, gid
, cli
, wait_for_mgr_restart
)
5785 if ctx
.output_config
== CEPH_DEFAULT_CONF
and not ctx
.skip_admin_label
and not ctx
.no_minimize_config
:
5786 logger
.info('Enabling client.admin keyring and conf on hosts with "admin" label')
5788 cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin'])
5789 cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin'])
5791 logger
.info('Unable to set up "admin" label; assuming older version of Ceph')
5794 logger
.info('Applying %s to cluster' % ctx
.apply_spec
)
5795 # copy ssh key to hosts in spec file
5796 with
open(ctx
.apply_spec
) as f
:
5797 host_dicts
= _extract_host_info_from_applied_spec(f
)
5798 for h
in host_dicts
:
5799 _distribute_ssh_keys(ctx
, h
, hostname
)
5802 mounts
[pathify(ctx
.apply_spec
)] = '/tmp/spec.yml:ro'
5804 out
= cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts
=mounts
)
5807 ctx
.error_code
= -errno
.EINVAL
5808 logger
.info('\nApplying %s to cluster failed!\n' % ctx
.apply_spec
)
5810 save_cluster_config(ctx
, uid
, gid
, fsid
)
5812 # enable autotune for osd_memory_target
5813 logger
.info('Enabling autotune for osd_memory_target')
5814 cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true'])
5816 # Notify the Dashboard to show the 'Expand cluster' page on first log in.
5817 cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED'])
5819 logger
.info('You can access the Ceph CLI as following in case of multi-cluster or non-default config:\n\n'
5820 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
5824 ctx
.output_keyring
))
5826 logger
.info('Or, if you are only running a single cluster on this host:\n\n\tsudo %s shell \n' % (sys
.argv
[0]))
5828 logger
.info('Please consider enabling telemetry to help improve Ceph:\n\n'
5829 '\tceph telemetry on\n\n'
5830 'For more information see:\n\n'
5831 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
5832 logger
.info('Bootstrap complete.')
5833 return ctx
.error_code
5835 ##################################
5838 def command_registry_login(ctx
: CephadmContext
) -> int:
5839 if ctx
.registry_json
:
5840 logger
.info('Pulling custom registry login info from %s.' % ctx
.registry_json
)
5841 d
= get_parm(ctx
.registry_json
)
5842 if d
.get('url') and d
.get('username') and d
.get('password'):
5843 ctx
.registry_url
= d
.get('url')
5844 ctx
.registry_username
= d
.get('username')
5845 ctx
.registry_password
= d
.get('password')
5846 registry_login(ctx
, ctx
.registry_url
, ctx
.registry_username
, ctx
.registry_password
)
5848 raise Error('json provided for custom registry login did not include all necessary fields. '
5849 'Please setup json file as\n'
5851 ' "url": "REGISTRY_URL",\n'
5852 ' "username": "REGISTRY_USERNAME",\n'
5853 ' "password": "REGISTRY_PASSWORD"\n'
5855 elif ctx
.registry_url
and ctx
.registry_username
and ctx
.registry_password
:
5856 registry_login(ctx
, ctx
.registry_url
, ctx
.registry_username
, ctx
.registry_password
)
5858 raise Error('Invalid custom registry arguments received. To login to a custom registry include '
5859 '--registry-url, --registry-username and --registry-password '
5860 'options or --registry-json option')
5864 def registry_login(ctx
: CephadmContext
, url
: Optional
[str], username
: Optional
[str], password
: Optional
[str]) -> None:
5865 logger
.info('Logging into custom registry.')
5867 engine
= ctx
.container_engine
5868 cmd
= [engine
.path
, 'login',
5869 '-u', username
, '-p', password
,
5871 if isinstance(engine
, Podman
):
5872 cmd
.append('--authfile=/etc/ceph/podman-auth.json')
5873 out
, _
, _
= call_throws(ctx
, cmd
)
5874 if isinstance(engine
, Podman
):
5875 os
.chmod('/etc/ceph/podman-auth.json', 0o600)
5877 raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx
.registry_url
, ctx
.registry_username
))
5879 ##################################
5882 def extract_uid_gid_monitoring(ctx
, daemon_type
):
5883 # type: (CephadmContext, str) -> Tuple[int, int]
5885 if daemon_type
== 'prometheus':
5886 uid
, gid
= extract_uid_gid(ctx
, file_path
='/etc/prometheus')
5887 elif daemon_type
== 'node-exporter':
5888 uid
, gid
= 65534, 65534
5889 elif daemon_type
== 'grafana':
5890 uid
, gid
= extract_uid_gid(ctx
, file_path
='/var/lib/grafana')
5891 elif daemon_type
== 'loki':
5892 uid
, gid
= extract_uid_gid(ctx
, file_path
='/etc/loki')
5893 elif daemon_type
== 'promtail':
5894 uid
, gid
= extract_uid_gid(ctx
, file_path
='/etc/promtail')
5895 elif daemon_type
== 'alertmanager':
5896 uid
, gid
= extract_uid_gid(ctx
, file_path
=['/etc/alertmanager', '/etc/prometheus'])
5898 raise Error('{} not implemented yet'.format(daemon_type
))
5902 def get_deployment_container(ctx
: CephadmContext
,
5903 fsid
: str, daemon_type
: str, daemon_id
: Union
[int, str],
5904 privileged
: bool = False,
5905 ptrace
: bool = False,
5906 container_args
: Optional
[List
[str]] = None) -> 'CephContainer':
5907 # wrapper for get_container specifically for containers made during the `cephadm deploy`
5908 # command. Adds some extra things such as extra container args and custom config files
5909 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
, privileged
, ptrace
, container_args
)
5910 if 'extra_container_args' in ctx
and ctx
.extra_container_args
:
5911 c
.container_args
.extend(ctx
.extra_container_args
)
5912 if 'extra_entrypoint_args' in ctx
and ctx
.extra_entrypoint_args
:
5913 c
.args
.extend(ctx
.extra_entrypoint_args
)
5914 if 'config_json' in ctx
and ctx
.config_json
:
5915 conf_files
= get_custom_config_files(ctx
.config_json
)
5916 mandatory_keys
= ['mount_path', 'content']
5917 for conf
in conf_files
['custom_config_files']:
5918 if all(k
in conf
for k
in mandatory_keys
):
5919 mount_path
= conf
['mount_path']
5920 file_path
= os
.path
.join(
5923 'custom_config_files',
5924 f
'{daemon_type}.{daemon_id}',
5925 os
.path
.basename(mount_path
)
5927 c
.volume_mounts
[file_path
] = mount_path
5932 def command_deploy(ctx
):
5933 # type: (CephadmContext) -> None
5934 daemon_type
, daemon_id
= ctx
.name
.split('.', 1)
5936 lock
= FileLock(ctx
, ctx
.fsid
)
5939 if daemon_type
not in get_supported_daemons():
5940 raise Error('daemon type %s not recognized' % daemon_type
)
5943 unit_name
= get_unit_name(ctx
.fsid
, daemon_type
, daemon_id
)
5944 (_
, state
, _
) = check_unit(ctx
, unit_name
)
5945 if state
== 'running' or is_container_running(ctx
, CephContainer
.for_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, 'bash')):
5949 logger
.info('%s daemon %s ...' % ('Reconfig', ctx
.name
))
5951 logger
.info('%s daemon %s ...' % ('Redeploy', ctx
.name
))
5953 logger
.info('%s daemon %s ...' % ('Deploy', ctx
.name
))
5955 # Migrate sysctl conf files from /usr/lib to /etc
5956 migrate_sysctl_dir(ctx
, ctx
.fsid
)
5958 # Get and check ports explicitly required to be opened
5959 daemon_ports
= [] # type: List[int]
5961 # only check port in use if not reconfig or redeploy since service
5962 # we are redeploying/reconfiguring will already be using the port
5963 if not ctx
.reconfig
and not redeploy
:
5965 daemon_ports
= list(map(int, ctx
.tcp_ports
.split()))
5967 if daemon_type
in Ceph
.daemons
:
5968 config
, keyring
= get_config_and_keyring(ctx
)
5969 uid
, gid
= extract_uid_gid(ctx
)
5970 make_var_run(ctx
, ctx
.fsid
, uid
, gid
)
5972 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
5973 ptrace
=ctx
.allow_ptrace
)
5974 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
5975 config
=config
, keyring
=keyring
,
5976 osd_fsid
=ctx
.osd_fsid
,
5977 reconfig
=ctx
.reconfig
,
5980 elif daemon_type
in Monitoring
.components
:
5981 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
5983 # make sure provided config-json is sufficient
5984 config
= get_parm(ctx
.config_json
) # type: ignore
5985 required_files
= Monitoring
.components
[daemon_type
].get('config-json-files', list())
5986 required_args
= Monitoring
.components
[daemon_type
].get('config-json-args', list())
5988 if not config
or not all(c
in config
.get('files', {}).keys() for c
in required_files
): # type: ignore
5989 raise Error('{} deployment requires config-json which must '
5990 'contain file content for {}'.format(daemon_type
.capitalize(), ', '.join(required_files
)))
5992 if not config
or not all(c
in config
.keys() for c
in required_args
): # type: ignore
5993 raise Error('{} deployment requires config-json which must '
5994 'contain arg for {}'.format(daemon_type
.capitalize(), ', '.join(required_args
)))
5996 uid
, gid
= extract_uid_gid_monitoring(ctx
, daemon_type
)
5997 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5998 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
5999 reconfig
=ctx
.reconfig
,
6002 elif daemon_type
== NFSGanesha
.daemon_type
:
6003 if not ctx
.reconfig
and not redeploy
and not daemon_ports
:
6004 daemon_ports
= list(NFSGanesha
.port_map
.values())
6006 config
, keyring
= get_config_and_keyring(ctx
)
6007 # TODO: extract ganesha uid/gid (997, 994) ?
6008 uid
, gid
= extract_uid_gid(ctx
)
6009 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
6010 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
6011 config
=config
, keyring
=keyring
,
6012 reconfig
=ctx
.reconfig
,
6015 elif daemon_type
== CephIscsi
.daemon_type
:
6016 config
, keyring
= get_config_and_keyring(ctx
)
6017 uid
, gid
= extract_uid_gid(ctx
)
6018 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
6019 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
6020 config
=config
, keyring
=keyring
,
6021 reconfig
=ctx
.reconfig
,
6024 elif daemon_type
== HAproxy
.daemon_type
:
6025 haproxy
= HAproxy
.init(ctx
, ctx
.fsid
, daemon_id
)
6026 uid
, gid
= haproxy
.extract_uid_gid_haproxy()
6027 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
6028 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
6029 reconfig
=ctx
.reconfig
,
6032 elif daemon_type
== Keepalived
.daemon_type
:
6033 keepalived
= Keepalived
.init(ctx
, ctx
.fsid
, daemon_id
)
6034 uid
, gid
= keepalived
.extract_uid_gid_keepalived()
6035 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
6036 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
6037 reconfig
=ctx
.reconfig
,
6040 elif daemon_type
== CustomContainer
.daemon_type
:
6041 cc
= CustomContainer
.init(ctx
, ctx
.fsid
, daemon_id
)
6042 if not ctx
.reconfig
and not redeploy
:
6043 daemon_ports
.extend(cc
.ports
)
6044 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
6045 privileged
=cc
.privileged
,
6046 ptrace
=ctx
.allow_ptrace
)
6047 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
,
6048 uid
=cc
.uid
, gid
=cc
.gid
, config
=None,
6049 keyring
=None, reconfig
=ctx
.reconfig
,
6052 elif daemon_type
== CephadmAgent
.daemon_type
:
6053 # get current user gid and uid
6056 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, None,
6057 uid
, gid
, ports
=daemon_ports
)
6059 elif daemon_type
== SNMPGateway
.daemon_type
:
6060 sc
= SNMPGateway
.init(ctx
, ctx
.fsid
, daemon_id
)
6061 c
= get_deployment_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
6062 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
,
6067 raise Error('daemon type {} not implemented in command_deploy function'
6068 .format(daemon_type
))
6070 ##################################
6074 def command_run(ctx
):
6075 # type: (CephadmContext) -> int
6076 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
6077 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
6078 command
= c
.run_cmd()
6079 return call_timeout(ctx
, command
, ctx
.timeout
)
6081 ##################################
6088 def command_shell(ctx
):
6089 # type: (CephadmContext) -> int
6090 cp
= read_config(ctx
.config
)
6091 if cp
.has_option('global', 'fsid') and \
6092 cp
.get('global', 'fsid') != ctx
.fsid
:
6093 raise Error('fsid does not match ceph.conf')
6097 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
6099 daemon_type
= ctx
.name
6102 daemon_type
= 'osd' # get the most mounts
6105 if ctx
.fsid
and daemon_type
in Ceph
.daemons
:
6106 make_log_dir(ctx
, ctx
.fsid
)
6108 if daemon_id
and not ctx
.fsid
:
6109 raise Error('must pass --fsid to specify cluster')
6111 # in case a dedicated keyring for the specified fsid is found we us it.
6112 # Otherwise, use /etc/ceph files by default, if present. We do this instead of
6113 # making these defaults in the arg parser because we don't want an error
6114 # if they don't exist.
6116 keyring_file
= f
'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_KEYRING}'
6117 if os
.path
.exists(keyring_file
):
6118 ctx
.keyring
= keyring_file
6119 elif os
.path
.exists(CEPH_DEFAULT_KEYRING
):
6120 ctx
.keyring
= CEPH_DEFAULT_KEYRING
6122 container_args
: List
[str] = ['-i']
6123 mounts
= get_container_mounts(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
6124 no_config
=True if ctx
.config
else False)
6125 binds
= get_container_binds(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
6127 mounts
[pathify(ctx
.config
)] = '/etc/ceph/ceph.conf:z'
6129 mounts
[pathify(ctx
.keyring
)] = '/etc/ceph/ceph.keyring:z'
6131 for _mount
in ctx
.mount
:
6132 split_src_dst
= _mount
.split(':')
6133 mount
= pathify(split_src_dst
[0])
6134 filename
= os
.path
.basename(split_src_dst
[0])
6135 if len(split_src_dst
) > 1:
6136 dst
= split_src_dst
[1]
6137 if len(split_src_dst
) == 3:
6138 dst
= '{}:{}'.format(dst
, split_src_dst
[2])
6141 mounts
[mount
] = '/mnt/{}'.format(filename
)
6143 command
= ctx
.command
6149 '-e', 'PS1=%s' % CUSTOM_PS1
,
6152 home
= os
.path
.join(ctx
.data_dir
, ctx
.fsid
, 'home')
6153 if not os
.path
.exists(home
):
6154 logger
.debug('Creating root home at %s' % home
)
6155 makedirs(home
, 0, 0, 0o660)
6156 if os
.path
.exists('/etc/skel'):
6157 for f
in os
.listdir('/etc/skel'):
6158 if f
.startswith('.bash'):
6159 shutil
.copyfile(os
.path
.join('/etc/skel', f
),
6160 os
.path
.join(home
, f
))
6161 mounts
[home
] = '/root'
6163 for i
in ctx
.volume
:
6164 a
, b
= i
.split(':', 1)
6170 entrypoint
='doesnotmatter',
6172 container_args
=container_args
,
6173 volume_mounts
=mounts
,
6177 command
= c
.shell_cmd(command
)
6179 return call_timeout(ctx
, command
, ctx
.timeout
)
6181 ##################################
6185 def command_enter(ctx
):
6186 # type: (CephadmContext) -> int
6188 raise Error('must pass --fsid to specify cluster')
6189 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
6190 container_args
= ['-i'] # type: List[str]
6192 command
= ctx
.command
6198 '-e', 'PS1=%s' % CUSTOM_PS1
,
6203 entrypoint
='doesnotmatter',
6204 container_args
=container_args
,
6205 cname
='ceph-%s-%s.%s' % (ctx
.fsid
, daemon_type
, daemon_id
),
6207 command
= c
.exec_cmd(command
)
6208 return call_timeout(ctx
, command
, ctx
.timeout
)
6210 ##################################
6216 def command_ceph_volume(ctx
):
6217 # type: (CephadmContext) -> None
6218 cp
= read_config(ctx
.config
)
6219 if cp
.has_option('global', 'fsid') and \
6220 cp
.get('global', 'fsid') != ctx
.fsid
:
6221 raise Error('fsid does not match ceph.conf')
6224 make_log_dir(ctx
, ctx
.fsid
)
6226 lock
= FileLock(ctx
, ctx
.fsid
)
6229 (uid
, gid
) = (0, 0) # ceph-volume runs as root
6230 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'osd', None)
6235 (config
, keyring
) = get_config_and_keyring(ctx
)
6239 tmp_config
= write_tmp(config
, uid
, gid
)
6240 mounts
[tmp_config
.name
] = '/etc/ceph/ceph.conf:z'
6244 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
6245 mounts
[tmp_keyring
.name
] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
6247 c
= get_ceph_volume_container(
6251 volume_mounts
=mounts
,
6254 out
, err
, code
= call_throws(ctx
, c
.run_cmd(), verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
6258 ##################################
6262 def command_unit(ctx
):
6263 # type: (CephadmContext) -> int
6265 raise Error('must pass --fsid to specify cluster')
6267 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
6271 ['systemctl', ctx
.command
, unit_name
],
6272 verbosity
=CallVerbosity
.VERBOSE
,
6277 ##################################
6281 def command_logs(ctx
):
6282 # type: (CephadmContext) -> None
6284 raise Error('must pass --fsid to specify cluster')
6286 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
6288 cmd
= [find_program('journalctl')]
6289 cmd
.extend(['-u', unit_name
])
6291 cmd
.extend(ctx
.command
)
6293 # call this directly, without our wrapper, so that we get an unmolested
6294 # stdout with logger prefixing.
6295 logger
.debug('Running command: %s' % ' '.join(cmd
))
6296 subprocess
.call(cmd
, env
=os
.environ
.copy()) # type: ignore
6298 ##################################
6301 def list_networks(ctx
):
6302 # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]]
6304 # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
6305 # so we'll need to use a regex to parse 'ip' command output.
6307 # out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
6308 # j = json.loads(out)
6310 res
= _list_ipv4_networks(ctx
)
6311 res
.update(_list_ipv6_networks(ctx
))
6315 def _list_ipv4_networks(ctx
: CephadmContext
) -> Dict
[str, Dict
[str, Set
[str]]]:
6316 execstr
: Optional
[str] = find_executable('ip')
6318 raise FileNotFoundError("unable to find 'ip' command")
6319 out
, _
, _
= call_throws(ctx
, [execstr
, 'route', 'ls'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
6320 return _parse_ipv4_route(out
)
6323 def _parse_ipv4_route(out
: str) -> Dict
[str, Dict
[str, Set
[str]]]:
6324 r
= {} # type: Dict[str, Dict[str, Set[str]]]
6325 p
= re
.compile(r
'^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)')
6326 for line
in out
.splitlines():
6331 if '/' not in net
: # aggregate /32 mask for single host sub-networks
6337 if iface
not in r
[net
]:
6338 r
[net
][iface
] = set()
6339 r
[net
][iface
].add(ip
)
6343 def _list_ipv6_networks(ctx
: CephadmContext
) -> Dict
[str, Dict
[str, Set
[str]]]:
6344 execstr
: Optional
[str] = find_executable('ip')
6346 raise FileNotFoundError("unable to find 'ip' command")
6347 routes
, _
, _
= call_throws(ctx
, [execstr
, '-6', 'route', 'ls'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
6348 ips
, _
, _
= call_throws(ctx
, [execstr
, '-6', 'addr', 'ls'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
6349 return _parse_ipv6_route(routes
, ips
)
6352 def _parse_ipv6_route(routes
: str, ips
: str) -> Dict
[str, Dict
[str, Set
[str]]]:
6353 r
= {} # type: Dict[str, Dict[str, Set[str]]]
6354 route_p
= re
.compile(r
'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
6355 ip_p
= re
.compile(r
'^\s+inet6 (\S+)/(.*)scope (.*)$')
6356 iface_p
= re
.compile(r
'^(\d+): (\S+): (.*)$')
6357 for line
in routes
.splitlines():
6358 m
= route_p
.findall(line
)
6359 if not m
or m
[0][0].lower() == 'default':
6362 if '/' not in net
: # aggregate /128 mask for single host sub-networks
6365 if iface
== 'lo': # skip loopback devices
6369 if iface
not in r
[net
]:
6370 r
[net
][iface
] = set()
6373 for line
in ips
.splitlines():
6374 m
= ip_p
.findall(line
)
6376 m
= iface_p
.findall(line
)
6378 # drop @... suffix, if present
6379 iface
= m
[0][1].split('@')[0]
6382 # find the network it belongs to
6383 net
= [n
for n
in r
.keys()
6384 if ipaddress
.ip_address(ip
) in ipaddress
.ip_network(n
)]
6385 if net
and iface
in r
[net
[0]]:
6387 r
[net
[0]][iface
].add(ip
)
6392 def command_list_networks(ctx
):
6393 # type: (CephadmContext) -> None
6394 r
= list_networks(ctx
)
6396 def serialize_sets(obj
: Any
) -> Any
:
6397 return list(obj
) if isinstance(obj
, set) else obj
6399 print(json
.dumps(r
, indent
=4, default
=serialize_sets
))
6401 ##################################
6404 def command_ls(ctx
):
6405 # type: (CephadmContext) -> None
6406 ls
= list_daemons(ctx
, detail
=not ctx
.no_detail
,
6407 legacy_dir
=ctx
.legacy_dir
)
6408 print(json
.dumps(ls
, indent
=4))
6411 def with_units_to_int(v
: str) -> int:
6412 if v
.endswith('iB'):
6414 elif v
.endswith('B'):
6417 if v
[-1].upper() == 'K':
6420 elif v
[-1].upper() == 'M':
6423 elif v
[-1].upper() == 'G':
6424 mult
= 1024 * 1024 * 1024
6426 elif v
[-1].upper() == 'T':
6427 mult
= 1024 * 1024 * 1024 * 1024
6429 return int(float(v
) * mult
)
6432 def list_daemons(ctx
, detail
=True, legacy_dir
=None):
6433 # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
6434 host_version
: Optional
[str] = None
6436 container_path
= ctx
.container_engine
.path
6438 data_dir
= ctx
.data_dir
6439 if legacy_dir
is not None:
6440 data_dir
= os
.path
.abspath(legacy_dir
+ data_dir
)
6442 # keep track of ceph versions we see
6443 seen_versions
= {} # type: Dict[str, Optional[str]]
6445 # keep track of image digests
6446 seen_digests
= {} # type: Dict[str, List[str]]
6448 # keep track of memory and cpu usage we've seen
6449 seen_memusage
= {} # type: Dict[str, int]
6450 seen_cpuperc
= {} # type: Dict[str, str]
6451 out
, err
, code
= call(
6453 [container_path
, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
6454 verbosity
=CallVerbosity
.QUIET
6456 seen_memusage_cid_len
, seen_memusage
= _parse_mem_usage(code
, out
)
6458 out
, err
, code
= call(
6460 [container_path
, 'stats', '--format', '{{.ID}},{{.CPUPerc}}', '--no-stream'],
6461 verbosity
=CallVerbosity
.QUIET
6463 seen_cpuperc_cid_len
, seen_cpuperc
= _parse_cpu_perc(code
, out
)
6466 if os
.path
.exists(data_dir
):
6467 for i
in os
.listdir(data_dir
):
6468 if i
in ['mon', 'osd', 'mds', 'mgr']:
6470 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
6473 (cluster
, daemon_id
) = j
.split('-', 1)
6474 fsid
= get_legacy_daemon_fsid(ctx
,
6475 cluster
, daemon_type
, daemon_id
,
6476 legacy_dir
=legacy_dir
)
6477 legacy_unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
6478 val
: Dict
[str, Any
] = {
6480 'name': '%s.%s' % (daemon_type
, daemon_id
),
6481 'fsid': fsid
if fsid
is not None else 'unknown',
6482 'systemd_unit': legacy_unit_name
,
6485 (val
['enabled'], val
['state'], _
) = check_unit(ctx
, legacy_unit_name
)
6486 if not host_version
:
6488 out
, err
, code
= call(ctx
,
6490 verbosity
=CallVerbosity
.QUIET
)
6491 if not code
and out
.startswith('ceph version '):
6492 host_version
= out
.split(' ')[2]
6495 val
['host_version'] = host_version
6498 fsid
= str(i
) # convince mypy that fsid is a str here
6499 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
6500 if '.' in j
and os
.path
.isdir(os
.path
.join(data_dir
, fsid
, j
)):
6502 (daemon_type
, daemon_id
) = j
.split('.', 1)
6503 unit_name
= get_unit_name(fsid
,
6509 'style': 'cephadm:v1',
6512 'systemd_unit': unit_name
,
6516 (val
['enabled'], val
['state'], _
) = check_unit(ctx
, unit_name
)
6520 image_digests
= None
6524 out
, err
, code
= get_container_stats(ctx
, container_path
, fsid
, daemon_type
, daemon_id
)
6526 (container_id
, image_name
, image_id
, start
,
6527 version
) = out
.strip().split(',')
6528 image_id
= normalize_container_id(image_id
)
6529 daemon_type
= name
.split('.', 1)[0]
6530 start_stamp
= try_convert_datetime(start
)
6532 # collect digests for this image id
6533 image_digests
= seen_digests
.get(image_id
)
6534 if not image_digests
:
6535 out
, err
, code
= call(
6538 container_path
, 'image', 'inspect', image_id
,
6539 '--format', '{{.RepoDigests}}',
6541 verbosity
=CallVerbosity
.QUIET
)
6543 image_digests
= list(set(map(
6544 normalize_image_digest
,
6545 out
.strip()[1:-1].split(' '))))
6546 seen_digests
[image_id
] = image_digests
6548 # identify software version inside the container (if we can)
6549 if not version
or '.' not in version
:
6550 version
= seen_versions
.get(image_id
, None)
6551 if daemon_type
== NFSGanesha
.daemon_type
:
6552 version
= NFSGanesha
.get_version(ctx
, container_id
)
6553 if daemon_type
== CephIscsi
.daemon_type
:
6554 version
= CephIscsi
.get_version(ctx
, container_id
)
6556 if daemon_type
in Ceph
.daemons
:
6557 out
, err
, code
= call(ctx
,
6558 [container_path
, 'exec', container_id
,
6560 verbosity
=CallVerbosity
.QUIET
)
6562 out
.startswith('ceph version '):
6563 version
= out
.split(' ')[2]
6564 seen_versions
[image_id
] = version
6565 elif daemon_type
== 'grafana':
6566 out
, err
, code
= call(ctx
,
6567 [container_path
, 'exec', container_id
,
6568 'grafana-server', '-v'],
6569 verbosity
=CallVerbosity
.QUIET
)
6571 out
.startswith('Version '):
6572 version
= out
.split(' ')[1]
6573 seen_versions
[image_id
] = version
6574 elif daemon_type
in ['prometheus',
6579 version
= Monitoring
.get_version(ctx
, container_id
, daemon_type
)
6580 seen_versions
[image_id
] = version
6581 elif daemon_type
== 'haproxy':
6582 out
, err
, code
= call(ctx
,
6583 [container_path
, 'exec', container_id
,
6585 verbosity
=CallVerbosity
.QUIET
)
6587 out
.startswith('HA-Proxy version '):
6588 version
= out
.split(' ')[2]
6589 seen_versions
[image_id
] = version
6590 elif daemon_type
== 'keepalived':
6591 out
, err
, code
= call(ctx
,
6592 [container_path
, 'exec', container_id
,
6593 'keepalived', '--version'],
6594 verbosity
=CallVerbosity
.QUIET
)
6596 err
.startswith('Keepalived '):
6597 version
= err
.split(' ')[1]
6598 if version
[0] == 'v':
6599 version
= version
[1:]
6600 seen_versions
[image_id
] = version
6601 elif daemon_type
== CustomContainer
.daemon_type
:
6602 # Because a custom container can contain
6603 # everything, we do not know which command
6604 # to execute to get the version.
6606 elif daemon_type
== SNMPGateway
.daemon_type
:
6607 version
= SNMPGateway
.get_version(ctx
, fsid
, daemon_id
)
6608 seen_versions
[image_id
] = version
6610 logger
.warning('version for unknown daemon type %s' % daemon_type
)
6612 vfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.image') # type: ignore
6614 with
open(vfile
, 'r') as f
:
6615 image_name
= f
.read().strip() or None
6620 mfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.meta') # type: ignore
6622 with
open(mfile
, 'r') as f
:
6623 meta
= json
.loads(f
.read())
6628 val
['container_id'] = container_id
6629 val
['container_image_name'] = image_name
6630 val
['container_image_id'] = image_id
6631 val
['container_image_digests'] = image_digests
6633 val
['memory_usage'] = seen_memusage
.get(container_id
[0:seen_memusage_cid_len
])
6634 val
['cpu_percentage'] = seen_cpuperc
.get(container_id
[0:seen_cpuperc_cid_len
])
6635 val
['version'] = version
6636 val
['started'] = start_stamp
6637 val
['created'] = get_file_timestamp(
6638 os
.path
.join(data_dir
, fsid
, j
, 'unit.created')
6640 val
['deployed'] = get_file_timestamp(
6641 os
.path
.join(data_dir
, fsid
, j
, 'unit.image'))
6642 val
['configured'] = get_file_timestamp(
6643 os
.path
.join(data_dir
, fsid
, j
, 'unit.configured'))
6649 def _parse_mem_usage(code
: int, out
: str) -> Tuple
[int, Dict
[str, int]]:
6650 # keep track of memory usage we've seen
6651 seen_memusage
= {} # type: Dict[str, int]
6652 seen_memusage_cid_len
= 0
6654 for line
in out
.splitlines():
6655 (cid
, usage
) = line
.split(',')
6656 (used
, limit
) = usage
.split(' / ')
6658 seen_memusage
[cid
] = with_units_to_int(used
)
6659 if not seen_memusage_cid_len
:
6660 seen_memusage_cid_len
= len(cid
)
6662 logger
.info('unable to parse memory usage line\n>{}'.format(line
))
6664 return seen_memusage_cid_len
, seen_memusage
6667 def _parse_cpu_perc(code
: int, out
: str) -> Tuple
[int, Dict
[str, str]]:
6669 seen_cpuperc_cid_len
= 0
6671 for line
in out
.splitlines():
6672 (cid
, cpuperc
) = line
.split(',')
6674 seen_cpuperc
[cid
] = cpuperc
6675 if not seen_cpuperc_cid_len
:
6676 seen_cpuperc_cid_len
= len(cid
)
6678 logger
.info('unable to parse cpu percentage line\n>{}'.format(line
))
6680 return seen_cpuperc_cid_len
, seen_cpuperc
6683 def get_daemon_description(ctx
, fsid
, name
, detail
=False, legacy_dir
=None):
6684 # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str]
6686 for d
in list_daemons(ctx
, detail
=detail
, legacy_dir
=legacy_dir
):
6687 if d
['fsid'] != fsid
:
6689 if d
['name'] != name
:
6692 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name
))
6695 def get_container_stats(ctx
: CephadmContext
, container_path
: str, fsid
: str, daemon_type
: str, daemon_id
: str) -> Tuple
[str, str, int]:
6696 c
= CephContainer
.for_daemon(ctx
, fsid
, daemon_type
, daemon_id
, 'bash')
6697 out
, err
, code
= '', '', -1
6698 for name
in (c
.cname
, c
.old_cname
):
6700 container_path
, 'inspect',
6701 '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
6704 out
, err
, code
= call(ctx
, cmd
, verbosity
=CallVerbosity
.QUIET
)
6707 return out
, err
, code
6709 ##################################
6713 def command_adopt(ctx
):
6714 # type: (CephadmContext) -> None
6716 if not ctx
.skip_pull
:
6718 _pull_image(ctx
, ctx
.image
)
6719 except UnauthorizedRegistryError
:
6720 err_str
= 'Failed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url <url> --registry-username <username> --registry-password <password>` or supply login info via a json file with `cephadm registry-login --registry-json <file>`'
6721 logger
.debug(f
'Pulling image for `command_adopt` failed: {err_str}')
6722 raise Error(err_str
)
6724 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
6727 if ctx
.style
!= 'legacy':
6728 raise Error('adoption of style %s not implemented' % ctx
.style
)
6731 fsid
= get_legacy_daemon_fsid(ctx
,
6735 legacy_dir
=ctx
.legacy_dir
)
6737 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
6738 lock
= FileLock(ctx
, fsid
)
6741 # call correct adoption
6742 if daemon_type
in Ceph
.daemons
:
6743 command_adopt_ceph(ctx
, daemon_type
, daemon_id
, fsid
)
6744 elif daemon_type
== 'prometheus':
6745 command_adopt_prometheus(ctx
, daemon_id
, fsid
)
6746 elif daemon_type
== 'grafana':
6747 command_adopt_grafana(ctx
, daemon_id
, fsid
)
6748 elif daemon_type
== 'node-exporter':
6749 raise Error('adoption of node-exporter not implemented')
6750 elif daemon_type
== 'alertmanager':
6751 command_adopt_alertmanager(ctx
, daemon_id
, fsid
)
6753 raise Error('daemon type %s not recognized' % daemon_type
)
6756 class AdoptOsd(object):
6757 def __init__(self
, ctx
, osd_data_dir
, osd_id
):
6758 # type: (CephadmContext, str, str) -> None
6760 self
.osd_data_dir
= osd_data_dir
6761 self
.osd_id
= osd_id
6763 def check_online_osd(self
):
6764 # type: () -> Tuple[Optional[str], Optional[str]]
6766 osd_fsid
, osd_type
= None, None
6768 path
= os
.path
.join(self
.osd_data_dir
, 'fsid')
6770 with
open(path
, 'r') as f
:
6771 osd_fsid
= f
.read().strip()
6772 logger
.info('Found online OSD at %s' % path
)
6774 logger
.info('Unable to read OSD fsid from %s' % path
)
6775 if os
.path
.exists(os
.path
.join(self
.osd_data_dir
, 'type')):
6776 with
open(os
.path
.join(self
.osd_data_dir
, 'type')) as f
:
6777 osd_type
= f
.read().strip()
6779 logger
.info('"type" file missing for OSD data dir')
6781 return osd_fsid
, osd_type
6783 def check_offline_lvm_osd(self
):
6784 # type: () -> Tuple[Optional[str], Optional[str]]
6785 osd_fsid
, osd_type
= None, None
6787 c
= get_ceph_volume_container(
6789 args
=['lvm', 'list', '--format=json'],
6791 out
, err
, code
= call_throws(self
.ctx
, c
.run_cmd())
6794 js
= json
.loads(out
)
6795 if self
.osd_id
in js
:
6796 logger
.info('Found offline LVM OSD {}'.format(self
.osd_id
))
6797 osd_fsid
= js
[self
.osd_id
][0]['tags']['ceph.osd_fsid']
6798 for device
in js
[self
.osd_id
]:
6799 if device
['tags']['ceph.type'] == 'block':
6800 osd_type
= 'bluestore'
6802 if device
['tags']['ceph.type'] == 'data':
6803 osd_type
= 'filestore'
6805 except ValueError as e
:
6806 logger
.info('Invalid JSON in ceph-volume lvm list: {}'.format(e
))
6808 return osd_fsid
, osd_type
6810 def check_offline_simple_osd(self
):
6811 # type: () -> Tuple[Optional[str], Optional[str]]
6812 osd_fsid
, osd_type
= None, None
6814 osd_file
= glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self
.osd_id
))
6815 if len(osd_file
) == 1:
6816 with
open(osd_file
[0], 'r') as f
:
6818 js
= json
.loads(f
.read())
6819 logger
.info('Found offline simple OSD {}'.format(self
.osd_id
))
6820 osd_fsid
= js
['fsid']
6821 osd_type
= js
['type']
6822 if osd_type
!= 'filestore':
6823 # need this to be mounted for the adopt to work, as it
6824 # needs to move files from this directory
6825 call_throws(self
.ctx
, ['mount', js
['data']['path'], self
.osd_data_dir
])
6826 except ValueError as e
:
6827 logger
.info('Invalid JSON in {}: {}'.format(osd_file
, e
))
6829 return osd_fsid
, osd_type
6831 def change_cluster_name(self
) -> None:
6832 logger
.info('Attempting to convert osd cluster name to ceph . . .')
6833 c
= get_ceph_volume_container(
6835 args
=['lvm', 'list', '{}'.format(self
.osd_id
), '--format=json'],
6837 out
, err
, code
= call_throws(self
.ctx
, c
.run_cmd())
6839 raise Exception(f
'Failed to get list of LVs: {err}\nceph-volume failed with rc {code}')
6841 js
= json
.loads(out
)
6843 raise RuntimeError(f
'Failed to find osd.{self.osd_id}')
6844 device
: Optional
[Dict
[Any
, Any
]] = None
6845 for d
in js
[self
.osd_id
]:
6846 if d
['type'] == 'block':
6850 raise RuntimeError(f
'Failed to find block device for osd.{self.osd_id}')
6851 vg
= device
['vg_name']
6852 out
, err
, code
= call_throws(self
.ctx
, ['lvchange', '--deltag', f
'ceph.cluster_name={self.ctx.cluster}', vg
])
6854 raise RuntimeError(f
"Can't delete tag ceph.cluster_name={self.ctx.cluster} on osd.{self.osd_id}.\nlvchange failed with rc {code}")
6855 out
, err
, code
= call_throws(self
.ctx
, ['lvchange', '--addtag', 'ceph.cluster_name=ceph', vg
])
6857 raise RuntimeError(f
"Can't add tag ceph.cluster_name=ceph on osd.{self.osd_id}.\nlvchange failed with rc {code}")
6858 logger
.info('Successfully converted osd cluster name')
6859 except (Exception, RuntimeError) as e
:
6860 logger
.info(f
'Failed to convert osd cluster name: {e}')
6863 def command_adopt_ceph(ctx
, daemon_type
, daemon_id
, fsid
):
6864 # type: (CephadmContext, str, str, str) -> None
6866 (uid
, gid
) = extract_uid_gid(ctx
)
6868 data_dir_src
= ('/var/lib/ceph/%s/%s-%s' %
6869 (daemon_type
, ctx
.cluster
, daemon_id
))
6870 data_dir_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_dir_src
)
6872 if not os
.path
.exists(data_dir_src
):
6873 raise Error("{}.{} data directory '{}' does not exist. "
6874 'Incorrect ID specified, or daemon already adopted?'.format(
6875 daemon_type
, daemon_id
, data_dir_src
))
6878 if daemon_type
== 'osd':
6879 adopt_osd
= AdoptOsd(ctx
, data_dir_src
, daemon_id
)
6880 osd_fsid
, osd_type
= adopt_osd
.check_online_osd()
6882 osd_fsid
, osd_type
= adopt_osd
.check_offline_lvm_osd()
6884 osd_fsid
, osd_type
= adopt_osd
.check_offline_simple_osd()
6886 raise Error('Unable to find OSD {}'.format(daemon_id
))
6887 elif ctx
.cluster
!= 'ceph':
6888 adopt_osd
.change_cluster_name()
6889 logger
.info('objectstore_type is %s' % osd_type
)
6891 if osd_type
== 'filestore':
6892 raise Error('FileStore is not supported by cephadm')
6894 # NOTE: implicit assumption here that the units correspond to the
6895 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
6897 unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
6898 (enabled
, state
, _
) = check_unit(ctx
, unit_name
)
6899 if state
== 'running':
6900 logger
.info('Stopping old systemd unit %s...' % unit_name
)
6901 call_throws(ctx
, ['systemctl', 'stop', unit_name
])
6903 logger
.info('Disabling old systemd unit %s...' % unit_name
)
6904 call_throws(ctx
, ['systemctl', 'disable', unit_name
])
6907 logger
.info('Moving data...')
6908 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
6910 move_files(ctx
, glob(os
.path
.join(data_dir_src
, '*')),
6913 logger
.debug('Remove dir `%s`' % (data_dir_src
))
6914 if os
.path
.ismount(data_dir_src
):
6915 call_throws(ctx
, ['umount', data_dir_src
])
6916 os
.rmdir(data_dir_src
)
6918 logger
.info('Chowning content...')
6919 call_throws(ctx
, ['chown', '-c', '-R', '%d.%d' % (uid
, gid
), data_dir_dst
])
6921 if daemon_type
== 'mon':
6922 # rename *.ldb -> *.sst, in case they are coming from ubuntu
6923 store
= os
.path
.join(data_dir_dst
, 'store.db')
6925 if os
.path
.exists(store
):
6926 for oldf
in os
.listdir(store
):
6927 if oldf
.endswith('.ldb'):
6928 newf
= oldf
.replace('.ldb', '.sst')
6929 oldp
= os
.path
.join(store
, oldf
)
6930 newp
= os
.path
.join(store
, newf
)
6931 logger
.debug('Renaming %s -> %s' % (oldp
, newp
))
6932 os
.rename(oldp
, newp
)
6934 logger
.info('Renamed %d leveldb *.ldb files to *.sst',
6936 if daemon_type
== 'osd':
6937 for n
in ['block', 'block.db', 'block.wal']:
6938 p
= os
.path
.join(data_dir_dst
, n
)
6939 if os
.path
.exists(p
):
6940 logger
.info('Chowning %s...' % p
)
6941 os
.chown(p
, uid
, gid
)
6942 # disable the ceph-volume 'simple' mode files on the host
6943 simple_fn
= os
.path
.join('/etc/ceph/osd',
6944 '%s-%s.json' % (daemon_id
, osd_fsid
))
6945 if os
.path
.exists(simple_fn
):
6946 new_fn
= simple_fn
+ '.adopted-by-cephadm'
6947 logger
.info('Renaming %s -> %s', simple_fn
, new_fn
)
6948 os
.rename(simple_fn
, new_fn
)
6949 logger
.info('Disabling host unit ceph-volume@ simple unit...')
6950 call(ctx
, ['systemctl', 'disable',
6951 'ceph-volume@simple-%s-%s.service' % (daemon_id
, osd_fsid
)])
6953 # assume this is an 'lvm' c-v for now, but don't error
6955 logger
.info('Disabling host unit ceph-volume@ lvm unit...')
6956 call(ctx
, ['systemctl', 'disable',
6957 'ceph-volume@lvm-%s-%s.service' % (daemon_id
, osd_fsid
)])
6960 config_src
= '/etc/ceph/%s.conf' % (ctx
.cluster
)
6961 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
6962 config_dst
= os
.path
.join(data_dir_dst
, 'config')
6963 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
6966 logger
.info('Moving logs...')
6967 log_dir_src
= ('/var/log/ceph/%s-%s.%s.log*' %
6968 (ctx
.cluster
, daemon_type
, daemon_id
))
6969 log_dir_src
= os
.path
.abspath(ctx
.legacy_dir
+ log_dir_src
)
6970 log_dir_dst
= make_log_dir(ctx
, fsid
, uid
=uid
, gid
=gid
)
6971 move_files(ctx
, glob(log_dir_src
),
6975 logger
.info('Creating new units...')
6976 make_var_run(ctx
, fsid
, uid
, gid
)
6977 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
6978 deploy_daemon_units(ctx
, fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
6979 enable
=True, # unconditionally enable the new unit
6980 start
=(state
== 'running' or ctx
.force_start
),
6982 update_firewalld(ctx
, daemon_type
)
6985 def command_adopt_prometheus(ctx
, daemon_id
, fsid
):
6986 # type: (CephadmContext, str, str) -> None
6987 daemon_type
= 'prometheus'
6988 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
6990 _stop_and_disable(ctx
, 'prometheus')
6992 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
6996 config_src
= '/etc/prometheus/prometheus.yml'
6997 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
6998 config_dst
= os
.path
.join(data_dir_dst
, 'etc/prometheus')
6999 makedirs(config_dst
, uid
, gid
, 0o755)
7000 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
7003 data_src
= '/var/lib/prometheus/metrics/'
7004 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
7005 data_dst
= os
.path
.join(data_dir_dst
, 'data')
7006 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
7008 make_var_run(ctx
, fsid
, uid
, gid
)
7009 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
7010 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
7011 update_firewalld(ctx
, daemon_type
)
7014 def command_adopt_grafana(ctx
, daemon_id
, fsid
):
7015 # type: (CephadmContext, str, str) -> None
7017 daemon_type
= 'grafana'
7018 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
7020 _stop_and_disable(ctx
, 'grafana-server')
7022 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
7026 config_src
= '/etc/grafana/grafana.ini'
7027 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
7028 config_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
7029 makedirs(config_dst
, uid
, gid
, 0o755)
7030 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
7032 prov_src
= '/etc/grafana/provisioning/'
7033 prov_src
= os
.path
.abspath(ctx
.legacy_dir
+ prov_src
)
7034 prov_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
7035 copy_tree(ctx
, [prov_src
], prov_dst
, uid
=uid
, gid
=gid
)
7038 cert
= '/etc/grafana/grafana.crt'
7039 key
= '/etc/grafana/grafana.key'
7040 if os
.path
.exists(cert
) and os
.path
.exists(key
):
7041 cert_src
= '/etc/grafana/grafana.crt'
7042 cert_src
= os
.path
.abspath(ctx
.legacy_dir
+ cert_src
)
7043 makedirs(os
.path
.join(data_dir_dst
, 'etc/grafana/certs'), uid
, gid
, 0o755)
7044 cert_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_file')
7045 copy_files(ctx
, [cert_src
], cert_dst
, uid
=uid
, gid
=gid
)
7047 key_src
= '/etc/grafana/grafana.key'
7048 key_src
= os
.path
.abspath(ctx
.legacy_dir
+ key_src
)
7049 key_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_key')
7050 copy_files(ctx
, [key_src
], key_dst
, uid
=uid
, gid
=gid
)
7052 _adjust_grafana_ini(os
.path
.join(config_dst
, 'grafana.ini'))
7054 logger
.debug('Skipping ssl, missing cert {} or key {}'.format(cert
, key
))
7056 # data - possible custom dashboards/plugins
7057 data_src
= '/var/lib/grafana/'
7058 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
7059 data_dst
= os
.path
.join(data_dir_dst
, 'data')
7060 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
7062 make_var_run(ctx
, fsid
, uid
, gid
)
7063 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
7064 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
7065 update_firewalld(ctx
, daemon_type
)
7068 def command_adopt_alertmanager(ctx
, daemon_id
, fsid
):
7069 # type: (CephadmContext, str, str) -> None
7071 daemon_type
= 'alertmanager'
7072 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
7074 _stop_and_disable(ctx
, 'prometheus-alertmanager')
7076 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
7080 config_src
= '/etc/prometheus/alertmanager.yml'
7081 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
7082 config_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager')
7083 makedirs(config_dst
, uid
, gid
, 0o755)
7084 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
7087 data_src
= '/var/lib/prometheus/alertmanager/'
7088 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
7089 data_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager/data')
7090 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
7092 make_var_run(ctx
, fsid
, uid
, gid
)
7093 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
7094 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
7095 update_firewalld(ctx
, daemon_type
)
7098 def _adjust_grafana_ini(filename
):
7099 # type: (str) -> None
7101 # Update cert_file, cert_key pathnames in server section
7102 # ConfigParser does not preserve comments
7104 with
open(filename
, 'r') as grafana_ini
:
7105 lines
= grafana_ini
.readlines()
7106 with
open('{}.new'.format(filename
), 'w') as grafana_ini
:
7107 server_section
= False
7109 if line
.startswith('['):
7110 server_section
= False
7111 if line
.startswith('[server]'):
7112 server_section
= True
7114 line
= re
.sub(r
'^cert_file.*',
7115 'cert_file = /etc/grafana/certs/cert_file', line
)
7116 line
= re
.sub(r
'^cert_key.*',
7117 'cert_key = /etc/grafana/certs/cert_key', line
)
7118 grafana_ini
.write(line
)
7119 os
.rename('{}.new'.format(filename
), filename
)
7120 except OSError as err
:
7121 raise Error('Cannot update {}: {}'.format(filename
, err
))
7124 def _stop_and_disable(ctx
, unit_name
):
7125 # type: (CephadmContext, str) -> None
7127 (enabled
, state
, _
) = check_unit(ctx
, unit_name
)
7128 if state
== 'running':
7129 logger
.info('Stopping old systemd unit %s...' % unit_name
)
7130 call_throws(ctx
, ['systemctl', 'stop', unit_name
])
7132 logger
.info('Disabling old systemd unit %s...' % unit_name
)
7133 call_throws(ctx
, ['systemctl', 'disable', unit_name
])
7135 ##################################
7138 def command_rm_daemon(ctx
):
7139 # type: (CephadmContext) -> None
7140 lock
= FileLock(ctx
, ctx
.fsid
)
7143 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
7144 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
7146 if daemon_type
in ['mon', 'osd'] and not ctx
.force
:
7147 raise Error('must pass --force to proceed: '
7148 'this command may destroy precious data!')
7150 call(ctx
, ['systemctl', 'stop', unit_name
],
7151 verbosity
=CallVerbosity
.DEBUG
)
7152 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
7153 verbosity
=CallVerbosity
.DEBUG
)
7154 call(ctx
, ['systemctl', 'disable', unit_name
],
7155 verbosity
=CallVerbosity
.DEBUG
)
7156 data_dir
= get_data_dir(ctx
.fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
7157 if daemon_type
in ['mon', 'osd', 'prometheus'] and \
7158 not ctx
.force_delete_data
:
7159 # rename it out of the way -- do not delete
7160 backup_dir
= os
.path
.join(ctx
.data_dir
, ctx
.fsid
, 'removed')
7161 if not os
.path
.exists(backup_dir
):
7162 makedirs(backup_dir
, 0, 0, DATA_DIR_MODE
)
7163 dirname
= '%s.%s_%s' % (daemon_type
, daemon_id
,
7164 datetime
.datetime
.utcnow().strftime(DATEFMT
))
7166 os
.path
.join(backup_dir
, dirname
))
7168 call_throws(ctx
, ['rm', '-rf', data_dir
])
7170 if 'tcp_ports' in ctx
and ctx
.tcp_ports
is not None:
7171 ports
: List
[int] = [int(p
) for p
in ctx
.tcp_ports
.split()]
7174 fw
.close_ports(ports
)
7176 except RuntimeError as e
:
7177 # in case we cannot close the ports we will remove
7178 # the daemon but keep them open.
7179 logger
.warning(f
' Error when trying to close ports: {e}')
7182 ##################################
7185 def _zap(ctx
: CephadmContext
, what
: str) -> None:
7186 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'clusterless-ceph-volume', None)
7187 c
= get_ceph_volume_container(ctx
,
7188 args
=['lvm', 'zap', '--destroy', what
],
7189 volume_mounts
=mounts
,
7191 logger
.info(f
'Zapping {what}...')
7192 out
, err
, code
= call_throws(ctx
, c
.run_cmd())
7196 def _zap_osds(ctx
: CephadmContext
) -> None:
7197 # assume fsid lock already held
7200 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'clusterless-ceph-volume', None)
7201 c
= get_ceph_volume_container(ctx
,
7202 args
=['inventory', '--format', 'json'],
7203 volume_mounts
=mounts
,
7205 out
, err
, code
= call_throws(ctx
, c
.run_cmd())
7207 raise Error('failed to list osd inventory')
7209 ls
= json
.loads(out
)
7210 except ValueError as e
:
7211 raise Error(f
'Invalid JSON in ceph-volume inventory: {e}')
7214 matches
= [lv
.get('cluster_fsid') == ctx
.fsid
and i
.get('ceph_device') for lv
in i
.get('lvs', [])]
7215 if any(matches
) and all(matches
):
7216 _zap(ctx
, i
.get('path'))
7218 lv_names
= [lv
['name'] for lv
in i
.get('lvs', [])]
7219 # TODO: we need to map the lv_names back to device paths (the vg
7220 # id isn't part of the output here!)
7221 logger
.warning(f
'Not zapping LVs (not implemented): {lv_names}')
7224 def command_zap_osds(ctx
: CephadmContext
) -> None:
7226 raise Error('must pass --force to proceed: '
7227 'this command may destroy precious data!')
7229 lock
= FileLock(ctx
, ctx
.fsid
)
7234 ##################################
7237 def get_ceph_cluster_count(ctx
: CephadmContext
) -> int:
7238 return len([c
for c
in os
.listdir(ctx
.data_dir
) if is_fsid(c
)])
7241 def command_rm_cluster(ctx
):
7242 # type: (CephadmContext) -> None
7244 raise Error('must pass --force to proceed: '
7245 'this command may destroy precious data!')
7247 lock
= FileLock(ctx
, ctx
.fsid
)
7250 def disable_systemd_service(unit_name
: str) -> None:
7251 call(ctx
, ['systemctl', 'stop', unit_name
],
7252 verbosity
=CallVerbosity
.DEBUG
)
7253 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
7254 verbosity
=CallVerbosity
.DEBUG
)
7255 call(ctx
, ['systemctl', 'disable', unit_name
],
7256 verbosity
=CallVerbosity
.DEBUG
)
7258 # stop + disable individual daemon units
7259 for d
in list_daemons(ctx
, detail
=False):
7260 if d
['fsid'] != ctx
.fsid
:
7262 if d
['style'] != 'cephadm:v1':
7264 disable_systemd_service(get_unit_name(ctx
.fsid
, d
['name']))
7267 for unit_name
in ['ceph-%s.target' % ctx
.fsid
]:
7268 disable_systemd_service(unit_name
)
7270 slice_name
= 'system-ceph\\x2d{}.slice'.format(ctx
.fsid
.replace('-', '\\x2d'))
7271 call(ctx
, ['systemctl', 'stop', slice_name
],
7272 verbosity
=CallVerbosity
.DEBUG
)
7279 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
7280 + '/ceph-%s@.service' % ctx
.fsid
])
7281 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
7282 + '/ceph-%s.target' % ctx
.fsid
])
7283 call_throws(ctx
, ['rm', '-rf',
7284 ctx
.unit_dir
+ '/ceph-%s.target.wants' % ctx
.fsid
])
7286 call_throws(ctx
, ['rm', '-rf', ctx
.data_dir
+ '/' + ctx
.fsid
])
7288 if not ctx
.keep_logs
:
7290 call_throws(ctx
, ['rm', '-rf', ctx
.log_dir
+ '/' + ctx
.fsid
])
7291 call_throws(ctx
, ['rm', '-rf', ctx
.log_dir
7292 + '/*.wants/ceph-%s@*' % ctx
.fsid
])
7294 # rm logrotate config
7295 call_throws(ctx
, ['rm', '-f', ctx
.logrotate_dir
+ '/ceph-%s' % ctx
.fsid
])
7297 # if last cluster on host remove shared files
7298 if get_ceph_cluster_count(ctx
) == 0:
7299 disable_systemd_service('ceph.target')
7301 # rm shared ceph target files
7302 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
+ '/multi-user.target.wants/ceph.target'])
7303 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
+ '/ceph.target'])
7305 # rm cephadm logrotate config
7306 call_throws(ctx
, ['rm', '-f', ctx
.logrotate_dir
+ '/cephadm'])
7308 if not ctx
.keep_logs
:
7309 # remove all cephadm logs
7310 for fname
in glob(f
'{ctx.log_dir}/cephadm.log*'):
7313 # rm sysctl settings
7314 sysctl_dirs
: List
[Path
] = [Path(ctx
.sysctl_dir
), Path('/usr/lib/sysctl.d')]
7316 for sysctl_dir
in sysctl_dirs
:
7317 for p
in sysctl_dir
.glob(f
'90-ceph-{ctx.fsid}-*.conf'):
7320 # cleanup remaining ceph directories
7321 ceph_dirs
= [f
'/run/ceph/{ctx.fsid}', f
'/tmp/var/lib/ceph/{ctx.fsid}', f
'/var/run/ceph/{ctx.fsid}']
7322 for dd
in ceph_dirs
:
7323 shutil
.rmtree(dd
, ignore_errors
=True)
7325 # clean up config, keyring, and pub key files
7326 files
= [CEPH_DEFAULT_CONF
, CEPH_DEFAULT_PUBKEY
, CEPH_DEFAULT_KEYRING
]
7327 if os
.path
.exists(files
[0]):
7329 with
open(files
[0]) as f
:
7330 if ctx
.fsid
in f
.read():
7333 # rm configuration files on /etc/ceph
7334 for n
in range(0, len(files
)):
7335 if os
.path
.exists(files
[n
]):
7338 ##################################
7341 def check_time_sync(ctx
, enabler
=None):
7342 # type: (CephadmContext, Optional[Packager]) -> bool
7344 'chrony.service', # 18.04 (at least)
7345 'chronyd.service', # el / opensuse
7346 'systemd-timesyncd.service',
7347 'ntpd.service', # el7 (at least)
7348 'ntp.service', # 18.04 (at least)
7349 'ntpsec.service', # 20.04 (at least) / buster
7350 'openntpd.service', # ubuntu / debian
7352 if not check_units(ctx
, units
, enabler
):
7353 logger
.warning('No time sync service is running; checked for %s' % units
)
7358 def command_check_host(ctx
: CephadmContext
) -> None:
7360 commands
= ['systemctl', 'lvcreate']
7363 engine
= check_container_engine(ctx
)
7364 logger
.info(f
'{engine} is present')
7366 errors
.append(str(e
))
7368 for command
in commands
:
7370 find_program(command
)
7371 logger
.info('%s is present' % command
)
7373 errors
.append('%s binary does not appear to be installed' % command
)
7375 # check for configured+running chronyd or ntp
7376 if not check_time_sync(ctx
):
7377 errors
.append('No time synchronization is active')
7379 if 'expect_hostname' in ctx
and ctx
.expect_hostname
:
7380 if get_hostname().lower() != ctx
.expect_hostname
.lower():
7381 errors
.append('hostname "%s" does not match expected hostname "%s"' % (
7382 get_hostname(), ctx
.expect_hostname
))
7384 logger
.info('Hostname "%s" matches what is expected.',
7385 ctx
.expect_hostname
)
7388 raise Error('\nERROR: '.join(errors
))
7390 logger
.info('Host looks OK')
7392 ##################################
7395 def get_ssh_vars(ssh_user
: str) -> Tuple
[int, int, str]:
7397 s_pwd
= pwd
.getpwnam(ssh_user
)
7399 raise Error('Cannot find uid/gid for ssh-user: %s' % (ssh_user
))
7401 ssh_uid
= s_pwd
.pw_uid
7402 ssh_gid
= s_pwd
.pw_gid
7403 ssh_dir
= os
.path
.join(s_pwd
.pw_dir
, '.ssh')
7404 return ssh_uid
, ssh_gid
, ssh_dir
7407 def authorize_ssh_key(ssh_pub_key
: str, ssh_user
: str) -> bool:
7408 """Authorize the public key for the provided ssh user"""
7410 def key_in_file(path
: str, key
: str) -> bool:
7411 if not os
.path
.exists(path
):
7413 with
open(path
) as f
:
7414 lines
= f
.readlines()
7416 if line
.strip() == key
.strip():
7420 logger
.info(f
'Adding key to {ssh_user}@localhost authorized_keys...')
7421 if ssh_pub_key
is None or ssh_pub_key
.isspace():
7422 raise Error('Trying to authorize an empty ssh key')
7424 ssh_pub_key
= ssh_pub_key
.strip()
7425 ssh_uid
, ssh_gid
, ssh_dir
= get_ssh_vars(ssh_user
)
7426 if not os
.path
.exists(ssh_dir
):
7427 makedirs(ssh_dir
, ssh_uid
, ssh_gid
, 0o700)
7429 auth_keys_file
= '%s/authorized_keys' % ssh_dir
7430 if key_in_file(auth_keys_file
, ssh_pub_key
):
7431 logger
.info(f
'key already in {ssh_user}@localhost authorized_keys...')
7435 if os
.path
.exists(auth_keys_file
):
7436 with
open(auth_keys_file
, 'r') as f
:
7437 f
.seek(0, os
.SEEK_END
)
7439 f
.seek(f
.tell() - 1, os
.SEEK_SET
) # go to last char
7440 if f
.read() != '\n':
7443 with
open(auth_keys_file
, 'a') as f
:
7444 os
.fchown(f
.fileno(), ssh_uid
, ssh_gid
) # just in case we created it
7445 os
.fchmod(f
.fileno(), 0o600) # just in case we created it
7448 f
.write(ssh_pub_key
+ '\n')
7453 def revoke_ssh_key(key
: str, ssh_user
: str) -> None:
7454 """Revoke the public key authorization for the ssh user"""
7455 ssh_uid
, ssh_gid
, ssh_dir
= get_ssh_vars(ssh_user
)
7456 auth_keys_file
= '%s/authorized_keys' % ssh_dir
7458 if os
.path
.exists(auth_keys_file
):
7459 with
open(auth_keys_file
, 'r') as f
:
7460 lines
= f
.readlines()
7461 _
, filename
= tempfile
.mkstemp()
7462 with
open(filename
, 'w') as f
:
7463 os
.fchown(f
.fileno(), ssh_uid
, ssh_gid
)
7464 os
.fchmod(f
.fileno(), 0o600) # secure access to the keys file
7466 if line
.strip() == key
.strip():
7472 shutil
.move(filename
, auth_keys_file
)
7474 logger
.warning('Cannot find the ssh key to be deleted')
7477 def check_ssh_connectivity(ctx
: CephadmContext
) -> None:
7479 def cmd_is_available(cmd
: str) -> bool:
7480 if shutil
.which(cmd
) is None:
7481 logger
.warning(f
'Command not found: {cmd}')
7485 if not cmd_is_available('ssh') or not cmd_is_available('ssh-keygen'):
7486 logger
.warning('Cannot check ssh connectivity. Skipping...')
7489 logger
.info('Verifying ssh connectivity ...')
7490 if ctx
.ssh_private_key
and ctx
.ssh_public_key
:
7491 # let's use the keys provided by the user
7492 ssh_priv_key_path
= pathify(ctx
.ssh_private_key
.name
)
7493 ssh_pub_key_path
= pathify(ctx
.ssh_public_key
.name
)
7495 # no custom keys, let's generate some random keys just for this check
7496 ssh_priv_key_path
= f
'/tmp/ssh_key_{uuid.uuid1()}'
7497 ssh_pub_key_path
= f
'{ssh_priv_key_path}.pub'
7498 ssh_key_gen_cmd
= ['ssh-keygen', '-q', '-t', 'rsa', '-N', '', '-C', '', '-f', ssh_priv_key_path
]
7499 _
, _
, code
= call(ctx
, ssh_key_gen_cmd
)
7501 logger
.warning('Cannot generate keys to check ssh connectivity.')
7504 with
open(ssh_pub_key_path
, 'r') as f
:
7505 key
= f
.read().strip()
7506 new_key
= authorize_ssh_key(key
, ctx
.ssh_user
)
7507 ssh_cfg_file_arg
= ['-F', pathify(ctx
.ssh_config
.name
)] if ctx
.ssh_config
else []
7508 _
, _
, code
= call(ctx
, ['ssh', '-o StrictHostKeyChecking=no',
7509 *ssh_cfg_file_arg
, '-i', ssh_priv_key_path
,
7510 '-o PasswordAuthentication=no',
7511 f
'{ctx.ssh_user}@{get_hostname()}',
7514 # we only remove the key if it's a new one. In case the user has provided
7515 # some already existing key then we don't alter authorized_keys file
7517 revoke_ssh_key(key
, ctx
.ssh_user
)
7519 pub_key_msg
= '- The public key file configured by --ssh-public-key is valid\n' if ctx
.ssh_public_key
else ''
7520 prv_key_msg
= '- The private key file configured by --ssh-private-key is valid\n' if ctx
.ssh_private_key
else ''
7521 ssh_cfg_msg
= '- The ssh configuration file configured by --ssh-config is valid\n' if ctx
.ssh_config
else ''
7523 ** Please verify your user's ssh configuration and make sure:
7524 - User {ctx.ssh_user} must have passwordless sudo access
7525 {pub_key_msg}{prv_key_msg}{ssh_cfg_msg}
7528 raise Error(err_msg
)
7531 def command_prepare_host(ctx
: CephadmContext
) -> None:
7532 logger
.info('Verifying podman|docker is present...')
7535 check_container_engine(ctx
)
7537 logger
.warning(str(e
))
7539 pkg
= create_packager(ctx
)
7540 pkg
.install_podman()
7542 logger
.info('Verifying lvm2 is present...')
7543 if not find_executable('lvcreate'):
7545 pkg
= create_packager(ctx
)
7546 pkg
.install(['lvm2'])
7548 logger
.info('Verifying time synchronization is in place...')
7549 if not check_time_sync(ctx
):
7551 pkg
= create_packager(ctx
)
7552 pkg
.install(['chrony'])
7553 # check again, and this time try to enable
7555 check_time_sync(ctx
, enabler
=pkg
)
7557 if 'expect_hostname' in ctx
and ctx
.expect_hostname
and ctx
.expect_hostname
!= get_hostname():
7558 logger
.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx
.expect_hostname
))
7559 call_throws(ctx
, ['hostname', ctx
.expect_hostname
])
7560 with
open('/etc/hostname', 'w') as f
:
7561 f
.write(ctx
.expect_hostname
+ '\n')
7563 logger
.info('Repeating the final host check...')
7564 command_check_host(ctx
)
7566 ##################################
7569 class CustomValidation(argparse
.Action
):
7571 def _check_name(self
, values
: str) -> None:
7573 (daemon_type
, daemon_id
) = values
.split('.', 1)
7575 raise argparse
.ArgumentError(self
,
7576 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com')
7578 daemons
= get_supported_daemons()
7579 if daemon_type
not in daemons
:
7580 raise argparse
.ArgumentError(self
,
7581 'name must declare the type of daemon e.g. '
7582 '{}'.format(', '.join(daemons
)))
7584 def __call__(self
, parser
: argparse
.ArgumentParser
, namespace
: argparse
.Namespace
, values
: Union
[str, Sequence
[Any
], None],
7585 option_string
: Optional
[str] = None) -> None:
7586 assert isinstance(values
, str)
7587 if self
.dest
== 'name':
7588 self
._check
_name
(values
)
7589 setattr(namespace
, self
.dest
, values
)
7591 ##################################
7595 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
7597 distro_version
= None
7598 distro_codename
= None
7599 with
open('/etc/os-release', 'r') as f
:
7600 for line
in f
.readlines():
7602 if '=' not in line
or line
.startswith('#'):
7604 (var
, val
) = line
.split('=', 1)
7605 if val
[0] == '"' and val
[-1] == '"':
7608 distro
= val
.lower()
7609 elif var
== 'VERSION_ID':
7610 distro_version
= val
.lower()
7611 elif var
== 'VERSION_CODENAME':
7612 distro_codename
= val
.lower()
7613 return distro
, distro_version
, distro_codename
7616 class Packager(object):
7617 def __init__(self
, ctx
: CephadmContext
,
7618 stable
: Optional
[str] = None, version
: Optional
[str] = None,
7619 branch
: Optional
[str] = None, commit
: Optional
[str] = None):
7621 (stable
and not version
and not branch
and not commit
) or \
7622 (not stable
and version
and not branch
and not commit
) or \
7623 (not stable
and not version
and branch
) or \
7624 (not stable
and not version
and not branch
and not commit
)
7626 self
.stable
= stable
7627 self
.version
= version
7628 self
.branch
= branch
7629 self
.commit
= commit
7631 def validate(self
) -> None:
7632 """Validate parameters before writing any state to disk."""
7635 def add_repo(self
) -> None:
7636 raise NotImplementedError
7638 def rm_repo(self
) -> None:
7639 raise NotImplementedError
7641 def install(self
, ls
: List
[str]) -> None:
7642 raise NotImplementedError
7644 def install_podman(self
) -> None:
7645 raise NotImplementedError
7647 def query_shaman(self
, distro
: str, distro_version
: Any
, branch
: Optional
[str], commit
: Optional
[str]) -> str:
7649 logger
.info('Fetching repo metadata from shaman and chacra...')
7650 shaman_url
= 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
7652 distro_version
=distro_version
,
7654 sha1
=commit
or 'latest',
7658 shaman_response
= urlopen(shaman_url
)
7659 except HTTPError
as err
:
7660 logger
.error('repository not found in shaman (might not be available yet)')
7661 raise Error('%s, failed to fetch %s' % (err
, shaman_url
))
7664 chacra_url
= shaman_response
.geturl()
7665 chacra_response
= urlopen(chacra_url
)
7666 except HTTPError
as err
:
7667 logger
.error('repository not found in chacra (might not be available yet)')
7668 raise Error('%s, failed to fetch %s' % (err
, chacra_url
))
7669 return chacra_response
.read().decode('utf-8')
7671 def repo_gpgkey(self
) -> Tuple
[str, str]:
7672 if self
.ctx
.gpg_url
:
7673 return self
.ctx
.gpg_url
, 'manual'
7674 if self
.stable
or self
.version
:
7675 return 'https://download.ceph.com/keys/release.gpg', 'release'
7677 return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild'
7679 def enable_service(self
, service
: str) -> None:
7681 Start and enable the service (typically using systemd).
7683 call_throws(self
.ctx
, ['systemctl', 'enable', '--now', service
])
7686 class Apt(Packager
):
7692 def __init__(self
, ctx
: CephadmContext
,
7693 stable
: Optional
[str], version
: Optional
[str], branch
: Optional
[str], commit
: Optional
[str],
7694 distro
: Optional
[str], distro_version
: Optional
[str], distro_codename
: Optional
[str]) -> None:
7695 super(Apt
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
7696 branch
=branch
, commit
=commit
)
7699 self
.distro
= self
.DISTRO_NAMES
[distro
]
7700 self
.distro_codename
= distro_codename
7701 self
.distro_version
= distro_version
7703 def repo_path(self
) -> str:
7704 return '/etc/apt/sources.list.d/ceph.list'
7706 def add_repo(self
) -> None:
7708 url
, name
= self
.repo_gpgkey()
7709 logger
.info('Installing repo GPG key from %s...' % url
)
7711 response
= urlopen(url
)
7712 except HTTPError
as err
:
7713 logger
.error('failed to fetch GPG repo key from %s: %s' % (
7715 raise Error('failed to fetch GPG key')
7716 key
= response
.read()
7717 with
open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
, 'wb') as f
:
7721 content
= 'deb %s/debian-%s/ %s main\n' % (
7722 self
.ctx
.repo_url
, self
.version
, self
.distro_codename
)
7724 content
= 'deb %s/debian-%s/ %s main\n' % (
7725 self
.ctx
.repo_url
, self
.stable
, self
.distro_codename
)
7727 content
= self
.query_shaman(self
.distro
, self
.distro_codename
, self
.branch
,
7730 logger
.info('Installing repo file at %s...' % self
.repo_path())
7731 with
open(self
.repo_path(), 'w') as f
:
7736 def rm_repo(self
) -> None:
7737 for name
in ['autobuild', 'release', 'manual']:
7738 p
= '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
7739 if os
.path
.exists(p
):
7740 logger
.info('Removing repo GPG key %s...' % p
)
7742 if os
.path
.exists(self
.repo_path()):
7743 logger
.info('Removing repo at %s...' % self
.repo_path())
7744 os
.unlink(self
.repo_path())
7746 if self
.distro
== 'ubuntu':
7747 self
.rm_kubic_repo()
7749 def install(self
, ls
: List
[str]) -> None:
7750 logger
.info('Installing packages %s...' % ls
)
7751 call_throws(self
.ctx
, ['apt-get', 'install', '-y'] + ls
)
7753 def update(self
) -> None:
7754 logger
.info('Updating package list...')
7755 call_throws(self
.ctx
, ['apt-get', 'update'])
7757 def install_podman(self
) -> None:
7758 if self
.distro
== 'ubuntu':
7759 logger
.info('Setting up repo for podman...')
7760 self
.add_kubic_repo()
7763 logger
.info('Attempting podman install...')
7765 self
.install(['podman'])
7767 logger
.info('Podman did not work. Falling back to docker...')
7768 self
.install(['docker.io'])
7770 def kubic_repo_url(self
) -> str:
7771 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
7772 'libcontainers:/stable/xUbuntu_%s/' % self
.distro_version
7774 def kubic_repo_path(self
) -> str:
7775 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
7777 def kubric_repo_gpgkey_url(self
) -> str:
7778 return '%s/Release.key' % self
.kubic_repo_url()
7780 def kubric_repo_gpgkey_path(self
) -> str:
7781 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
7783 def add_kubic_repo(self
) -> None:
7784 url
= self
.kubric_repo_gpgkey_url()
7785 logger
.info('Installing repo GPG key from %s...' % url
)
7787 response
= urlopen(url
)
7788 except HTTPError
as err
:
7789 logger
.error('failed to fetch GPG repo key from %s: %s' % (
7791 raise Error('failed to fetch GPG key')
7792 key
= response
.read().decode('utf-8')
7793 tmp_key
= write_tmp(key
, 0, 0)
7794 keyring
= self
.kubric_repo_gpgkey_path()
7795 call_throws(self
.ctx
, ['apt-key', '--keyring', keyring
, 'add', tmp_key
.name
])
7797 logger
.info('Installing repo file at %s...' % self
.kubic_repo_path())
7798 content
= 'deb %s /\n' % self
.kubic_repo_url()
7799 with
open(self
.kubic_repo_path(), 'w') as f
:
7802 def rm_kubic_repo(self
) -> None:
7803 keyring
= self
.kubric_repo_gpgkey_path()
7804 if os
.path
.exists(keyring
):
7805 logger
.info('Removing repo GPG key %s...' % keyring
)
7808 p
= self
.kubic_repo_path()
7809 if os
.path
.exists(p
):
7810 logger
.info('Removing repo at %s...' % p
)
7814 class YumDnf(Packager
):
7816 'centos': ('centos', 'el'),
7817 'rhel': ('centos', 'el'),
7818 'scientific': ('centos', 'el'),
7819 'rocky': ('centos', 'el'),
7820 'almalinux': ('centos', 'el'),
7821 'ol': ('centos', 'el'),
7822 'fedora': ('fedora', 'fc'),
7823 'mariner': ('mariner', 'cm'),
7826 def __init__(self
, ctx
: CephadmContext
,
7827 stable
: Optional
[str], version
: Optional
[str], branch
: Optional
[str], commit
: Optional
[str],
7828 distro
: Optional
[str], distro_version
: Optional
[str]) -> None:
7829 super(YumDnf
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
7830 branch
=branch
, commit
=commit
)
7832 assert distro_version
7834 self
.major
= int(distro_version
.split('.')[0])
7835 self
.distro_normalized
= self
.DISTRO_NAMES
[distro
][0]
7836 self
.distro_code
= self
.DISTRO_NAMES
[distro
][1] + str(self
.major
)
7837 if (self
.distro_code
== 'fc' and self
.major
>= 30) or \
7838 (self
.distro_code
== 'el' and self
.major
>= 8):
7840 elif (self
.distro_code
== 'cm'):
7845 def custom_repo(self
, **kw
: Any
) -> str:
7847 Repo files need special care in that a whole line should not be present
7848 if there is no value for it. Because we were using `format()` we could
7849 not conditionally add a line for a repo file. So the end result would
7850 contain a key with a missing value (say if we were passing `None`).
7852 For example, it could look like::
7859 Which breaks. This function allows us to conditionally add lines,
7860 preserving an order and be more careful.
7862 Previously, and for historical purposes, this is how the template used
7878 # by using tuples (vs a dict) we preserve the order of what we want to
7879 # return, like starting with a [repo name]
7881 ('reponame', '[%s]'),
7882 ('name', 'name=%s'),
7883 ('baseurl', 'baseurl=%s'),
7884 ('enabled', 'enabled=%s'),
7885 ('gpgcheck', 'gpgcheck=%s'),
7886 ('_type', 'type=%s'),
7887 ('gpgkey', 'gpgkey=%s'),
7888 ('proxy', 'proxy=%s'),
7889 ('priority', 'priority=%s'),
7893 tmpl_key
, tmpl_value
= line
# key values from tmpl
7895 # ensure that there is an actual value (not None nor empty string)
7896 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
7897 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
7899 return '\n'.join(lines
)
7901 def repo_path(self
) -> str:
7902 return '/etc/yum.repos.d/ceph.repo'
7904 def repo_baseurl(self
) -> str:
7905 assert self
.stable
or self
.version
7907 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
, self
.version
,
7910 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
, self
.stable
,
7913 def validate(self
) -> None:
7914 if self
.distro_code
.startswith('fc'):
7915 raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro')
7916 if self
.distro_code
== 'el7':
7917 if self
.stable
and self
.stable
>= 'pacific':
7918 raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it')
7919 if self
.version
and self
.version
.split('.')[0] >= '16':
7920 raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it')
7922 if self
.stable
or self
.version
:
7923 # we know that yum & dnf require there to be a
7924 # $base_url/$arch/repodata/repomd.xml so we can test if this URL
7925 # is gettable in order to validate the inputs
7926 test_url
= self
.repo_baseurl() + '/noarch/repodata/repomd.xml'
7929 except HTTPError
as err
:
7930 logger
.error('unable to fetch repo metadata: %r', err
)
7931 raise Error('failed to fetch repository metadata. please check'
7932 ' the provided parameters are correct and try again')
7934 def add_repo(self
) -> None:
7935 if self
.stable
or self
.version
:
7938 'Ceph': '$basearch',
7939 'Ceph-noarch': 'noarch',
7940 'Ceph-source': 'SRPMS'}.items():
7941 content
+= '[%s]\n' % (n
)
7942 content
+= self
.custom_repo(
7944 baseurl
=self
.repo_baseurl() + '/' + t
,
7947 gpgkey
=self
.repo_gpgkey()[0],
7951 content
= self
.query_shaman(self
.distro_normalized
, self
.major
,
7955 logger
.info('Writing repo to %s...' % self
.repo_path())
7956 with
open(self
.repo_path(), 'w') as f
:
7959 if self
.distro_code
.startswith('el'):
7960 logger
.info('Enabling EPEL...')
7961 call_throws(self
.ctx
, [self
.tool
, 'install', '-y', 'epel-release'])
7963 def rm_repo(self
) -> None:
7964 if os
.path
.exists(self
.repo_path()):
7965 os
.unlink(self
.repo_path())
7967 def install(self
, ls
: List
[str]) -> None:
7968 logger
.info('Installing packages %s...' % ls
)
7969 call_throws(self
.ctx
, [self
.tool
, 'install', '-y'] + ls
)
7971 def install_podman(self
) -> None:
7972 self
.install(['podman'])
7975 class Zypper(Packager
):
7978 'opensuse-tumbleweed',
7982 def __init__(self
, ctx
: CephadmContext
,
7983 stable
: Optional
[str], version
: Optional
[str], branch
: Optional
[str], commit
: Optional
[str],
7984 distro
: Optional
[str], distro_version
: Optional
[str]) -> None:
7985 super(Zypper
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
7986 branch
=branch
, commit
=commit
)
7987 assert distro
is not None
7989 self
.tool
= 'zypper'
7990 self
.distro
= 'opensuse'
7991 self
.distro_version
= '15.1'
7992 if 'tumbleweed' not in distro
and distro_version
is not None:
7993 self
.distro_version
= distro_version
7995 def custom_repo(self
, **kw
: Any
) -> str:
7997 See YumDnf for format explanation.
8001 # by using tuples (vs a dict) we preserve the order of what we want to
8002 # return, like starting with a [repo name]
8004 ('reponame', '[%s]'),
8005 ('name', 'name=%s'),
8006 ('baseurl', 'baseurl=%s'),
8007 ('enabled', 'enabled=%s'),
8008 ('gpgcheck', 'gpgcheck=%s'),
8009 ('_type', 'type=%s'),
8010 ('gpgkey', 'gpgkey=%s'),
8011 ('proxy', 'proxy=%s'),
8012 ('priority', 'priority=%s'),
8016 tmpl_key
, tmpl_value
= line
# key values from tmpl
8018 # ensure that there is an actual value (not None nor empty string)
8019 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
8020 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
8022 return '\n'.join(lines
)
8024 def repo_path(self
) -> str:
8025 return '/etc/zypp/repos.d/ceph.repo'
8027 def repo_baseurl(self
) -> str:
8028 assert self
.stable
or self
.version
8030 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
,
8031 self
.stable
, self
.distro
)
8033 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
,
8034 self
.stable
, self
.distro
)
8036 def add_repo(self
) -> None:
8037 if self
.stable
or self
.version
:
8040 'Ceph': '$basearch',
8041 'Ceph-noarch': 'noarch',
8042 'Ceph-source': 'SRPMS'}.items():
8043 content
+= '[%s]\n' % (n
)
8044 content
+= self
.custom_repo(
8046 baseurl
=self
.repo_baseurl() + '/' + t
,
8049 gpgkey
=self
.repo_gpgkey()[0],
8053 content
= self
.query_shaman(self
.distro
, self
.distro_version
,
8057 logger
.info('Writing repo to %s...' % self
.repo_path())
8058 with
open(self
.repo_path(), 'w') as f
:
8061 def rm_repo(self
) -> None:
8062 if os
.path
.exists(self
.repo_path()):
8063 os
.unlink(self
.repo_path())
8065 def install(self
, ls
: List
[str]) -> None:
8066 logger
.info('Installing packages %s...' % ls
)
8067 call_throws(self
.ctx
, [self
.tool
, 'in', '-y'] + ls
)
8069 def install_podman(self
) -> None:
8070 self
.install(['podman'])
8073 def create_packager(ctx
: CephadmContext
,
8074 stable
: Optional
[str] = None, version
: Optional
[str] = None,
8075 branch
: Optional
[str] = None, commit
: Optional
[str] = None) -> Packager
:
8076 distro
, distro_version
, distro_codename
= get_distro()
8077 if distro
in YumDnf
.DISTRO_NAMES
:
8078 return YumDnf(ctx
, stable
=stable
, version
=version
,
8079 branch
=branch
, commit
=commit
,
8080 distro
=distro
, distro_version
=distro_version
)
8081 elif distro
in Apt
.DISTRO_NAMES
:
8082 return Apt(ctx
, stable
=stable
, version
=version
,
8083 branch
=branch
, commit
=commit
,
8084 distro
=distro
, distro_version
=distro_version
,
8085 distro_codename
=distro_codename
)
8086 elif distro
in Zypper
.DISTRO_NAMES
:
8087 return Zypper(ctx
, stable
=stable
, version
=version
,
8088 branch
=branch
, commit
=commit
,
8089 distro
=distro
, distro_version
=distro_version
)
8090 raise Error('Distro %s version %s not supported' % (distro
, distro_version
))
8093 def command_add_repo(ctx
: CephadmContext
) -> None:
8094 if ctx
.version
and ctx
.release
:
8095 raise Error('you can specify either --release or --version but not both')
8096 if not ctx
.version
and not ctx
.release
and not ctx
.dev
and not ctx
.dev_commit
:
8097 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
8100 (x
, y
, z
) = ctx
.version
.split('.')
8102 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
8104 # Pacific =/= pacific in this case, set to undercase to avoid confision
8105 ctx
.release
= ctx
.release
.lower()
8107 pkg
= create_packager(ctx
, stable
=ctx
.release
,
8108 version
=ctx
.version
,
8110 commit
=ctx
.dev_commit
)
8113 logger
.info('Completed adding repo.')
8116 def command_rm_repo(ctx
: CephadmContext
) -> None:
8117 pkg
= create_packager(ctx
)
8121 def command_install(ctx
: CephadmContext
) -> None:
8122 pkg
= create_packager(ctx
)
8123 pkg
.install(ctx
.packages
)
8126 def command_rescan_disks(ctx
: CephadmContext
) -> str:
8128 def probe_hba(scan_path
: str) -> None:
8129 """Tell the adapter to rescan"""
8130 with
open(scan_path
, 'w') as f
:
8133 cmd
= ctx
.func
.__name
__.replace('command_', '')
8134 logger
.info(f
'{cmd}: starting')
8137 all_scan_files
= glob('/sys/class/scsi_host/*/scan')
8140 for scan_path
in all_scan_files
:
8141 adapter_name
= os
.path
.basename(os
.path
.dirname(scan_path
))
8142 proc_name
= read_file([os
.path
.join(os
.path
.dirname(scan_path
), 'proc_name')])
8143 if proc_name
in ['unknown', 'usb-storage']:
8144 skipped
.append(os
.path
.basename(scan_path
))
8145 logger
.info(f
'{cmd}: rescan skipping incompatible host adapter {adapter_name} : {proc_name}')
8148 scan_files
.append(scan_path
)
8151 logger
.info(f
'{cmd}: no compatible HBAs found')
8152 return 'Ok. No compatible HBAs found'
8154 responses
= async_run(concurrent_tasks(probe_hba
, scan_files
))
8155 failures
= [r
for r
in responses
if r
]
8157 logger
.info(f
'{cmd}: Complete. {len(scan_files)} adapters rescanned, {len(failures)} failures, {len(skipped)} skipped')
8159 elapsed
= time
.time() - start
8161 plural
= 's' if len(failures
) > 1 else ''
8162 if len(failures
) == len(scan_files
):
8163 return f
'Failed. All {len(scan_files)} rescan requests failed'
8165 return f
'Partial. {len(scan_files) - len(failures)} successful, {len(failures)} failure{plural} against: {", ".join(failures)}'
8167 return f
'Ok. {len(all_scan_files)} adapters detected: {len(scan_files)} rescanned, {len(skipped)} skipped, {len(failures)} failed ({elapsed:.2f}s)'
8169 ##################################
8172 def get_ipv4_address(ifname
):
8173 # type: (str) -> str
8174 def _extract(sock
: socket
.socket
, offset
: int) -> str:
8175 return socket
.inet_ntop(
8180 struct
.pack('256s', bytes(ifname
[:15], 'utf-8'))
8183 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_DGRAM
)
8185 addr
= _extract(s
, 35093) # '0x8915' = SIOCGIFADDR
8186 dq_mask
= _extract(s
, 35099) # 0x891b = SIOCGIFNETMASK
8188 # interface does not have an ipv4 address
8191 dec_mask
= sum([bin(int(i
)).count('1')
8192 for i
in dq_mask
.split('.')])
8193 return '{}/{}'.format(addr
, dec_mask
)
8196 def get_ipv6_address(ifname
):
8197 # type: (str) -> str
8198 if not os
.path
.exists('/proc/net/if_inet6'):
8201 raw
= read_file(['/proc/net/if_inet6'])
8202 data
= raw
.splitlines()
8203 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
8204 # field 0 is ipv6, field 2 is scope
8205 for iface_setting
in data
:
8206 field
= iface_setting
.split()
8207 if field
[-1] == ifname
:
8209 ipv6_fmtd
= ':'.join([ipv6_raw
[_p
:_p
+ 4] for _p
in range(0, len(field
[0]), 4)])
8210 # apply naming rules using ipaddress module
8211 ipv6
= ipaddress
.ip_address(ipv6_fmtd
)
8212 return '{}/{}'.format(str(ipv6
), int('0x{}'.format(field
[2]), 16))
8216 def bytes_to_human(num
, mode
='decimal'):
8217 # type: (float, str) -> str
8218 """Convert a bytes value into it's human-readable form.
8220 :param num: number, in bytes, to convert
8221 :param mode: Either decimal (default) or binary to determine divisor
8222 :returns: string representing the bytes value in a more readable format
8224 unit_list
= ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
8228 if mode
== 'binary':
8229 unit_list
= ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
8233 for unit
in unit_list
:
8234 if abs(num
) < divisor
:
8235 return '%3.1f%s' % (num
, unit
)
8237 return '%.1f%s' % (num
, yotta
)
8240 def read_file(path_list
, file_name
=''):
8241 # type: (List[str], str) -> str
8242 """Returns the content of the first file found within the `path_list`
8244 :param path_list: list of file paths to search
8245 :param file_name: optional file_name to be applied to a file path
8246 :returns: content of the file or 'Unknown'
8248 for path
in path_list
:
8250 file_path
= os
.path
.join(path
, file_name
)
8253 if os
.path
.exists(file_path
):
8254 with
open(file_path
, 'r') as f
:
8256 content
= f
.read().strip()
8258 # sysfs may populate the file, but for devices like
8259 # virtio reads can fail
8265 ##################################
8269 _dmi_path_list
= ['/sys/class/dmi/id']
8270 _nic_path_list
= ['/sys/class/net']
8271 _apparmor_path_list
= ['/etc/apparmor']
8272 _disk_vendor_workarounds
= {
8273 '0x1af4': 'Virtio Block Device'
8275 _excluded_block_devices
= ('sr', 'zram', 'dm-')
8277 def __init__(self
, ctx
: CephadmContext
):
8278 self
.ctx
: CephadmContext
= ctx
8279 self
.cpu_model
: str = 'Unknown'
8280 self
.sysctl_options
: Dict
[str, str] = self
._populate
_sysctl
_options
()
8281 self
.cpu_count
: int = 0
8282 self
.cpu_cores
: int = 0
8283 self
.cpu_threads
: int = 0
8284 self
.interfaces
: Dict
[str, Any
] = {}
8286 self
._meminfo
: List
[str] = read_file(['/proc/meminfo']).splitlines()
8288 self
._process
_nics
()
8289 self
.arch
: str = platform
.processor()
8290 self
.kernel
: str = platform
.release()
8292 def _populate_sysctl_options(self
) -> Dict
[str, str]:
8294 out
, _
, _
= call_throws(self
.ctx
, ['sysctl', '-a'], verbosity
=CallVerbosity
.QUIET_UNLESS_ERROR
)
8296 for line
in out
.splitlines():
8297 option
, value
= line
.split('=')
8298 sysctl_options
[option
.strip()] = value
.strip()
8299 return sysctl_options
8301 def _get_cpuinfo(self
):
8303 """Determine cpu information via /proc/cpuinfo"""
8304 raw
= read_file(['/proc/cpuinfo'])
8305 output
= raw
.splitlines()
8309 field
= [f
.strip() for f
in line
.split(':')]
8310 if 'model name' in line
:
8311 self
.cpu_model
= field
[1]
8312 if 'physical id' in line
:
8313 cpu_set
.add(field
[1])
8314 if 'siblings' in line
:
8315 self
.cpu_threads
= int(field
[1].strip())
8316 if 'cpu cores' in line
:
8317 self
.cpu_cores
= int(field
[1].strip())
8319 self
.cpu_count
= len(cpu_set
)
8321 def _get_block_devs(self
):
8322 # type: () -> List[str]
8323 """Determine the list of block devices by looking at /sys/block"""
8324 return [dev
for dev
in os
.listdir('/sys/block')
8325 if not dev
.startswith(HostFacts
._excluded
_block
_devices
)]
8327 def _get_devs_by_type(self
, rota
='0'):
8328 # type: (str) -> List[str]
8329 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
8331 for blk_dev
in self
._get
_block
_devs
():
8332 rot_path
= '/sys/block/{}/queue/rotational'.format(blk_dev
)
8333 rot_value
= read_file([rot_path
])
8334 if rot_value
== rota
:
8335 devs
.append(blk_dev
)
8339 def operating_system(self
):
8341 """Determine OS version"""
8342 raw_info
= read_file(['/etc/os-release'])
8343 os_release
= raw_info
.splitlines()
8347 for line
in os_release
:
8349 var_name
, var_value
= line
.split('=')
8350 rel_dict
[var_name
] = var_value
.strip('"')
8352 # Would normally use PRETTY_NAME, but NAME and VERSION are more
8354 if all(_v
in rel_dict
for _v
in ['NAME', 'VERSION']):
8355 rel_str
= '{} {}'.format(rel_dict
['NAME'], rel_dict
['VERSION'])
8361 """Return the hostname"""
8362 return platform
.node()
8365 def subscribed(self
):
8367 """Highlevel check to see if the host is subscribed to receive updates/support"""
8371 entitlements_dir
= '/etc/pki/entitlement'
8372 if os
.path
.exists(entitlements_dir
):
8373 pems
= glob('{}/*.pem'.format(entitlements_dir
))
8379 os_name
= self
.operating_system
8380 if os_name
.upper().startswith('RED HAT'):
8386 def hdd_count(self
):
8388 """Return a count of HDDs (spinners)"""
8389 return len(self
._get
_devs
_by
_type
(rota
='1'))
8391 def _get_capacity(self
, dev
):
8392 # type: (str) -> int
8393 """Determine the size of a given device
8395 The kernel always bases device size calculations based on a 512 byte
8396 sector. For more information see
8397 https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/include/linux/types.h?h=v5.15.63#n120
8399 size_path
= os
.path
.join('/sys/block', dev
, 'size')
8400 size_blocks
= int(read_file([size_path
]))
8401 return size_blocks
* 512
8403 def _get_capacity_by_type(self
, rota
='0'):
8404 # type: (str) -> int
8405 """Return the total capacity of a category of device (flash or hdd)"""
8406 devs
= self
._get
_devs
_by
_type
(rota
=rota
)
8409 capacity
+= self
._get
_capacity
(dev
)
8412 def _dev_list(self
, dev_list
):
8413 # type: (List[str]) -> List[Dict[str, object]]
8414 """Return a 'pretty' name list for each device in the `dev_list`"""
8417 for dev
in dev_list
:
8418 disk_model
= read_file(['/sys/block/{}/device/model'.format(dev
)]).strip()
8419 disk_rev
= read_file(['/sys/block/{}/device/rev'.format(dev
)]).strip()
8420 disk_wwid
= read_file(['/sys/block/{}/device/wwid'.format(dev
)]).strip()
8421 vendor
= read_file(['/sys/block/{}/device/vendor'.format(dev
)]).strip()
8422 disk_vendor
= HostFacts
._disk
_vendor
_workarounds
.get(vendor
, vendor
)
8423 disk_size_bytes
= self
._get
_capacity
(dev
)
8425 'description': '{} {} ({})'.format(disk_vendor
, disk_model
, bytes_to_human(disk_size_bytes
)),
8426 'vendor': disk_vendor
,
8427 'model': disk_model
,
8431 'disk_size_bytes': disk_size_bytes
,
8437 # type: () -> List[Dict[str, object]]
8438 """Return a list of devices that are HDDs (spinners)"""
8439 devs
= self
._get
_devs
_by
_type
(rota
='1')
8440 return self
._dev
_list
(devs
)
8443 def flash_list(self
):
8444 # type: () -> List[Dict[str, object]]
8445 """Return a list of devices that are flash based (SSD, NVMe)"""
8446 devs
= self
._get
_devs
_by
_type
(rota
='0')
8447 return self
._dev
_list
(devs
)
8450 def hdd_capacity_bytes(self
):
8452 """Return the total capacity for all HDD devices (bytes)"""
8453 return self
._get
_capacity
_by
_type
(rota
='1')
8456 def hdd_capacity(self
):
8458 """Return the total capacity for all HDD devices (human readable format)"""
8459 return bytes_to_human(self
.hdd_capacity_bytes
)
8463 # type: () -> Dict[str, float]
8464 """Return the cpu load average data for the host"""
8465 raw
= read_file(['/proc/loadavg']).strip()
8468 '1min': float(data
[0]),
8469 '5min': float(data
[1]),
8470 '15min': float(data
[2]),
8474 def flash_count(self
):
8476 """Return the number of flash devices in the system (SSD, NVMe)"""
8477 return len(self
._get
_devs
_by
_type
(rota
='0'))
8480 def flash_capacity_bytes(self
):
8482 """Return the total capacity for all flash devices (bytes)"""
8483 return self
._get
_capacity
_by
_type
(rota
='0')
8486 def flash_capacity(self
):
8488 """Return the total capacity for all Flash devices (human readable format)"""
8489 return bytes_to_human(self
.flash_capacity_bytes
)
8491 def _process_nics(self
):
8493 """Look at the NIC devices and extract network related metadata"""
8494 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
8501 for nic_path
in HostFacts
._nic
_path
_list
:
8502 if not os
.path
.exists(nic_path
):
8504 for iface
in os
.listdir(nic_path
):
8506 if os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bridge')):
8508 elif os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bonding')):
8509 nic_type
= 'bonding'
8511 nic_type
= hw_lookup
.get(read_file([os
.path
.join(nic_path
, iface
, 'type')]), 'Unknown')
8513 if nic_type
== 'loopback': # skip loopback devices
8516 lower_devs_list
= [os
.path
.basename(link
.replace('lower_', '')) for link
in glob(os
.path
.join(nic_path
, iface
, 'lower_*'))]
8517 upper_devs_list
= [os
.path
.basename(link
.replace('upper_', '')) for link
in glob(os
.path
.join(nic_path
, iface
, 'upper_*'))]
8520 mtu
= int(read_file([os
.path
.join(nic_path
, iface
, 'mtu')]))
8524 operstate
= read_file([os
.path
.join(nic_path
, iface
, 'operstate')])
8526 speed
= int(read_file([os
.path
.join(nic_path
, iface
, 'speed')]))
8527 except (OSError, ValueError):
8528 # OSError : device doesn't support the ethtool get_link_ksettings
8529 # ValueError : raised when the read fails, and returns Unknown
8531 # Either way, we show a -1 when speed isn't available
8534 dev_link
= os
.path
.join(nic_path
, iface
, 'device')
8535 if os
.path
.exists(dev_link
):
8537 driver_path
= os
.path
.join(dev_link
, 'driver')
8538 if os
.path
.exists(driver_path
):
8539 driver
= os
.path
.basename(os
.path
.realpath(driver_path
))
8547 self
.interfaces
[iface
] = {
8549 'upper_devs_list': upper_devs_list
,
8550 'lower_devs_list': lower_devs_list
,
8551 'operstate': operstate
,
8553 'nic_type': nic_type
,
8556 'ipv4_address': get_ipv4_address(iface
),
8557 'ipv6_address': get_ipv6_address(iface
),
8561 def nic_count(self
):
8563 """Return a total count of all physical NICs detected in the host"""
8565 for iface
in self
.interfaces
:
8566 if self
.interfaces
[iface
]['iftype'] == 'physical':
8567 phys_devs
.append(iface
)
8568 return len(phys_devs
)
8570 def _get_mem_data(self
, field_name
):
8571 # type: (str) -> int
8572 for line
in self
._meminfo
:
8573 if line
.startswith(field_name
):
8579 def memory_total_kb(self
):
8581 """Determine the memory installed (kb)"""
8582 return self
._get
_mem
_data
('MemTotal')
8585 def memory_free_kb(self
):
8587 """Determine the memory free (not cache, immediately usable)"""
8588 return self
._get
_mem
_data
('MemFree')
8591 def memory_available_kb(self
):
8593 """Determine the memory available to new applications without swapping"""
8594 return self
._get
_mem
_data
('MemAvailable')
8599 """Determine server vendor from DMI data in sysfs"""
8600 return read_file(HostFacts
._dmi
_path
_list
, 'sys_vendor')
8605 """Determine server model information from DMI data in sysfs"""
8606 family
= read_file(HostFacts
._dmi
_path
_list
, 'product_family')
8607 product
= read_file(HostFacts
._dmi
_path
_list
, 'product_name')
8608 if family
== 'Unknown' and product
:
8609 return '{}'.format(product
)
8611 return '{} ({})'.format(family
, product
)
8614 def bios_version(self
):
8616 """Determine server BIOS version from DMI data in sysfs"""
8617 return read_file(HostFacts
._dmi
_path
_list
, 'bios_version')
8620 def bios_date(self
):
8622 """Determine server BIOS date from DMI data in sysfs"""
8623 return read_file(HostFacts
._dmi
_path
_list
, 'bios_date')
8626 def timestamp(self
):
8628 """Return the current time as Epoch seconds"""
8632 def system_uptime(self
):
8634 """Return the system uptime (in secs)"""
8635 raw_time
= read_file(['/proc/uptime'])
8636 up_secs
, _
= raw_time
.split()
8637 return float(up_secs
)
8640 def kernel_security(self
):
8641 # type: () -> Dict[str, str]
8642 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
8643 def _fetch_selinux() -> Dict
[str, str]:
8644 """Get the selinux status"""
8647 out
, err
, code
= call(self
.ctx
, ['sestatus'],
8648 verbosity
=CallVerbosity
.QUIET
)
8649 security
['type'] = 'SELinux'
8650 status
, mode
, policy
= '', '', ''
8651 for line
in out
.split('\n'):
8652 if line
.startswith('SELinux status:'):
8653 k
, v
= line
.split(':')
8655 elif line
.startswith('Current mode:'):
8656 k
, v
= line
.split(':')
8658 elif line
.startswith('Loaded policy name:'):
8659 k
, v
= line
.split(':')
8661 if status
== 'disabled':
8662 security
['description'] = 'SELinux: Disabled'
8664 security
['description'] = 'SELinux: Enabled({}, {})'.format(mode
, policy
)
8665 except Exception as e
:
8666 logger
.info('unable to get selinux status: %s' % e
)
8669 def _fetch_apparmor() -> Dict
[str, str]:
8670 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
8672 for apparmor_path
in HostFacts
._apparmor
_path
_list
:
8673 if os
.path
.exists(apparmor_path
):
8674 security
['type'] = 'AppArmor'
8675 security
['description'] = 'AppArmor: Enabled'
8677 profiles
= read_file(['/sys/kernel/security/apparmor/profiles'])
8678 if len(profiles
) == 0:
8683 summary
= {} # type: Dict[str, int]
8684 for line
in profiles
.split('\n'):
8685 item
, mode
= line
.split(' ')
8686 mode
= mode
.strip('()')
8691 summary_str
= ','.join(['{} {}'.format(v
, k
) for k
, v
in summary
.items()])
8692 security
= {**security
, **summary
} # type: ignore
8693 security
['description'] += '({})'.format(summary_str
)
8699 if os
.path
.exists('/sys/kernel/security/lsm'):
8700 lsm
= read_file(['/sys/kernel/security/lsm']).strip()
8701 if 'selinux' in lsm
:
8702 ret
= _fetch_selinux()
8703 elif 'apparmor' in lsm
:
8704 ret
= _fetch_apparmor()
8708 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor'
8716 'description': 'Linux Security Module framework is not available'
8720 def selinux_enabled(self
) -> bool:
8721 return (self
.kernel_security
['type'] == 'SELinux') and \
8722 (self
.kernel_security
['description'] != 'SELinux: Disabled')
8725 def kernel_parameters(self
):
8726 # type: () -> Dict[str, str]
8727 """Get kernel parameters required/used in Ceph clusters"""
8730 out
, _
, _
= call_throws(self
.ctx
, ['sysctl', '-a'], verbosity
=CallVerbosity
.SILENT
)
8732 param_list
= out
.split('\n')
8733 param_dict
= {param
.split(' = ')[0]: param
.split(' = ')[-1] for param
in param_list
}
8735 # return only desired parameters
8736 if 'net.ipv4.ip_nonlocal_bind' in param_dict
:
8737 k_param
['net.ipv4.ip_nonlocal_bind'] = param_dict
['net.ipv4.ip_nonlocal_bind']
8742 def _process_net_data(tcp_file
: str, protocol
: str = 'tcp') -> List
[int]:
8743 listening_ports
= []
8744 # Connections state documentation
8745 # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h
8746 # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul <port>)
8752 if protocol
not in listening_state
.keys():
8755 if os
.path
.exists(tcp_file
):
8756 with
open(tcp_file
) as f
:
8757 tcp_data
= f
.readlines()[1:]
8759 for con
in tcp_data
:
8760 con_info
= con
.strip().split()
8761 if con_info
[3] == listening_state
[protocol
]:
8762 local_port
= int(con_info
[1].split(':')[1], 16)
8763 listening_ports
.append(local_port
)
8765 return listening_ports
8768 def tcp_ports_used(self
) -> List
[int]:
8769 return HostFacts
._process
_net
_data
('/proc/net/tcp')
8772 def tcp6_ports_used(self
) -> List
[int]:
8773 return HostFacts
._process
_net
_data
('/proc/net/tcp6')
8776 def udp_ports_used(self
) -> List
[int]:
8777 return HostFacts
._process
_net
_data
('/proc/net/udp', 'udp')
8780 def udp6_ports_used(self
) -> List
[int]:
8781 return HostFacts
._process
_net
_data
('/proc/net/udp6', 'udp')
8785 """Return the attributes of this HostFacts object as json"""
8787 k
: getattr(self
, k
) for k
in dir(self
)
8788 if not k
.startswith('_')
8789 and isinstance(getattr(self
, k
), (float, int, str, list, dict, tuple))
8791 return json
.dumps(data
, indent
=2, sort_keys
=True)
8793 ##################################
8796 def command_gather_facts(ctx
: CephadmContext
) -> None:
8797 """gather_facts is intended to provide host releated metadata to the caller"""
8798 host
= HostFacts(ctx
)
8802 ##################################
8805 def systemd_target_state(ctx
: CephadmContext
, target_name
: str, subsystem
: str = 'ceph') -> bool:
8807 return os
.path
.exists(
8810 f
'{subsystem}.target.wants',
8816 def target_exists(ctx
: CephadmContext
) -> bool:
8817 return os
.path
.exists(ctx
.unit_dir
+ '/ceph.target')
8821 def command_maintenance(ctx
: CephadmContext
) -> str:
8823 raise Error('failed - must pass --fsid to specify cluster')
8825 target
= f
'ceph-{ctx.fsid}.target'
8827 if ctx
.maintenance_action
.lower() == 'enter':
8828 logger
.info('Requested to place host into maintenance')
8829 if systemd_target_state(ctx
, target
):
8830 _out
, _err
, code
= call(ctx
,
8831 ['systemctl', 'disable', target
],
8832 verbosity
=CallVerbosity
.DEBUG
)
8834 logger
.error(f
'Failed to disable the {target} target')
8835 return 'failed - to disable the target'
8837 # stopping a target waits by default
8838 _out
, _err
, code
= call(ctx
,
8839 ['systemctl', 'stop', target
],
8840 verbosity
=CallVerbosity
.DEBUG
)
8842 logger
.error(f
'Failed to stop the {target} target')
8843 return 'failed - to disable the target'
8845 return f
'success - systemd target {target} disabled'
8848 return 'skipped - target already disabled'
8851 logger
.info('Requested to exit maintenance state')
8852 # if we've never deployed a daemon on this host there will be no systemd
8853 # target to disable so attempting a disable will fail. We still need to
8854 # return success here or host will be permanently stuck in maintenance mode
8855 # as no daemons can be deployed so no systemd target will ever exist to disable.
8856 if not target_exists(ctx
):
8857 return 'skipped - systemd target not present on this host. Host removed from maintenance mode.'
8858 # exit maintenance request
8859 if not systemd_target_state(ctx
, target
):
8860 _out
, _err
, code
= call(ctx
,
8861 ['systemctl', 'enable', target
],
8862 verbosity
=CallVerbosity
.DEBUG
)
8864 logger
.error(f
'Failed to enable the {target} target')
8865 return 'failed - unable to enable the target'
8867 # starting a target waits by default
8868 _out
, _err
, code
= call(ctx
,
8869 ['systemctl', 'start', target
],
8870 verbosity
=CallVerbosity
.DEBUG
)
8872 logger
.error(f
'Failed to start the {target} target')
8873 return 'failed - unable to start the target'
8875 return f
'success - systemd target {target} enabled and started'
8876 return f
'success - systemd target {target} enabled and started'
8878 ##################################
8882 # type: () -> argparse.ArgumentParser
8883 parser
= argparse
.ArgumentParser(
8884 description
='Bootstrap Ceph daemons with systemd and containers.',
8885 formatter_class
=argparse
.ArgumentDefaultsHelpFormatter
)
8886 parser
.add_argument(
8888 help='container image. Can also be set via the "CEPHADM_IMAGE" '
8890 parser
.add_argument(
8892 action
='store_true',
8893 help='use docker instead of podman')
8894 parser
.add_argument(
8897 help='base directory for daemon data')
8898 parser
.add_argument(
8901 help='base directory for daemon logs')
8902 parser
.add_argument(
8904 default
=LOGROTATE_DIR
,
8905 help='location of logrotate configuration files')
8906 parser
.add_argument(
8909 help='location of sysctl configuration files')
8910 parser
.add_argument(
8913 help='base directory for systemd units')
8914 parser
.add_argument(
8916 action
='store_true',
8917 help='Show debug-level log messages')
8918 parser
.add_argument(
8921 default
=DEFAULT_TIMEOUT
,
8922 help='timeout in seconds')
8923 parser
.add_argument(
8926 default
=DEFAULT_RETRY
,
8927 help='max number of retries')
8928 parser
.add_argument(
8932 help='set environment variable')
8933 parser
.add_argument(
8934 '--no-container-init',
8935 action
='store_true',
8936 default
=not CONTAINER_INIT
,
8937 help='Do not run podman/docker with `--init`')
8938 parser
.add_argument(
8939 '--no-cgroups-split',
8940 action
='store_true',
8942 help='Do not run containers with --cgroups=split (currently only relevant when using podman)')
8944 subparsers
= parser
.add_subparsers(help='sub-command')
8946 parser_version
= subparsers
.add_parser(
8947 'version', help='get ceph version from container')
8948 parser_version
.set_defaults(func
=command_version
)
8950 parser_pull
= subparsers
.add_parser(
8951 'pull', help='pull the default container image')
8952 parser_pull
.set_defaults(func
=command_pull
)
8953 parser_pull
.add_argument(
8955 action
='store_true',
8956 help=argparse
.SUPPRESS
,
8959 parser_inspect_image
= subparsers
.add_parser(
8960 'inspect-image', help='inspect local container image')
8961 parser_inspect_image
.set_defaults(func
=command_inspect_image
)
8963 parser_ls
= subparsers
.add_parser(
8964 'ls', help='list daemon instances on this host')
8965 parser_ls
.set_defaults(func
=command_ls
)
8966 parser_ls
.add_argument(
8968 action
='store_true',
8969 help='Do not include daemon status')
8970 parser_ls
.add_argument(
8973 help='base directory for legacy daemon data')
8975 parser_list_networks
= subparsers
.add_parser(
8976 'list-networks', help='list IP networks')
8977 parser_list_networks
.set_defaults(func
=command_list_networks
)
8979 parser_adopt
= subparsers
.add_parser(
8980 'adopt', help='adopt daemon deployed with a different tool')
8981 parser_adopt
.set_defaults(func
=command_adopt
)
8982 parser_adopt
.add_argument(
8985 help='daemon name (type.id)')
8986 parser_adopt
.add_argument(
8989 help='deployment style (legacy, ...)')
8990 parser_adopt
.add_argument(
8993 help='cluster name')
8994 parser_adopt
.add_argument(
8997 help='base directory for legacy daemon data')
8998 parser_adopt
.add_argument(
9000 help='Additional configuration information in JSON format')
9001 parser_adopt
.add_argument(
9003 action
='store_true',
9004 help='Do not configure firewalld')
9005 parser_adopt
.add_argument(
9007 action
='store_true',
9008 help='do not pull the default image before adopting')
9009 parser_adopt
.add_argument(
9011 action
='store_true',
9012 help='start newly adoped daemon, even if it was not running previously')
9013 parser_adopt
.add_argument(
9015 action
='store_true',
9016 default
=CONTAINER_INIT
,
9017 help=argparse
.SUPPRESS
)
9019 parser_rm_daemon
= subparsers
.add_parser(
9020 'rm-daemon', help='remove daemon instance')
9021 parser_rm_daemon
.set_defaults(func
=command_rm_daemon
)
9022 parser_rm_daemon
.add_argument(
9025 action
=CustomValidation
,
9026 help='daemon name (type.id)')
9027 parser_rm_daemon
.add_argument(
9029 help='List of tcp ports to close in the host firewall')
9030 parser_rm_daemon
.add_argument(
9033 help='cluster FSID')
9034 parser_rm_daemon
.add_argument(
9036 action
='store_true',
9037 help='proceed, even though this may destroy valuable data')
9038 parser_rm_daemon
.add_argument(
9039 '--force-delete-data',
9040 action
='store_true',
9041 help='delete valuable daemon data instead of making a backup')
9043 parser_rm_cluster
= subparsers
.add_parser(
9044 'rm-cluster', help='remove all daemons for a cluster')
9045 parser_rm_cluster
.set_defaults(func
=command_rm_cluster
)
9046 parser_rm_cluster
.add_argument(
9049 help='cluster FSID')
9050 parser_rm_cluster
.add_argument(
9052 action
='store_true',
9053 help='proceed, even though this may destroy valuable data')
9054 parser_rm_cluster
.add_argument(
9056 action
='store_true',
9057 help='do not remove log files')
9058 parser_rm_cluster
.add_argument(
9060 action
='store_true',
9061 help='zap OSD devices for this cluster')
9063 parser_run
= subparsers
.add_parser(
9064 'run', help='run a ceph daemon, in a container, in the foreground')
9065 parser_run
.set_defaults(func
=command_run
)
9066 parser_run
.add_argument(
9069 help='daemon name (type.id)')
9070 parser_run
.add_argument(
9073 help='cluster FSID')
9075 parser_shell
= subparsers
.add_parser(
9076 'shell', help='run an interactive shell inside a daemon container')
9077 parser_shell
.set_defaults(func
=command_shell
)
9078 parser_shell
.add_argument(
9079 '--shared_ceph_folder',
9080 metavar
='CEPH_SOURCE_FOLDER',
9081 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9082 parser_shell
.add_argument(
9084 help='cluster FSID')
9085 parser_shell
.add_argument(
9087 help='daemon name (type.id)')
9088 parser_shell
.add_argument(
9090 help='ceph.conf to pass through to the container')
9091 parser_shell
.add_argument(
9093 help='ceph.keyring to pass through to the container')
9094 parser_shell
.add_argument(
9096 help=('mount a file or directory in the container. '
9097 'Support multiple mounts. '
9098 'ie: `--mount /foo /bar:/bar`. '
9099 'When no destination is passed, default is /mnt'),
9101 parser_shell
.add_argument(
9105 help='set environment variable')
9106 parser_shell
.add_argument(
9110 help='set environment variable')
9111 parser_shell
.add_argument(
9112 'command', nargs
=argparse
.REMAINDER
,
9113 help='command (optional)')
9114 parser_shell
.add_argument(
9116 action
='store_true',
9117 help='dont pass /etc/hosts through to the container')
9119 parser_enter
= subparsers
.add_parser(
9120 'enter', help='run an interactive shell inside a running daemon container')
9121 parser_enter
.set_defaults(func
=command_enter
)
9122 parser_enter
.add_argument(
9124 help='cluster FSID')
9125 parser_enter
.add_argument(
9128 help='daemon name (type.id)')
9129 parser_enter
.add_argument(
9130 'command', nargs
=argparse
.REMAINDER
,
9133 parser_ceph_volume
= subparsers
.add_parser(
9134 'ceph-volume', help='run ceph-volume inside a container')
9135 parser_ceph_volume
.set_defaults(func
=command_ceph_volume
)
9136 parser_ceph_volume
.add_argument(
9137 '--shared_ceph_folder',
9138 metavar
='CEPH_SOURCE_FOLDER',
9139 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9140 parser_ceph_volume
.add_argument(
9142 help='cluster FSID')
9143 parser_ceph_volume
.add_argument(
9145 help='JSON file with config and (client.bootstrap-osd) key')
9146 parser_ceph_volume
.add_argument(
9148 help='ceph conf file')
9149 parser_ceph_volume
.add_argument(
9151 help='ceph.keyring to pass through to the container')
9152 parser_ceph_volume
.add_argument(
9153 'command', nargs
=argparse
.REMAINDER
,
9156 parser_zap_osds
= subparsers
.add_parser(
9157 'zap-osds', help='zap all OSDs associated with a particular fsid')
9158 parser_zap_osds
.set_defaults(func
=command_zap_osds
)
9159 parser_zap_osds
.add_argument(
9162 help='cluster FSID')
9163 parser_zap_osds
.add_argument(
9165 action
='store_true',
9166 help='proceed, even though this may destroy valuable data')
9168 parser_unit
= subparsers
.add_parser(
9169 'unit', help="operate on the daemon's systemd unit")
9170 parser_unit
.set_defaults(func
=command_unit
)
9171 parser_unit
.add_argument(
9173 help='systemd command (start, stop, restart, enable, disable, ...)')
9174 parser_unit
.add_argument(
9176 help='cluster FSID')
9177 parser_unit
.add_argument(
9180 help='daemon name (type.id)')
9182 parser_logs
= subparsers
.add_parser(
9183 'logs', help='print journald logs for a daemon container')
9184 parser_logs
.set_defaults(func
=command_logs
)
9185 parser_logs
.add_argument(
9187 help='cluster FSID')
9188 parser_logs
.add_argument(
9191 help='daemon name (type.id)')
9192 parser_logs
.add_argument(
9193 'command', nargs
='*',
9194 help='additional journalctl args')
9196 parser_bootstrap
= subparsers
.add_parser(
9197 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
9198 parser_bootstrap
.set_defaults(func
=command_bootstrap
)
9199 parser_bootstrap
.add_argument(
9201 help='ceph conf file to incorporate')
9202 parser_bootstrap
.add_argument(
9205 help='mon id (default: local hostname)')
9206 group
= parser_bootstrap
.add_mutually_exclusive_group()
9209 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
9213 parser_bootstrap
.add_argument(
9216 help='mgr id (default: randomly generated)')
9217 parser_bootstrap
.add_argument(
9219 help='cluster FSID')
9220 parser_bootstrap
.add_argument(
9222 default
='/etc/ceph',
9223 help='directory to write config, keyring, and pub key files')
9224 parser_bootstrap
.add_argument(
9226 help='location to write keyring file with new cluster admin and mon keys')
9227 parser_bootstrap
.add_argument(
9229 help='location to write conf file to connect to new cluster')
9230 parser_bootstrap
.add_argument(
9231 '--output-pub-ssh-key',
9232 help="location to write the cluster's public SSH key")
9233 parser_bootstrap
.add_argument(
9234 '--skip-admin-label',
9235 action
='store_true',
9236 help='do not create admin label for ceph.conf and client.admin keyring distribution')
9237 parser_bootstrap
.add_argument(
9239 action
='store_true',
9240 help='skip setup of ssh key on local host')
9241 parser_bootstrap
.add_argument(
9242 '--initial-dashboard-user',
9244 help='Initial user for the dashboard')
9245 parser_bootstrap
.add_argument(
9246 '--initial-dashboard-password',
9247 help='Initial password for the initial dashboard user')
9248 parser_bootstrap
.add_argument(
9249 '--ssl-dashboard-port',
9252 help='Port number used to connect with dashboard using SSL')
9253 parser_bootstrap
.add_argument(
9255 type=argparse
.FileType('r'),
9256 help='Dashboard key')
9257 parser_bootstrap
.add_argument(
9259 type=argparse
.FileType('r'),
9260 help='Dashboard certificate')
9262 parser_bootstrap
.add_argument(
9264 type=argparse
.FileType('r'),
9266 parser_bootstrap
.add_argument(
9267 '--ssh-private-key',
9268 type=argparse
.FileType('r'),
9269 help='SSH private key')
9270 parser_bootstrap
.add_argument(
9272 type=argparse
.FileType('r'),
9273 help='SSH public key')
9274 parser_bootstrap
.add_argument(
9277 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
9278 parser_bootstrap
.add_argument(
9279 '--skip-mon-network',
9280 action
='store_true',
9281 help='set mon public_network based on bootstrap mon ip')
9282 parser_bootstrap
.add_argument(
9284 action
='store_true',
9285 help='do not enable the Ceph Dashboard')
9286 parser_bootstrap
.add_argument(
9287 '--dashboard-password-noupdate',
9288 action
='store_true',
9289 help='stop forced dashboard password change')
9290 parser_bootstrap
.add_argument(
9291 '--no-minimize-config',
9292 action
='store_true',
9293 help='do not assimilate and minimize the config file')
9294 parser_bootstrap
.add_argument(
9295 '--skip-ping-check',
9296 action
='store_true',
9297 help='do not verify that mon IP is pingable')
9298 parser_bootstrap
.add_argument(
9300 action
='store_true',
9301 help='do not pull the default image before bootstrapping')
9302 parser_bootstrap
.add_argument(
9304 action
='store_true',
9305 help='Do not configure firewalld')
9306 parser_bootstrap
.add_argument(
9307 '--allow-overwrite',
9308 action
='store_true',
9309 help='allow overwrite of existing --output-* config/keyring/ssh files')
9310 parser_bootstrap
.add_argument(
9311 '--allow-fqdn-hostname',
9312 action
='store_true',
9313 help='allow hostname that is fully-qualified (contains ".")')
9314 parser_bootstrap
.add_argument(
9315 '--allow-mismatched-release',
9316 action
='store_true',
9317 help="allow bootstrap of ceph that doesn't match this version of cephadm")
9318 parser_bootstrap
.add_argument(
9319 '--skip-prepare-host',
9320 action
='store_true',
9321 help='Do not prepare host')
9322 parser_bootstrap
.add_argument(
9323 '--orphan-initial-daemons',
9324 action
='store_true',
9325 help='Set mon and mgr service to `unmanaged`, Do not create the crash service')
9326 parser_bootstrap
.add_argument(
9327 '--skip-monitoring-stack',
9328 action
='store_true',
9329 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
9330 parser_bootstrap
.add_argument(
9331 '--with-centralized-logging',
9332 action
='store_true',
9333 help='Automatically provision centralized logging (promtail, loki)')
9334 parser_bootstrap
.add_argument(
9336 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
9337 parser_bootstrap
.add_argument(
9338 '--shared_ceph_folder',
9339 metavar
='CEPH_SOURCE_FOLDER',
9340 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9342 parser_bootstrap
.add_argument(
9344 help='url for custom registry')
9345 parser_bootstrap
.add_argument(
9346 '--registry-username',
9347 help='username for custom registry')
9348 parser_bootstrap
.add_argument(
9349 '--registry-password',
9350 help='password for custom registry')
9351 parser_bootstrap
.add_argument(
9353 help='json file with custom registry login info (URL, Username, Password)')
9354 parser_bootstrap
.add_argument(
9356 action
='store_true',
9357 default
=CONTAINER_INIT
,
9358 help=argparse
.SUPPRESS
)
9359 parser_bootstrap
.add_argument(
9360 '--cluster-network',
9361 help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
9362 parser_bootstrap
.add_argument(
9363 '--single-host-defaults',
9364 action
='store_true',
9365 help='adjust configuration defaults to suit a single-host cluster')
9366 parser_bootstrap
.add_argument(
9368 action
='store_true',
9369 help='configure cluster to log to traditional log files in /var/log/ceph/$fsid')
9371 parser_deploy
= subparsers
.add_parser(
9372 'deploy', help='deploy a daemon')
9373 parser_deploy
.set_defaults(func
=command_deploy
)
9374 parser_deploy
.add_argument(
9377 action
=CustomValidation
,
9378 help='daemon name (type.id)')
9379 parser_deploy
.add_argument(
9382 help='cluster FSID')
9383 parser_deploy
.add_argument(
9385 help='config file for new daemon')
9386 parser_deploy
.add_argument(
9388 help='Additional configuration information in JSON format')
9389 parser_deploy
.add_argument(
9391 help='keyring for new daemon')
9392 parser_deploy
.add_argument(
9394 help='key for new daemon')
9395 parser_deploy
.add_argument(
9397 help='OSD uuid, if creating an OSD container')
9398 parser_deploy
.add_argument(
9400 action
='store_true',
9401 help='Do not configure firewalld')
9402 parser_deploy
.add_argument(
9404 help='List of tcp ports to open in the host firewall')
9405 parser_deploy
.add_argument(
9407 action
='store_true',
9408 help='Reconfigure a previously deployed daemon')
9409 parser_deploy
.add_argument(
9411 action
='store_true',
9412 help='Allow SYS_PTRACE on daemon container')
9413 parser_deploy
.add_argument(
9415 action
='store_true',
9416 default
=CONTAINER_INIT
,
9417 help=argparse
.SUPPRESS
)
9418 parser_deploy
.add_argument(
9420 help='Container memory request/target'
9422 parser_deploy
.add_argument(
9424 help='Container memory hard limit'
9426 parser_deploy
.add_argument(
9428 help='JSON dict of additional metadata'
9430 parser_deploy
.add_argument(
9431 '--extra-container-args',
9434 help='Additional container arguments to apply to deamon'
9436 parser_deploy
.add_argument(
9437 '--extra-entrypoint-args',
9440 help='Additional entrypoint arguments to apply to deamon'
9443 parser_check_host
= subparsers
.add_parser(
9444 'check-host', help='check host configuration')
9445 parser_check_host
.set_defaults(func
=command_check_host
)
9446 parser_check_host
.add_argument(
9447 '--expect-hostname',
9448 help='Check that hostname matches an expected value')
9450 parser_prepare_host
= subparsers
.add_parser(
9451 'prepare-host', help='prepare a host for cephadm use')
9452 parser_prepare_host
.set_defaults(func
=command_prepare_host
)
9453 parser_prepare_host
.add_argument(
9454 '--expect-hostname',
9455 help='Set hostname')
9457 parser_add_repo
= subparsers
.add_parser(
9458 'add-repo', help='configure package repository')
9459 parser_add_repo
.set_defaults(func
=command_add_repo
)
9460 parser_add_repo
.add_argument(
9462 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE
))
9463 parser_add_repo
.add_argument(
9465 help='use specific upstream version (x.y.z)')
9466 parser_add_repo
.add_argument(
9468 help='use specified bleeding edge build from git branch or tag')
9469 parser_add_repo
.add_argument(
9471 help='use specified bleeding edge build from git commit')
9472 parser_add_repo
.add_argument(
9474 help='specify alternative GPG key location')
9475 parser_add_repo
.add_argument(
9477 default
='https://download.ceph.com',
9478 help='specify alternative repo location')
9481 parser_rm_repo
= subparsers
.add_parser(
9482 'rm-repo', help='remove package repository configuration')
9483 parser_rm_repo
.set_defaults(func
=command_rm_repo
)
9485 parser_install
= subparsers
.add_parser(
9486 'install', help='install ceph package(s)')
9487 parser_install
.set_defaults(func
=command_install
)
9488 parser_install
.add_argument(
9489 'packages', nargs
='*',
9490 default
=['cephadm'],
9493 parser_registry_login
= subparsers
.add_parser(
9494 'registry-login', help='log host into authenticated registry')
9495 parser_registry_login
.set_defaults(func
=command_registry_login
)
9496 parser_registry_login
.add_argument(
9498 help='url for custom registry')
9499 parser_registry_login
.add_argument(
9500 '--registry-username',
9501 help='username for custom registry')
9502 parser_registry_login
.add_argument(
9503 '--registry-password',
9504 help='password for custom registry')
9505 parser_registry_login
.add_argument(
9507 help='json file with custom registry login info (URL, Username, Password)')
9508 parser_registry_login
.add_argument(
9510 help='cluster FSID')
9512 parser_gather_facts
= subparsers
.add_parser(
9513 'gather-facts', help='gather and return host related information (JSON format)')
9514 parser_gather_facts
.set_defaults(func
=command_gather_facts
)
9516 parser_maintenance
= subparsers
.add_parser(
9517 'host-maintenance', help='Manage the maintenance state of a host')
9518 parser_maintenance
.add_argument(
9520 help='cluster FSID')
9521 parser_maintenance
.add_argument(
9522 'maintenance_action',
9524 choices
=['enter', 'exit'],
9525 help='Maintenance action - enter maintenance, or exit maintenance')
9526 parser_maintenance
.set_defaults(func
=command_maintenance
)
9528 parser_agent
= subparsers
.add_parser(
9529 'agent', help='start cephadm agent')
9530 parser_agent
.set_defaults(func
=command_agent
)
9531 parser_agent
.add_argument(
9534 help='cluster FSID')
9535 parser_agent
.add_argument(
9537 help='daemon id for agent')
9539 parser_disk_rescan
= subparsers
.add_parser(
9540 'disk-rescan', help='rescan all HBAs to detect new/removed devices')
9541 parser_disk_rescan
.set_defaults(func
=command_rescan_disks
)
9546 def _parse_args(av
: List
[str]) -> argparse
.Namespace
:
9547 parser
= _get_parser()
9549 args
= parser
.parse_args(av
)
9550 if 'command' in args
and args
.command
and args
.command
[0] == '--':
9553 # workaround argparse to deprecate the subparser `--container-init` flag
9554 # container_init and no_container_init must always be mutually exclusive
9555 container_init_args
= ('--container-init', '--no-container-init')
9556 if set(container_init_args
).issubset(av
):
9557 parser
.error('argument %s: not allowed with argument %s' % (container_init_args
))
9558 elif '--container-init' in av
:
9559 args
.no_container_init
= not args
.container_init
9561 args
.container_init
= not args
.no_container_init
9562 assert args
.container_init
is not args
.no_container_init
9567 def cephadm_init_ctx(args
: List
[str]) -> CephadmContext
:
9568 ctx
= CephadmContext()
9569 ctx
.set_args(_parse_args(args
))
9573 def cephadm_init_logging(ctx
: CephadmContext
, args
: List
[str]) -> None:
9574 """Configure the logging for cephadm as well as updating the system
9575 to have the expected log dir and logrotate configuration.
9577 logging
.addLevelName(QUIET_LOG_LEVEL
, 'QUIET')
9579 if not os
.path
.exists(LOG_DIR
):
9580 os
.makedirs(LOG_DIR
)
9581 operations
= ['bootstrap', 'rm-cluster']
9582 if any(op
in args
for op
in operations
):
9583 dictConfig(interactive_logging_config
)
9585 dictConfig(logging_config
)
9587 logger
= logging
.getLogger()
9588 logger
.setLevel(QUIET_LOG_LEVEL
)
9590 if not os
.path
.exists(ctx
.logrotate_dir
+ '/cephadm'):
9591 with
open(ctx
.logrotate_dir
+ '/cephadm', 'w') as f
:
9592 f
.write("""# created by cephadm
9593 /var/log/ceph/cephadm.log {
9604 for handler
in logger
.handlers
:
9605 if handler
.name
in ['console', 'log_file', 'console_stdout']:
9606 handler
.setLevel(QUIET_LOG_LEVEL
)
9607 logger
.debug('%s\ncephadm %s' % ('-' * 80, args
))
9610 def cephadm_require_root() -> None:
9611 """Exit if the process is not running as root."""
9612 if os
.geteuid() != 0:
9613 sys
.stderr
.write('ERROR: cephadm should be run as root\n')
9621 ctx
= cephadm_init_ctx(av
)
9622 if not ctx
.has_function():
9623 sys
.stderr
.write('No command specified; pass -h or --help for usage\n')
9626 cephadm_require_root()
9627 cephadm_init_logging(ctx
, av
)
9630 ctx
.container_engine
= find_container_engine(ctx
)
9631 if ctx
.func
not in \
9634 command_prepare_host
,
9639 check_container_engine(ctx
)
9645 logger
.error('ERROR: %s' % e
)
9652 if __name__
== '__main__':