4 import asyncio
.subprocess
11 from logging
.config
import dictConfig
26 from socketserver
import ThreadingMixIn
27 from http
.server
import BaseHTTPRequestHandler
, HTTPServer
30 from contextlib
import redirect_stdout
34 from typing
import Dict
, List
, Tuple
, Optional
, Union
, Any
, NoReturn
, Callable
, IO
39 from configparser
import ConfigParser
40 from functools
import wraps
42 from io
import StringIO
43 from threading
import Thread
, RLock
44 from urllib
.error
import HTTPError
45 from urllib
.request
import urlopen
46 from pathlib
import Path
48 # Default container images -----------------------------------------------------
49 DEFAULT_IMAGE
= 'docker.io/ceph/ceph:v16'
50 DEFAULT_IMAGE_IS_MASTER
= False
51 DEFAULT_IMAGE_RELEASE
= 'pacific'
52 DEFAULT_PROMETHEUS_IMAGE
= 'docker.io/prom/prometheus:v2.18.1'
53 DEFAULT_NODE_EXPORTER_IMAGE
= 'docker.io/prom/node-exporter:v0.18.1'
54 DEFAULT_GRAFANA_IMAGE
= 'docker.io/ceph/ceph-grafana:6.7.4'
55 DEFAULT_ALERT_MANAGER_IMAGE
= 'docker.io/prom/alertmanager:v0.20.0'
56 DEFAULT_REGISTRY
= 'docker.io' # normalize unqualified digests to this
57 # ------------------------------------------------------------------------------
59 LATEST_STABLE_RELEASE
= 'pacific'
60 DATA_DIR
= '/var/lib/ceph'
61 LOG_DIR
= '/var/log/ceph'
62 LOCK_DIR
= '/run/cephadm'
63 LOGROTATE_DIR
= '/etc/logrotate.d'
64 SYSCTL_DIR
= '/usr/lib/sysctl.d'
65 UNIT_DIR
= '/etc/systemd/system'
69 MIN_PODMAN_VERSION
= (2, 0, 2)
70 CGROUPS_SPLIT_PODMAN_VERSION
= (2, 1, 0)
71 CUSTOM_PS1
= r
'[ceph: \u@\h \W]\$ '
72 DEFAULT_TIMEOUT
= None # in seconds
74 SHELL_DEFAULT_CONF
= '/etc/ceph/ceph.conf'
75 SHELL_DEFAULT_KEYRING
= '/etc/ceph/ceph.client.admin.keyring'
76 DATEFMT
= '%Y-%m-%dT%H:%M:%S.%fZ'
78 logger
: logging
.Logger
= None # type: ignore
81 You can invoke cephadm in two ways:
83 1. The normal way, at the command line.
85 2. By piping the script to the python3 binary. In this latter case, you should
86 prepend one or more lines to the beginning of the script.
94 injected_argv = ['ls']
96 For reading stdin from the '--config-json -' argument,
98 injected_stdin = '...'
102 ##################################
109 self
.docker
: bool = False
110 self
.data_dir
: str = DATA_DIR
111 self
.log_dir
: str = LOG_DIR
112 self
.logrotate_dir
: str = LOGROTATE_DIR
113 self
.sysctl_dir
: str = SYSCTL_DIR
114 self
.unit_dir
: str = UNIT_DIR
115 self
.verbose
: bool = False
116 self
.timeout
: Optional
[int] = DEFAULT_TIMEOUT
117 self
.retry
: int = DEFAULT_RETRY
118 self
.env
: List
[str] = []
119 self
.memory_request
: Optional
[int] = None
120 self
.memory_limit
: Optional
[int] = None
122 self
.container_init
: bool = CONTAINER_INIT
123 self
.container_engine
: Optional
[ContainerEngine
] = None
125 def set_from_args(self
, args
: argparse
.Namespace
):
126 argdict
: Dict
[str, Any
] = vars(args
)
127 for k
, v
in argdict
.items():
132 class CephadmContext
:
135 self
.__dict
__['_args'] = None
136 self
.__dict
__['_conf'] = BaseConfig()
138 def set_args(self
, args
: argparse
.Namespace
) -> None:
139 self
._conf
.set_from_args(args
)
142 def has_function(self
) -> bool:
143 return 'func' in self
._args
145 def __contains__(self
, name
: str) -> bool:
146 return hasattr(self
, name
)
148 def __getattr__(self
, name
: str) -> Any
:
149 if '_conf' in self
.__dict
__ and hasattr(self
._conf
, name
):
150 return getattr(self
._conf
, name
)
151 elif '_args' in self
.__dict
__ and hasattr(self
._args
, name
):
152 return getattr(self
._args
, name
)
154 return super().__getattribute
__(name
)
156 def __setattr__(self
, name
: str, value
: Any
) -> None:
157 if hasattr(self
._conf
, name
):
158 setattr(self
._conf
, name
, value
)
159 elif hasattr(self
._args
, name
):
160 setattr(self
._args
, name
, value
)
162 super().__setattr
__(name
, value
)
165 class ContainerEngine
:
167 self
.path
= find_program(self
.EXE
)
170 def EXE(self
) -> str:
171 raise NotImplementedError()
174 class Podman(ContainerEngine
):
183 if self
._version
is None:
184 raise RuntimeError('Please call `get_version` first')
187 def get_version(self
, ctx
: CephadmContext
):
188 out
, _
, _
= call_throws(ctx
, [self
.path
, 'version', '--format', '{{.Client.Version}}'])
189 self
._version
= _parse_podman_version(out
)
192 class Docker(ContainerEngine
):
196 CONTAINER_PREFERENCE
= (Podman
, Docker
) # prefer podman to docker
199 # Log and console output config
202 'disable_existing_loggers': True,
205 'format': '%(asctime)s %(levelname)s %(message)s'
211 'class': 'logging.StreamHandler',
215 'class': 'logging.handlers.WatchedFileHandler',
216 'formatter': 'cephadm',
217 'filename': '%s/cephadm.log' % LOG_DIR
,
223 'handlers': ['console', 'log_file'],
235 class Error(Exception):
239 class TimeoutExpired(Error
):
242 ##################################
246 daemons
= ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
247 'crash', 'cephfs-mirror')
249 ##################################
254 def get_sysctl_settings() -> List
[str]:
256 '# allow a large number of OSDs',
257 'fs.aio-max-nr = 1048576',
258 'kernel.pid_max = 4194304',
261 ##################################
264 class Monitoring(object):
265 """Define the configs for the monitoring containers"""
268 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI
269 'node-exporter': [9100],
271 'alertmanager': [9093, 9094],
276 'image': DEFAULT_PROMETHEUS_IMAGE
,
280 '--config.file=/etc/prometheus/prometheus.yml',
281 '--storage.tsdb.path=/prometheus',
283 'config-json-files': [
288 'image': DEFAULT_NODE_EXPORTER_IMAGE
,
292 '--no-collector.timex',
296 'image': DEFAULT_GRAFANA_IMAGE
,
300 'config-json-files': [
302 'provisioning/datasources/ceph-dashboard.yml',
308 'image': DEFAULT_ALERT_MANAGER_IMAGE
,
312 '--cluster.listen-address=:{}'.format(port_map
['alertmanager'][1]),
314 'config-json-files': [
317 'config-json-args': [
324 def get_version(ctx
, container_id
, daemon_type
):
325 # type: (CephadmContext, str, str) -> str
327 :param: daemon_type Either "prometheus", "alertmanager" or "node-exporter"
329 assert daemon_type
in ('prometheus', 'alertmanager', 'node-exporter')
330 cmd
= daemon_type
.replace('-', '_')
334 if daemon_type
== 'alertmanager':
335 for cmd
in ['alertmanager', 'prometheus-alertmanager']:
336 _
, err
, code
= call(ctx
, [
337 ctx
.container_engine
.path
, 'exec', container_id
, cmd
,
339 ], verbosity
=CallVerbosity
.DEBUG
)
342 cmd
= 'alertmanager' # reset cmd for version extraction
344 _
, err
, code
= call(ctx
, [
345 ctx
.container_engine
.path
, 'exec', container_id
, cmd
, '--version'
346 ], verbosity
=CallVerbosity
.DEBUG
)
348 err
.startswith('%s, version ' % cmd
):
349 version
= err
.split(' ')[2]
352 ##################################
355 def populate_files(config_dir
, config_files
, uid
, gid
):
356 # type: (str, Dict, int, int) -> None
357 """create config files for different services"""
358 for fname
in config_files
:
359 config_file
= os
.path
.join(config_dir
, fname
)
360 config_content
= dict_get_join(config_files
, fname
)
361 logger
.info('Write file: %s' % (config_file
))
362 with
open(config_file
, 'w', encoding
='utf-8') as f
:
363 os
.fchown(f
.fileno(), uid
, gid
)
364 os
.fchmod(f
.fileno(), 0o600)
365 f
.write(config_content
)
368 class NFSGanesha(object):
369 """Defines a NFS-Ganesha container"""
372 entrypoint
= '/usr/bin/ganesha.nfsd'
373 daemon_args
= ['-F', '-L', 'STDERR']
375 required_files
= ['ganesha.conf']
386 image
=DEFAULT_IMAGE
):
387 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
390 self
.daemon_id
= daemon_id
393 # config-json options
394 self
.pool
= dict_get(config_json
, 'pool', require
=True)
395 self
.namespace
= dict_get(config_json
, 'namespace')
396 self
.userid
= dict_get(config_json
, 'userid')
397 self
.extra_args
= dict_get(config_json
, 'extra_args', [])
398 self
.files
= dict_get(config_json
, 'files', {})
399 self
.rgw
= dict_get(config_json
, 'rgw', {})
401 # validate the supplied args
405 def init(cls
, ctx
, fsid
, daemon_id
):
406 # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
407 return cls(ctx
, fsid
, daemon_id
, get_parm(ctx
.config_json
), ctx
.image
)
409 def get_container_mounts(self
, data_dir
):
410 # type: (str) -> Dict[str, str]
412 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
413 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
414 mounts
[os
.path
.join(data_dir
, 'etc/ganesha')] = '/etc/ganesha:z'
416 cluster
= self
.rgw
.get('cluster', 'ceph')
417 rgw_user
= self
.rgw
.get('user', 'admin')
418 mounts
[os
.path
.join(data_dir
, 'keyring.rgw')] = \
419 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster
, rgw_user
)
423 def get_container_envs():
424 # type: () -> List[str]
426 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
431 def get_version(ctx
, container_id
):
432 # type: (CephadmContext, str) -> Optional[str]
434 out
, err
, code
= call(ctx
,
435 [ctx
.container_engine
.path
, 'exec', container_id
,
436 NFSGanesha
.entrypoint
, '-v'],
437 verbosity
=CallVerbosity
.DEBUG
)
439 match
= re
.search(r
'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out
)
441 version
= match
.group(1)
446 if not is_fsid(self
.fsid
):
447 raise Error('not an fsid: %s' % self
.fsid
)
448 if not self
.daemon_id
:
449 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
451 raise Error('invalid image: %s' % self
.image
)
453 # check for the required files
454 if self
.required_files
:
455 for fname
in self
.required_files
:
456 if fname
not in self
.files
:
457 raise Error('required file missing from config-json: %s' % fname
)
459 # check for an RGW config
461 if not self
.rgw
.get('keyring'):
462 raise Error('RGW keyring is missing')
463 if not self
.rgw
.get('user'):
464 raise Error('RGW user is missing')
466 def get_daemon_name(self
):
468 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
470 def get_container_name(self
, desc
=None):
471 # type: (Optional[str]) -> str
472 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
474 cname
= '%s-%s' % (cname
, desc
)
477 def get_daemon_args(self
):
478 # type: () -> List[str]
479 return self
.daemon_args
+ self
.extra_args
481 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
482 # type: (str, int, int) -> None
483 """Create files under the container data dir"""
484 if not os
.path
.isdir(data_dir
):
485 raise OSError('data_dir is not a directory: %s' % (data_dir
))
487 logger
.info('Creating ganesha config...')
489 # create the ganesha conf dir
490 config_dir
= os
.path
.join(data_dir
, 'etc/ganesha')
491 makedirs(config_dir
, uid
, gid
, 0o755)
493 # populate files from the config-json
494 populate_files(config_dir
, self
.files
, uid
, gid
)
496 # write the RGW keyring
498 keyring_path
= os
.path
.join(data_dir
, 'keyring.rgw')
499 with
open(keyring_path
, 'w') as f
:
500 os
.fchmod(f
.fileno(), 0o600)
501 os
.fchown(f
.fileno(), uid
, gid
)
502 f
.write(self
.rgw
.get('keyring', ''))
504 ##################################
507 class CephIscsi(object):
508 """Defines a Ceph-Iscsi container"""
510 daemon_type
= 'iscsi'
511 entrypoint
= '/usr/bin/rbd-target-api'
513 required_files
= ['iscsi-gateway.cfg']
520 image
=DEFAULT_IMAGE
):
521 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
524 self
.daemon_id
= daemon_id
527 # config-json options
528 self
.files
= dict_get(config_json
, 'files', {})
530 # validate the supplied args
534 def init(cls
, ctx
, fsid
, daemon_id
):
535 # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
536 return cls(ctx
, fsid
, daemon_id
,
537 get_parm(ctx
.config_json
), ctx
.image
)
540 def get_container_mounts(data_dir
, log_dir
):
541 # type: (str, str) -> Dict[str, str]
543 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
544 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
545 mounts
[os
.path
.join(data_dir
, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
546 mounts
[os
.path
.join(data_dir
, 'configfs')] = '/sys/kernel/config'
547 mounts
[log_dir
] = '/var/log/rbd-target-api:z'
548 mounts
['/dev'] = '/dev'
552 def get_container_binds():
553 # type: () -> List[List[str]]
555 lib_modules
= ['type=bind',
556 'source=/lib/modules',
557 'destination=/lib/modules',
559 binds
.append(lib_modules
)
563 def get_version(ctx
, container_id
):
564 # type: (CephadmContext, str) -> Optional[str]
566 out
, err
, code
= call(ctx
,
567 [ctx
.container_engine
.path
, 'exec', container_id
,
568 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
569 verbosity
=CallVerbosity
.DEBUG
)
571 version
= out
.strip()
576 if not is_fsid(self
.fsid
):
577 raise Error('not an fsid: %s' % self
.fsid
)
578 if not self
.daemon_id
:
579 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
581 raise Error('invalid image: %s' % self
.image
)
583 # check for the required files
584 if self
.required_files
:
585 for fname
in self
.required_files
:
586 if fname
not in self
.files
:
587 raise Error('required file missing from config-json: %s' % fname
)
589 def get_daemon_name(self
):
591 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
593 def get_container_name(self
, desc
=None):
594 # type: (Optional[str]) -> str
595 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
597 cname
= '%s-%s' % (cname
, desc
)
600 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
601 # type: (str, int, int) -> None
602 """Create files under the container data dir"""
603 if not os
.path
.isdir(data_dir
):
604 raise OSError('data_dir is not a directory: %s' % (data_dir
))
606 logger
.info('Creating ceph-iscsi config...')
607 configfs_dir
= os
.path
.join(data_dir
, 'configfs')
608 makedirs(configfs_dir
, uid
, gid
, 0o755)
610 # populate files from the config-json
611 populate_files(data_dir
, self
.files
, uid
, gid
)
614 def configfs_mount_umount(data_dir
, mount
=True):
615 # type: (str, bool) -> List[str]
616 mount_path
= os
.path
.join(data_dir
, 'configfs')
618 cmd
= 'if ! grep -qs {0} /proc/mounts; then ' \
619 'mount -t configfs none {0}; fi'.format(mount_path
)
621 cmd
= 'if grep -qs {0} /proc/mounts; then ' \
622 'umount {0}; fi'.format(mount_path
)
625 def get_tcmu_runner_container(self
):
626 # type: () -> CephContainer
627 tcmu_container
= get_container(self
.ctx
, self
.fsid
, self
.daemon_type
, self
.daemon_id
)
628 tcmu_container
.entrypoint
= '/usr/bin/tcmu-runner'
629 tcmu_container
.cname
= self
.get_container_name(desc
='tcmu')
630 # remove extra container args for tcmu container.
631 # extra args could cause issue with forking service type
632 tcmu_container
.container_args
= []
633 return tcmu_container
635 ##################################
638 class HAproxy(object):
639 """Defines an HAproxy container"""
640 daemon_type
= 'haproxy'
641 required_files
= ['haproxy.cfg']
642 default_image
= 'haproxy'
646 fsid
: str, daemon_id
: Union
[int, str],
647 config_json
: Dict
, image
: str) -> None:
650 self
.daemon_id
= daemon_id
653 # config-json options
654 self
.files
= dict_get(config_json
, 'files', {})
659 def init(cls
, ctx
: CephadmContext
,
660 fsid
: str, daemon_id
: Union
[int, str]) -> 'HAproxy':
661 return cls(ctx
, fsid
, daemon_id
, get_parm(ctx
.config_json
),
664 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
665 """Create files under the container data dir"""
666 if not os
.path
.isdir(data_dir
):
667 raise OSError('data_dir is not a directory: %s' % (data_dir
))
669 # create additional directories in data dir for HAproxy to use
670 if not os
.path
.isdir(os
.path
.join(data_dir
, 'haproxy')):
671 makedirs(os
.path
.join(data_dir
, 'haproxy'), uid
, gid
, DATA_DIR_MODE
)
673 data_dir
= os
.path
.join(data_dir
, 'haproxy')
674 populate_files(data_dir
, self
.files
, uid
, gid
)
676 def get_daemon_args(self
) -> List
[str]:
677 return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
681 if not is_fsid(self
.fsid
):
682 raise Error('not an fsid: %s' % self
.fsid
)
683 if not self
.daemon_id
:
684 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
686 raise Error('invalid image: %s' % self
.image
)
688 # check for the required files
689 if self
.required_files
:
690 for fname
in self
.required_files
:
691 if fname
not in self
.files
:
692 raise Error('required file missing from config-json: %s' % fname
)
694 def get_daemon_name(self
):
696 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
698 def get_container_name(self
, desc
=None):
699 # type: (Optional[str]) -> str
700 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
702 cname
= '%s-%s' % (cname
, desc
)
705 def extract_uid_gid_haproxy(self
):
706 # better directory for this?
707 return extract_uid_gid(self
.ctx
, file_path
='/var/lib')
710 def get_container_mounts(data_dir
: str) -> Dict
[str, str]:
712 mounts
[os
.path
.join(data_dir
, 'haproxy')] = '/var/lib/haproxy'
716 def get_sysctl_settings() -> List
[str]:
719 'net.ipv4.ip_forward = 1',
722 ##################################
725 class Keepalived(object):
726 """Defines an Keepalived container"""
727 daemon_type
= 'keepalived'
728 required_files
= ['keepalived.conf']
729 default_image
= 'arcts/keepalived'
733 fsid
: str, daemon_id
: Union
[int, str],
734 config_json
: Dict
, image
: str) -> None:
737 self
.daemon_id
= daemon_id
740 # config-json options
741 self
.files
= dict_get(config_json
, 'files', {})
746 def init(cls
, ctx
: CephadmContext
, fsid
: str,
747 daemon_id
: Union
[int, str]) -> 'Keepalived':
748 return cls(ctx
, fsid
, daemon_id
,
749 get_parm(ctx
.config_json
), ctx
.image
)
751 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
752 """Create files under the container data dir"""
753 if not os
.path
.isdir(data_dir
):
754 raise OSError('data_dir is not a directory: %s' % (data_dir
))
756 # create additional directories in data dir for keepalived to use
757 if not os
.path
.isdir(os
.path
.join(data_dir
, 'keepalived')):
758 makedirs(os
.path
.join(data_dir
, 'keepalived'), uid
, gid
, DATA_DIR_MODE
)
760 # populate files from the config-json
761 populate_files(data_dir
, self
.files
, uid
, gid
)
765 if not is_fsid(self
.fsid
):
766 raise Error('not an fsid: %s' % self
.fsid
)
767 if not self
.daemon_id
:
768 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
770 raise Error('invalid image: %s' % self
.image
)
772 # check for the required files
773 if self
.required_files
:
774 for fname
in self
.required_files
:
775 if fname
not in self
.files
:
776 raise Error('required file missing from config-json: %s' % fname
)
778 def get_daemon_name(self
):
780 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
782 def get_container_name(self
, desc
=None):
783 # type: (Optional[str]) -> str
784 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
786 cname
= '%s-%s' % (cname
, desc
)
790 def get_container_envs():
791 # type: () -> List[str]
793 'KEEPALIVED_AUTOCONF=false',
794 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
795 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
796 'KEEPALIVED_DEBUG=false'
801 def get_sysctl_settings() -> List
[str]:
803 '# IP forwarding and non-local bind',
804 'net.ipv4.ip_forward = 1',
805 'net.ipv4.ip_nonlocal_bind = 1',
808 def extract_uid_gid_keepalived(self
):
809 # better directory for this?
810 return extract_uid_gid(self
.ctx
, file_path
='/var/lib')
813 def get_container_mounts(data_dir
: str) -> Dict
[str, str]:
815 mounts
[os
.path
.join(data_dir
, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
818 ##################################
821 class CustomContainer(object):
822 """Defines a custom container"""
823 daemon_type
= 'container'
826 fsid
: str, daemon_id
: Union
[int, str],
827 config_json
: Dict
, image
: str) -> None:
829 self
.daemon_id
= daemon_id
832 # config-json options
833 self
.entrypoint
= dict_get(config_json
, 'entrypoint')
834 self
.uid
= dict_get(config_json
, 'uid', 65534) # nobody
835 self
.gid
= dict_get(config_json
, 'gid', 65534) # nobody
836 self
.volume_mounts
= dict_get(config_json
, 'volume_mounts', {})
837 self
.args
= dict_get(config_json
, 'args', [])
838 self
.envs
= dict_get(config_json
, 'envs', [])
839 self
.privileged
= dict_get(config_json
, 'privileged', False)
840 self
.bind_mounts
= dict_get(config_json
, 'bind_mounts', [])
841 self
.ports
= dict_get(config_json
, 'ports', [])
842 self
.dirs
= dict_get(config_json
, 'dirs', [])
843 self
.files
= dict_get(config_json
, 'files', {})
846 def init(cls
, ctx
: CephadmContext
,
847 fsid
: str, daemon_id
: Union
[int, str]) -> 'CustomContainer':
848 return cls(fsid
, daemon_id
,
849 get_parm(ctx
.config_json
), ctx
.image
)
851 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
853 Create dirs/files below the container data directory.
855 logger
.info('Creating custom container configuration '
856 'dirs/files in {} ...'.format(data_dir
))
858 if not os
.path
.isdir(data_dir
):
859 raise OSError('data_dir is not a directory: %s' % data_dir
)
861 for dir_path
in self
.dirs
:
862 logger
.info('Creating directory: {}'.format(dir_path
))
863 dir_path
= os
.path
.join(data_dir
, dir_path
.strip('/'))
864 makedirs(dir_path
, uid
, gid
, 0o755)
866 for file_path
in self
.files
:
867 logger
.info('Creating file: {}'.format(file_path
))
868 content
= dict_get_join(self
.files
, file_path
)
869 file_path
= os
.path
.join(data_dir
, file_path
.strip('/'))
870 with
open(file_path
, 'w', encoding
='utf-8') as f
:
871 os
.fchown(f
.fileno(), uid
, gid
)
872 os
.fchmod(f
.fileno(), 0o600)
875 def get_daemon_args(self
) -> List
[str]:
878 def get_container_args(self
) -> List
[str]:
881 def get_container_envs(self
) -> List
[str]:
884 def get_container_mounts(self
, data_dir
: str) -> Dict
[str, str]:
886 Get the volume mounts. Relative source paths will be located below
887 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
897 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
901 for source
, destination
in self
.volume_mounts
.items():
902 source
= os
.path
.join(data_dir
, source
)
903 mounts
[source
] = destination
906 def get_container_binds(self
, data_dir
: str) -> List
[List
[str]]:
908 Get the bind mounts. Relative `source=...` paths will be located below
909 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
914 'source=lib/modules',
915 'destination=/lib/modules',
921 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
925 binds
= self
.bind_mounts
.copy()
927 for index
, value
in enumerate(bind
):
928 match
= re
.match(r
'^source=(.+)$', value
)
930 bind
[index
] = 'source={}'.format(os
.path
.join(
931 data_dir
, match
.group(1)))
934 ##################################
937 def touch(file_path
: str, uid
: Optional
[int] = None, gid
: Optional
[int] = None) -> None:
938 Path(file_path
).touch()
940 os
.chown(file_path
, uid
, gid
)
943 ##################################
946 def dict_get(d
: Dict
, key
: str, default
: Any
= None, require
: bool = False) -> Any
:
948 Helper function to get a key from a dictionary.
949 :param d: The dictionary to process.
950 :param key: The name of the key to get.
951 :param default: The default value in case the key does not
952 exist. Default is `None`.
953 :param require: Set to `True` if the key is required. An
954 exception will be raised if the key does not exist in
955 the given dictionary.
956 :return: Returns the value of the given key.
957 :raises: :exc:`self.Error` if the given key does not exist
958 and `require` is set to `True`.
960 if require
and key
not in d
.keys():
961 raise Error('{} missing from dict'.format(key
))
962 return d
.get(key
, default
) # type: ignore
964 ##################################
967 def dict_get_join(d
: Dict
, key
: str) -> Any
:
969 Helper function to get the value of a given key from a dictionary.
970 `List` values will be converted to a string by joining them with a
972 :param d: The dictionary to process.
973 :param key: The name of the key to get.
974 :return: Returns the value of the given key. If it was a `list`, it
975 will be joining with a line break.
978 if isinstance(value
, list):
979 value
= '\n'.join(map(str, value
))
982 ##################################
985 def get_supported_daemons():
986 # type: () -> List[str]
987 supported_daemons
= list(Ceph
.daemons
)
988 supported_daemons
.extend(Monitoring
.components
)
989 supported_daemons
.append(NFSGanesha
.daemon_type
)
990 supported_daemons
.append(CephIscsi
.daemon_type
)
991 supported_daemons
.append(CustomContainer
.daemon_type
)
992 supported_daemons
.append(CephadmDaemon
.daemon_type
)
993 supported_daemons
.append(HAproxy
.daemon_type
)
994 supported_daemons
.append(Keepalived
.daemon_type
)
995 assert len(supported_daemons
) == len(set(supported_daemons
))
996 return supported_daemons
998 ##################################
1001 class PortOccupiedError(Error
):
1005 def attempt_bind(ctx
, s
, address
, port
):
1006 # type: (CephadmContext, socket.socket, str, int) -> None
1008 s
.setsockopt(socket
.SOL_SOCKET
, socket
.SO_REUSEADDR
, 1)
1009 s
.bind((address
, port
))
1010 except OSError as e
:
1011 if e
.errno
== errno
.EADDRINUSE
:
1012 msg
= 'Cannot bind to IP %s port %d: %s' % (address
, port
, e
)
1014 raise PortOccupiedError(msg
)
1017 except Exception as e
:
1023 def port_in_use(ctx
, port_num
):
1024 # type: (CephadmContext, int) -> bool
1025 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
1026 logger
.info('Verifying port %d ...' % port_num
)
1028 def _port_in_use(af
: socket
.AddressFamily
, address
: str) -> bool:
1030 s
= socket
.socket(af
, socket
.SOCK_STREAM
)
1031 attempt_bind(ctx
, s
, address
, port_num
)
1032 except PortOccupiedError
:
1034 except OSError as e
:
1035 if e
.errno
in (errno
.EAFNOSUPPORT
, errno
.EADDRNOTAVAIL
):
1036 # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
1037 # being tested here and one might be intentionally be disabled.
1038 # In that case no error should be raised.
1043 return any(_port_in_use(af
, address
) for af
, address
in (
1044 (socket
.AF_INET
, '0.0.0.0'),
1045 (socket
.AF_INET6
, '::')
1049 def check_ip_port(ctx
, ip
, port
):
1050 # type: (CephadmContext, str, int) -> None
1051 if not ctx
.skip_ping_check
:
1052 logger
.info('Verifying IP %s port %d ...' % (ip
, port
))
1054 s
= socket
.socket(socket
.AF_INET6
, socket
.SOCK_STREAM
)
1055 ip
= unwrap_ipv6(ip
)
1057 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
1058 attempt_bind(ctx
, s
, ip
, port
)
1060 ##################################
1063 # this is an abbreviated version of
1064 # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
1065 # that drops all of the compatibility (this is Unix/Linux only).
1067 class Timeout(TimeoutError
):
1069 Raised when the lock could not be acquired in *timeout*
1073 def __init__(self
, lock_file
):
1076 #: The path of the file lock.
1077 self
.lock_file
= lock_file
1081 temp
= "The file lock '{}' could not be acquired."\
1082 .format(self
.lock_file
)
1086 class _Acquire_ReturnProxy(object):
1087 def __init__(self
, lock
):
1091 def __enter__(self
):
1094 def __exit__(self
, exc_type
, exc_value
, traceback
):
1099 class FileLock(object):
1100 def __init__(self
, ctx
: CephadmContext
, name
, timeout
=-1):
1101 if not os
.path
.exists(LOCK_DIR
):
1102 os
.mkdir(LOCK_DIR
, 0o700)
1103 self
._lock
_file
= os
.path
.join(LOCK_DIR
, name
+ '.lock')
1106 # The file descriptor for the *_lock_file* as it is returned by the
1107 # os.open() function.
1108 # This file lock is only NOT None, if the object currently holds the
1110 self
._lock
_file
_fd
: Optional
[int] = None
1111 self
.timeout
= timeout
1112 # The lock counter is used for implementing the nested locking
1113 # mechanism. Whenever the lock is acquired, the counter is increased and
1114 # the lock is only released, when this value is 0 again.
1115 self
._lock
_counter
= 0
1119 def is_locked(self
):
1120 return self
._lock
_file
_fd
is not None
1122 def acquire(self
, timeout
=None, poll_intervall
=0.05):
1124 Acquires the file lock or fails with a :exc:`Timeout` error.
1125 .. code-block:: python
1126 # You can use this method in the context manager (recommended)
1127 with lock.acquire():
1129 # Or use an equivalent try-finally construct:
1136 The maximum time waited for the file lock.
1137 If ``timeout < 0``, there is no timeout and this method will
1138 block until the lock could be acquired.
1139 If ``timeout`` is None, the default :attr:`~timeout` is used.
1140 :arg float poll_intervall:
1141 We check once in *poll_intervall* seconds if we can acquire the
1144 if the lock could not be acquired in *timeout* seconds.
1145 .. versionchanged:: 2.0.0
1146 This method returns now a *proxy* object instead of *self*,
1147 so that it can be used in a with statement without side effects.
1150 # Use the default timeout, if no timeout is provided.
1152 timeout
= self
.timeout
1154 # Increment the number right at the beginning.
1155 # We can still undo it, if something fails.
1156 self
._lock
_counter
+= 1
1159 lock_filename
= self
._lock
_file
1160 start_time
= time
.time()
1163 if not self
.is_locked
:
1164 logger
.debug('Acquiring lock %s on %s', lock_id
,
1169 logger
.debug('Lock %s acquired on %s', lock_id
,
1172 elif timeout
>= 0 and time
.time() - start_time
> timeout
:
1173 logger
.warning('Timeout acquiring lock %s on %s', lock_id
,
1175 raise Timeout(self
._lock
_file
)
1178 'Lock %s not acquired on %s, waiting %s seconds ...',
1179 lock_id
, lock_filename
, poll_intervall
1181 time
.sleep(poll_intervall
)
1183 # Something did go wrong, so decrement the counter.
1184 self
._lock
_counter
= max(0, self
._lock
_counter
- 1)
1187 return _Acquire_ReturnProxy(lock
=self
)
1189 def release(self
, force
=False):
1191 Releases the file lock.
1192 Please note, that the lock is only completly released, if the lock
1194 Also note, that the lock file itself is not automatically deleted.
1196 If true, the lock counter is ignored and the lock is released in
1200 self
._lock
_counter
-= 1
1202 if self
._lock
_counter
== 0 or force
:
1204 lock_filename
= self
._lock
_file
1206 logger
.debug('Releasing lock %s on %s', lock_id
, lock_filename
)
1208 self
._lock
_counter
= 0
1209 logger
.debug('Lock %s released on %s', lock_id
, lock_filename
)
1213 def __enter__(self
):
1217 def __exit__(self
, exc_type
, exc_value
, traceback
):
1222 self
.release(force
=True)
1226 open_mode
= os
.O_RDWR | os
.O_CREAT | os
.O_TRUNC
1227 fd
= os
.open(self
._lock
_file
, open_mode
)
1230 fcntl
.flock(fd
, fcntl
.LOCK_EX | fcntl
.LOCK_NB
)
1231 except (IOError, OSError):
1234 self
._lock
_file
_fd
= fd
1238 # Do not remove the lockfile:
1240 # https://github.com/benediktschmitt/py-filelock/issues/31
1241 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
1242 fd
= self
._lock
_file
_fd
1243 self
._lock
_file
_fd
= None
1244 fcntl
.flock(fd
, fcntl
.LOCK_UN
) # type: ignore
1245 os
.close(fd
) # type: ignore
1249 ##################################
1250 # Popen wrappers, lifted from ceph-volume
1252 class CallVerbosity(Enum
):
1254 # log stdout/stderr to logger.debug
1256 # On a non-zero exit status, it will forcefully set
1257 # logging ON for the terminal
1258 VERBOSE_ON_FAILURE
= 2
1259 # log at info (instead of debug) level.
1263 if sys
.version_info
< (3, 8):
1267 from asyncio
import events
1269 class ThreadedChildWatcher(asyncio
.AbstractChildWatcher
):
1270 """Threaded child watcher implementation.
1271 The watcher uses a thread per process
1272 for waiting for the process finish.
1273 It doesn't require subscription on POSIX signal
1274 but a thread creation is not free.
1275 The watcher has O(1) complexity, its performance doesn't depend
1276 on amount of spawn processes.
1280 self
._pid
_counter
= itertools
.count(0)
1283 def is_active(self
):
1287 self
._join
_threads
()
1289 def _join_threads(self
):
1290 """Internal: Join all non-daemon threads"""
1291 threads
= [thread
for thread
in list(self
._threads
.values())
1292 if thread
.is_alive() and not thread
.daemon
]
1293 for thread
in threads
:
1296 def __enter__(self
):
1299 def __exit__(self
, exc_type
, exc_val
, exc_tb
):
1302 def __del__(self
, _warn
=warnings
.warn
):
1303 threads
= [thread
for thread
in list(self
._threads
.values())
1304 if thread
.is_alive()]
1306 _warn(f
'{self.__class__} has registered but not finished child processes',
1310 def add_child_handler(self
, pid
, callback
, *args
):
1311 loop
= events
.get_event_loop()
1312 thread
= threading
.Thread(target
=self
._do
_waitpid
,
1313 name
=f
'waitpid-{next(self._pid_counter)}',
1314 args
=(loop
, pid
, callback
, args
),
1316 self
._threads
[pid
] = thread
1319 def remove_child_handler(self
, pid
):
1320 # asyncio never calls remove_child_handler() !!!
1321 # The method is no-op but is implemented because
1322 # abstract base classe requires it
1325 def attach_loop(self
, loop
):
1328 def _do_waitpid(self
, loop
, expected_pid
, callback
, args
):
1329 assert expected_pid
> 0
1332 pid
, status
= os
.waitpid(expected_pid
, 0)
1333 except ChildProcessError
:
1334 # The child process is already reaped
1335 # (may happen if waitpid() is called elsewhere).
1339 'Unknown child process pid %d, will report returncode 255',
1342 if os
.WIFEXITED(status
):
1343 returncode
= os
.WEXITSTATUS(status
)
1344 elif os
.WIFSIGNALED(status
):
1345 returncode
= -os
.WTERMSIG(status
)
1347 raise ValueError(f
'unknown wait status {status}')
1348 if loop
.get_debug():
1349 logger
.debug('process %s exited with returncode %s',
1350 expected_pid
, returncode
)
1352 if loop
.is_closed():
1353 logger
.warning('Loop %r that handles pid %r is closed', loop
, pid
)
1355 loop
.call_soon_threadsafe(callback
, pid
, returncode
, *args
)
1357 self
._threads
.pop(expected_pid
)
1359 # unlike SafeChildWatcher which handles SIGCHLD in the main thread,
1360 # ThreadedChildWatcher runs in a separated thread, hence allows us to
1361 # run create_subprocess_exec() in non-main thread, see
1362 # https://bugs.python.org/issue35621
1363 asyncio
.set_child_watcher(ThreadedChildWatcher())
1367 from asyncio
import run
as async_run
# type: ignore[attr-defined]
1369 def async_run(coro
): # type: ignore
1370 loop
= asyncio
.new_event_loop()
1372 asyncio
.set_event_loop(loop
)
1373 return loop
.run_until_complete(coro
)
1376 loop
.run_until_complete(loop
.shutdown_asyncgens())
1378 asyncio
.set_event_loop(None)
1382 def call(ctx
: CephadmContext
,
1384 desc
: Optional
[str] = None,
1385 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
1386 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
1387 **kwargs
) -> Tuple
[str, str, int]:
1389 Wrap subprocess.Popen to
1391 - log stdout/stderr to a logger,
1393 - cleanly return out, err, returncode
1395 :param timeout: timeout in seconds
1398 prefix
= command
[0] if desc
is None else desc
1401 timeout
= timeout
or ctx
.timeout
1403 logger
.debug('Running command: %s' % ' '.join(command
))
1405 async def tee(reader
: asyncio
.StreamReader
) -> str:
1406 collected
= StringIO()
1407 async for line
in reader
:
1408 message
= line
.decode('utf-8')
1409 collected
.write(message
)
1410 if verbosity
== CallVerbosity
.VERBOSE
:
1411 logger
.info(prefix
+ message
.rstrip())
1412 elif verbosity
!= CallVerbosity
.SILENT
:
1413 logger
.debug(prefix
+ message
.rstrip())
1414 return collected
.getvalue()
1416 async def run_with_timeout() -> Tuple
[str, str, int]:
1417 process
= await asyncio
.create_subprocess_exec(
1419 stdout
=asyncio
.subprocess
.PIPE
,
1420 stderr
=asyncio
.subprocess
.PIPE
)
1421 assert process
.stdout
1422 assert process
.stderr
1424 stdout
, stderr
= await asyncio
.gather(tee(process
.stdout
),
1425 tee(process
.stderr
))
1426 returncode
= await asyncio
.wait_for(process
.wait(), timeout
)
1427 except asyncio
.TimeoutError
:
1428 logger
.info(prefix
+ f
'timeout after {timeout} seconds')
1431 return stdout
, stderr
, returncode
1433 stdout
, stderr
, returncode
= async_run(run_with_timeout())
1434 if returncode
!= 0 and verbosity
== CallVerbosity
.VERBOSE_ON_FAILURE
:
1435 logger
.info('Non-zero exit code %d from %s',
1436 returncode
, ' '.join(command
))
1437 for line
in stdout
.splitlines():
1438 logger
.info(prefix
+ 'stdout ' + line
)
1439 for line
in stderr
.splitlines():
1440 logger
.info(prefix
+ 'stderr ' + line
)
1441 return stdout
, stderr
, returncode
1445 ctx
: CephadmContext
,
1447 desc
: Optional
[str] = None,
1448 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
1449 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
1450 **kwargs
) -> Tuple
[str, str, int]:
1451 out
, err
, ret
= call(ctx
, command
, desc
, verbosity
, timeout
, **kwargs
)
1453 raise RuntimeError('Failed command: %s' % ' '.join(command
))
1454 return out
, err
, ret
1457 def call_timeout(ctx
, command
, timeout
):
1458 # type: (CephadmContext, List[str], int) -> int
1459 logger
.debug('Running command (timeout=%s): %s'
1460 % (timeout
, ' '.join(command
)))
1462 def raise_timeout(command
, timeout
):
1463 # type: (List[str], int) -> NoReturn
1464 msg
= 'Command `%s` timed out after %s seconds' % (command
, timeout
)
1466 raise TimeoutExpired(msg
)
1469 return subprocess
.call(command
, timeout
=timeout
)
1470 except subprocess
.TimeoutExpired
:
1471 raise_timeout(command
, timeout
)
1473 ##################################
1476 def json_loads_retry(cli_func
):
1477 for sleep_secs
in [1, 4, 4]:
1479 return json
.loads(cli_func())
1480 except json
.JSONDecodeError
:
1481 logger
.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs
)
1482 time
.sleep(sleep_secs
)
1483 return json
.loads(cli_func())
1486 def is_available(ctx
, what
, func
):
1487 # type: (CephadmContext, str, Callable[[], bool]) -> None
1489 Wait for a service to become available
1491 :param what: the name of the service
1492 :param func: the callable object that determines availability
1495 logger
.info('Waiting for %s...' % what
)
1499 logger
.info('%s is available'
1503 raise Error('%s not available after %s tries'
1506 logger
.info('%s not available, waiting (%s/%s)...'
1507 % (what
, num
, retry
))
1513 def read_config(fn
):
1514 # type: (Optional[str]) -> ConfigParser
1522 # type: (str) -> str
1523 p
= os
.path
.expanduser(p
)
1524 return os
.path
.abspath(p
)
1527 def get_file_timestamp(fn
):
1528 # type: (str) -> Optional[str]
1530 mt
= os
.path
.getmtime(fn
)
1531 return datetime
.datetime
.fromtimestamp(
1532 mt
, tz
=datetime
.timezone
.utc
1538 def try_convert_datetime(s
):
1539 # type: (str) -> Optional[str]
1540 # This is super irritating because
1541 # 1) podman and docker use different formats
1542 # 2) python's strptime can't parse either one
1545 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1546 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1547 # 2020-03-03 15:52:30.136257504 -0600 CST
1548 # (In the podman case, there is a different string format for
1549 # 'inspect' and 'inspect --format {{.Created}}'!!)
1551 # In *all* cases, the 9 digit second precision is too much for
1552 # python's strptime. Shorten it to 6 digits.
1553 p
= re
.compile(r
'(\.[\d]{6})[\d]*')
1556 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
1557 if s
and s
[-1] == 'Z':
1558 s
= s
[:-1] + '-0000'
1560 # cut off the redundant 'CST' part that strptime can't parse, if
1563 s
= ' '.join(v
[0:3])
1565 # try parsing with several format strings
1567 '%Y-%m-%dT%H:%M:%S.%f%z',
1568 '%Y-%m-%d %H:%M:%S.%f %z',
1572 # return timestamp normalized to UTC, rendered as DATEFMT.
1573 return datetime
.datetime
.strptime(s
, f
).astimezone(tz
=datetime
.timezone
.utc
).strftime(DATEFMT
)
1579 def _parse_podman_version(version_str
):
1580 # type: (str) -> Tuple[int, ...]
1581 def to_int(val
, org_e
=None):
1582 if not val
and org_e
:
1586 except ValueError as e
:
1587 return to_int(val
[0:-1], org_e
or e
)
1589 return tuple(map(to_int
, version_str
.split('.')))
1594 return socket
.gethostname()
1599 return socket
.getfqdn() or socket
.gethostname()
1604 return platform
.uname().machine
1607 def generate_service_id():
1609 return get_hostname() + '.' + ''.join(random
.choice(string
.ascii_lowercase
)
1613 def generate_password():
1615 return ''.join(random
.choice(string
.ascii_lowercase
+ string
.digits
)
1619 def normalize_container_id(i
):
1620 # type: (str) -> str
1621 # docker adds the sha256: prefix, but AFAICS both
1622 # docker (18.09.7 in bionic at least) and podman
1623 # both always use sha256, so leave off the prefix
1626 if i
.startswith(prefix
):
1633 return str(uuid
.uuid1())
1637 # type: (str) -> bool
1645 def infer_fsid(func
):
1647 If we only find a single fsid in /var/lib/ceph/*, use that
1650 def _infer_fsid(ctx
: CephadmContext
):
1652 logger
.debug('Using specified fsid: %s' % ctx
.fsid
)
1656 daemon_list
= list_daemons(ctx
, detail
=False)
1657 for daemon
in daemon_list
:
1658 if not is_fsid(daemon
['fsid']):
1661 elif 'name' not in ctx
or not ctx
.name
:
1662 # ctx.name not specified
1663 fsids_set
.add(daemon
['fsid'])
1664 elif daemon
['name'] == ctx
.name
:
1665 # ctx.name is a match
1666 fsids_set
.add(daemon
['fsid'])
1667 fsids
= sorted(fsids_set
)
1670 # some commands do not always require an fsid
1672 elif len(fsids
) == 1:
1673 logger
.info('Inferring fsid %s' % fsids
[0])
1676 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids
)
1682 def infer_config(func
):
1684 If we find a MON daemon, use the config from that container
1687 def _infer_config(ctx
: CephadmContext
):
1689 logger
.debug('Using specified config: %s' % ctx
.config
)
1695 daemon_list
= list_daemons(ctx
, detail
=False)
1696 for daemon
in daemon_list
:
1697 if daemon
['name'].startswith('mon.'):
1698 name
= daemon
['name']
1701 config
= '/var/lib/ceph/{}/{}/config'.format(ctx
.fsid
,
1704 logger
.info('Inferring config %s' % config
)
1706 elif os
.path
.exists(SHELL_DEFAULT_CONF
):
1707 logger
.debug('Using default config: %s' % SHELL_DEFAULT_CONF
)
1708 ctx
.config
= SHELL_DEFAULT_CONF
1711 return _infer_config
1714 def _get_default_image(ctx
: CephadmContext
):
1715 if DEFAULT_IMAGE_IS_MASTER
:
1716 warn
= """This is a development version of cephadm.
1717 For information regarding the latest stable release:
1718 https://docs.ceph.com/docs/{}/cephadm/install
1719 """.format(LATEST_STABLE_RELEASE
)
1720 for line
in warn
.splitlines():
1721 logger
.warning('{}{}{}'.format(termcolor
.yellow
, line
, termcolor
.end
))
1722 return DEFAULT_IMAGE
1725 def infer_image(func
):
1727 Use the most recent ceph image
1730 def _infer_image(ctx
: CephadmContext
):
1732 ctx
.image
= os
.environ
.get('CEPHADM_IMAGE')
1734 ctx
.image
= get_last_local_ceph_image(ctx
, ctx
.container_engine
.path
)
1736 ctx
.image
= _get_default_image(ctx
)
1742 def default_image(func
):
1744 def _default_image(ctx
: CephadmContext
):
1746 if 'name' in ctx
and ctx
.name
:
1747 type_
= ctx
.name
.split('.', 1)[0]
1748 if type_
in Monitoring
.components
:
1749 ctx
.image
= Monitoring
.components
[type_
]['image']
1750 if type_
== 'haproxy':
1751 ctx
.image
= HAproxy
.default_image
1752 if type_
== 'keepalived':
1753 ctx
.image
= Keepalived
.default_image
1755 ctx
.image
= os
.environ
.get('CEPHADM_IMAGE')
1757 ctx
.image
= _get_default_image(ctx
)
1761 return _default_image
1764 def get_last_local_ceph_image(ctx
: CephadmContext
, container_path
: str):
1766 :return: The most recent local ceph image (already pulled)
1768 out
, _
, _
= call_throws(ctx
,
1769 [container_path
, 'images',
1770 '--filter', 'label=ceph=True',
1771 '--filter', 'dangling=false',
1772 '--format', '{{.Repository}}@{{.Digest}}'])
1773 return _filter_last_local_ceph_image(out
)
1776 def _filter_last_local_ceph_image(out
):
1777 # type: (str) -> Optional[str]
1778 for image
in out
.splitlines():
1779 if image
and not image
.endswith('@'):
1780 logger
.info('Using recent ceph image %s' % image
)
1785 def write_tmp(s
, uid
, gid
):
1786 # type: (str, int, int) -> IO[str]
1787 tmp_f
= tempfile
.NamedTemporaryFile(mode
='w',
1789 os
.fchown(tmp_f
.fileno(), uid
, gid
)
1796 def makedirs(dir, uid
, gid
, mode
):
1797 # type: (str, int, int, int) -> None
1798 if not os
.path
.exists(dir):
1799 os
.makedirs(dir, mode
=mode
)
1802 os
.chown(dir, uid
, gid
)
1803 os
.chmod(dir, mode
) # the above is masked by umask...
1806 def get_data_dir(fsid
, data_dir
, t
, n
):
1807 # type: (str, str, str, Union[int, str]) -> str
1808 return os
.path
.join(data_dir
, fsid
, '%s.%s' % (t
, n
))
1811 def get_log_dir(fsid
, log_dir
):
1812 # type: (str, str) -> str
1813 return os
.path
.join(log_dir
, fsid
)
1816 def make_data_dir_base(fsid
, data_dir
, uid
, gid
):
1817 # type: (str, str, int, int) -> str
1818 data_dir_base
= os
.path
.join(data_dir
, fsid
)
1819 makedirs(data_dir_base
, uid
, gid
, DATA_DIR_MODE
)
1820 makedirs(os
.path
.join(data_dir_base
, 'crash'), uid
, gid
, DATA_DIR_MODE
)
1821 makedirs(os
.path
.join(data_dir_base
, 'crash', 'posted'), uid
, gid
,
1823 return data_dir_base
1826 def make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
, uid
=None, gid
=None):
1827 # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str
1828 if uid
is None or gid
is None:
1829 uid
, gid
= extract_uid_gid(ctx
)
1830 make_data_dir_base(fsid
, ctx
.data_dir
, uid
, gid
)
1831 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
1832 makedirs(data_dir
, uid
, gid
, DATA_DIR_MODE
)
1836 def make_log_dir(ctx
, fsid
, uid
=None, gid
=None):
1837 # type: (CephadmContext, str, Optional[int], Optional[int]) -> str
1838 if uid
is None or gid
is None:
1839 uid
, gid
= extract_uid_gid(ctx
)
1840 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
1841 makedirs(log_dir
, uid
, gid
, LOG_DIR_MODE
)
1845 def make_var_run(ctx
, fsid
, uid
, gid
):
1846 # type: (CephadmContext, str, int, int) -> None
1847 call_throws(ctx
, ['install', '-d', '-m0770', '-o', str(uid
), '-g', str(gid
),
1848 '/var/run/ceph/%s' % fsid
])
1851 def copy_tree(ctx
, src
, dst
, uid
=None, gid
=None):
1852 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
1854 Copy a directory tree from src to dst
1856 if uid
is None or gid
is None:
1857 (uid
, gid
) = extract_uid_gid(ctx
)
1861 if os
.path
.isdir(dst
):
1862 dst_dir
= os
.path
.join(dst
, os
.path
.basename(src_dir
))
1864 logger
.debug('copy directory `%s` -> `%s`' % (src_dir
, dst_dir
))
1865 shutil
.rmtree(dst_dir
, ignore_errors
=True)
1866 shutil
.copytree(src_dir
, dst_dir
) # dirs_exist_ok needs python 3.8
1868 for dirpath
, dirnames
, filenames
in os
.walk(dst_dir
):
1869 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dirpath
))
1870 os
.chown(dirpath
, uid
, gid
)
1871 for filename
in filenames
:
1872 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, filename
))
1873 os
.chown(os
.path
.join(dirpath
, filename
), uid
, gid
)
1876 def copy_files(ctx
, src
, dst
, uid
=None, gid
=None):
1877 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
1879 Copy a files from src to dst
1881 if uid
is None or gid
is None:
1882 (uid
, gid
) = extract_uid_gid(ctx
)
1884 for src_file
in src
:
1886 if os
.path
.isdir(dst
):
1887 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
1889 logger
.debug('copy file `%s` -> `%s`' % (src_file
, dst_file
))
1890 shutil
.copyfile(src_file
, dst_file
)
1892 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dst_file
))
1893 os
.chown(dst_file
, uid
, gid
)
1896 def move_files(ctx
, src
, dst
, uid
=None, gid
=None):
1897 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
1899 Move files from src to dst
1901 if uid
is None or gid
is None:
1902 (uid
, gid
) = extract_uid_gid(ctx
)
1904 for src_file
in src
:
1906 if os
.path
.isdir(dst
):
1907 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
1909 if os
.path
.islink(src_file
):
1910 # shutil.move() in py2 does not handle symlinks correctly
1911 src_rl
= os
.readlink(src_file
)
1912 logger
.debug("symlink '%s' -> '%s'" % (dst_file
, src_rl
))
1913 os
.symlink(src_rl
, dst_file
)
1916 logger
.debug("move file '%s' -> '%s'" % (src_file
, dst_file
))
1917 shutil
.move(src_file
, dst_file
)
1918 logger
.debug('chown %s:%s `%s`' % (uid
, gid
, dst_file
))
1919 os
.chown(dst_file
, uid
, gid
)
1922 # copied from distutils
1923 def find_executable(executable
, path
=None):
1924 """Tries to find 'executable' in the directories listed in 'path'.
1925 A string listing directories separated by 'os.pathsep'; defaults to
1926 os.environ['PATH']. Returns the complete filename or None if not found.
1928 _
, ext
= os
.path
.splitext(executable
)
1929 if (sys
.platform
== 'win32') and (ext
!= '.exe'):
1930 executable
= executable
+ '.exe'
1932 if os
.path
.isfile(executable
):
1936 path
= os
.environ
.get('PATH', None)
1939 path
= os
.confstr('CS_PATH')
1940 except (AttributeError, ValueError):
1941 # os.confstr() or CS_PATH is not available
1943 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1944 # set to an empty string
1946 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1950 paths
= path
.split(os
.pathsep
)
1952 f
= os
.path
.join(p
, executable
)
1953 if os
.path
.isfile(f
):
1954 # the file exists, we have a shot at spawn working
1959 def find_program(filename
):
1960 # type: (str) -> str
1961 name
= find_executable(filename
)
1963 raise ValueError('%s not found' % filename
)
1967 def find_container_engine(ctx
: CephadmContext
):
1971 for i
in CONTAINER_PREFERENCE
:
1974 except Exception as e
:
1975 logger
.debug('Could not locate %s: %s' % (i
.EXE
, e
))
1979 def check_container_engine(ctx
):
1980 # type: (CephadmContext) -> None
1981 engine
= ctx
.container_engine
1982 if not isinstance(engine
, CONTAINER_PREFERENCE
):
1983 raise Error('Unable to locate any of %s' % [i
.EXE
for i
in CONTAINER_PREFERENCE
])
1984 elif isinstance(engine
, Podman
):
1985 engine
.get_version(ctx
)
1986 if engine
.version
< MIN_PODMAN_VERSION
:
1987 raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION
)
1990 def get_unit_name(fsid
, daemon_type
, daemon_id
=None):
1991 # type: (str, str, Optional[Union[int, str]]) -> str
1992 # accept either name or type + id
1993 if daemon_type
== CephadmDaemon
.daemon_type
and daemon_id
is not None:
1994 return 'ceph-%s-%s.%s' % (fsid
, daemon_type
, daemon_id
)
1995 elif daemon_id
is not None:
1996 return 'ceph-%s@%s.%s' % (fsid
, daemon_type
, daemon_id
)
1998 return 'ceph-%s@%s' % (fsid
, daemon_type
)
2001 def get_unit_name_by_daemon_name(ctx
: CephadmContext
, fsid
, name
):
2002 daemon
= get_daemon_description(ctx
, fsid
, name
)
2004 return daemon
['systemd_unit']
2006 raise Error('Failed to get unit name for {}'.format(daemon
))
2009 def check_unit(ctx
, unit_name
):
2010 # type: (CephadmContext, str) -> Tuple[bool, str, bool]
2011 # NOTE: we ignore the exit code here because systemctl outputs
2012 # various exit codes based on the state of the service, but the
2013 # string result is more explicit (and sufficient).
2017 out
, err
, code
= call(ctx
, ['systemctl', 'is-enabled', unit_name
],
2018 verbosity
=CallVerbosity
.DEBUG
)
2022 elif 'disabled' in out
:
2024 except Exception as e
:
2025 logger
.warning('unable to run systemctl: %s' % e
)
2031 out
, err
, code
= call(ctx
, ['systemctl', 'is-active', unit_name
],
2032 verbosity
=CallVerbosity
.DEBUG
)
2034 if out
in ['active']:
2036 elif out
in ['inactive']:
2038 elif out
in ['failed', 'auto-restart']:
2042 except Exception as e
:
2043 logger
.warning('unable to run systemctl: %s' % e
)
2045 return (enabled
, state
, installed
)
2048 def check_units(ctx
, units
, enabler
=None):
2049 # type: (CephadmContext, List[str], Optional[Packager]) -> bool
2051 (enabled
, state
, installed
) = check_unit(ctx
, u
)
2052 if enabled
and state
== 'running':
2053 logger
.info('Unit %s is enabled and running' % u
)
2055 if enabler
is not None:
2057 logger
.info('Enabling unit %s' % u
)
2058 enabler
.enable_service(u
)
2062 def is_container_running(ctx
: CephadmContext
, name
: str) -> bool:
2063 out
, err
, ret
= call(ctx
, [
2064 ctx
.container_engine
.path
, 'container', 'inspect',
2065 '--format', '{{.State.Status}}', name
2067 return out
== 'running'
2070 def get_legacy_config_fsid(cluster
, legacy_dir
=None):
2071 # type: (str, Optional[str]) -> Optional[str]
2072 config_file
= '/etc/ceph/%s.conf' % cluster
2073 if legacy_dir
is not None:
2074 config_file
= os
.path
.abspath(legacy_dir
+ config_file
)
2076 if os
.path
.exists(config_file
):
2077 config
= read_config(config_file
)
2078 if config
.has_section('global') and config
.has_option('global', 'fsid'):
2079 return config
.get('global', 'fsid')
2083 def get_legacy_daemon_fsid(ctx
, cluster
,
2084 daemon_type
, daemon_id
, legacy_dir
=None):
2085 # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str]
2087 if daemon_type
== 'osd':
2089 fsid_file
= os
.path
.join(ctx
.data_dir
,
2091 'ceph-%s' % daemon_id
,
2093 if legacy_dir
is not None:
2094 fsid_file
= os
.path
.abspath(legacy_dir
+ fsid_file
)
2095 with
open(fsid_file
, 'r') as f
:
2096 fsid
= f
.read().strip()
2100 fsid
= get_legacy_config_fsid(cluster
, legacy_dir
=legacy_dir
)
2104 def get_daemon_args(ctx
, fsid
, daemon_type
, daemon_id
):
2105 # type: (CephadmContext, str, str, Union[int, str]) -> List[str]
2106 r
= list() # type: List[str]
2108 if daemon_type
in Ceph
.daemons
and daemon_type
!= 'crash':
2110 '--setuser', 'ceph',
2111 '--setgroup', 'ceph',
2112 '--default-log-to-file=false',
2113 '--default-log-to-stderr=true',
2114 '--default-log-stderr-prefix=debug ',
2116 if daemon_type
== 'mon':
2118 '--default-mon-cluster-log-to-file=false',
2119 '--default-mon-cluster-log-to-stderr=true',
2121 elif daemon_type
in Monitoring
.components
:
2122 metadata
= Monitoring
.components
[daemon_type
]
2123 r
+= metadata
.get('args', list())
2124 # set ip and port to bind to for nodeexporter,alertmanager,prometheus
2125 if daemon_type
!= 'grafana':
2127 port
= Monitoring
.port_map
[daemon_type
][0]
2128 if 'meta_json' in ctx
and ctx
.meta_json
:
2129 meta
= json
.loads(ctx
.meta_json
) or {}
2130 if 'ip' in meta
and meta
['ip']:
2132 if 'ports' in meta
and meta
['ports']:
2133 port
= meta
['ports'][0]
2134 r
+= [f
'--web.listen-address={ip}:{port}']
2135 if daemon_type
== 'alertmanager':
2136 config
= get_parm(ctx
.config_json
)
2137 peers
= config
.get('peers', list()) # type: ignore
2139 r
+= ['--cluster.peer={}'.format(peer
)]
2140 # some alertmanager, by default, look elsewhere for a config
2141 r
+= ['--config.file=/etc/alertmanager/alertmanager.yml']
2142 elif daemon_type
== NFSGanesha
.daemon_type
:
2143 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2144 r
+= nfs_ganesha
.get_daemon_args()
2145 elif daemon_type
== HAproxy
.daemon_type
:
2146 haproxy
= HAproxy
.init(ctx
, fsid
, daemon_id
)
2147 r
+= haproxy
.get_daemon_args()
2148 elif daemon_type
== CustomContainer
.daemon_type
:
2149 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2150 r
.extend(cc
.get_daemon_args())
2155 def create_daemon_dirs(ctx
, fsid
, daemon_type
, daemon_id
, uid
, gid
,
2156 config
=None, keyring
=None):
2157 # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
2158 data_dir
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
, uid
=uid
, gid
=gid
)
2159 make_log_dir(ctx
, fsid
, uid
=uid
, gid
=gid
)
2162 config_path
= os
.path
.join(data_dir
, 'config')
2163 with
open(config_path
, 'w') as f
:
2164 os
.fchown(f
.fileno(), uid
, gid
)
2165 os
.fchmod(f
.fileno(), 0o600)
2169 keyring_path
= os
.path
.join(data_dir
, 'keyring')
2170 with
open(keyring_path
, 'w') as f
:
2171 os
.fchmod(f
.fileno(), 0o600)
2172 os
.fchown(f
.fileno(), uid
, gid
)
2175 if daemon_type
in Monitoring
.components
.keys():
2176 config_json
: Dict
[str, Any
] = get_parm(ctx
.config_json
)
2178 # Set up directories specific to the monitoring component
2181 if daemon_type
== 'prometheus':
2182 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2183 daemon_type
, daemon_id
)
2184 config_dir
= 'etc/prometheus'
2185 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2186 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'alerting'), uid
, gid
, 0o755)
2187 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2188 elif daemon_type
== 'grafana':
2189 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2190 daemon_type
, daemon_id
)
2191 config_dir
= 'etc/grafana'
2192 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2193 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'certs'), uid
, gid
, 0o755)
2194 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'provisioning/datasources'), uid
, gid
, 0o755)
2195 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
2196 touch(os
.path
.join(data_dir_root
, 'data', 'grafana.db'), uid
, gid
)
2197 elif daemon_type
== 'alertmanager':
2198 data_dir_root
= get_data_dir(fsid
, ctx
.data_dir
,
2199 daemon_type
, daemon_id
)
2200 config_dir
= 'etc/alertmanager'
2201 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
2202 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'data'), uid
, gid
, 0o755)
2204 # populate the config directory for the component from the config-json
2205 if 'files' in config_json
:
2206 for fname
in config_json
['files']:
2207 content
= dict_get_join(config_json
['files'], fname
)
2208 if os
.path
.isabs(fname
):
2209 fpath
= os
.path
.join(data_dir_root
, fname
.lstrip(os
.path
.sep
))
2211 fpath
= os
.path
.join(data_dir_root
, config_dir
, fname
)
2212 with
open(fpath
, 'w', encoding
='utf-8') as f
:
2213 os
.fchown(f
.fileno(), uid
, gid
)
2214 os
.fchmod(f
.fileno(), 0o600)
2217 elif daemon_type
== NFSGanesha
.daemon_type
:
2218 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2219 nfs_ganesha
.create_daemon_dirs(data_dir
, uid
, gid
)
2221 elif daemon_type
== CephIscsi
.daemon_type
:
2222 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
2223 ceph_iscsi
.create_daemon_dirs(data_dir
, uid
, gid
)
2225 elif daemon_type
== HAproxy
.daemon_type
:
2226 haproxy
= HAproxy
.init(ctx
, fsid
, daemon_id
)
2227 haproxy
.create_daemon_dirs(data_dir
, uid
, gid
)
2229 elif daemon_type
== Keepalived
.daemon_type
:
2230 keepalived
= Keepalived
.init(ctx
, fsid
, daemon_id
)
2231 keepalived
.create_daemon_dirs(data_dir
, uid
, gid
)
2233 elif daemon_type
== CustomContainer
.daemon_type
:
2234 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2235 cc
.create_daemon_dirs(data_dir
, uid
, gid
)
2238 def get_parm(option
):
2239 # type: (str) -> Dict[str, str]
2246 if cached_stdin
is not None:
2249 j
= sys
.stdin
.read()
2252 # inline json string
2253 if option
[0] == '{' and option
[-1] == '}':
2256 elif os
.path
.exists(option
):
2257 with
open(option
, 'r') as f
:
2260 raise Error('Config file {} not found'.format(option
))
2264 except ValueError as e
:
2265 raise Error('Invalid JSON in {}: {}'.format(option
, e
))
2270 def get_config_and_keyring(ctx
):
2271 # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
2275 if 'config_json' in ctx
and ctx
.config_json
:
2276 d
= get_parm(ctx
.config_json
)
2277 config
= d
.get('config')
2278 keyring
= d
.get('keyring')
2280 if 'config' in ctx
and ctx
.config
:
2282 with
open(ctx
.config
, 'r') as f
:
2284 except FileNotFoundError
as e
:
2287 if 'key' in ctx
and ctx
.key
:
2288 keyring
= '[%s]\n\tkey = %s\n' % (ctx
.name
, ctx
.key
)
2289 elif 'keyring' in ctx
and ctx
.keyring
:
2291 with
open(ctx
.keyring
, 'r') as f
:
2293 except FileNotFoundError
as e
:
2296 return config
, keyring
2299 def get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
):
2300 # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]]
2303 if daemon_type
== CephIscsi
.daemon_type
:
2304 binds
.extend(CephIscsi
.get_container_binds())
2305 elif daemon_type
== CustomContainer
.daemon_type
:
2307 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2308 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2309 binds
.extend(cc
.get_container_binds(data_dir
))
2314 def get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
,
2316 # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
2319 if daemon_type
in Ceph
.daemons
:
2321 run_path
= os
.path
.join('/var/run/ceph', fsid
)
2322 if os
.path
.exists(run_path
):
2323 mounts
[run_path
] = '/var/run/ceph:z'
2324 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2325 mounts
[log_dir
] = '/var/log/ceph:z'
2326 crash_dir
= '/var/lib/ceph/%s/crash' % fsid
2327 if os
.path
.exists(crash_dir
):
2328 mounts
[crash_dir
] = '/var/lib/ceph/crash:z'
2330 if daemon_type
in Ceph
.daemons
and daemon_id
:
2331 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2332 if daemon_type
== 'rgw':
2333 cdata_dir
= '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id
)
2335 cdata_dir
= '/var/lib/ceph/%s/ceph-%s' % (daemon_type
, daemon_id
)
2336 if daemon_type
!= 'crash':
2337 mounts
[data_dir
] = cdata_dir
+ ':z'
2339 mounts
[data_dir
+ '/config'] = '/etc/ceph/ceph.conf:z'
2340 if daemon_type
in ['rbd-mirror', 'cephfs-mirror', 'crash']:
2341 # these do not search for their keyrings in a data directory
2342 mounts
[data_dir
+ '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type
, daemon_id
)
2344 if daemon_type
in ['mon', 'osd', 'clusterless-ceph-volume']:
2345 mounts
['/dev'] = '/dev' # FIXME: narrow this down?
2346 mounts
['/run/udev'] = '/run/udev'
2347 if daemon_type
in ['osd', 'clusterless-ceph-volume']:
2348 mounts
['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
2349 mounts
['/run/lvm'] = '/run/lvm'
2350 mounts
['/run/lock/lvm'] = '/run/lock/lvm'
2351 if daemon_type
== 'osd':
2352 # selinux-policy in the container may not match the host.
2353 if HostFacts(ctx
).selinux_enabled
:
2354 selinux_folder
= '/var/lib/ceph/%s/selinux' % fsid
2355 if not os
.path
.exists(selinux_folder
):
2356 os
.makedirs(selinux_folder
, mode
=0o755)
2357 mounts
[selinux_folder
] = '/sys/fs/selinux:ro'
2360 if ctx
.shared_ceph_folder
: # make easy manager modules/ceph-volume development
2361 ceph_folder
= pathify(ctx
.shared_ceph_folder
)
2362 if os
.path
.exists(ceph_folder
):
2363 mounts
[ceph_folder
+ '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
2364 mounts
[ceph_folder
+ '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
2365 mounts
[ceph_folder
+ '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
2366 mounts
[ceph_folder
+ '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
2367 mounts
[ceph_folder
+ '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
2369 logger
.error('{}{}{}'.format(termcolor
.red
,
2370 'Ceph shared source folder does not exist.',
2372 except AttributeError:
2375 if daemon_type
in Monitoring
.components
and daemon_id
:
2376 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2377 if daemon_type
== 'prometheus':
2378 mounts
[os
.path
.join(data_dir
, 'etc/prometheus')] = '/etc/prometheus:Z'
2379 mounts
[os
.path
.join(data_dir
, 'data')] = '/prometheus:Z'
2380 elif daemon_type
== 'node-exporter':
2381 mounts
['/proc'] = '/host/proc:ro'
2382 mounts
['/sys'] = '/host/sys:ro'
2383 mounts
['/'] = '/rootfs:ro'
2384 elif daemon_type
== 'grafana':
2385 mounts
[os
.path
.join(data_dir
, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
2386 mounts
[os
.path
.join(data_dir
, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
2387 mounts
[os
.path
.join(data_dir
, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
2388 mounts
[os
.path
.join(data_dir
, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
2389 elif daemon_type
== 'alertmanager':
2390 mounts
[os
.path
.join(data_dir
, 'etc/alertmanager')] = '/etc/alertmanager:Z'
2392 if daemon_type
== NFSGanesha
.daemon_type
:
2394 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2395 nfs_ganesha
= NFSGanesha
.init(ctx
, fsid
, daemon_id
)
2396 mounts
.update(nfs_ganesha
.get_container_mounts(data_dir
))
2398 if daemon_type
== HAproxy
.daemon_type
:
2400 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2401 mounts
.update(HAproxy
.get_container_mounts(data_dir
))
2403 if daemon_type
== CephIscsi
.daemon_type
:
2405 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2406 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2407 mounts
.update(CephIscsi
.get_container_mounts(data_dir
, log_dir
))
2409 if daemon_type
== Keepalived
.daemon_type
:
2411 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2412 mounts
.update(Keepalived
.get_container_mounts(data_dir
))
2414 if daemon_type
== CustomContainer
.daemon_type
:
2416 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2417 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2418 mounts
.update(cc
.get_container_mounts(data_dir
))
2423 def get_container(ctx
: CephadmContext
,
2424 fsid
: str, daemon_type
: str, daemon_id
: Union
[int, str],
2425 privileged
: bool = False,
2426 ptrace
: bool = False,
2427 container_args
: Optional
[List
[str]] = None) -> 'CephContainer':
2428 entrypoint
: str = ''
2430 ceph_args
: List
[str] = []
2432 'TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728',
2434 host_network
: bool = True
2436 if container_args
is None:
2438 if daemon_type
in ['mon', 'osd']:
2439 # mon and osd need privileged in order for libudev to query devices
2441 if daemon_type
== 'rgw':
2442 entrypoint
= '/usr/bin/radosgw'
2443 name
= 'client.rgw.%s' % daemon_id
2444 elif daemon_type
== 'rbd-mirror':
2445 entrypoint
= '/usr/bin/rbd-mirror'
2446 name
= 'client.rbd-mirror.%s' % daemon_id
2447 elif daemon_type
== 'cephfs-mirror':
2448 entrypoint
= '/usr/bin/cephfs-mirror'
2449 name
= 'client.cephfs-mirror.%s' % daemon_id
2450 elif daemon_type
== 'crash':
2451 entrypoint
= '/usr/bin/ceph-crash'
2452 name
= 'client.crash.%s' % daemon_id
2453 elif daemon_type
in ['mon', 'mgr', 'mds', 'osd']:
2454 entrypoint
= '/usr/bin/ceph-' + daemon_type
2455 name
= '%s.%s' % (daemon_type
, daemon_id
)
2456 elif daemon_type
in Monitoring
.components
:
2458 elif daemon_type
== NFSGanesha
.daemon_type
:
2459 entrypoint
= NFSGanesha
.entrypoint
2460 name
= '%s.%s' % (daemon_type
, daemon_id
)
2461 envs
.extend(NFSGanesha
.get_container_envs())
2462 elif daemon_type
== HAproxy
.daemon_type
:
2463 name
= '%s.%s' % (daemon_type
, daemon_id
)
2464 elif daemon_type
== Keepalived
.daemon_type
:
2465 name
= '%s.%s' % (daemon_type
, daemon_id
)
2466 envs
.extend(Keepalived
.get_container_envs())
2467 container_args
.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
2468 elif daemon_type
== CephIscsi
.daemon_type
:
2469 entrypoint
= CephIscsi
.entrypoint
2470 name
= '%s.%s' % (daemon_type
, daemon_id
)
2471 # So the container can modprobe iscsi_target_mod and have write perms
2472 # to configfs we need to make this a privileged container.
2474 elif daemon_type
== CustomContainer
.daemon_type
:
2475 cc
= CustomContainer
.init(ctx
, fsid
, daemon_id
)
2476 entrypoint
= cc
.entrypoint
2477 host_network
= False
2478 envs
.extend(cc
.get_container_envs())
2479 container_args
.extend(cc
.get_container_args())
2481 if daemon_type
in Monitoring
.components
:
2482 uid
, gid
= extract_uid_gid_monitoring(ctx
, daemon_type
)
2486 # FIXME: disable cpu/memory limits for the time being (not supported
2487 # by ubuntu 18.04 kernel!)
2489 container_args
.extend(monitoring_args
)
2490 elif daemon_type
== 'crash':
2491 ceph_args
= ['-n', name
]
2492 elif daemon_type
in Ceph
.daemons
:
2493 ceph_args
= ['-n', name
, '-f']
2495 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2496 # so service can have Type=Forking
2497 if isinstance(ctx
.container_engine
, Podman
):
2498 runtime_dir
= '/run'
2499 container_args
.extend([
2500 '-d', '--log-driver', 'journald',
2502 runtime_dir
+ '/ceph-%s@%s.%s.service-pid' % (fsid
, daemon_type
, daemon_id
),
2504 runtime_dir
+ '/ceph-%s@%s.%s.service-cid' % (fsid
, daemon_type
, daemon_id
),
2506 if ctx
.container_engine
.version
>= CGROUPS_SPLIT_PODMAN_VERSION
:
2507 container_args
.append('--cgroups=split')
2509 return CephContainer(
2512 entrypoint
=entrypoint
,
2513 args
=ceph_args
+ get_daemon_args(ctx
, fsid
, daemon_type
, daemon_id
),
2514 container_args
=container_args
,
2515 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
2516 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
2517 cname
='ceph-%s-%s.%s' % (fsid
, daemon_type
, daemon_id
),
2519 privileged
=privileged
,
2521 host_network
=host_network
,
2525 def extract_uid_gid(ctx
, img
='', file_path
='/var/lib/ceph'):
2526 # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]
2531 if isinstance(file_path
, str):
2538 out
= CephContainer(
2542 args
=['-c', '%u %g', fp
]
2544 uid
, gid
= out
.split(' ')
2545 return int(uid
), int(gid
)
2546 except RuntimeError:
2548 raise RuntimeError('uid/gid not found')
2551 def deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
2552 config
=None, keyring
=None,
2556 # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
2559 if any([port_in_use(ctx
, port
) for port
in ports
]):
2560 if daemon_type
== 'mgr':
2561 # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't
2562 # tell whether that is the case here.
2564 f
"ceph-mgr TCP port(s) {','.join(map(str, ports))} already in use"
2567 raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports
)), daemon_type
))
2569 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2570 if reconfig
and not os
.path
.exists(data_dir
):
2571 raise Error('cannot reconfig, data path %s does not exist' % data_dir
)
2572 if daemon_type
== 'mon' and not os
.path
.exists(data_dir
):
2576 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
2579 tmp_config
= write_tmp(config
, uid
, gid
)
2582 create_daemon_dirs(ctx
, fsid
, daemon_type
, daemon_id
, uid
, gid
)
2583 mon_dir
= get_data_dir(fsid
, ctx
.data_dir
, 'mon', daemon_id
)
2584 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
2588 entrypoint
='/usr/bin/ceph-mon',
2591 '-i', str(daemon_id
),
2593 '-c', '/tmp/config',
2594 '--keyring', '/tmp/keyring',
2595 ] + get_daemon_args(ctx
, fsid
, 'mon', daemon_id
),
2597 log_dir
: '/var/log/ceph:z',
2598 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id
),
2599 tmp_keyring
.name
: '/tmp/keyring:z',
2600 tmp_config
.name
: '/tmp/config:z',
2605 with
open(mon_dir
+ '/config', 'w') as f
:
2606 os
.fchown(f
.fileno(), uid
, gid
)
2607 os
.fchmod(f
.fileno(), 0o600)
2610 # dirs, conf, keyring
2613 fsid
, daemon_type
, daemon_id
,
2618 if daemon_type
== CephadmDaemon
.daemon_type
:
2619 port
= next(iter(ports
), None) # get first tcp port provided or None
2621 if ctx
.config_json
== '-':
2622 config_js
= get_parm('-')
2624 config_js
= get_parm(ctx
.config_json
)
2625 assert isinstance(config_js
, dict)
2627 cephadm_exporter
= CephadmDaemon(ctx
, fsid
, daemon_id
, port
)
2628 cephadm_exporter
.deploy_daemon_unit(config_js
)
2631 deploy_daemon_units(ctx
, fsid
, uid
, gid
, daemon_type
, daemon_id
,
2632 c
, osd_fsid
=osd_fsid
, ports
=ports
)
2634 raise RuntimeError('attempting to deploy a daemon without a container image')
2636 if not os
.path
.exists(data_dir
+ '/unit.created'):
2637 with
open(data_dir
+ '/unit.created', 'w') as f
:
2638 os
.fchmod(f
.fileno(), 0o600)
2639 os
.fchown(f
.fileno(), uid
, gid
)
2640 f
.write('mtime is time the daemon deployment was created\n')
2642 with
open(data_dir
+ '/unit.configured', 'w') as f
:
2643 f
.write('mtime is time we were last configured\n')
2644 os
.fchmod(f
.fileno(), 0o600)
2645 os
.fchown(f
.fileno(), uid
, gid
)
2647 update_firewalld(ctx
, daemon_type
)
2649 # Open ports explicitly required for the daemon
2652 fw
.open_ports(ports
)
2655 if reconfig
and daemon_type
not in Ceph
.daemons
:
2656 # ceph daemons do not need a restart; others (presumably) do to pick
2658 call_throws(ctx
, ['systemctl', 'reset-failed',
2659 get_unit_name(fsid
, daemon_type
, daemon_id
)])
2660 call_throws(ctx
, ['systemctl', 'restart',
2661 get_unit_name(fsid
, daemon_type
, daemon_id
)])
2664 def _write_container_cmd_to_bash(ctx
, file_obj
, container
, comment
=None, background
=False):
2665 # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None
2667 # Sometimes adding a comment, especially if there are multiple containers in one
2668 # unit file, makes it easier to read and grok.
2669 file_obj
.write('# ' + comment
+ '\n')
2670 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
2671 file_obj
.write('! ' + ' '.join(container
.rm_cmd()) + ' 2> /dev/null\n')
2672 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
2673 if isinstance(ctx
.container_engine
, Podman
):
2676 + ' '.join([shlex
.quote(a
) for a
in container
.rm_cmd(storage
=True)])
2677 + ' 2> /dev/null\n')
2679 # container run command
2681 ' '.join([shlex
.quote(a
) for a
in container
.run_cmd()])
2682 + (' &' if background
else '') + '\n')
2685 def deploy_daemon_units(
2686 ctx
: CephadmContext
,
2691 daemon_id
: Union
[int, str],
2693 enable
: bool = True,
2695 osd_fsid
: Optional
[str] = None,
2696 ports
: Optional
[List
[int]] = None,
2699 data_dir
= get_data_dir(fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
2700 with
open(data_dir
+ '/unit.run.new', 'w') as f
, \
2701 open(data_dir
+ '/unit.meta.new', 'w') as metaf
:
2704 if daemon_type
in Ceph
.daemons
:
2705 install_path
= find_program('install')
2706 f
.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path
=install_path
, fsid
=fsid
, uid
=uid
, gid
=gid
))
2709 if daemon_type
== 'osd':
2710 # osds have a pre-start step
2712 simple_fn
= os
.path
.join('/etc/ceph/osd',
2713 '%s-%s.json.adopted-by-cephadm' % (daemon_id
, osd_fsid
))
2714 if os
.path
.exists(simple_fn
):
2715 f
.write('# Simple OSDs need chown on startup:\n')
2716 for n
in ['block', 'block.db', 'block.wal']:
2717 p
= os
.path
.join(data_dir
, n
)
2718 f
.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p
=p
, uid
=uid
, gid
=gid
))
2720 prestart
= CephContainer(
2723 entrypoint
='/usr/sbin/ceph-volume',
2726 str(daemon_id
), osd_fsid
,
2730 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
2731 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
2732 cname
='ceph-%s-%s.%s-activate' % (fsid
, daemon_type
, daemon_id
),
2733 memory_request
=ctx
.memory_request
,
2734 memory_limit
=ctx
.memory_limit
,
2736 _write_container_cmd_to_bash(ctx
, f
, prestart
, 'LVM OSDs use ceph-volume lvm activate')
2737 elif daemon_type
== CephIscsi
.daemon_type
:
2738 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=True)) + '\n')
2739 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
2740 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
2741 _write_container_cmd_to_bash(ctx
, f
, tcmu_container
, 'iscsi tcmu-runnter container', background
=True)
2743 _write_container_cmd_to_bash(ctx
, f
, c
, '%s.%s' % (daemon_type
, str(daemon_id
)))
2745 # some metadata about the deploy
2746 meta
: Dict
[str, Any
] = {}
2747 if 'meta_json' in ctx
and ctx
.meta_json
:
2748 meta
= json
.loads(ctx
.meta_json
) or {}
2750 'memory_request': int(ctx
.memory_request
) if ctx
.memory_request
else None,
2751 'memory_limit': int(ctx
.memory_limit
) if ctx
.memory_limit
else None,
2753 if not meta
.get('ports'):
2754 meta
['ports'] = ports
2755 metaf
.write(json
.dumps(meta
, indent
=4) + '\n')
2757 os
.fchmod(f
.fileno(), 0o600)
2758 os
.fchmod(metaf
.fileno(), 0o600)
2759 os
.rename(data_dir
+ '/unit.run.new',
2760 data_dir
+ '/unit.run')
2761 os
.rename(data_dir
+ '/unit.meta.new',
2762 data_dir
+ '/unit.meta')
2764 # post-stop command(s)
2765 with
open(data_dir
+ '/unit.poststop.new', 'w') as f
:
2766 if daemon_type
== 'osd':
2768 poststop
= CephContainer(
2771 entrypoint
='/usr/sbin/ceph-volume',
2773 'lvm', 'deactivate',
2774 str(daemon_id
), osd_fsid
,
2777 volume_mounts
=get_container_mounts(ctx
, fsid
, daemon_type
, daemon_id
),
2778 bind_mounts
=get_container_binds(ctx
, fsid
, daemon_type
, daemon_id
),
2779 cname
='ceph-%s-%s.%s-deactivate' % (fsid
, daemon_type
,
2782 _write_container_cmd_to_bash(ctx
, f
, poststop
, 'deactivate osd')
2783 elif daemon_type
== CephIscsi
.daemon_type
:
2784 # make sure we also stop the tcmu container
2785 ceph_iscsi
= CephIscsi
.init(ctx
, fsid
, daemon_id
)
2786 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
2787 f
.write('! ' + ' '.join(tcmu_container
.stop_cmd()) + '\n')
2788 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=False)) + '\n')
2789 os
.fchmod(f
.fileno(), 0o600)
2790 os
.rename(data_dir
+ '/unit.poststop.new',
2791 data_dir
+ '/unit.poststop')
2794 with
open(data_dir
+ '/unit.image.new', 'w') as f
:
2795 f
.write(c
.image
+ '\n')
2796 os
.fchmod(f
.fileno(), 0o600)
2797 os
.rename(data_dir
+ '/unit.image.new',
2798 data_dir
+ '/unit.image')
2801 install_sysctl(ctx
, fsid
, daemon_type
)
2804 install_base_units(ctx
, fsid
)
2805 unit
= get_unit_file(ctx
, fsid
)
2806 unit_file
= 'ceph-%s@.service' % (fsid
)
2807 with
open(ctx
.unit_dir
+ '/' + unit_file
+ '.new', 'w') as f
:
2809 os
.rename(ctx
.unit_dir
+ '/' + unit_file
+ '.new',
2810 ctx
.unit_dir
+ '/' + unit_file
)
2811 call_throws(ctx
, ['systemctl', 'daemon-reload'])
2813 unit_name
= get_unit_name(fsid
, daemon_type
, daemon_id
)
2814 call(ctx
, ['systemctl', 'stop', unit_name
],
2815 verbosity
=CallVerbosity
.DEBUG
)
2816 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
2817 verbosity
=CallVerbosity
.DEBUG
)
2819 call_throws(ctx
, ['systemctl', 'enable', unit_name
])
2821 call_throws(ctx
, ['systemctl', 'start', unit_name
])
2824 class Firewalld(object):
2825 def __init__(self
, ctx
):
2826 # type: (CephadmContext) -> None
2828 self
.available
= self
.check()
2832 self
.cmd
= find_executable('firewall-cmd')
2834 logger
.debug('firewalld does not appear to be present')
2836 (enabled
, state
, _
) = check_unit(self
.ctx
, 'firewalld.service')
2838 logger
.debug('firewalld.service is not enabled')
2840 if state
!= 'running':
2841 logger
.debug('firewalld.service is not running')
2844 logger
.info('firewalld ready')
2847 def enable_service_for(self
, daemon_type
):
2848 # type: (str) -> None
2849 if not self
.available
:
2850 logger
.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type
)
2853 if daemon_type
== 'mon':
2855 elif daemon_type
in ['mgr', 'mds', 'osd']:
2857 elif daemon_type
== NFSGanesha
.daemon_type
:
2863 raise RuntimeError('command not defined')
2865 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-service', svc
], verbosity
=CallVerbosity
.DEBUG
)
2867 logger
.info('Enabling firewalld service %s in current zone...' % svc
)
2868 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--add-service', svc
])
2871 'unable to add service %s to current zone: %s' % (svc
, err
))
2873 logger
.debug('firewalld service %s is enabled in current zone' % svc
)
2875 def open_ports(self
, fw_ports
):
2876 # type: (List[int]) -> None
2877 if not self
.available
:
2878 logger
.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports
)
2882 raise RuntimeError('command not defined')
2884 for port
in fw_ports
:
2885 tcp_port
= str(port
) + '/tcp'
2886 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-port', tcp_port
], verbosity
=CallVerbosity
.DEBUG
)
2888 logger
.info('Enabling firewalld port %s in current zone...' % tcp_port
)
2889 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--add-port', tcp_port
])
2891 raise RuntimeError('unable to add port %s to current zone: %s' %
2894 logger
.debug('firewalld port %s is enabled in current zone' % tcp_port
)
2896 def close_ports(self
, fw_ports
):
2897 # type: (List[int]) -> None
2898 if not self
.available
:
2899 logger
.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports
)
2903 raise RuntimeError('command not defined')
2905 for port
in fw_ports
:
2906 tcp_port
= str(port
) + '/tcp'
2907 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--query-port', tcp_port
], verbosity
=CallVerbosity
.DEBUG
)
2909 logger
.info('Disabling port %s in current zone...' % tcp_port
)
2910 out
, err
, ret
= call(self
.ctx
, [self
.cmd
, '--permanent', '--remove-port', tcp_port
])
2912 raise RuntimeError('unable to remove port %s from current zone: %s' %
2915 logger
.info(f
'Port {tcp_port} disabled')
2917 logger
.info(f
'firewalld port {tcp_port} already closed')
2919 def apply_rules(self
):
2921 if not self
.available
:
2925 raise RuntimeError('command not defined')
2927 call_throws(self
.ctx
, [self
.cmd
, '--reload'])
2930 def update_firewalld(ctx
, daemon_type
):
2931 # type: (CephadmContext, str) -> None
2932 firewall
= Firewalld(ctx
)
2933 firewall
.enable_service_for(daemon_type
)
2934 firewall
.apply_rules()
2937 def install_sysctl(ctx
: CephadmContext
, fsid
: str, daemon_type
: str) -> None:
2939 Set up sysctl settings
2941 def _write(conf
: Path
, lines
: List
[str]) -> None:
2943 '# created by cephadm',
2948 with
open(conf
, 'w') as f
:
2949 f
.write('\n'.join(lines
))
2951 conf
= Path(ctx
.sysctl_dir
).joinpath(f
'90-ceph-{fsid}-{daemon_type}.conf')
2952 lines
: Optional
[List
] = None
2954 if daemon_type
== 'osd':
2955 lines
= OSD
.get_sysctl_settings()
2956 elif daemon_type
== 'haproxy':
2957 lines
= HAproxy
.get_sysctl_settings()
2958 elif daemon_type
== 'keepalived':
2959 lines
= Keepalived
.get_sysctl_settings()
2961 # apply the sysctl settings
2964 call_throws(ctx
, ['sysctl', '--system'])
2967 def install_base_units(ctx
, fsid
):
2968 # type: (CephadmContext, str) -> None
2970 Set up ceph.target and ceph-$fsid.target units.
2973 existed
= os
.path
.exists(ctx
.unit_dir
+ '/ceph.target')
2974 with
open(ctx
.unit_dir
+ '/ceph.target.new', 'w') as f
:
2976 'Description=All Ceph clusters and services\n'
2979 'WantedBy=multi-user.target\n')
2980 os
.rename(ctx
.unit_dir
+ '/ceph.target.new',
2981 ctx
.unit_dir
+ '/ceph.target')
2983 # we disable before enable in case a different ceph.target
2984 # (from the traditional package) is present; while newer
2985 # systemd is smart enough to disable the old
2986 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2987 # some older versions of systemd error out with EEXIST.
2988 call_throws(ctx
, ['systemctl', 'disable', 'ceph.target'])
2989 call_throws(ctx
, ['systemctl', 'enable', 'ceph.target'])
2990 call_throws(ctx
, ['systemctl', 'start', 'ceph.target'])
2993 existed
= os
.path
.exists(ctx
.unit_dir
+ '/ceph-%s.target' % fsid
)
2994 with
open(ctx
.unit_dir
+ '/ceph-%s.target.new' % fsid
, 'w') as f
:
2997 'Description=Ceph cluster {fsid}\n'
2998 'PartOf=ceph.target\n'
2999 'Before=ceph.target\n'
3002 'WantedBy=multi-user.target ceph.target\n'.format(
3005 os
.rename(ctx
.unit_dir
+ '/ceph-%s.target.new' % fsid
,
3006 ctx
.unit_dir
+ '/ceph-%s.target' % fsid
)
3008 call_throws(ctx
, ['systemctl', 'enable', 'ceph-%s.target' % fsid
])
3009 call_throws(ctx
, ['systemctl', 'start', 'ceph-%s.target' % fsid
])
3011 # logrotate for the cluster
3012 with
open(ctx
.logrotate_dir
+ '/ceph-%s' % fsid
, 'w') as f
:
3014 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
3015 in all containers, but I don't see an elegant way to send SIGHUP *just* to
3016 the daemons for this cluster. (1) systemd kill -s will get the signal to
3017 podman, but podman will exit. (2) podman kill will get the signal to the
3018 first child (bash), but that isn't the ceph daemon. This is simpler and
3021 f
.write("""# created by cephadm
3022 /var/log/ceph/%s/*.log {
3028 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror' || true
3037 def get_unit_file(ctx
, fsid
):
3038 # type: (CephadmContext, str) -> str
3040 if isinstance(ctx
.container_engine
, Podman
):
3041 extra_args
= ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3042 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3044 'PIDFile=%t/%n-pid\n')
3045 if ctx
.container_engine
.version
>= CGROUPS_SPLIT_PODMAN_VERSION
:
3046 extra_args
+= 'Delegate=yes\n'
3048 docker
= isinstance(ctx
.container_engine
, Docker
)
3049 u
= """# generated by cephadm
3051 Description=Ceph %i for {fsid}
3054 # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
3055 # these can be removed once ceph-mon will dynamically change network
3057 After=network-online.target local-fs.target time-sync.target{docker_after}
3058 Wants=network-online.target local-fs.target time-sync.target
3061 PartOf=ceph-{fsid}.target
3062 Before=ceph-{fsid}.target
3067 EnvironmentFile=-/etc/environment
3068 ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
3069 ExecStop=-{container_path} stop ceph-{fsid}-%i
3070 ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
3076 StartLimitInterval=30min
3080 WantedBy=ceph-{fsid}.target
3081 """.format(container_path
=ctx
.container_engine
.path
,
3083 data_dir
=ctx
.data_dir
,
3084 extra_args
=extra_args
,
3085 # if docker, we depend on docker.service
3086 docker_after
=' docker.service' if docker
else '',
3087 docker_requires
='Requires=docker.service\n' if docker
else '')
3091 ##################################
3094 class CephContainer
:
3096 ctx
: CephadmContext
,
3099 args
: List
[str] = [],
3100 volume_mounts
: Dict
[str, str] = {},
3102 container_args
: List
[str] = [],
3103 envs
: Optional
[List
[str]] = None,
3104 privileged
: bool = False,
3105 ptrace
: bool = False,
3106 bind_mounts
: Optional
[List
[List
[str]]] = None,
3107 init
: Optional
[bool] = None,
3108 host_network
: bool = True,
3109 memory_request
: Optional
[str] = None,
3110 memory_limit
: Optional
[str] = None,
3114 self
.entrypoint
= entrypoint
3116 self
.volume_mounts
= volume_mounts
3118 self
.container_args
= container_args
3120 self
.privileged
= privileged
3121 self
.ptrace
= ptrace
3122 self
.bind_mounts
= bind_mounts
if bind_mounts
else []
3123 self
.init
= init
if init
else ctx
.container_init
3124 self
.host_network
= host_network
3125 self
.memory_request
= memory_request
3126 self
.memory_limit
= memory_limit
3128 def run_cmd(self
) -> List
[str]:
3129 cmd_args
: List
[str] = [
3130 str(self
.ctx
.container_engine
.path
),
3134 # some containers (ahem, haproxy) override this, but we want a fast
3135 # shutdown always (and, more importantly, a successful exit even if we
3136 # fall back to SIGKILL).
3137 '--stop-signal=SIGTERM',
3140 if isinstance(self
.ctx
.container_engine
, Podman
):
3141 if os
.path
.exists('/etc/ceph/podman-auth.json'):
3142 cmd_args
.append('--authfile=/etc/ceph/podman-auth.json')
3145 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
3146 '-e', 'NODE_NAME=%s' % get_hostname(),
3148 vols
: List
[str] = []
3149 binds
: List
[str] = []
3151 if self
.memory_request
:
3152 cmd_args
.extend(['-e', 'POD_MEMORY_REQUEST', str(self
.memory_request
)])
3153 if self
.memory_limit
:
3154 cmd_args
.extend(['-e', 'POD_MEMORY_LIMIT', str(self
.memory_limit
)])
3155 cmd_args
.extend(['--memory', str(self
.memory_limit
)])
3157 if self
.host_network
:
3158 cmd_args
.append('--net=host')
3160 cmd_args
.extend(['--entrypoint', self
.entrypoint
])
3164 # let OSD etc read block devs that haven't been chowned
3165 '--group-add=disk'])
3166 if self
.ptrace
and not self
.privileged
:
3167 # if privileged, the SYS_PTRACE cap is already added
3168 # in addition, --cap-add and --privileged are mutually
3169 # exclusive since podman >= 2.0
3170 cmd_args
.append('--cap-add=SYS_PTRACE')
3172 cmd_args
.append('--init')
3173 envs
+= ['-e', 'CEPH_USE_RANDOM_NONCE=1']
3175 cmd_args
.extend(['--name', self
.cname
])
3177 for env
in self
.envs
:
3178 envs
.extend(['-e', env
])
3181 [['-v', '%s:%s' % (host_dir
, container_dir
)]
3182 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
3183 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
3184 for bind
in self
.bind_mounts
], [])
3187 cmd_args
+ self
.container_args
+ \
3188 envs
+ vols
+ binds
+ \
3189 [self
.image
] + self
.args
# type: ignore
3191 def shell_cmd(self
, cmd
: List
[str]) -> List
[str]:
3192 cmd_args
: List
[str] = [
3193 str(self
.ctx
.container_engine
.path
),
3199 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
3200 '-e', 'NODE_NAME=%s' % get_hostname(),
3202 vols
: List
[str] = []
3203 binds
: List
[str] = []
3205 if self
.host_network
:
3206 cmd_args
.append('--net=host')
3207 if self
.ctx
.no_hosts
:
3208 cmd_args
.append('--no-hosts')
3212 # let OSD etc read block devs that haven't been chowned
3216 cmd_args
.append('--init')
3217 envs
+= ['-e', 'CEPH_USE_RANDOM_NONCE=1']
3219 for env
in self
.envs
:
3220 envs
.extend(['-e', env
])
3223 [['-v', '%s:%s' % (host_dir
, container_dir
)]
3224 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
3225 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
3226 for bind
in self
.bind_mounts
], [])
3228 return cmd_args
+ self
.container_args
+ envs
+ vols
+ binds
+ [
3229 '--entrypoint', cmd
[0],
3233 def exec_cmd(self
, cmd
):
3234 # type: (List[str]) -> List[str]
3236 str(self
.ctx
.container_engine
.path
),
3238 ] + self
.container_args
+ [
3242 def rm_cmd(self
, storage
=False):
3243 # type: (bool) -> List[str]
3245 str(self
.ctx
.container_engine
.path
),
3249 ret
.append('--storage')
3250 ret
.append(self
.cname
)
3254 # type () -> List[str]
3256 str(self
.ctx
.container_engine
.path
),
3261 def run(self
, timeout
=DEFAULT_TIMEOUT
):
3262 # type: (Optional[int]) -> str
3263 out
, _
, _
= call_throws(self
.ctx
, self
.run_cmd(),
3264 desc
=self
.entrypoint
, timeout
=timeout
)
3267 ##################################
3271 def command_version(ctx
):
3272 # type: (CephadmContext) -> int
3273 c
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version'])
3274 out
, err
, ret
= call(ctx
, c
.run_cmd(), desc
=c
.entrypoint
)
3279 ##################################
3283 def command_pull(ctx
):
3284 # type: (CephadmContext) -> int
3286 _pull_image(ctx
, ctx
.image
)
3287 return command_inspect_image(ctx
)
3290 def _pull_image(ctx
, image
):
3291 # type: (CephadmContext, str) -> None
3292 logger
.info('Pulling container image %s...' % image
)
3295 'error creating read-write layer with ID',
3296 'net/http: TLS handshake timeout',
3297 'Digest did not match, expected',
3300 cmd
= [ctx
.container_engine
.path
, 'pull', image
]
3301 if isinstance(ctx
.container_engine
, Podman
) and os
.path
.exists('/etc/ceph/podman-auth.json'):
3302 cmd
.append('--authfile=/etc/ceph/podman-auth.json')
3303 cmd_str
= ' '.join(cmd
)
3305 for sleep_secs
in [1, 4, 25]:
3306 out
, err
, ret
= call(ctx
, cmd
)
3310 if not any(pattern
in err
for pattern
in ignorelist
):
3311 raise RuntimeError('Failed command: %s' % cmd_str
)
3313 logger
.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str
, sleep_secs
))
3314 time
.sleep(sleep_secs
)
3316 raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str
)
3318 ##################################
3322 def command_inspect_image(ctx
):
3323 # type: (CephadmContext) -> int
3324 out
, err
, ret
= call_throws(ctx
, [
3325 ctx
.container_engine
.path
, 'inspect',
3326 '--format', '{{.ID}},{{.RepoDigests}}',
3330 info_from
= get_image_info_from_inspect(out
.strip(), ctx
.image
)
3332 ver
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version']).run().strip()
3333 info_from
['ceph_version'] = ver
3335 print(json
.dumps(info_from
, indent
=4, sort_keys
=True))
3339 def normalize_image_digest(digest
):
3341 # ceph/ceph -> docker.io/ceph/ceph
3342 # edge cases that shouldn't ever come up:
3343 # ubuntu -> docker.io/ubuntu (ubuntu alias for library/ubuntu)
3345 # quay.ceph.io/ceph/ceph -> ceph
3346 # docker.io/ubuntu -> no change
3347 bits
= digest
.split('/')
3348 if '.' not in bits
[0] and len(bits
) < 3:
3349 digest
= DEFAULT_REGISTRY
+ '/' + digest
3353 def get_image_info_from_inspect(out
, image
):
3354 # type: (str, str) -> Dict[str, Union[str,List[str]]]
3355 image_id
, digests
= out
.split(',', 1)
3357 raise Error('inspect {}: empty result'.format(image
))
3359 'image_id': normalize_container_id(image_id
)
3360 } # type: Dict[str, Union[str,List[str]]]
3362 r
['repo_digests'] = list(map(normalize_image_digest
, digests
[1:-1].split(' ')))
3365 ##################################
3368 def check_subnet(subnets
: str) -> Tuple
[int, List
[int], str]:
3369 """Determine whether the given string is a valid subnet
3371 :param subnets: subnet string, a single definition or comma separated list of CIDR subnets
3372 :returns: return code, IP version list of the subnets and msg describing any errors validation errors
3378 subnet_list
= subnets
.split(',')
3379 for subnet
in subnet_list
:
3380 # ensure the format of the string is as expected address/netmask
3381 if not re
.search(r
'\/\d+$', subnet
):
3383 errors
.append(f
'{subnet} is not in CIDR format (address/netmask)')
3386 v
= ipaddress
.ip_network(subnet
).version
3388 except ValueError as e
:
3390 errors
.append(f
'{subnet} invalid: {str(e)}')
3392 return rc
, list(versions
), ', '.join(errors
)
3395 def unwrap_ipv6(address
):
3396 # type: (str) -> str
3397 if address
.startswith('[') and address
.endswith(']'):
3398 return address
[1:-1]
3402 def wrap_ipv6(address
):
3403 # type: (str) -> str
3405 # We cannot assume it's already wrapped or even an IPv6 address if
3406 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
3409 if ipaddress
.ip_address(address
).version
== 6:
3410 return f
'[{address}]'
3417 def is_ipv6(address
):
3418 # type: (str) -> bool
3419 address
= unwrap_ipv6(address
)
3421 return ipaddress
.ip_address(address
).version
== 6
3423 logger
.warning('Address: {} is not a valid IP address'.format(address
))
3427 def prepare_mon_addresses(
3429 ) -> Tuple
[str, bool, Optional
[str]]:
3430 r
= re
.compile(r
':(\d+)$')
3435 ipv6
= is_ipv6(ctx
.mon_ip
)
3437 ctx
.mon_ip
= wrap_ipv6(ctx
.mon_ip
)
3438 hasport
= r
.findall(ctx
.mon_ip
)
3440 port
= int(hasport
[0])
3442 addr_arg
= '[v1:%s]' % ctx
.mon_ip
3444 addr_arg
= '[v2:%s]' % ctx
.mon_ip
3446 logger
.warning('Using msgr2 protocol for unrecognized port %d' %
3448 addr_arg
= '[v2:%s]' % ctx
.mon_ip
3449 base_ip
= ctx
.mon_ip
[0:-(len(str(port
))) - 1]
3450 check_ip_port(ctx
, base_ip
, port
)
3452 base_ip
= ctx
.mon_ip
3453 addr_arg
= '[v2:%s:3300,v1:%s:6789]' % (ctx
.mon_ip
, ctx
.mon_ip
)
3454 check_ip_port(ctx
, ctx
.mon_ip
, 3300)
3455 check_ip_port(ctx
, ctx
.mon_ip
, 6789)
3457 addr_arg
= ctx
.mon_addrv
3458 if addr_arg
[0] != '[' or addr_arg
[-1] != ']':
3459 raise Error('--mon-addrv value %s must use square backets' %
3461 ipv6
= addr_arg
.count('[') > 1
3462 for addr
in addr_arg
[1:-1].split(','):
3463 hasport
= r
.findall(addr
)
3465 raise Error('--mon-addrv value %s must include port number' %
3467 port
= int(hasport
[0])
3468 # strip off v1: or v2: prefix
3469 addr
= re
.sub(r
'^\w+:', '', addr
)
3470 base_ip
= addr
[0:-(len(str(port
))) - 1]
3471 check_ip_port(ctx
, base_ip
, port
)
3473 raise Error('must specify --mon-ip or --mon-addrv')
3474 logger
.debug('Base mon IP is %s, final addrv is %s' % (base_ip
, addr_arg
))
3477 if not ctx
.skip_mon_network
:
3478 # make sure IP is configured locally, and then figure out the
3480 errmsg
= f
'Cannot infer CIDR network for mon IP `{base_ip}`'
3481 for net
, ifaces
in list_networks(ctx
).items():
3483 for iface
, ls
in ifaces
.items():
3486 if ipaddress
.ip_address(unwrap_ipv6(base_ip
)) in \
3487 [ipaddress
.ip_address(ip
) for ip
in ips
]:
3489 logger
.info(f
'Mon IP `{base_ip}` is in CIDR network `{mon_network}`')
3491 except ValueError as e
:
3492 logger
.warning(f
'{errmsg}: {e}')
3494 raise Error(f
'{errmsg}: pass --skip-mon-network to configure it later')
3496 return (addr_arg
, ipv6
, mon_network
)
3499 def prepare_cluster_network(ctx
: CephadmContext
) -> Tuple
[str, bool]:
3500 cluster_network
= ''
3501 ipv6_cluster_network
= False
3502 # the cluster network may not exist on this node, so all we can do is
3503 # validate that the address given is valid ipv4 or ipv6 subnet
3504 if ctx
.cluster_network
:
3505 rc
, versions
, err_msg
= check_subnet(ctx
.cluster_network
)
3507 raise Error(f
'Invalid --cluster-network parameter: {err_msg}')
3508 cluster_network
= ctx
.cluster_network
3509 ipv6_cluster_network
= True if 6 in versions
else False
3511 logger
.info('- internal network (--cluster-network) has not '
3512 'been provided, OSD replication will default to '
3513 'the public_network')
3515 return cluster_network
, ipv6_cluster_network
3518 def create_initial_keys(
3519 ctx
: CephadmContext
,
3522 ) -> Tuple
[str, str, str, Any
, Any
]: # type: ignore
3526 # create some initial keys
3527 logger
.info('Creating initial keys...')
3528 mon_key
= CephContainer(
3531 entrypoint
='/usr/bin/ceph-authtool',
3532 args
=['--gen-print-key'],
3534 admin_key
= CephContainer(
3537 entrypoint
='/usr/bin/ceph-authtool',
3538 args
=['--gen-print-key'],
3540 mgr_key
= CephContainer(
3543 entrypoint
='/usr/bin/ceph-authtool',
3544 args
=['--gen-print-key'],
3547 keyring
= ('[mon.]\n'
3549 '\tcaps mon = allow *\n'
3552 '\tcaps mon = allow *\n'
3553 '\tcaps mds = allow *\n'
3554 '\tcaps mgr = allow *\n'
3555 '\tcaps osd = allow *\n'
3558 '\tcaps mon = profile mgr\n'
3559 '\tcaps mds = allow *\n'
3560 '\tcaps osd = allow *\n'
3561 % (mon_key
, admin_key
, mgr_id
, mgr_key
))
3563 admin_keyring
= write_tmp('[client.admin]\n'
3564 '\tkey = ' + admin_key
+ '\n',
3568 bootstrap_keyring
= write_tmp(keyring
, uid
, gid
)
3569 return (mon_key
, mgr_key
, admin_key
,
3570 bootstrap_keyring
, admin_keyring
)
3573 def create_initial_monmap(
3574 ctx
: CephadmContext
,
3577 mon_id
: str, mon_addr
: str
3579 logger
.info('Creating initial monmap...')
3580 monmap
= write_tmp('', 0, 0)
3581 out
= CephContainer(
3584 entrypoint
='/usr/bin/monmaptool',
3589 '--addv', mon_id
, mon_addr
,
3593 monmap
.name
: '/tmp/monmap:z',
3596 logger
.debug(f
'monmaptool for {mon_id} {mon_addr} on {out}')
3598 # pass monmap file to ceph user for use by ceph-mon --mkfs below
3599 os
.fchown(monmap
.fileno(), uid
, gid
)
3603 def prepare_create_mon(
3604 ctx
: CephadmContext
,
3606 fsid
: str, mon_id
: str,
3607 bootstrap_keyring_path
: str,
3610 logger
.info('Creating mon...')
3611 create_daemon_dirs(ctx
, fsid
, 'mon', mon_id
, uid
, gid
)
3612 mon_dir
= get_data_dir(fsid
, ctx
.data_dir
, 'mon', mon_id
)
3613 log_dir
= get_log_dir(fsid
, ctx
.log_dir
)
3614 out
= CephContainer(
3617 entrypoint
='/usr/bin/ceph-mon',
3623 '--monmap', '/tmp/monmap',
3624 '--keyring', '/tmp/keyring',
3625 ] + get_daemon_args(ctx
, fsid
, 'mon', mon_id
),
3627 log_dir
: '/var/log/ceph:z',
3628 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
3629 bootstrap_keyring_path
: '/tmp/keyring:z',
3630 monmap_path
: '/tmp/monmap:z',
3633 logger
.debug(f
'create mon.{mon_id} on {out}')
3634 return (mon_dir
, log_dir
)
3638 ctx
: CephadmContext
,
3640 fsid
: str, mon_id
: str
3642 mon_c
= get_container(ctx
, fsid
, 'mon', mon_id
)
3643 ctx
.meta_json
= json
.dumps({'service_name': 'mon'})
3644 deploy_daemon(ctx
, fsid
, 'mon', mon_id
, mon_c
, uid
, gid
,
3645 config
=None, keyring
=None)
3649 ctx
: CephadmContext
,
3650 mon_id
: str, mon_dir
: str,
3651 admin_keyring_path
: str, config_path
: str
3653 logger
.info('Waiting for mon to start...')
3657 entrypoint
='/usr/bin/ceph',
3661 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
3662 admin_keyring_path
: '/etc/ceph/ceph.client.admin.keyring:z',
3663 config_path
: '/etc/ceph/ceph.conf:z',
3667 # wait for the service to become available
3668 def is_mon_available():
3670 timeout
= ctx
.timeout
if ctx
.timeout
else 60 # seconds
3671 out
, err
, ret
= call(ctx
, c
.run_cmd(),
3676 is_available(ctx
, 'mon', is_mon_available
)
3680 ctx
: CephadmContext
,
3682 fsid
: str, mgr_id
: str, mgr_key
: str,
3683 config
: str, clifunc
: Callable
3685 logger
.info('Creating mgr...')
3686 mgr_keyring
= '[mgr.%s]\n\tkey = %s\n' % (mgr_id
, mgr_key
)
3687 mgr_c
= get_container(ctx
, fsid
, 'mgr', mgr_id
)
3688 # Note:the default port used by the Prometheus node exporter is opened in fw
3689 ctx
.meta_json
= json
.dumps({'service_name': 'mgr'})
3690 deploy_daemon(ctx
, fsid
, 'mgr', mgr_id
, mgr_c
, uid
, gid
,
3691 config
=config
, keyring
=mgr_keyring
, ports
=[9283])
3693 # wait for the service to become available
3694 logger
.info('Waiting for mgr to start...')
3696 def is_mgr_available():
3698 timeout
= ctx
.timeout
if ctx
.timeout
else 60 # seconds
3700 out
= clifunc(['status', '-f', 'json-pretty'], timeout
=timeout
)
3702 return j
.get('mgrmap', {}).get('available', False)
3703 except Exception as e
:
3704 logger
.debug('status failed: %s' % e
)
3706 is_available(ctx
, 'mgr', is_mgr_available
)
3710 ctx
: CephadmContext
,
3711 cli
: Callable
, wait_for_mgr_restart
: Callable
3714 cli(['cephadm', 'set-user', ctx
.ssh_user
])
3717 logger
.info('Using provided ssh config...')
3719 pathify(ctx
.ssh_config
.name
): '/tmp/cephadm-ssh-config:z',
3721 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts
=mounts
)
3723 if ctx
.ssh_private_key
and ctx
.ssh_public_key
:
3724 logger
.info('Using provided ssh keys...')
3726 pathify(ctx
.ssh_private_key
.name
): '/tmp/cephadm-ssh-key:z',
3727 pathify(ctx
.ssh_public_key
.name
): '/tmp/cephadm-ssh-key.pub:z'
3729 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts
=mounts
)
3730 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts
=mounts
)
3732 logger
.info('Generating ssh key...')
3733 cli(['cephadm', 'generate-key'])
3734 ssh_pub
= cli(['cephadm', 'get-pub-key'])
3736 with
open(ctx
.output_pub_ssh_key
, 'w') as f
:
3738 logger
.info('Wrote public SSH key to %s' % ctx
.output_pub_ssh_key
)
3740 logger
.info('Adding key to %s@localhost authorized_keys...' % ctx
.ssh_user
)
3742 s_pwd
= pwd
.getpwnam(ctx
.ssh_user
)
3744 raise Error('Cannot find uid/gid for ssh-user: %s' % (ctx
.ssh_user
))
3745 ssh_uid
= s_pwd
.pw_uid
3746 ssh_gid
= s_pwd
.pw_gid
3747 ssh_dir
= os
.path
.join(s_pwd
.pw_dir
, '.ssh')
3749 if not os
.path
.exists(ssh_dir
):
3750 makedirs(ssh_dir
, ssh_uid
, ssh_gid
, 0o700)
3752 auth_keys_file
= '%s/authorized_keys' % ssh_dir
3755 if os
.path
.exists(auth_keys_file
):
3756 with
open(auth_keys_file
, 'r') as f
:
3757 f
.seek(0, os
.SEEK_END
)
3759 f
.seek(f
.tell() - 1, os
.SEEK_SET
) # go to last char
3760 if f
.read() != '\n':
3763 with
open(auth_keys_file
, 'a') as f
:
3764 os
.fchown(f
.fileno(), ssh_uid
, ssh_gid
) # just in case we created it
3765 os
.fchmod(f
.fileno(), 0o600) # just in case we created it
3768 f
.write(ssh_pub
.strip() + '\n')
3770 host
= get_hostname()
3771 logger
.info('Adding host %s...' % host
)
3773 args
= ['orch', 'host', 'add', host
]
3775 args
.append(ctx
.mon_ip
)
3777 except RuntimeError as e
:
3778 raise Error('Failed to add host <%s>: %s' % (host
, e
))
3780 for t
in ['mon', 'mgr']:
3781 if not ctx
.orphan_initial_daemons
:
3782 logger
.info('Deploying %s service with default placement...' % t
)
3783 cli(['orch', 'apply', t
])
3785 logger
.info('Deploying unmanaged %s service...' % t
)
3786 cli(['orch', 'apply', t
, '--unmanaged'])
3788 if not ctx
.orphan_initial_daemons
:
3789 logger
.info('Deploying crash service with default placement...')
3790 cli(['orch', 'apply', 'crash'])
3792 if not ctx
.skip_monitoring_stack
:
3793 logger
.info('Enabling mgr prometheus module...')
3794 cli(['mgr', 'module', 'enable', 'prometheus'])
3795 for t
in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
3796 logger
.info('Deploying %s service with default placement...' % t
)
3797 cli(['orch', 'apply', t
])
3800 def enable_cephadm_mgr_module(
3801 cli
: Callable
, wait_for_mgr_restart
: Callable
3804 logger
.info('Enabling cephadm module...')
3805 cli(['mgr', 'module', 'enable', 'cephadm'])
3806 wait_for_mgr_restart()
3807 logger
.info('Setting orchestrator backend to cephadm...')
3808 cli(['orch', 'set', 'backend', 'cephadm'])
3811 def prepare_dashboard(
3812 ctx
: CephadmContext
,
3814 cli
: Callable
, wait_for_mgr_restart
: Callable
3817 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
3818 # if the user does not want to use SSL he can change this setting once the cluster is up
3819 cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx
.ssl_dashboard_port
)])
3821 # configuring dashboard parameters
3822 logger
.info('Enabling the dashboard module...')
3823 cli(['mgr', 'module', 'enable', 'dashboard'])
3824 wait_for_mgr_restart()
3826 # dashboard crt and key
3827 if ctx
.dashboard_key
and ctx
.dashboard_crt
:
3828 logger
.info('Using provided dashboard certificate...')
3830 pathify(ctx
.dashboard_crt
.name
): '/tmp/dashboard.crt:z',
3831 pathify(ctx
.dashboard_key
.name
): '/tmp/dashboard.key:z'
3833 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts
=mounts
)
3834 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts
=mounts
)
3836 logger
.info('Generating a dashboard self-signed certificate...')
3837 cli(['dashboard', 'create-self-signed-cert'])
3839 logger
.info('Creating initial admin user...')
3840 password
= ctx
.initial_dashboard_password
or generate_password()
3841 tmp_password_file
= write_tmp(password
, uid
, gid
)
3842 cmd
= ['dashboard', 'ac-user-create', ctx
.initial_dashboard_user
, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
3843 if not ctx
.dashboard_password_noupdate
:
3844 cmd
.append('--pwd-update-required')
3845 cli(cmd
, extra_mounts
={pathify(tmp_password_file
.name
): '/tmp/dashboard.pw:z'})
3846 logger
.info('Fetching dashboard port number...')
3847 out
= cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
3850 # Open dashboard port
3852 fw
.open_ports([port
])
3855 logger
.info('Ceph Dashboard is now available at:\n\n'
3856 '\t URL: https://%s:%s/\n'
3858 '\tPassword: %s\n' % (
3860 ctx
.initial_dashboard_user
,
3864 def prepare_bootstrap_config(
3865 ctx
: CephadmContext
,
3866 fsid
: str, mon_addr
: str, image
: str
3870 cp
= read_config(ctx
.config
)
3871 if not cp
.has_section('global'):
3872 cp
.add_section('global')
3873 cp
.set('global', 'fsid', fsid
)
3874 cp
.set('global', 'mon_host', mon_addr
)
3875 cp
.set('global', 'container_image', image
)
3877 if not cp
.has_section('mon'):
3878 cp
.add_section('mon')
3880 not cp
.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
3881 and not cp
.has_option('mon', 'auth allow insecure global id reclaim')
3883 cp
.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
3885 if ctx
.single_host_defaults
:
3886 logger
.info('Adjusting default settings to suit single-host cluster...')
3887 # replicate across osds, not hosts
3889 not cp
.has_option('global', 'osd_crush_choose_leaf_type')
3890 and not cp
.has_option('global', 'osd crush choose leaf type')
3892 cp
.set('global', 'osd_crush_choose_leaf_type', '0')
3895 not cp
.has_option('global', 'osd_pool_default_size')
3896 and not cp
.has_option('global', 'osd pool default size')
3898 cp
.set('global', 'osd_pool_default_size', '2')
3899 # disable mgr standby modules (so we can colocate multiple mgrs on one host)
3900 if not cp
.has_section('mgr'):
3901 cp
.add_section('mgr')
3903 not cp
.has_option('mgr', 'mgr_standby_modules')
3904 and not cp
.has_option('mgr', 'mgr standby modules')
3906 cp
.set('mgr', 'mgr_standby_modules', 'false')
3910 config
= cpf
.getvalue()
3912 if ctx
.registry_json
or ctx
.registry_url
:
3913 command_registry_login(ctx
)
3918 def finish_bootstrap_config(
3919 ctx
: CephadmContext
,
3922 mon_id
: str, mon_dir
: str,
3923 mon_network
: Optional
[str], ipv6
: bool,
3925 cluster_network
: Optional
[str], ipv6_cluster_network
: bool
3928 if not ctx
.no_minimize_config
:
3929 logger
.info('Assimilating anything we can from ceph.conf...')
3931 'config', 'assimilate-conf',
3932 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3934 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3936 logger
.info('Generating new minimal ceph.conf...')
3938 'config', 'generate-minimal-conf',
3939 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3941 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3943 # re-read our minimized config
3944 with
open(mon_dir
+ '/config', 'r') as f
:
3946 logger
.info('Restarting the monitor...')
3950 get_unit_name(fsid
, 'mon', mon_id
)
3954 logger
.info(f
'Setting mon public_network to {mon_network}')
3955 cli(['config', 'set', 'mon', 'public_network', mon_network
])
3958 logger
.info(f
'Setting cluster_network to {cluster_network}')
3959 cli(['config', 'set', 'global', 'cluster_network', cluster_network
])
3961 if ipv6
or ipv6_cluster_network
:
3962 logger
.info('Enabling IPv6 (ms_bind_ipv6) binding')
3963 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
3965 with
open(ctx
.output_config
, 'w') as f
:
3967 logger
.info('Wrote config to %s' % ctx
.output_config
)
3972 def command_bootstrap(ctx
):
3973 # type: (CephadmContext) -> int
3975 if not ctx
.output_config
:
3976 ctx
.output_config
= os
.path
.join(ctx
.output_dir
, 'ceph.conf')
3977 if not ctx
.output_keyring
:
3978 ctx
.output_keyring
= os
.path
.join(ctx
.output_dir
,
3979 'ceph.client.admin.keyring')
3980 if not ctx
.output_pub_ssh_key
:
3981 ctx
.output_pub_ssh_key
= os
.path
.join(ctx
.output_dir
, 'ceph.pub')
3983 # verify output files
3984 for f
in [ctx
.output_config
, ctx
.output_keyring
,
3985 ctx
.output_pub_ssh_key
]:
3986 if not ctx
.allow_overwrite
:
3987 if os
.path
.exists(f
):
3988 raise Error('%s already exists; delete or pass '
3989 '--allow-overwrite to overwrite' % f
)
3990 dirname
= os
.path
.dirname(f
)
3991 if dirname
and not os
.path
.exists(dirname
):
3992 fname
= os
.path
.basename(f
)
3993 logger
.info(f
'Creating directory {dirname} for {fname}')
3995 # use makedirs to create intermediate missing dirs
3996 os
.makedirs(dirname
, 0o755)
3997 except PermissionError
:
3998 raise Error(f
'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
4000 (user_conf
, _
) = get_config_and_keyring(ctx
)
4002 if not ctx
.skip_prepare_host
:
4003 command_prepare_host(ctx
)
4005 logger
.info('Skip prepare_host')
4008 fsid
= ctx
.fsid
or make_fsid()
4009 if not is_fsid(fsid
):
4010 raise Error('not an fsid: %s' % fsid
)
4011 logger
.info('Cluster fsid: %s' % fsid
)
4013 hostname
= get_hostname()
4014 if '.' in hostname
and not ctx
.allow_fqdn_hostname
:
4015 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname
, hostname
.split('.')[0]))
4016 mon_id
= ctx
.mon_id
or hostname
4017 mgr_id
= ctx
.mgr_id
or generate_service_id()
4019 lock
= FileLock(ctx
, fsid
)
4022 (addr_arg
, ipv6
, mon_network
) = prepare_mon_addresses(ctx
)
4023 cluster_network
, ipv6_cluster_network
= prepare_cluster_network(ctx
)
4025 config
= prepare_bootstrap_config(ctx
, fsid
, addr_arg
, ctx
.image
)
4027 if not ctx
.skip_pull
:
4028 _pull_image(ctx
, ctx
.image
)
4030 image_ver
= CephContainer(ctx
, ctx
.image
, 'ceph', ['--version']).run().strip()
4031 logger
.info(f
'Ceph version: {image_ver}')
4033 if not ctx
.allow_mismatched_release
:
4034 image_release
= image_ver
.split()[4]
4035 if image_release
not in \
4036 [DEFAULT_IMAGE_RELEASE
, LATEST_STABLE_RELEASE
]:
4038 f
'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};'
4039 ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
4042 logger
.info('Extracting ceph user uid/gid from container image...')
4043 (uid
, gid
) = extract_uid_gid(ctx
)
4045 # create some initial keys
4046 (mon_key
, mgr_key
, admin_key
, bootstrap_keyring
, admin_keyring
) = \
4047 create_initial_keys(ctx
, uid
, gid
, mgr_id
)
4049 monmap
= create_initial_monmap(ctx
, uid
, gid
, fsid
, mon_id
, addr_arg
)
4050 (mon_dir
, log_dir
) = \
4051 prepare_create_mon(ctx
, uid
, gid
, fsid
, mon_id
,
4052 bootstrap_keyring
.name
, monmap
.name
)
4054 with
open(mon_dir
+ '/config', 'w') as f
:
4055 os
.fchown(f
.fileno(), uid
, gid
)
4056 os
.fchmod(f
.fileno(), 0o600)
4059 make_var_run(ctx
, fsid
, uid
, gid
)
4060 create_mon(ctx
, uid
, gid
, fsid
, mon_id
)
4062 # config to issue various CLI commands
4063 tmp_config
= write_tmp(config
, uid
, gid
)
4065 # a CLI helper to reduce our typing
4066 def cli(cmd
, extra_mounts
={}, timeout
=DEFAULT_TIMEOUT
):
4067 # type: (List[str], Dict[str, str], Optional[int]) -> str
4069 log_dir
: '/var/log/ceph:z',
4070 admin_keyring
.name
: '/etc/ceph/ceph.client.admin.keyring:z',
4071 tmp_config
.name
: '/etc/ceph/ceph.conf:z',
4073 for k
, v
in extra_mounts
.items():
4075 timeout
= timeout
or ctx
.timeout
4076 return CephContainer(
4079 entrypoint
='/usr/bin/ceph',
4081 volume_mounts
=mounts
,
4082 ).run(timeout
=timeout
)
4084 wait_for_mon(ctx
, mon_id
, mon_dir
, admin_keyring
.name
, tmp_config
.name
)
4086 finish_bootstrap_config(ctx
, fsid
, config
, mon_id
, mon_dir
,
4087 mon_network
, ipv6
, cli
,
4088 cluster_network
, ipv6_cluster_network
)
4091 with
open(ctx
.output_keyring
, 'w') as f
:
4092 os
.fchmod(f
.fileno(), 0o600)
4093 f
.write('[client.admin]\n'
4094 '\tkey = ' + admin_key
+ '\n')
4095 logger
.info('Wrote keyring to %s' % ctx
.output_keyring
)
4098 create_mgr(ctx
, uid
, gid
, fsid
, mgr_id
, mgr_key
, config
, cli
)
4101 # user given config settings were already assimilated earlier
4102 # but if the given settings contained any attributes in
4103 # the mgr (e.g. mgr/cephadm/container_image_prometheus)
4104 # they don't seem to be stored if there isn't a mgr yet.
4105 # Since re-assimilating the same conf settings should be
4106 # idempotent we can just do it again here.
4107 with tempfile
.NamedTemporaryFile(buffering
=0) as tmp
:
4108 tmp
.write(user_conf
.encode('utf-8'))
4109 cli(['config', 'assimilate-conf',
4110 '-i', '/var/lib/ceph/user.conf'],
4111 {tmp
.name
: '/var/lib/ceph/user.conf:z'})
4113 # wait for mgr to restart (after enabling a module)
4114 def wait_for_mgr_restart():
4115 # first get latest mgrmap epoch from the mon. try newer 'mgr
4116 # stat' command first, then fall back to 'mgr dump' if
4119 j
= json_loads_retry(lambda: cli(['mgr', 'stat']))
4121 j
= json_loads_retry(lambda: cli(['mgr', 'dump']))
4124 # wait for mgr to have it
4125 logger
.info('Waiting for the mgr to restart...')
4127 def mgr_has_latest_epoch():
4130 out
= cli(['tell', 'mgr', 'mgr_status'])
4132 return j
['mgrmap_epoch'] >= epoch
4133 except Exception as e
:
4134 logger
.debug('tell mgr mgr_status failed: %s' % e
)
4136 is_available(ctx
, 'mgr epoch %d' % epoch
, mgr_has_latest_epoch
)
4138 enable_cephadm_mgr_module(cli
, wait_for_mgr_restart
)
4141 if not ctx
.skip_ssh
:
4142 prepare_ssh(ctx
, cli
, wait_for_mgr_restart
)
4144 if ctx
.registry_url
and ctx
.registry_username
and ctx
.registry_password
:
4145 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', ctx
.registry_url
, '--force'])
4146 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', ctx
.registry_username
, '--force'])
4147 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', ctx
.registry_password
, '--force'])
4149 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx
.container_init
), '--force'])
4151 if ctx
.with_exporter
:
4152 cli(['config-key', 'set', 'mgr/cephadm/exporter_enabled', 'true'])
4153 if ctx
.exporter_config
:
4154 logger
.info('Applying custom cephadm exporter settings')
4155 # validated within the parser, so we can just apply to the store
4156 with tempfile
.NamedTemporaryFile(buffering
=0) as tmp
:
4157 tmp
.write(json
.dumps(ctx
.exporter_config
).encode('utf-8'))
4159 tmp
.name
: '/tmp/exporter-config.json:z'
4161 cli(['cephadm', 'set-exporter-config', '-i', '/tmp/exporter-config.json'], extra_mounts
=mounts
)
4162 logger
.info('-> Use ceph orch apply cephadm-exporter to deploy')
4164 # generate a default SSL configuration for the exporter(s)
4165 logger
.info('Generating a default cephadm exporter configuration (self-signed)')
4166 cli(['cephadm', 'generate-exporter-config'])
4168 # deploy the service (commented out until the cephadm changes are in the ceph container build)
4169 logger
.info('Deploying cephadm exporter service with default placement...')
4170 cli(['orch', 'apply', 'cephadm-exporter'])
4172 if not ctx
.skip_dashboard
:
4173 prepare_dashboard(ctx
, uid
, gid
, cli
, wait_for_mgr_restart
)
4175 if ctx
.output_config
== '/etc/ceph/ceph.conf' and not ctx
.skip_admin_label
:
4176 logger
.info('Enabling client.admin keyring and conf on hosts with "admin" label')
4178 cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin'])
4179 cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin'])
4181 logger
.info('Unable to set up "admin" label; assuming older version of Ceph')
4184 logger
.info('Applying %s to cluster' % ctx
.apply_spec
)
4186 with
open(ctx
.apply_spec
) as f
:
4188 if 'hostname:' in line
:
4189 line
= line
.replace('\n', '')
4190 split
= line
.split(': ')
4191 if split
[1] != hostname
:
4192 logger
.info('Adding ssh key to %s' % split
[1])
4194 ssh_key
= '/etc/ceph/ceph.pub'
4195 if ctx
.ssh_public_key
:
4196 ssh_key
= ctx
.ssh_public_key
.name
4197 out
, err
, code
= call_throws(ctx
, ['sudo', '-u', ctx
.ssh_user
, 'ssh-copy-id', '-f', '-i', ssh_key
, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx
.ssh_user
, split
[1])])
4200 mounts
[pathify(ctx
.apply_spec
)] = '/tmp/spec.yml:z'
4202 out
= cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts
=mounts
)
4205 logger
.info('You can access the Ceph CLI with:\n\n'
4206 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
4210 ctx
.output_keyring
))
4211 logger
.info('Please consider enabling telemetry to help improve Ceph:\n\n'
4212 '\tceph telemetry on\n\n'
4213 'For more information see:\n\n'
4214 '\thttps://docs.ceph.com/docs/pacific/mgr/telemetry/\n')
4215 logger
.info('Bootstrap complete.')
4218 ##################################
4221 def command_registry_login(ctx
: CephadmContext
):
4222 if ctx
.registry_json
:
4223 logger
.info('Pulling custom registry login info from %s.' % ctx
.registry_json
)
4224 d
= get_parm(ctx
.registry_json
)
4225 if d
.get('url') and d
.get('username') and d
.get('password'):
4226 ctx
.registry_url
= d
.get('url')
4227 ctx
.registry_username
= d
.get('username')
4228 ctx
.registry_password
= d
.get('password')
4229 registry_login(ctx
, ctx
.registry_url
, ctx
.registry_username
, ctx
.registry_password
)
4231 raise Error('json provided for custom registry login did not include all necessary fields. '
4232 'Please setup json file as\n'
4234 ' "url": "REGISTRY_URL",\n'
4235 ' "username": "REGISTRY_USERNAME",\n'
4236 ' "password": "REGISTRY_PASSWORD"\n'
4238 elif ctx
.registry_url
and ctx
.registry_username
and ctx
.registry_password
:
4239 registry_login(ctx
, ctx
.registry_url
, ctx
.registry_username
, ctx
.registry_password
)
4241 raise Error('Invalid custom registry arguments received. To login to a custom registry include '
4242 '--registry-url, --registry-username and --registry-password '
4243 'options or --registry-json option')
4247 def registry_login(ctx
: CephadmContext
, url
, username
, password
):
4248 logger
.info('Logging into custom registry.')
4250 engine
= ctx
.container_engine
4251 cmd
= [engine
.path
, 'login',
4252 '-u', username
, '-p', password
,
4254 if isinstance(engine
, Podman
):
4255 cmd
.append('--authfile=/etc/ceph/podman-auth.json')
4256 out
, _
, _
= call_throws(ctx
, cmd
)
4257 if isinstance(engine
, Podman
):
4258 os
.chmod('/etc/ceph/podman-auth.json', 0o600)
4260 raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx
.registry_url
, ctx
.registry_username
))
4262 ##################################
4265 def extract_uid_gid_monitoring(ctx
, daemon_type
):
4266 # type: (CephadmContext, str) -> Tuple[int, int]
4268 if daemon_type
== 'prometheus':
4269 uid
, gid
= extract_uid_gid(ctx
, file_path
='/etc/prometheus')
4270 elif daemon_type
== 'node-exporter':
4271 uid
, gid
= 65534, 65534
4272 elif daemon_type
== 'grafana':
4273 uid
, gid
= extract_uid_gid(ctx
, file_path
='/var/lib/grafana')
4274 elif daemon_type
== 'alertmanager':
4275 uid
, gid
= extract_uid_gid(ctx
, file_path
=['/etc/alertmanager', '/etc/prometheus'])
4277 raise Error('{} not implemented yet'.format(daemon_type
))
4282 def command_deploy(ctx
):
4283 # type: (CephadmContext) -> None
4284 daemon_type
, daemon_id
= ctx
.name
.split('.', 1)
4286 lock
= FileLock(ctx
, ctx
.fsid
)
4289 if daemon_type
not in get_supported_daemons():
4290 raise Error('daemon type %s not recognized' % daemon_type
)
4293 unit_name
= get_unit_name(ctx
.fsid
, daemon_type
, daemon_id
)
4294 container_name
= 'ceph-%s-%s.%s' % (ctx
.fsid
, daemon_type
, daemon_id
)
4295 (_
, state
, _
) = check_unit(ctx
, unit_name
)
4296 if state
== 'running' or is_container_running(ctx
, container_name
):
4300 logger
.info('%s daemon %s ...' % ('Reconfig', ctx
.name
))
4302 logger
.info('%s daemon %s ...' % ('Redeploy', ctx
.name
))
4304 logger
.info('%s daemon %s ...' % ('Deploy', ctx
.name
))
4306 # Get and check ports explicitly required to be opened
4307 daemon_ports
= [] # type: List[int]
4309 # only check port in use if not reconfig or redeploy since service
4310 # we are redeploying/reconfiguring will already be using the port
4311 if not ctx
.reconfig
and not redeploy
:
4313 daemon_ports
= list(map(int, ctx
.tcp_ports
.split()))
4315 if daemon_type
in Ceph
.daemons
:
4316 config
, keyring
= get_config_and_keyring(ctx
)
4317 uid
, gid
= extract_uid_gid(ctx
)
4318 make_var_run(ctx
, ctx
.fsid
, uid
, gid
)
4320 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
4321 ptrace
=ctx
.allow_ptrace
)
4322 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4323 config
=config
, keyring
=keyring
,
4324 osd_fsid
=ctx
.osd_fsid
,
4325 reconfig
=ctx
.reconfig
,
4328 elif daemon_type
in Monitoring
.components
:
4329 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
4331 # make sure provided config-json is sufficient
4332 config
= get_parm(ctx
.config_json
) # type: ignore
4333 required_files
= Monitoring
.components
[daemon_type
].get('config-json-files', list())
4334 required_args
= Monitoring
.components
[daemon_type
].get('config-json-args', list())
4336 if not config
or not all(c
in config
.get('files', {}).keys() for c
in required_files
): # type: ignore
4337 raise Error('{} deployment requires config-json which must '
4338 'contain file content for {}'.format(daemon_type
.capitalize(), ', '.join(required_files
)))
4340 if not config
or not all(c
in config
.keys() for c
in required_args
): # type: ignore
4341 raise Error('{} deployment requires config-json which must '
4342 'contain arg for {}'.format(daemon_type
.capitalize(), ', '.join(required_args
)))
4344 uid
, gid
= extract_uid_gid_monitoring(ctx
, daemon_type
)
4345 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4346 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4347 reconfig
=ctx
.reconfig
,
4350 elif daemon_type
== NFSGanesha
.daemon_type
:
4351 if not ctx
.reconfig
and not redeploy
and not daemon_ports
:
4352 daemon_ports
= list(NFSGanesha
.port_map
.values())
4354 config
, keyring
= get_config_and_keyring(ctx
)
4355 # TODO: extract ganesha uid/gid (997, 994) ?
4356 uid
, gid
= extract_uid_gid(ctx
)
4357 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4358 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4359 config
=config
, keyring
=keyring
,
4360 reconfig
=ctx
.reconfig
,
4363 elif daemon_type
== CephIscsi
.daemon_type
:
4364 config
, keyring
= get_config_and_keyring(ctx
)
4365 uid
, gid
= extract_uid_gid(ctx
)
4366 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4367 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4368 config
=config
, keyring
=keyring
,
4369 reconfig
=ctx
.reconfig
,
4372 elif daemon_type
== HAproxy
.daemon_type
:
4373 haproxy
= HAproxy
.init(ctx
, ctx
.fsid
, daemon_id
)
4374 uid
, gid
= haproxy
.extract_uid_gid_haproxy()
4375 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4376 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4377 reconfig
=ctx
.reconfig
,
4380 elif daemon_type
== Keepalived
.daemon_type
:
4381 keepalived
= Keepalived
.init(ctx
, ctx
.fsid
, daemon_id
)
4382 uid
, gid
= keepalived
.extract_uid_gid_keepalived()
4383 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4384 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
4385 reconfig
=ctx
.reconfig
,
4388 elif daemon_type
== CustomContainer
.daemon_type
:
4389 cc
= CustomContainer
.init(ctx
, ctx
.fsid
, daemon_id
)
4390 if not ctx
.reconfig
and not redeploy
:
4391 daemon_ports
.extend(cc
.ports
)
4392 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
4393 privileged
=cc
.privileged
,
4394 ptrace
=ctx
.allow_ptrace
)
4395 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, c
,
4396 uid
=cc
.uid
, gid
=cc
.gid
, config
=None,
4397 keyring
=None, reconfig
=ctx
.reconfig
,
4400 elif daemon_type
== CephadmDaemon
.daemon_type
:
4401 # get current user gid and uid
4404 config_js
= get_parm(ctx
.config_json
) # type: Dict[str, str]
4405 if not daemon_ports
:
4406 logger
.info('cephadm-exporter will use default port ({})'.format(CephadmDaemon
.default_port
))
4407 daemon_ports
= [CephadmDaemon
.default_port
]
4409 CephadmDaemon
.validate_config(config_js
)
4411 deploy_daemon(ctx
, ctx
.fsid
, daemon_type
, daemon_id
, None,
4412 uid
, gid
, ports
=daemon_ports
)
4415 raise Error('daemon type {} not implemented in command_deploy function'
4416 .format(daemon_type
))
4418 ##################################
4422 def command_run(ctx
):
4423 # type: (CephadmContext) -> int
4424 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
4425 c
= get_container(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4426 command
= c
.run_cmd()
4427 return call_timeout(ctx
, command
, ctx
.timeout
)
4429 ##################################
4432 def fsid_conf_mismatch(ctx
):
4433 # type: (CephadmContext) -> bool
4434 (config
, _
) = get_config_and_keyring(ctx
)
4436 for c
in config
.split('\n'):
4437 if 'fsid = ' in c
.strip():
4438 if 'fsid = ' + ctx
.fsid
!= c
.strip():
4446 def command_shell(ctx
):
4447 # type: (CephadmContext) -> int
4448 if fsid_conf_mismatch(ctx
):
4449 raise Error('fsid does not match ceph conf')
4452 make_log_dir(ctx
, ctx
.fsid
)
4455 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
4457 daemon_type
= ctx
.name
4460 daemon_type
= 'osd' # get the most mounts
4463 if daemon_id
and not ctx
.fsid
:
4464 raise Error('must pass --fsid to specify cluster')
4466 # use /etc/ceph files by default, if present. we do this instead of
4467 # making these defaults in the arg parser because we don't want an error
4468 # if they don't exist.
4469 if not ctx
.keyring
and os
.path
.exists(SHELL_DEFAULT_KEYRING
):
4470 ctx
.keyring
= SHELL_DEFAULT_KEYRING
4472 container_args
: List
[str] = ['-i']
4473 mounts
= get_container_mounts(ctx
, ctx
.fsid
, daemon_type
, daemon_id
,
4474 no_config
=True if ctx
.config
else False)
4475 binds
= get_container_binds(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
4477 mounts
[pathify(ctx
.config
)] = '/etc/ceph/ceph.conf:z'
4479 mounts
[pathify(ctx
.keyring
)] = '/etc/ceph/ceph.keyring:z'
4481 for _mount
in ctx
.mount
:
4482 split_src_dst
= _mount
.split(':')
4483 mount
= pathify(split_src_dst
[0])
4484 filename
= os
.path
.basename(split_src_dst
[0])
4485 if len(split_src_dst
) > 1:
4486 dst
= split_src_dst
[1] + ':z' if len(split_src_dst
) == 3 else split_src_dst
[1]
4489 mounts
[mount
] = '/mnt/{}:z'.format(filename
)
4491 command
= ctx
.command
4497 '-e', 'PS1=%s' % CUSTOM_PS1
,
4500 home
= os
.path
.join(ctx
.data_dir
, ctx
.fsid
, 'home')
4501 if not os
.path
.exists(home
):
4502 logger
.debug('Creating root home at %s' % home
)
4503 makedirs(home
, 0, 0, 0o660)
4504 if os
.path
.exists('/etc/skel'):
4505 for f
in os
.listdir('/etc/skel'):
4506 if f
.startswith('.bash'):
4507 shutil
.copyfile(os
.path
.join('/etc/skel', f
),
4508 os
.path
.join(home
, f
))
4509 mounts
[home
] = '/root'
4511 for i
in ctx
.volume
:
4512 a
, b
= i
.split(':', 1)
4518 entrypoint
='doesnotmatter',
4520 container_args
=container_args
,
4521 volume_mounts
=mounts
,
4525 command
= c
.shell_cmd(command
)
4527 return call_timeout(ctx
, command
, ctx
.timeout
)
4529 ##################################
4533 def command_enter(ctx
):
4534 # type: (CephadmContext) -> int
4536 raise Error('must pass --fsid to specify cluster')
4537 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
4538 container_args
= ['-i'] # type: List[str]
4540 command
= ctx
.command
4546 '-e', 'PS1=%s' % CUSTOM_PS1
,
4551 entrypoint
='doesnotmatter',
4552 container_args
=container_args
,
4553 cname
='ceph-%s-%s.%s' % (ctx
.fsid
, daemon_type
, daemon_id
),
4555 command
= c
.exec_cmd(command
)
4556 return call_timeout(ctx
, command
, ctx
.timeout
)
4558 ##################################
4563 def command_ceph_volume(ctx
):
4564 # type: (CephadmContext) -> None
4566 make_log_dir(ctx
, ctx
.fsid
)
4568 lock
= FileLock(ctx
, ctx
.fsid
)
4571 (uid
, gid
) = (0, 0) # ceph-volume runs as root
4572 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'osd', None)
4577 (config
, keyring
) = get_config_and_keyring(ctx
)
4581 tmp_config
= write_tmp(config
, uid
, gid
)
4582 mounts
[tmp_config
.name
] = '/etc/ceph/ceph.conf:z'
4586 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
4587 mounts
[tmp_keyring
.name
] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
4592 entrypoint
='/usr/sbin/ceph-volume',
4596 volume_mounts
=mounts
,
4599 out
, err
, code
= call_throws(ctx
, c
.run_cmd())
4603 ##################################
4607 def command_unit(ctx
):
4608 # type: (CephadmContext) -> None
4610 raise Error('must pass --fsid to specify cluster')
4612 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
4618 verbosity
=CallVerbosity
.VERBOSE
,
4622 ##################################
4626 def command_logs(ctx
):
4627 # type: (CephadmContext) -> None
4629 raise Error('must pass --fsid to specify cluster')
4631 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
4633 cmd
= [find_program('journalctl')]
4634 cmd
.extend(['-u', unit_name
])
4636 cmd
.extend(ctx
.command
)
4638 # call this directly, without our wrapper, so that we get an unmolested
4639 # stdout with logger prefixing.
4640 logger
.debug('Running command: %s' % ' '.join(cmd
))
4641 subprocess
.call(cmd
) # type: ignore
4643 ##################################
4646 def list_networks(ctx
):
4647 # type: (CephadmContext) -> Dict[str,Dict[str,List[str]]]
4649 # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
4650 # so we'll need to use a regex to parse 'ip' command output.
4652 # out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
4653 # j = json.loads(out)
4656 res
= _list_ipv4_networks(ctx
)
4657 res
.update(_list_ipv6_networks(ctx
))
4661 def _list_ipv4_networks(ctx
: CephadmContext
):
4662 execstr
: Optional
[str] = find_executable('ip')
4664 raise FileNotFoundError("unable to find 'ip' command")
4665 out
, _
, _
= call_throws(ctx
, [execstr
, 'route', 'ls'])
4666 return _parse_ipv4_route(out
)
4669 def _parse_ipv4_route(out
):
4670 r
= {} # type: Dict[str,Dict[str,List[str]]]
4671 p
= re
.compile(r
'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)')
4672 for line
in out
.splitlines():
4681 if iface
not in r
[net
]:
4683 r
[net
][iface
].append(ip
)
4687 def _list_ipv6_networks(ctx
: CephadmContext
):
4688 execstr
: Optional
[str] = find_executable('ip')
4690 raise FileNotFoundError("unable to find 'ip' command")
4691 routes
, _
, _
= call_throws(ctx
, [execstr
, '-6', 'route', 'ls'])
4692 ips
, _
, _
= call_throws(ctx
, [execstr
, '-6', 'addr', 'ls'])
4693 return _parse_ipv6_route(routes
, ips
)
4696 def _parse_ipv6_route(routes
, ips
):
4697 r
= {} # type: Dict[str,Dict[str,List[str]]]
4698 route_p
= re
.compile(r
'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
4699 ip_p
= re
.compile(r
'^\s+inet6 (\S+)/(.*)scope (.*)$')
4700 iface_p
= re
.compile(r
'^(\d+): (\S+): (.*)$')
4701 for line
in routes
.splitlines():
4702 m
= route_p
.findall(line
)
4703 if not m
or m
[0][0].lower() == 'default':
4706 if '/' not in net
: # only consider networks with a mask
4711 if iface
not in r
[net
]:
4715 for line
in ips
.splitlines():
4716 m
= ip_p
.findall(line
)
4718 m
= iface_p
.findall(line
)
4720 # drop @... suffix, if present
4721 iface
= m
[0][1].split('@')[0]
4724 # find the network it belongs to
4725 net
= [n
for n
in r
.keys()
4726 if ipaddress
.ip_address(ip
) in ipaddress
.ip_network(n
)]
4729 r
[net
[0]][iface
].append(ip
)
4734 def command_list_networks(ctx
):
4735 # type: (CephadmContext) -> None
4736 r
= list_networks(ctx
)
4737 print(json
.dumps(r
, indent
=4))
4739 ##################################
4742 def command_ls(ctx
):
4743 # type: (CephadmContext) -> None
4744 ls
= list_daemons(ctx
, detail
=not ctx
.no_detail
,
4745 legacy_dir
=ctx
.legacy_dir
)
4746 print(json
.dumps(ls
, indent
=4))
4749 def with_units_to_int(v
: str) -> int:
4750 if v
.endswith('iB'):
4752 elif v
.endswith('B'):
4755 if v
[-1].upper() == 'K':
4758 elif v
[-1].upper() == 'M':
4761 elif v
[-1].upper() == 'G':
4762 mult
= 1024 * 1024 * 1024
4764 elif v
[-1].upper() == 'T':
4765 mult
= 1024 * 1024 * 1024 * 1024
4767 return int(float(v
) * mult
)
4770 def list_daemons(ctx
, detail
=True, legacy_dir
=None):
4771 # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
4772 host_version
: Optional
[str] = None
4774 container_path
= ctx
.container_engine
.path
4776 data_dir
= ctx
.data_dir
4777 if legacy_dir
is not None:
4778 data_dir
= os
.path
.abspath(legacy_dir
+ data_dir
)
4780 # keep track of ceph versions we see
4781 seen_versions
= {} # type: Dict[str, Optional[str]]
4783 # keep track of image digests
4784 seen_digests
= {} # type: Dict[str, List[str]]
4786 # keep track of memory usage we've seen
4787 seen_memusage
= {} # type: Dict[str, int]
4788 out
, err
, code
= call(
4790 [container_path
, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
4791 verbosity
=CallVerbosity
.DEBUG
4793 seen_memusage_cid_len
= 0
4795 for line
in out
.splitlines():
4796 (cid
, usage
) = line
.split(',')
4797 (used
, limit
) = usage
.split(' / ')
4798 seen_memusage
[cid
] = with_units_to_int(used
)
4799 if not seen_memusage_cid_len
:
4800 seen_memusage_cid_len
= len(cid
)
4803 if os
.path
.exists(data_dir
):
4804 for i
in os
.listdir(data_dir
):
4805 if i
in ['mon', 'osd', 'mds', 'mgr']:
4807 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
4810 (cluster
, daemon_id
) = j
.split('-', 1)
4811 fsid
= get_legacy_daemon_fsid(ctx
,
4812 cluster
, daemon_type
, daemon_id
,
4813 legacy_dir
=legacy_dir
)
4814 legacy_unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
4815 val
: Dict
[str, Any
] = {
4817 'name': '%s.%s' % (daemon_type
, daemon_id
),
4818 'fsid': fsid
if fsid
is not None else 'unknown',
4819 'systemd_unit': legacy_unit_name
,
4822 (val
['enabled'], val
['state'], _
) = \
4823 check_unit(ctx
, legacy_unit_name
)
4824 if not host_version
:
4826 out
, err
, code
= call(ctx
,
4828 verbosity
=CallVerbosity
.DEBUG
)
4829 if not code
and out
.startswith('ceph version '):
4830 host_version
= out
.split(' ')[2]
4833 val
['host_version'] = host_version
4836 fsid
= str(i
) # convince mypy that fsid is a str here
4837 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
4838 if '.' in j
and os
.path
.isdir(os
.path
.join(data_dir
, fsid
, j
)):
4840 (daemon_type
, daemon_id
) = j
.split('.', 1)
4841 unit_name
= get_unit_name(fsid
,
4847 'style': 'cephadm:v1',
4850 'systemd_unit': unit_name
,
4854 (val
['enabled'], val
['state'], _
) = \
4855 check_unit(ctx
, unit_name
)
4859 image_digests
= None
4864 container_path
, 'inspect',
4865 '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
4866 'ceph-%s-%s' % (fsid
, j
)
4868 out
, err
, code
= call(ctx
, cmd
, verbosity
=CallVerbosity
.DEBUG
)
4870 (container_id
, image_name
, image_id
, start
,
4871 version
) = out
.strip().split(',')
4872 image_id
= normalize_container_id(image_id
)
4873 daemon_type
= name
.split('.', 1)[0]
4874 start_stamp
= try_convert_datetime(start
)
4876 # collect digests for this image id
4877 image_digests
= seen_digests
.get(image_id
)
4878 if not image_digests
:
4879 out
, err
, code
= call(
4882 container_path
, 'image', 'inspect', image_id
,
4883 '--format', '{{.RepoDigests}}',
4885 verbosity
=CallVerbosity
.DEBUG
)
4887 image_digests
= list(set(map(
4888 normalize_image_digest
,
4889 out
.strip()[1:-1].split(' '))))
4890 seen_digests
[image_id
] = image_digests
4892 # identify software version inside the container (if we can)
4893 if not version
or '.' not in version
:
4894 version
= seen_versions
.get(image_id
, None)
4895 if daemon_type
== NFSGanesha
.daemon_type
:
4896 version
= NFSGanesha
.get_version(ctx
, container_id
)
4897 if daemon_type
== CephIscsi
.daemon_type
:
4898 version
= CephIscsi
.get_version(ctx
, container_id
)
4900 if daemon_type
in Ceph
.daemons
:
4901 out
, err
, code
= call(ctx
,
4902 [container_path
, 'exec', container_id
,
4904 verbosity
=CallVerbosity
.DEBUG
)
4906 out
.startswith('ceph version '):
4907 version
= out
.split(' ')[2]
4908 seen_versions
[image_id
] = version
4909 elif daemon_type
== 'grafana':
4910 out
, err
, code
= call(ctx
,
4911 [container_path
, 'exec', container_id
,
4912 'grafana-server', '-v'],
4913 verbosity
=CallVerbosity
.DEBUG
)
4915 out
.startswith('Version '):
4916 version
= out
.split(' ')[1]
4917 seen_versions
[image_id
] = version
4918 elif daemon_type
in ['prometheus',
4921 version
= Monitoring
.get_version(ctx
, container_id
, daemon_type
)
4922 seen_versions
[image_id
] = version
4923 elif daemon_type
== 'haproxy':
4924 out
, err
, code
= call(ctx
,
4925 [container_path
, 'exec', container_id
,
4927 verbosity
=CallVerbosity
.DEBUG
)
4929 out
.startswith('HA-Proxy version '):
4930 version
= out
.split(' ')[2]
4931 seen_versions
[image_id
] = version
4932 elif daemon_type
== 'keepalived':
4933 out
, err
, code
= call(ctx
,
4934 [container_path
, 'exec', container_id
,
4935 'keepalived', '--version'],
4936 verbosity
=CallVerbosity
.DEBUG
)
4938 err
.startswith('Keepalived '):
4939 version
= err
.split(' ')[1]
4940 if version
[0] == 'v':
4941 version
= version
[1:]
4942 seen_versions
[image_id
] = version
4943 elif daemon_type
== CustomContainer
.daemon_type
:
4944 # Because a custom container can contain
4945 # everything, we do not know which command
4946 # to execute to get the version.
4949 logger
.warning('version for unknown daemon type %s' % daemon_type
)
4951 vfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.image') # type: ignore
4953 with
open(vfile
, 'r') as f
:
4954 image_name
= f
.read().strip() or None
4959 mfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.meta') # type: ignore
4961 with
open(mfile
, 'r') as f
:
4962 meta
= json
.loads(f
.read())
4967 val
['container_id'] = container_id
4968 val
['container_image_name'] = image_name
4969 val
['container_image_id'] = image_id
4970 val
['container_image_digests'] = image_digests
4972 val
['memory_usage'] = seen_memusage
.get(container_id
[0:seen_memusage_cid_len
])
4973 val
['version'] = version
4974 val
['started'] = start_stamp
4975 val
['created'] = get_file_timestamp(
4976 os
.path
.join(data_dir
, fsid
, j
, 'unit.created')
4978 val
['deployed'] = get_file_timestamp(
4979 os
.path
.join(data_dir
, fsid
, j
, 'unit.image'))
4980 val
['configured'] = get_file_timestamp(
4981 os
.path
.join(data_dir
, fsid
, j
, 'unit.configured'))
4988 def get_daemon_description(ctx
, fsid
, name
, detail
=False, legacy_dir
=None):
4989 # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str]
4991 for d
in list_daemons(ctx
, detail
=detail
, legacy_dir
=legacy_dir
):
4992 if d
['fsid'] != fsid
:
4994 if d
['name'] != name
:
4997 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name
))
4999 ##################################
5003 def command_adopt(ctx
):
5004 # type: (CephadmContext) -> None
5006 if not ctx
.skip_pull
:
5007 _pull_image(ctx
, ctx
.image
)
5009 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
5012 if ctx
.style
!= 'legacy':
5013 raise Error('adoption of style %s not implemented' % ctx
.style
)
5016 fsid
= get_legacy_daemon_fsid(ctx
,
5020 legacy_dir
=ctx
.legacy_dir
)
5022 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
5023 lock
= FileLock(ctx
, fsid
)
5026 # call correct adoption
5027 if daemon_type
in Ceph
.daemons
:
5028 command_adopt_ceph(ctx
, daemon_type
, daemon_id
, fsid
)
5029 elif daemon_type
== 'prometheus':
5030 command_adopt_prometheus(ctx
, daemon_id
, fsid
)
5031 elif daemon_type
== 'grafana':
5032 command_adopt_grafana(ctx
, daemon_id
, fsid
)
5033 elif daemon_type
== 'node-exporter':
5034 raise Error('adoption of node-exporter not implemented')
5035 elif daemon_type
== 'alertmanager':
5036 command_adopt_alertmanager(ctx
, daemon_id
, fsid
)
5038 raise Error('daemon type %s not recognized' % daemon_type
)
5041 class AdoptOsd(object):
5042 def __init__(self
, ctx
, osd_data_dir
, osd_id
):
5043 # type: (CephadmContext, str, str) -> None
5045 self
.osd_data_dir
= osd_data_dir
5046 self
.osd_id
= osd_id
5048 def check_online_osd(self
):
5049 # type: () -> Tuple[Optional[str], Optional[str]]
5051 osd_fsid
, osd_type
= None, None
5053 path
= os
.path
.join(self
.osd_data_dir
, 'fsid')
5055 with
open(path
, 'r') as f
:
5056 osd_fsid
= f
.read().strip()
5057 logger
.info('Found online OSD at %s' % path
)
5059 logger
.info('Unable to read OSD fsid from %s' % path
)
5060 if os
.path
.exists(os
.path
.join(self
.osd_data_dir
, 'type')):
5061 with
open(os
.path
.join(self
.osd_data_dir
, 'type')) as f
:
5062 osd_type
= f
.read().strip()
5064 logger
.info('"type" file missing for OSD data dir')
5066 return osd_fsid
, osd_type
5068 def check_offline_lvm_osd(self
):
5069 # type: () -> Tuple[Optional[str], Optional[str]]
5070 osd_fsid
, osd_type
= None, None
5074 image
=self
.ctx
.image
,
5075 entrypoint
='/usr/sbin/ceph-volume',
5076 args
=['lvm', 'list', '--format=json'],
5079 out
, err
, code
= call_throws(self
.ctx
, c
.run_cmd())
5082 js
= json
.loads(out
)
5083 if self
.osd_id
in js
:
5084 logger
.info('Found offline LVM OSD {}'.format(self
.osd_id
))
5085 osd_fsid
= js
[self
.osd_id
][0]['tags']['ceph.osd_fsid']
5086 for device
in js
[self
.osd_id
]:
5087 if device
['tags']['ceph.type'] == 'block':
5088 osd_type
= 'bluestore'
5090 if device
['tags']['ceph.type'] == 'data':
5091 osd_type
= 'filestore'
5093 except ValueError as e
:
5094 logger
.info('Invalid JSON in ceph-volume lvm list: {}'.format(e
))
5096 return osd_fsid
, osd_type
5098 def check_offline_simple_osd(self
):
5099 # type: () -> Tuple[Optional[str], Optional[str]]
5100 osd_fsid
, osd_type
= None, None
5102 osd_file
= glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self
.osd_id
))
5103 if len(osd_file
) == 1:
5104 with
open(osd_file
[0], 'r') as f
:
5106 js
= json
.loads(f
.read())
5107 logger
.info('Found offline simple OSD {}'.format(self
.osd_id
))
5108 osd_fsid
= js
['fsid']
5109 osd_type
= js
['type']
5110 if osd_type
!= 'filestore':
5111 # need this to be mounted for the adopt to work, as it
5112 # needs to move files from this directory
5113 call_throws(self
.ctx
, ['mount', js
['data']['path'], self
.osd_data_dir
])
5114 except ValueError as e
:
5115 logger
.info('Invalid JSON in {}: {}'.format(osd_file
, e
))
5117 return osd_fsid
, osd_type
5120 def command_adopt_ceph(ctx
, daemon_type
, daemon_id
, fsid
):
5121 # type: (CephadmContext, str, str, str) -> None
5123 (uid
, gid
) = extract_uid_gid(ctx
)
5125 data_dir_src
= ('/var/lib/ceph/%s/%s-%s' %
5126 (daemon_type
, ctx
.cluster
, daemon_id
))
5127 data_dir_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_dir_src
)
5129 if not os
.path
.exists(data_dir_src
):
5130 raise Error("{}.{} data directory '{}' does not exist. "
5131 'Incorrect ID specified, or daemon already adopted?'.format(
5132 daemon_type
, daemon_id
, data_dir_src
))
5135 if daemon_type
== 'osd':
5136 adopt_osd
= AdoptOsd(ctx
, data_dir_src
, daemon_id
)
5137 osd_fsid
, osd_type
= adopt_osd
.check_online_osd()
5139 osd_fsid
, osd_type
= adopt_osd
.check_offline_lvm_osd()
5141 osd_fsid
, osd_type
= adopt_osd
.check_offline_simple_osd()
5143 raise Error('Unable to find OSD {}'.format(daemon_id
))
5144 logger
.info('objectstore_type is %s' % osd_type
)
5146 if osd_type
== 'filestore':
5147 raise Error('FileStore is not supported by cephadm')
5149 # NOTE: implicit assumption here that the units correspond to the
5150 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
5152 unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
5153 (enabled
, state
, _
) = check_unit(ctx
, unit_name
)
5154 if state
== 'running':
5155 logger
.info('Stopping old systemd unit %s...' % unit_name
)
5156 call_throws(ctx
, ['systemctl', 'stop', unit_name
])
5158 logger
.info('Disabling old systemd unit %s...' % unit_name
)
5159 call_throws(ctx
, ['systemctl', 'disable', unit_name
])
5162 logger
.info('Moving data...')
5163 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
5165 move_files(ctx
, glob(os
.path
.join(data_dir_src
, '*')),
5168 logger
.debug('Remove dir `%s`' % (data_dir_src
))
5169 if os
.path
.ismount(data_dir_src
):
5170 call_throws(ctx
, ['umount', data_dir_src
])
5171 os
.rmdir(data_dir_src
)
5173 logger
.info('Chowning content...')
5174 call_throws(ctx
, ['chown', '-c', '-R', '%d.%d' % (uid
, gid
), data_dir_dst
])
5176 if daemon_type
== 'mon':
5177 # rename *.ldb -> *.sst, in case they are coming from ubuntu
5178 store
= os
.path
.join(data_dir_dst
, 'store.db')
5180 if os
.path
.exists(store
):
5181 for oldf
in os
.listdir(store
):
5182 if oldf
.endswith('.ldb'):
5183 newf
= oldf
.replace('.ldb', '.sst')
5184 oldp
= os
.path
.join(store
, oldf
)
5185 newp
= os
.path
.join(store
, newf
)
5186 logger
.debug('Renaming %s -> %s' % (oldp
, newp
))
5187 os
.rename(oldp
, newp
)
5189 logger
.info('Renamed %d leveldb *.ldb files to *.sst',
5191 if daemon_type
== 'osd':
5192 for n
in ['block', 'block.db', 'block.wal']:
5193 p
= os
.path
.join(data_dir_dst
, n
)
5194 if os
.path
.exists(p
):
5195 logger
.info('Chowning %s...' % p
)
5196 os
.chown(p
, uid
, gid
)
5197 # disable the ceph-volume 'simple' mode files on the host
5198 simple_fn
= os
.path
.join('/etc/ceph/osd',
5199 '%s-%s.json' % (daemon_id
, osd_fsid
))
5200 if os
.path
.exists(simple_fn
):
5201 new_fn
= simple_fn
+ '.adopted-by-cephadm'
5202 logger
.info('Renaming %s -> %s', simple_fn
, new_fn
)
5203 os
.rename(simple_fn
, new_fn
)
5204 logger
.info('Disabling host unit ceph-volume@ simple unit...')
5205 call(ctx
, ['systemctl', 'disable',
5206 'ceph-volume@simple-%s-%s.service' % (daemon_id
, osd_fsid
)])
5208 # assume this is an 'lvm' c-v for now, but don't error
5210 logger
.info('Disabling host unit ceph-volume@ lvm unit...')
5211 call(ctx
, ['systemctl', 'disable',
5212 'ceph-volume@lvm-%s-%s.service' % (daemon_id
, osd_fsid
)])
5215 config_src
= '/etc/ceph/%s.conf' % (ctx
.cluster
)
5216 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
5217 config_dst
= os
.path
.join(data_dir_dst
, 'config')
5218 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
5221 logger
.info('Moving logs...')
5222 log_dir_src
= ('/var/log/ceph/%s-%s.%s.log*' %
5223 (ctx
.cluster
, daemon_type
, daemon_id
))
5224 log_dir_src
= os
.path
.abspath(ctx
.legacy_dir
+ log_dir_src
)
5225 log_dir_dst
= make_log_dir(ctx
, fsid
, uid
=uid
, gid
=gid
)
5226 move_files(ctx
, glob(log_dir_src
),
5230 logger
.info('Creating new units...')
5231 make_var_run(ctx
, fsid
, uid
, gid
)
5232 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
5233 deploy_daemon_units(ctx
, fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
5234 enable
=True, # unconditionally enable the new unit
5235 start
=(state
== 'running' or ctx
.force_start
),
5237 update_firewalld(ctx
, daemon_type
)
5240 def command_adopt_prometheus(ctx
, daemon_id
, fsid
):
5241 # type: (CephadmContext, str, str) -> None
5242 daemon_type
= 'prometheus'
5243 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
5245 _stop_and_disable(ctx
, 'prometheus')
5247 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
5251 config_src
= '/etc/prometheus/prometheus.yml'
5252 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
5253 config_dst
= os
.path
.join(data_dir_dst
, 'etc/prometheus')
5254 makedirs(config_dst
, uid
, gid
, 0o755)
5255 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
5258 data_src
= '/var/lib/prometheus/metrics/'
5259 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
5260 data_dst
= os
.path
.join(data_dir_dst
, 'data')
5261 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
5263 make_var_run(ctx
, fsid
, uid
, gid
)
5264 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
5265 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
5266 update_firewalld(ctx
, daemon_type
)
5269 def command_adopt_grafana(ctx
, daemon_id
, fsid
):
5270 # type: (CephadmContext, str, str) -> None
5272 daemon_type
= 'grafana'
5273 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
5275 _stop_and_disable(ctx
, 'grafana-server')
5277 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
5281 config_src
= '/etc/grafana/grafana.ini'
5282 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
5283 config_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
5284 makedirs(config_dst
, uid
, gid
, 0o755)
5285 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
5287 prov_src
= '/etc/grafana/provisioning/'
5288 prov_src
= os
.path
.abspath(ctx
.legacy_dir
+ prov_src
)
5289 prov_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
5290 copy_tree(ctx
, [prov_src
], prov_dst
, uid
=uid
, gid
=gid
)
5293 cert
= '/etc/grafana/grafana.crt'
5294 key
= '/etc/grafana/grafana.key'
5295 if os
.path
.exists(cert
) and os
.path
.exists(key
):
5296 cert_src
= '/etc/grafana/grafana.crt'
5297 cert_src
= os
.path
.abspath(ctx
.legacy_dir
+ cert_src
)
5298 makedirs(os
.path
.join(data_dir_dst
, 'etc/grafana/certs'), uid
, gid
, 0o755)
5299 cert_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_file')
5300 copy_files(ctx
, [cert_src
], cert_dst
, uid
=uid
, gid
=gid
)
5302 key_src
= '/etc/grafana/grafana.key'
5303 key_src
= os
.path
.abspath(ctx
.legacy_dir
+ key_src
)
5304 key_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_key')
5305 copy_files(ctx
, [key_src
], key_dst
, uid
=uid
, gid
=gid
)
5307 _adjust_grafana_ini(os
.path
.join(config_dst
, 'grafana.ini'))
5309 logger
.debug('Skipping ssl, missing cert {} or key {}'.format(cert
, key
))
5311 # data - possible custom dashboards/plugins
5312 data_src
= '/var/lib/grafana/'
5313 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
5314 data_dst
= os
.path
.join(data_dir_dst
, 'data')
5315 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
5317 make_var_run(ctx
, fsid
, uid
, gid
)
5318 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
5319 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
5320 update_firewalld(ctx
, daemon_type
)
5323 def command_adopt_alertmanager(ctx
, daemon_id
, fsid
):
5324 # type: (CephadmContext, str, str) -> None
5326 daemon_type
= 'alertmanager'
5327 (uid
, gid
) = extract_uid_gid_monitoring(ctx
, daemon_type
)
5329 _stop_and_disable(ctx
, 'prometheus-alertmanager')
5331 data_dir_dst
= make_data_dir(ctx
, fsid
, daemon_type
, daemon_id
,
5335 config_src
= '/etc/prometheus/alertmanager.yml'
5336 config_src
= os
.path
.abspath(ctx
.legacy_dir
+ config_src
)
5337 config_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager')
5338 makedirs(config_dst
, uid
, gid
, 0o755)
5339 copy_files(ctx
, [config_src
], config_dst
, uid
=uid
, gid
=gid
)
5342 data_src
= '/var/lib/prometheus/alertmanager/'
5343 data_src
= os
.path
.abspath(ctx
.legacy_dir
+ data_src
)
5344 data_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager/data')
5345 copy_tree(ctx
, [data_src
], data_dst
, uid
=uid
, gid
=gid
)
5347 make_var_run(ctx
, fsid
, uid
, gid
)
5348 c
= get_container(ctx
, fsid
, daemon_type
, daemon_id
)
5349 deploy_daemon(ctx
, fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
5350 update_firewalld(ctx
, daemon_type
)
5353 def _adjust_grafana_ini(filename
):
5354 # type: (str) -> None
5356 # Update cert_file, cert_key pathnames in server section
5357 # ConfigParser does not preserve comments
5359 with
open(filename
, 'r') as grafana_ini
:
5360 lines
= grafana_ini
.readlines()
5361 with
open('{}.new'.format(filename
), 'w') as grafana_ini
:
5362 server_section
= False
5364 if line
.startswith('['):
5365 server_section
= False
5366 if line
.startswith('[server]'):
5367 server_section
= True
5369 line
= re
.sub(r
'^cert_file.*',
5370 'cert_file = /etc/grafana/certs/cert_file', line
)
5371 line
= re
.sub(r
'^cert_key.*',
5372 'cert_key = /etc/grafana/certs/cert_key', line
)
5373 grafana_ini
.write(line
)
5374 os
.rename('{}.new'.format(filename
), filename
)
5375 except OSError as err
:
5376 raise Error('Cannot update {}: {}'.format(filename
, err
))
5379 def _stop_and_disable(ctx
, unit_name
):
5380 # type: (CephadmContext, str) -> None
5382 (enabled
, state
, _
) = check_unit(ctx
, unit_name
)
5383 if state
== 'running':
5384 logger
.info('Stopping old systemd unit %s...' % unit_name
)
5385 call_throws(ctx
, ['systemctl', 'stop', unit_name
])
5387 logger
.info('Disabling old systemd unit %s...' % unit_name
)
5388 call_throws(ctx
, ['systemctl', 'disable', unit_name
])
5390 ##################################
5393 def command_rm_daemon(ctx
):
5394 # type: (CephadmContext) -> None
5395 lock
= FileLock(ctx
, ctx
.fsid
)
5398 (daemon_type
, daemon_id
) = ctx
.name
.split('.', 1)
5399 unit_name
= get_unit_name_by_daemon_name(ctx
, ctx
.fsid
, ctx
.name
)
5401 if daemon_type
in ['mon', 'osd'] and not ctx
.force
:
5402 raise Error('must pass --force to proceed: '
5403 'this command may destroy precious data!')
5405 call(ctx
, ['systemctl', 'stop', unit_name
],
5406 verbosity
=CallVerbosity
.DEBUG
)
5407 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
5408 verbosity
=CallVerbosity
.DEBUG
)
5409 call(ctx
, ['systemctl', 'disable', unit_name
],
5410 verbosity
=CallVerbosity
.DEBUG
)
5411 data_dir
= get_data_dir(ctx
.fsid
, ctx
.data_dir
, daemon_type
, daemon_id
)
5412 if daemon_type
in ['mon', 'osd', 'prometheus'] and \
5413 not ctx
.force_delete_data
:
5414 # rename it out of the way -- do not delete
5415 backup_dir
= os
.path
.join(ctx
.data_dir
, ctx
.fsid
, 'removed')
5416 if not os
.path
.exists(backup_dir
):
5417 makedirs(backup_dir
, 0, 0, DATA_DIR_MODE
)
5418 dirname
= '%s.%s_%s' % (daemon_type
, daemon_id
,
5419 datetime
.datetime
.utcnow().strftime(DATEFMT
))
5421 os
.path
.join(backup_dir
, dirname
))
5423 if daemon_type
== CephadmDaemon
.daemon_type
:
5424 CephadmDaemon
.uninstall(ctx
, ctx
.fsid
, daemon_type
, daemon_id
)
5425 call_throws(ctx
, ['rm', '-rf', data_dir
])
5427 ##################################
5430 def _zap(ctx
, what
):
5431 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'clusterless-ceph-volume', None)
5435 entrypoint
='/usr/sbin/ceph-volume',
5437 args
=['lvm', 'zap', '--destroy', what
],
5439 volume_mounts
=mounts
,
5441 logger
.info(f
'Zapping {what}...')
5442 out
, err
, code
= call_throws(ctx
, c
.run_cmd())
5447 # assume fsid lock already held
5450 mounts
= get_container_mounts(ctx
, ctx
.fsid
, 'clusterless-ceph-volume', None)
5454 entrypoint
='/usr/sbin/ceph-volume',
5456 args
=['inventory', '--format', 'json'],
5458 volume_mounts
=mounts
,
5460 out
, err
, code
= call_throws(ctx
, c
.run_cmd())
5462 raise Error('failed to list osd inventory')
5464 ls
= json
.loads(out
)
5465 except ValueError as e
:
5466 raise Error(f
'Invalid JSON in ceph-volume inventory: {e}')
5469 matches
= [lv
.get('cluster_fsid') == ctx
.fsid
for lv
in i
.get('lvs', [])]
5470 if any(matches
) and all(matches
):
5471 _zap(ctx
, i
.get('path'))
5473 lv_names
= [lv
['name'] for lv
in i
.get('lvs', [])]
5474 # TODO: we need to map the lv_names back to device paths (the vg
5475 # id isn't part of the output here!)
5476 logger
.warning(f
'Not zapping LVs (not implemented): {lv_names}')
5479 def command_zap_osds(ctx
):
5481 raise Error('must pass --force to proceed: '
5482 'this command may destroy precious data!')
5484 lock
= FileLock(ctx
, ctx
.fsid
)
5489 ##################################
5492 def command_rm_cluster(ctx
):
5493 # type: (CephadmContext) -> None
5495 raise Error('must pass --force to proceed: '
5496 'this command may destroy precious data!')
5498 lock
= FileLock(ctx
, ctx
.fsid
)
5501 # stop + disable individual daemon units
5502 for d
in list_daemons(ctx
, detail
=False):
5503 if d
['fsid'] != ctx
.fsid
:
5505 if d
['style'] != 'cephadm:v1':
5507 unit_name
= get_unit_name(ctx
.fsid
, d
['name'])
5508 call(ctx
, ['systemctl', 'stop', unit_name
],
5509 verbosity
=CallVerbosity
.DEBUG
)
5510 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
5511 verbosity
=CallVerbosity
.DEBUG
)
5512 call(ctx
, ['systemctl', 'disable', unit_name
],
5513 verbosity
=CallVerbosity
.DEBUG
)
5516 for unit_name
in ['ceph-%s.target' % ctx
.fsid
]:
5517 call(ctx
, ['systemctl', 'stop', unit_name
],
5518 verbosity
=CallVerbosity
.DEBUG
)
5519 call(ctx
, ['systemctl', 'reset-failed', unit_name
],
5520 verbosity
=CallVerbosity
.DEBUG
)
5521 call(ctx
, ['systemctl', 'disable', unit_name
],
5522 verbosity
=CallVerbosity
.DEBUG
)
5524 slice_name
= 'system-%s.slice' % (('ceph-%s' % ctx
.fsid
).replace('-', '\\x2d'))
5525 call(ctx
, ['systemctl', 'stop', slice_name
],
5526 verbosity
=CallVerbosity
.DEBUG
)
5533 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
5534 + '/ceph-%s@.service' % ctx
.fsid
])
5535 call_throws(ctx
, ['rm', '-f', ctx
.unit_dir
5536 + '/ceph-%s.target' % ctx
.fsid
])
5537 call_throws(ctx
, ['rm', '-rf',
5538 ctx
.unit_dir
+ '/ceph-%s.target.wants' % ctx
.fsid
])
5540 call_throws(ctx
, ['rm', '-rf', ctx
.data_dir
+ '/' + ctx
.fsid
])
5542 if not ctx
.keep_logs
:
5544 call_throws(ctx
, ['rm', '-rf', ctx
.log_dir
+ '/' + ctx
.fsid
])
5545 call_throws(ctx
, ['rm', '-rf', ctx
.log_dir
5546 + '/*.wants/ceph-%s@*' % ctx
.fsid
])
5548 # rm logrotate config
5549 call_throws(ctx
, ['rm', '-f', ctx
.logrotate_dir
+ '/ceph-%s' % ctx
.fsid
])
5551 # rm cephadm logrotate config if last cluster on host
5552 if not os
.listdir(ctx
.data_dir
):
5553 call_throws(ctx
, ['rm', '-f', ctx
.logrotate_dir
+ '/cephadm'])
5555 # rm sysctl settings
5556 sysctl_dir
= Path(ctx
.sysctl_dir
)
5557 for p
in sysctl_dir
.glob(f
'90-ceph-{ctx.fsid}-*.conf'):
5560 # clean up config, keyring, and pub key files
5561 files
= ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
5563 if os
.path
.exists(files
[0]):
5565 with
open(files
[0]) as f
:
5566 if ctx
.fsid
in f
.read():
5569 for n
in range(0, len(files
)):
5570 if os
.path
.exists(files
[n
]):
5574 ##################################
5577 def check_time_sync(ctx
, enabler
=None):
5578 # type: (CephadmContext, Optional[Packager]) -> bool
5580 'chrony.service', # 18.04 (at least)
5581 'chronyd.service', # el / opensuse
5582 'systemd-timesyncd.service',
5583 'ntpd.service', # el7 (at least)
5584 'ntp.service', # 18.04 (at least)
5585 'ntpsec.service', # 20.04 (at least) / buster
5587 if not check_units(ctx
, units
, enabler
):
5588 logger
.warning('No time sync service is running; checked for %s' % units
)
5593 def command_check_host(ctx
: CephadmContext
) -> None:
5594 container_path
= ctx
.container_engine
.path
5597 commands
= ['systemctl', 'lvcreate']
5600 check_container_engine(ctx
)
5601 logger
.info('podman|docker (%s) is present' % container_path
)
5603 errors
.append(str(e
))
5605 for command
in commands
:
5607 find_program(command
)
5608 logger
.info('%s is present' % command
)
5610 errors
.append('%s binary does not appear to be installed' % command
)
5612 # check for configured+running chronyd or ntp
5613 if not check_time_sync(ctx
):
5614 errors
.append('No time synchronization is active')
5616 if 'expect_hostname' in ctx
and ctx
.expect_hostname
:
5617 if get_hostname().lower() != ctx
.expect_hostname
.lower():
5618 errors
.append('hostname "%s" does not match expected hostname "%s"' % (
5619 get_hostname(), ctx
.expect_hostname
))
5620 logger
.info('Hostname "%s" matches what is expected.',
5621 ctx
.expect_hostname
)
5624 raise Error('\nERROR: '.join(errors
))
5626 logger
.info('Host looks OK')
5628 ##################################
5631 def command_prepare_host(ctx
: CephadmContext
) -> None:
5632 logger
.info('Verifying podman|docker is present...')
5635 check_container_engine(ctx
)
5637 logger
.warning(str(e
))
5639 pkg
= create_packager(ctx
)
5640 pkg
.install_podman()
5642 logger
.info('Verifying lvm2 is present...')
5643 if not find_executable('lvcreate'):
5645 pkg
= create_packager(ctx
)
5646 pkg
.install(['lvm2'])
5648 logger
.info('Verifying time synchronization is in place...')
5649 if not check_time_sync(ctx
):
5651 pkg
= create_packager(ctx
)
5652 pkg
.install(['chrony'])
5653 # check again, and this time try to enable
5655 check_time_sync(ctx
, enabler
=pkg
)
5657 if 'expect_hostname' in ctx
and ctx
.expect_hostname
and ctx
.expect_hostname
!= get_hostname():
5658 logger
.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx
.expect_hostname
))
5659 call_throws(ctx
, ['hostname', ctx
.expect_hostname
])
5660 with
open('/etc/hostname', 'w') as f
:
5661 f
.write(ctx
.expect_hostname
+ '\n')
5663 logger
.info('Repeating the final host check...')
5664 command_check_host(ctx
)
5666 ##################################
5669 class CustomValidation(argparse
.Action
):
5671 def _check_name(self
, values
):
5673 (daemon_type
, daemon_id
) = values
.split('.', 1)
5675 raise argparse
.ArgumentError(self
,
5676 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com')
5678 daemons
= get_supported_daemons()
5679 if daemon_type
not in daemons
:
5680 raise argparse
.ArgumentError(self
,
5681 'name must declare the type of daemon e.g. '
5682 '{}'.format(', '.join(daemons
)))
5684 def __call__(self
, parser
, namespace
, values
, option_string
=None):
5685 if self
.dest
== 'name':
5686 self
._check
_name
(values
)
5687 setattr(namespace
, self
.dest
, values
)
5688 elif self
.dest
== 'exporter_config':
5689 cfg
= get_parm(values
)
5690 # run the class' validate method, and convert to an argparse error
5691 # if problems are found
5693 CephadmDaemon
.validate_config(cfg
)
5695 raise argparse
.ArgumentError(self
,
5697 setattr(namespace
, self
.dest
, cfg
)
5699 ##################################
5703 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
5705 distro_version
= None
5706 distro_codename
= None
5707 with
open('/etc/os-release', 'r') as f
:
5708 for line
in f
.readlines():
5710 if '=' not in line
or line
.startswith('#'):
5712 (var
, val
) = line
.split('=', 1)
5713 if val
[0] == '"' and val
[-1] == '"':
5716 distro
= val
.lower()
5717 elif var
== 'VERSION_ID':
5718 distro_version
= val
.lower()
5719 elif var
== 'VERSION_CODENAME':
5720 distro_codename
= val
.lower()
5721 return distro
, distro_version
, distro_codename
5724 class Packager(object):
5725 def __init__(self
, ctx
: CephadmContext
,
5726 stable
=None, version
=None, branch
=None, commit
=None):
5728 (stable
and not version
and not branch
and not commit
) or \
5729 (not stable
and version
and not branch
and not commit
) or \
5730 (not stable
and not version
and branch
) or \
5731 (not stable
and not version
and not branch
and not commit
)
5733 self
.stable
= stable
5734 self
.version
= version
5735 self
.branch
= branch
5736 self
.commit
= commit
5739 raise NotImplementedError
5742 raise NotImplementedError
5744 def query_shaman(self
, distro
, distro_version
, branch
, commit
):
5746 logger
.info('Fetching repo metadata from shaman and chacra...')
5747 shaman_url
= 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
5749 distro_version
=distro_version
,
5751 sha1
=commit
or 'latest',
5755 shaman_response
= urlopen(shaman_url
)
5756 except HTTPError
as err
:
5757 logger
.error('repository not found in shaman (might not be available yet)')
5758 raise Error('%s, failed to fetch %s' % (err
, shaman_url
))
5761 chacra_url
= shaman_response
.geturl()
5762 chacra_response
= urlopen(chacra_url
)
5763 except HTTPError
as err
:
5764 logger
.error('repository not found in chacra (might not be available yet)')
5765 raise Error('%s, failed to fetch %s' % (err
, chacra_url
))
5766 return chacra_response
.read().decode('utf-8')
5768 def repo_gpgkey(self
):
5769 if self
.ctx
.gpg_url
:
5770 return self
.ctx
.gpg_url
5771 if self
.stable
or self
.version
:
5772 return 'https://download.ceph.com/keys/release.gpg', 'release'
5774 return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild'
5776 def enable_service(self
, service
):
5778 Start and enable the service (typically using systemd).
5780 call_throws(self
.ctx
, ['systemctl', 'enable', '--now', service
])
5783 class Apt(Packager
):
5789 def __init__(self
, ctx
: CephadmContext
,
5790 stable
, version
, branch
, commit
,
5791 distro
, distro_version
, distro_codename
):
5792 super(Apt
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
5793 branch
=branch
, commit
=commit
)
5795 self
.distro
= self
.DISTRO_NAMES
[distro
]
5796 self
.distro_codename
= distro_codename
5797 self
.distro_version
= distro_version
5799 def repo_path(self
):
5800 return '/etc/apt/sources.list.d/ceph.list'
5804 url
, name
= self
.repo_gpgkey()
5805 logger
.info('Installing repo GPG key from %s...' % url
)
5807 response
= urlopen(url
)
5808 except HTTPError
as err
:
5809 logger
.error('failed to fetch GPG repo key from %s: %s' % (
5811 raise Error('failed to fetch GPG key')
5812 key
= response
.read()
5813 with
open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
, 'wb') as f
:
5817 content
= 'deb %s/debian-%s/ %s main\n' % (
5818 self
.ctx
.repo_url
, self
.version
, self
.distro_codename
)
5820 content
= 'deb %s/debian-%s/ %s main\n' % (
5821 self
.ctx
.repo_url
, self
.stable
, self
.distro_codename
)
5823 content
= self
.query_shaman(self
.distro
, self
.distro_codename
, self
.branch
,
5826 logger
.info('Installing repo file at %s...' % self
.repo_path())
5827 with
open(self
.repo_path(), 'w') as f
:
5833 for name
in ['autobuild', 'release']:
5834 p
= '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
5835 if os
.path
.exists(p
):
5836 logger
.info('Removing repo GPG key %s...' % p
)
5838 if os
.path
.exists(self
.repo_path()):
5839 logger
.info('Removing repo at %s...' % self
.repo_path())
5840 os
.unlink(self
.repo_path())
5842 if self
.distro
== 'ubuntu':
5843 self
.rm_kubic_repo()
5845 def install(self
, ls
):
5846 logger
.info('Installing packages %s...' % ls
)
5847 call_throws(self
.ctx
, ['apt-get', 'install', '-y'] + ls
)
5850 logger
.info('Updating package list...')
5851 call_throws(self
.ctx
, ['apt-get', 'update'])
5853 def install_podman(self
):
5854 if self
.distro
== 'ubuntu':
5855 logger
.info('Setting up repo for podman...')
5856 self
.add_kubic_repo()
5859 logger
.info('Attempting podman install...')
5861 self
.install(['podman'])
5863 logger
.info('Podman did not work. Falling back to docker...')
5864 self
.install(['docker.io'])
5866 def kubic_repo_url(self
):
5867 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
5868 'libcontainers:/stable/xUbuntu_%s/' % self
.distro_version
5870 def kubic_repo_path(self
):
5871 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
5873 def kubric_repo_gpgkey_url(self
):
5874 return '%s/Release.key' % self
.kubic_repo_url()
5876 def kubric_repo_gpgkey_path(self
):
5877 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
5879 def add_kubic_repo(self
):
5880 url
= self
.kubric_repo_gpgkey_url()
5881 logger
.info('Installing repo GPG key from %s...' % url
)
5883 response
= urlopen(url
)
5884 except HTTPError
as err
:
5885 logger
.error('failed to fetch GPG repo key from %s: %s' % (
5887 raise Error('failed to fetch GPG key')
5888 key
= response
.read().decode('utf-8')
5889 tmp_key
= write_tmp(key
, 0, 0)
5890 keyring
= self
.kubric_repo_gpgkey_path()
5891 call_throws(self
.ctx
, ['apt-key', '--keyring', keyring
, 'add', tmp_key
.name
])
5893 logger
.info('Installing repo file at %s...' % self
.kubic_repo_path())
5894 content
= 'deb %s /\n' % self
.kubic_repo_url()
5895 with
open(self
.kubic_repo_path(), 'w') as f
:
5898 def rm_kubic_repo(self
):
5899 keyring
= self
.kubric_repo_gpgkey_path()
5900 if os
.path
.exists(keyring
):
5901 logger
.info('Removing repo GPG key %s...' % keyring
)
5904 p
= self
.kubic_repo_path()
5905 if os
.path
.exists(p
):
5906 logger
.info('Removing repo at %s...' % p
)
5910 class YumDnf(Packager
):
5912 'centos': ('centos', 'el'),
5913 'rhel': ('centos', 'el'),
5914 'scientific': ('centos', 'el'),
5915 'rocky': ('centos', 'el'),
5916 'fedora': ('fedora', 'fc'),
5919 def __init__(self
, ctx
: CephadmContext
,
5920 stable
, version
, branch
, commit
,
5921 distro
, distro_version
):
5922 super(YumDnf
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
5923 branch
=branch
, commit
=commit
)
5925 self
.major
= int(distro_version
.split('.')[0])
5926 self
.distro_normalized
= self
.DISTRO_NAMES
[distro
][0]
5927 self
.distro_code
= self
.DISTRO_NAMES
[distro
][1] + str(self
.major
)
5928 if (self
.distro_code
== 'fc' and self
.major
>= 30) or \
5929 (self
.distro_code
== 'el' and self
.major
>= 8):
5934 def custom_repo(self
, **kw
):
5936 Repo files need special care in that a whole line should not be present
5937 if there is no value for it. Because we were using `format()` we could
5938 not conditionally add a line for a repo file. So the end result would
5939 contain a key with a missing value (say if we were passing `None`).
5941 For example, it could look like::
5948 Which breaks. This function allows us to conditionally add lines,
5949 preserving an order and be more careful.
5951 Previously, and for historical purposes, this is how the template used
5967 # by using tuples (vs a dict) we preserve the order of what we want to
5968 # return, like starting with a [repo name]
5970 ('reponame', '[%s]'),
5971 ('name', 'name=%s'),
5972 ('baseurl', 'baseurl=%s'),
5973 ('enabled', 'enabled=%s'),
5974 ('gpgcheck', 'gpgcheck=%s'),
5975 ('_type', 'type=%s'),
5976 ('gpgkey', 'gpgkey=%s'),
5977 ('proxy', 'proxy=%s'),
5978 ('priority', 'priority=%s'),
5982 tmpl_key
, tmpl_value
= line
# key values from tmpl
5984 # ensure that there is an actual value (not None nor empty string)
5985 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
5986 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
5988 return '\n'.join(lines
)
5990 def repo_path(self
):
5991 return '/etc/yum.repos.d/ceph.repo'
5993 def repo_baseurl(self
):
5994 assert self
.stable
or self
.version
5996 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
, self
.version
,
5999 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
, self
.stable
,
6003 if self
.distro_code
.startswith('fc'):
6004 raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro')
6005 if self
.distro_code
== 'el7':
6006 if self
.stable
and self
.stable
>= 'pacific':
6007 raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it')
6008 if self
.version
and self
.version
.split('.')[0] >= '16':
6009 raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it')
6010 if self
.stable
or self
.version
:
6013 'Ceph': '$basearch',
6014 'Ceph-noarch': 'noarch',
6015 'Ceph-source': 'SRPMS'}.items():
6016 content
+= '[%s]\n' % (n
)
6017 content
+= self
.custom_repo(
6019 baseurl
=self
.repo_baseurl() + '/' + t
,
6022 gpgkey
=self
.repo_gpgkey()[0],
6026 content
= self
.query_shaman(self
.distro_normalized
, self
.major
,
6030 logger
.info('Writing repo to %s...' % self
.repo_path())
6031 with
open(self
.repo_path(), 'w') as f
:
6034 if self
.distro_code
.startswith('el'):
6035 logger
.info('Enabling EPEL...')
6036 call_throws(self
.ctx
, [self
.tool
, 'install', '-y', 'epel-release'])
6039 if os
.path
.exists(self
.repo_path()):
6040 os
.unlink(self
.repo_path())
6042 def install(self
, ls
):
6043 logger
.info('Installing packages %s...' % ls
)
6044 call_throws(self
.ctx
, [self
.tool
, 'install', '-y'] + ls
)
6046 def install_podman(self
):
6047 self
.install(['podman'])
6050 class Zypper(Packager
):
6053 'opensuse-tumbleweed',
6057 def __init__(self
, ctx
: CephadmContext
,
6058 stable
, version
, branch
, commit
,
6059 distro
, distro_version
):
6060 super(Zypper
, self
).__init
__(ctx
, stable
=stable
, version
=version
,
6061 branch
=branch
, commit
=commit
)
6063 self
.tool
= 'zypper'
6064 self
.distro
= 'opensuse'
6065 self
.distro_version
= '15.1'
6066 if 'tumbleweed' not in distro
and distro_version
is not None:
6067 self
.distro_version
= distro_version
6069 def custom_repo(self
, **kw
):
6071 See YumDnf for format explanation.
6075 # by using tuples (vs a dict) we preserve the order of what we want to
6076 # return, like starting with a [repo name]
6078 ('reponame', '[%s]'),
6079 ('name', 'name=%s'),
6080 ('baseurl', 'baseurl=%s'),
6081 ('enabled', 'enabled=%s'),
6082 ('gpgcheck', 'gpgcheck=%s'),
6083 ('_type', 'type=%s'),
6084 ('gpgkey', 'gpgkey=%s'),
6085 ('proxy', 'proxy=%s'),
6086 ('priority', 'priority=%s'),
6090 tmpl_key
, tmpl_value
= line
# key values from tmpl
6092 # ensure that there is an actual value (not None nor empty string)
6093 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
6094 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
6096 return '\n'.join(lines
)
6098 def repo_path(self
):
6099 return '/etc/zypp/repos.d/ceph.repo'
6101 def repo_baseurl(self
):
6102 assert self
.stable
or self
.version
6104 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
,
6105 self
.stable
, self
.distro
)
6107 return '%s/rpm-%s/%s' % (self
.ctx
.repo_url
,
6108 self
.stable
, self
.distro
)
6111 if self
.stable
or self
.version
:
6114 'Ceph': '$basearch',
6115 'Ceph-noarch': 'noarch',
6116 'Ceph-source': 'SRPMS'}.items():
6117 content
+= '[%s]\n' % (n
)
6118 content
+= self
.custom_repo(
6120 baseurl
=self
.repo_baseurl() + '/' + t
,
6123 gpgkey
=self
.repo_gpgkey()[0],
6127 content
= self
.query_shaman(self
.distro
, self
.distro_version
,
6131 logger
.info('Writing repo to %s...' % self
.repo_path())
6132 with
open(self
.repo_path(), 'w') as f
:
6136 if os
.path
.exists(self
.repo_path()):
6137 os
.unlink(self
.repo_path())
6139 def install(self
, ls
):
6140 logger
.info('Installing packages %s...' % ls
)
6141 call_throws(self
.ctx
, [self
.tool
, 'in', '-y'] + ls
)
6143 def install_podman(self
):
6144 self
.install(['podman'])
6147 def create_packager(ctx
: CephadmContext
,
6148 stable
=None, version
=None, branch
=None, commit
=None):
6149 distro
, distro_version
, distro_codename
= get_distro()
6150 if distro
in YumDnf
.DISTRO_NAMES
:
6151 return YumDnf(ctx
, stable
=stable
, version
=version
,
6152 branch
=branch
, commit
=commit
,
6153 distro
=distro
, distro_version
=distro_version
)
6154 elif distro
in Apt
.DISTRO_NAMES
:
6155 return Apt(ctx
, stable
=stable
, version
=version
,
6156 branch
=branch
, commit
=commit
,
6157 distro
=distro
, distro_version
=distro_version
,
6158 distro_codename
=distro_codename
)
6159 elif distro
in Zypper
.DISTRO_NAMES
:
6160 return Zypper(ctx
, stable
=stable
, version
=version
,
6161 branch
=branch
, commit
=commit
,
6162 distro
=distro
, distro_version
=distro_version
)
6163 raise Error('Distro %s version %s not supported' % (distro
, distro_version
))
6166 def command_add_repo(ctx
: CephadmContext
):
6167 if ctx
.version
and ctx
.release
:
6168 raise Error('you can specify either --release or --version but not both')
6169 if not ctx
.version
and not ctx
.release
and not ctx
.dev
and not ctx
.dev_commit
:
6170 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
6173 (x
, y
, z
) = ctx
.version
.split('.')
6175 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
6177 # Pacific =/= pacific in this case, set to undercase to avoid confision
6178 ctx
.release
= ctx
.release
.lower()
6180 pkg
= create_packager(ctx
, stable
=ctx
.release
,
6181 version
=ctx
.version
,
6183 commit
=ctx
.dev_commit
)
6185 logger
.info('Completed adding repo.')
6188 def command_rm_repo(ctx
: CephadmContext
):
6189 pkg
= create_packager(ctx
)
6193 def command_install(ctx
: CephadmContext
):
6194 pkg
= create_packager(ctx
)
6195 pkg
.install(ctx
.packages
)
6197 ##################################
6200 def get_ipv4_address(ifname
):
6201 # type: (str) -> str
6202 def _extract(sock
, offset
):
6203 return socket
.inet_ntop(
6208 struct
.pack('256s', bytes(ifname
[:15], 'utf-8'))
6211 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_DGRAM
)
6213 addr
= _extract(s
, 35093) # '0x8915' = SIOCGIFADDR
6214 dq_mask
= _extract(s
, 35099) # 0x891b = SIOCGIFNETMASK
6216 # interface does not have an ipv4 address
6219 dec_mask
= sum([bin(int(i
)).count('1')
6220 for i
in dq_mask
.split('.')])
6221 return '{}/{}'.format(addr
, dec_mask
)
6224 def get_ipv6_address(ifname
):
6225 # type: (str) -> str
6226 if not os
.path
.exists('/proc/net/if_inet6'):
6229 raw
= read_file(['/proc/net/if_inet6'])
6230 data
= raw
.splitlines()
6231 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
6232 # field 0 is ipv6, field 2 is scope
6233 for iface_setting
in data
:
6234 field
= iface_setting
.split()
6235 if field
[-1] == ifname
:
6237 ipv6_fmtd
= ':'.join([ipv6_raw
[_p
:_p
+ 4] for _p
in range(0, len(field
[0]), 4)])
6238 # apply naming rules using ipaddress module
6239 ipv6
= ipaddress
.ip_address(ipv6_fmtd
)
6240 return '{}/{}'.format(str(ipv6
), int('0x{}'.format(field
[2]), 16))
6244 def bytes_to_human(num
, mode
='decimal'):
6245 # type: (float, str) -> str
6246 """Convert a bytes value into it's human-readable form.
6248 :param num: number, in bytes, to convert
6249 :param mode: Either decimal (default) or binary to determine divisor
6250 :returns: string representing the bytes value in a more readable format
6252 unit_list
= ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
6256 if mode
== 'binary':
6257 unit_list
= ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
6261 for unit
in unit_list
:
6262 if abs(num
) < divisor
:
6263 return '%3.1f%s' % (num
, unit
)
6265 return '%.1f%s' % (num
, yotta
)
6268 def read_file(path_list
, file_name
=''):
6269 # type: (List[str], str) -> str
6270 """Returns the content of the first file found within the `path_list`
6272 :param path_list: list of file paths to search
6273 :param file_name: optional file_name to be applied to a file path
6274 :returns: content of the file or 'Unknown'
6276 for path
in path_list
:
6278 file_path
= os
.path
.join(path
, file_name
)
6281 if os
.path
.exists(file_path
):
6282 with
open(file_path
, 'r') as f
:
6284 content
= f
.read().strip()
6286 # sysfs may populate the file, but for devices like
6287 # virtio reads can fail
6293 ##################################
6297 _dmi_path_list
= ['/sys/class/dmi/id']
6298 _nic_path_list
= ['/sys/class/net']
6299 _selinux_path_list
= ['/etc/selinux/config']
6300 _apparmor_path_list
= ['/etc/apparmor']
6301 _disk_vendor_workarounds
= {
6302 '0x1af4': 'Virtio Block Device'
6305 def __init__(self
, ctx
: CephadmContext
):
6306 self
.ctx
: CephadmContext
= ctx
6307 self
.cpu_model
: str = 'Unknown'
6308 self
.cpu_count
: int = 0
6309 self
.cpu_cores
: int = 0
6310 self
.cpu_threads
: int = 0
6311 self
.interfaces
: Dict
[str, Any
] = {}
6313 self
._meminfo
: List
[str] = read_file(['/proc/meminfo']).splitlines()
6315 self
._process
_nics
()
6316 self
.arch
: str = platform
.processor()
6317 self
.kernel
: str = platform
.release()
6319 def _get_cpuinfo(self
):
6321 """Determine cpu information via /proc/cpuinfo"""
6322 raw
= read_file(['/proc/cpuinfo'])
6323 output
= raw
.splitlines()
6327 field
= [f
.strip() for f
in line
.split(':')]
6328 if 'model name' in line
:
6329 self
.cpu_model
= field
[1]
6330 if 'physical id' in line
:
6331 cpu_set
.add(field
[1])
6332 if 'siblings' in line
:
6333 self
.cpu_threads
= int(field
[1].strip())
6334 if 'cpu cores' in line
:
6335 self
.cpu_cores
= int(field
[1].strip())
6337 self
.cpu_count
= len(cpu_set
)
6339 def _get_block_devs(self
):
6340 # type: () -> List[str]
6341 """Determine the list of block devices by looking at /sys/block"""
6342 return [dev
for dev
in os
.listdir('/sys/block')
6343 if not dev
.startswith('dm')]
6345 def _get_devs_by_type(self
, rota
='0'):
6346 # type: (str) -> List[str]
6347 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
6349 for blk_dev
in self
._get
_block
_devs
():
6350 rot_path
= '/sys/block/{}/queue/rotational'.format(blk_dev
)
6351 rot_value
= read_file([rot_path
])
6352 if rot_value
== rota
:
6353 devs
.append(blk_dev
)
6357 def operating_system(self
):
6359 """Determine OS version"""
6360 raw_info
= read_file(['/etc/os-release'])
6361 os_release
= raw_info
.splitlines()
6365 for line
in os_release
:
6367 var_name
, var_value
= line
.split('=')
6368 rel_dict
[var_name
] = var_value
.strip('"')
6370 # Would normally use PRETTY_NAME, but NAME and VERSION are more
6372 if all(_v
in rel_dict
for _v
in ['NAME', 'VERSION']):
6373 rel_str
= '{} {}'.format(rel_dict
['NAME'], rel_dict
['VERSION'])
6379 """Return the hostname"""
6380 return platform
.node()
6383 def subscribed(self
):
6385 """Highlevel check to see if the host is subscribed to receive updates/support"""
6389 entitlements_dir
= '/etc/pki/entitlement'
6390 if os
.path
.exists(entitlements_dir
):
6391 pems
= glob('{}/*.pem'.format(entitlements_dir
))
6397 os_name
= self
.operating_system
6398 if os_name
.upper().startswith('RED HAT'):
6404 def hdd_count(self
):
6406 """Return a count of HDDs (spinners)"""
6407 return len(self
._get
_devs
_by
_type
(rota
='1'))
6409 def _get_capacity(self
, dev
):
6410 # type: (str) -> int
6411 """Determine the size of a given device"""
6412 size_path
= os
.path
.join('/sys/block', dev
, 'size')
6413 size_blocks
= int(read_file([size_path
]))
6414 blk_path
= os
.path
.join('/sys/block', dev
, 'queue', 'logical_block_size')
6415 blk_count
= int(read_file([blk_path
]))
6416 return size_blocks
* blk_count
6418 def _get_capacity_by_type(self
, rota
='0'):
6419 # type: (str) -> int
6420 """Return the total capacity of a category of device (flash or hdd)"""
6421 devs
= self
._get
_devs
_by
_type
(rota
=rota
)
6424 capacity
+= self
._get
_capacity
(dev
)
6427 def _dev_list(self
, dev_list
):
6428 # type: (List[str]) -> List[Dict[str, object]]
6429 """Return a 'pretty' name list for each device in the `dev_list`"""
6432 for dev
in dev_list
:
6433 disk_model
= read_file(['/sys/block/{}/device/model'.format(dev
)]).strip()
6434 disk_rev
= read_file(['/sys/block/{}/device/rev'.format(dev
)]).strip()
6435 disk_wwid
= read_file(['/sys/block/{}/device/wwid'.format(dev
)]).strip()
6436 vendor
= read_file(['/sys/block/{}/device/vendor'.format(dev
)]).strip()
6437 disk_vendor
= HostFacts
._disk
_vendor
_workarounds
.get(vendor
, vendor
)
6438 disk_size_bytes
= self
._get
_capacity
(dev
)
6440 'description': '{} {} ({})'.format(disk_vendor
, disk_model
, bytes_to_human(disk_size_bytes
)),
6441 'vendor': disk_vendor
,
6442 'model': disk_model
,
6446 'disk_size_bytes': disk_size_bytes
,
6452 # type: () -> List[Dict[str, object]]
6453 """Return a list of devices that are HDDs (spinners)"""
6454 devs
= self
._get
_devs
_by
_type
(rota
='1')
6455 return self
._dev
_list
(devs
)
6458 def flash_list(self
):
6459 # type: () -> List[Dict[str, object]]
6460 """Return a list of devices that are flash based (SSD, NVMe)"""
6461 devs
= self
._get
_devs
_by
_type
(rota
='0')
6462 return self
._dev
_list
(devs
)
6465 def hdd_capacity_bytes(self
):
6467 """Return the total capacity for all HDD devices (bytes)"""
6468 return self
._get
_capacity
_by
_type
(rota
='1')
6471 def hdd_capacity(self
):
6473 """Return the total capacity for all HDD devices (human readable format)"""
6474 return bytes_to_human(self
.hdd_capacity_bytes
)
6478 # type: () -> Dict[str, float]
6479 """Return the cpu load average data for the host"""
6480 raw
= read_file(['/proc/loadavg']).strip()
6483 '1min': float(data
[0]),
6484 '5min': float(data
[1]),
6485 '15min': float(data
[2]),
6489 def flash_count(self
):
6491 """Return the number of flash devices in the system (SSD, NVMe)"""
6492 return len(self
._get
_devs
_by
_type
(rota
='0'))
6495 def flash_capacity_bytes(self
):
6497 """Return the total capacity for all flash devices (bytes)"""
6498 return self
._get
_capacity
_by
_type
(rota
='0')
6501 def flash_capacity(self
):
6503 """Return the total capacity for all Flash devices (human readable format)"""
6504 return bytes_to_human(self
.flash_capacity_bytes
)
6506 def _process_nics(self
):
6508 """Look at the NIC devices and extract network related metadata"""
6509 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
6516 for nic_path
in HostFacts
._nic
_path
_list
:
6517 if not os
.path
.exists(nic_path
):
6519 for iface
in os
.listdir(nic_path
):
6521 lower_devs_list
= [os
.path
.basename(link
.replace('lower_', '')) for link
in glob(os
.path
.join(nic_path
, iface
, 'lower_*'))]
6522 upper_devs_list
= [os
.path
.basename(link
.replace('upper_', '')) for link
in glob(os
.path
.join(nic_path
, iface
, 'upper_*'))]
6525 mtu
= int(read_file([os
.path
.join(nic_path
, iface
, 'mtu')]))
6529 operstate
= read_file([os
.path
.join(nic_path
, iface
, 'operstate')])
6531 speed
= int(read_file([os
.path
.join(nic_path
, iface
, 'speed')]))
6532 except (OSError, ValueError):
6533 # OSError : device doesn't support the ethtool get_link_ksettings
6534 # ValueError : raised when the read fails, and returns Unknown
6536 # Either way, we show a -1 when speed isn't available
6539 if os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bridge')):
6541 elif os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bonding')):
6542 nic_type
= 'bonding'
6544 nic_type
= hw_lookup
.get(read_file([os
.path
.join(nic_path
, iface
, 'type')]), 'Unknown')
6546 dev_link
= os
.path
.join(nic_path
, iface
, 'device')
6547 if os
.path
.exists(dev_link
):
6549 driver_path
= os
.path
.join(dev_link
, 'driver')
6550 if os
.path
.exists(driver_path
):
6551 driver
= os
.path
.basename(os
.path
.realpath(driver_path
))
6559 self
.interfaces
[iface
] = {
6561 'upper_devs_list': upper_devs_list
,
6562 'lower_devs_list': lower_devs_list
,
6563 'operstate': operstate
,
6565 'nic_type': nic_type
,
6568 'ipv4_address': get_ipv4_address(iface
),
6569 'ipv6_address': get_ipv6_address(iface
),
6573 def nic_count(self
):
6575 """Return a total count of all physical NICs detected in the host"""
6577 for iface
in self
.interfaces
:
6578 if self
.interfaces
[iface
]['iftype'] == 'physical':
6579 phys_devs
.append(iface
)
6580 return len(phys_devs
)
6582 def _get_mem_data(self
, field_name
):
6583 # type: (str) -> int
6584 for line
in self
._meminfo
:
6585 if line
.startswith(field_name
):
6591 def memory_total_kb(self
):
6593 """Determine the memory installed (kb)"""
6594 return self
._get
_mem
_data
('MemTotal')
6597 def memory_free_kb(self
):
6599 """Determine the memory free (not cache, immediately usable)"""
6600 return self
._get
_mem
_data
('MemFree')
6603 def memory_available_kb(self
):
6605 """Determine the memory available to new applications without swapping"""
6606 return self
._get
_mem
_data
('MemAvailable')
6611 """Determine server vendor from DMI data in sysfs"""
6612 return read_file(HostFacts
._dmi
_path
_list
, 'sys_vendor')
6617 """Determine server model information from DMI data in sysfs"""
6618 family
= read_file(HostFacts
._dmi
_path
_list
, 'product_family')
6619 product
= read_file(HostFacts
._dmi
_path
_list
, 'product_name')
6620 if family
== 'Unknown' and product
:
6621 return '{}'.format(product
)
6623 return '{} ({})'.format(family
, product
)
6626 def bios_version(self
):
6628 """Determine server BIOS version from DMI data in sysfs"""
6629 return read_file(HostFacts
._dmi
_path
_list
, 'bios_version')
6632 def bios_date(self
):
6634 """Determine server BIOS date from DMI data in sysfs"""
6635 return read_file(HostFacts
._dmi
_path
_list
, 'bios_date')
6638 def timestamp(self
):
6640 """Return the current time as Epoch seconds"""
6644 def system_uptime(self
):
6646 """Return the system uptime (in secs)"""
6647 raw_time
= read_file(['/proc/uptime'])
6648 up_secs
, _
= raw_time
.split()
6649 return float(up_secs
)
6652 def kernel_security(self
):
6653 # type: () -> Dict[str, str]
6654 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
6655 def _fetch_selinux() -> Dict
[str, str]:
6656 """Read the selinux config file to determine state"""
6658 for selinux_path
in HostFacts
._selinux
_path
_list
:
6659 if os
.path
.exists(selinux_path
):
6660 selinux_config
= read_file([selinux_path
]).splitlines()
6661 security
['type'] = 'SELinux'
6662 for line
in selinux_config
:
6663 if line
.strip().startswith('#'):
6665 k
, v
= line
.split('=')
6667 if security
['SELINUX'].lower() == 'disabled':
6668 security
['description'] = 'SELinux: Disabled'
6670 security
['description'] = 'SELinux: Enabled({}, {})'.format(security
['SELINUX'], security
['SELINUXTYPE'])
6674 def _fetch_apparmor() -> Dict
[str, str]:
6675 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
6677 for apparmor_path
in HostFacts
._apparmor
_path
_list
:
6678 if os
.path
.exists(apparmor_path
):
6679 security
['type'] = 'AppArmor'
6680 security
['description'] = 'AppArmor: Enabled'
6682 profiles
= read_file(['/sys/kernel/security/apparmor/profiles'])
6683 if len(profiles
) == 0:
6688 summary
= {} # type: Dict[str, int]
6689 for line
in profiles
.split('\n'):
6690 item
, mode
= line
.split(' ')
6691 mode
= mode
.strip('()')
6696 summary_str
= ','.join(['{} {}'.format(v
, k
) for k
, v
in summary
.items()])
6697 security
= {**security
, **summary
} # type: ignore
6698 security
['description'] += '({})'.format(summary_str
)
6704 if os
.path
.exists('/sys/kernel/security/lsm'):
6705 lsm
= read_file(['/sys/kernel/security/lsm']).strip()
6706 if 'selinux' in lsm
:
6707 ret
= _fetch_selinux()
6708 elif 'apparmor' in lsm
:
6709 ret
= _fetch_apparmor()
6713 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor'
6721 'description': 'Linux Security Module framework is not available'
6725 def selinux_enabled(self
):
6726 return (self
.kernel_security
['type'] == 'SELinux') and \
6727 (self
.kernel_security
['description'] != 'SELinux: Disabled')
6730 def kernel_parameters(self
):
6731 # type: () -> Dict[str, str]
6732 """Get kernel parameters required/used in Ceph clusters"""
6735 out
, _
, _
= call_throws(self
.ctx
, ['sysctl', '-a'], verbosity
=CallVerbosity
.SILENT
)
6737 param_list
= out
.split('\n')
6738 param_dict
= {param
.split(' = ')[0]: param
.split(' = ')[-1] for param
in param_list
}
6740 # return only desired parameters
6741 if 'net.ipv4.ip_nonlocal_bind' in param_dict
:
6742 k_param
['net.ipv4.ip_nonlocal_bind'] = param_dict
['net.ipv4.ip_nonlocal_bind']
6748 """Return the attributes of this HostFacts object as json"""
6750 k
: getattr(self
, k
) for k
in dir(self
)
6751 if not k
.startswith('_')
6752 and isinstance(getattr(self
, k
), (float, int, str, list, dict, tuple))
6754 return json
.dumps(data
, indent
=2, sort_keys
=True)
6756 ##################################
6759 def command_gather_facts(ctx
: CephadmContext
):
6760 """gather_facts is intended to provide host releated metadata to the caller"""
6761 host
= HostFacts(ctx
)
6765 ##################################
6769 task_types
= ['disks', 'daemons', 'host', 'http_server']
6772 self
.started_epoch_secs
= time
.time()
6774 'daemons': 'inactive',
6775 'disks': 'inactive',
6777 'http_server': 'inactive',
6788 'started_epoch_secs': self
.started_epoch_secs
,
6789 'tasks': self
.tasks
,
6790 'errors': self
.errors
,
6795 'health': self
.health
,
6797 'daemons': self
.daemons
,
6798 'disks': self
.disks
,
6801 def update_health(self
, task_type
, task_status
, error_msg
=None):
6802 assert task_type
in CephadmCache
.task_types
6804 self
.tasks
[task_type
] = task_status
6806 self
.errors
.append(error_msg
)
6808 def update_task(self
, task_type
, content
):
6809 assert task_type
in CephadmCache
.task_types
6810 assert isinstance(content
, dict)
6812 current
= getattr(self
, task_type
)
6814 current
[k
] = content
[k
]
6816 setattr(self
, task_type
, current
)
6819 class CephadmHTTPServer(ThreadingMixIn
, HTTPServer
):
6820 allow_reuse_address
= True
6821 daemon_threads
= True
6822 cephadm_cache
: CephadmCache
6826 class CephadmDaemonHandler(BaseHTTPRequestHandler
):
6827 server
: CephadmHTTPServer
6830 f
'/{api_version}/metadata',
6831 f
'/{api_version}/metadata/health',
6832 f
'/{api_version}/metadata/disks',
6833 f
'/{api_version}/metadata/daemons',
6834 f
'/{api_version}/metadata/host',
6839 def authorize(cls
, f
):
6840 """Implement a basic token check.
6842 The token is installed at deployment time and must be provided to
6843 ensure we only respond to callers who know our token i.e. mgr
6846 def wrapper(self
, *args
, **kwargs
):
6847 auth
= self
.headers
.get('Authorization', None)
6848 if auth
!= 'Bearer ' + self
.server
.token
:
6849 self
.send_error(401)
6851 f(self
, *args
, **kwargs
)
6855 def _help_page(self
):
6856 return """<!DOCTYPE html>
6858 <head><title>cephadm metadata exporter</title></head>
6861 font-family: sans-serif;
6866 border-spacing: 0px;
6870 background: PowderBlue;
6877 <h1>cephadm metadata exporter {api_version}</h1>
6880 <tr><th>Endpoint</th><th>Methods</th><th>Response</th><th>Description</th></tr>
6882 <tr><td><a href='{api_version}/metadata'>{api_version}/metadata</a></td><td>GET</td><td>JSON</td><td>Return <b>all</b> metadata for the host</td></tr>
6883 <tr><td><a href='{api_version}/metadata/daemons'>{api_version}/metadata/daemons</a></td><td>GET</td><td>JSON</td><td>Return daemon and systemd states for ceph daemons (ls)</td></tr>
6884 <tr><td><a href='{api_version}/metadata/disks'>{api_version}/metadata/disks</a></td><td>GET</td><td>JSON</td><td>show disk inventory (ceph-volume)</td></tr>
6885 <tr><td><a href='{api_version}/metadata/health'>{api_version}/metadata/health</a></td><td>GET</td><td>JSON</td><td>Show current health of the exporter sub-tasks</td></tr>
6886 <tr><td><a href='{api_version}/metadata/host'>{api_version}/metadata/host</a></td><td>GET</td><td>JSON</td><td>Show host metadata (gather-facts)</td></tr>
6889 </html>""".format(api_version
=CephadmDaemonHandler
.api_version
)
6891 def _fetch_root(self
):
6892 self
.send_response(200)
6893 self
.send_header('Content-type', 'text/html; charset=utf-8')
6895 self
.wfile
.write(self
._help
_page
().encode('utf-8'))
6897 @Decorators.authorize
6899 """Handle *all* GET requests"""
6901 if self
.path
== '/':
6902 # provide a html response if someone hits the root url, to document the
6903 # available api endpoints
6904 return self
._fetch
_root
()
6905 elif self
.path
in CephadmDaemonHandler
.valid_routes
:
6906 u
= self
.path
.split('/')[-1]
6907 data
= json
.dumps({})
6910 tasks
= self
.server
.cephadm_cache
.health
.get('tasks', {})
6913 # We're using the http status code to help indicate thread health
6914 # - 200 (OK): request successful
6915 # - 204 (No Content): access to a cache relating to a dead thread
6916 # - 206 (Partial content): one or more theads are inactive
6917 # - 500 (Server Error): all threads inactive
6919 data
= json
.dumps(self
.server
.cephadm_cache
.to_json())
6920 if all([tasks
[task_name
] == 'inactive' for task_name
in tasks
if task_name
!= 'http_server']):
6921 # All the subtasks are dead!
6923 elif any([tasks
[task_name
] == 'inactive' for task_name
in tasks
if task_name
!= 'http_server']):
6926 # Individual GETs against the a tasks endpoint will also return a 503 if the corresponding thread is inactive
6927 elif u
== 'daemons':
6928 data
= json
.dumps(self
.server
.cephadm_cache
.daemons
)
6929 if tasks
['daemons'] == 'inactive':
6932 data
= json
.dumps(self
.server
.cephadm_cache
.disks
)
6933 if tasks
['disks'] == 'inactive':
6936 data
= json
.dumps(self
.server
.cephadm_cache
.host
)
6937 if tasks
['host'] == 'inactive':
6940 # a GET against health will always return a 200, since the op is always successful
6942 data
= json
.dumps(self
.server
.cephadm_cache
.health
)
6944 self
.send_response(status_code
)
6945 self
.send_header('Content-type', 'application/json')
6947 self
.wfile
.write(data
.encode('utf-8'))
6950 bad_request_msg
= 'Valid URLs are: {}'.format(', '.join(CephadmDaemonHandler
.valid_routes
))
6951 self
.send_response(404, message
=bad_request_msg
) # reason
6952 self
.send_header('Content-type', 'application/json')
6954 self
.wfile
.write(json
.dumps({'message': bad_request_msg
}).encode('utf-8'))
6956 def log_message(self
, format
, *args
):
6957 rqst
= ' '.join(str(a
) for a
in args
)
6958 logger
.info(f
'client:{self.address_string()} [{self.log_date_time_string()}] {rqst}')
6961 class CephadmDaemon():
6963 daemon_type
= 'cephadm-exporter'
6967 token_name
= 'token'
6968 config_requirements
= [
6974 thread_check_interval
= 5
6976 def __init__(self
, ctx
: CephadmContext
, fsid
, daemon_id
=None, port
=None):
6979 self
.daemon_id
= daemon_id
6981 self
.port
= CephadmDaemon
.default_port
6984 self
.workers
: List
[Thread
] = []
6985 self
.http_server
: CephadmHTTPServer
6987 self
.cephadm_cache
= CephadmCache()
6988 self
.errors
: List
[str] = []
6989 self
.token
= read_file([os
.path
.join(self
.daemon_path
, CephadmDaemon
.token_name
)])
6992 def validate_config(cls
, config
):
6993 reqs
= ', '.join(CephadmDaemon
.config_requirements
)
6996 if not config
or not all([k_name
in config
for k_name
in CephadmDaemon
.config_requirements
]):
6997 raise Error(f
'config must contain the following fields : {reqs}')
6999 if not all([isinstance(config
[k_name
], str) for k_name
in CephadmDaemon
.config_requirements
]):
7000 errors
.append(f
'the following fields must be strings: {reqs}')
7002 crt
= config
[CephadmDaemon
.crt_name
]
7003 key
= config
[CephadmDaemon
.key_name
]
7004 token
= config
[CephadmDaemon
.token_name
]
7006 if not crt
.startswith('-----BEGIN CERTIFICATE-----') or not crt
.endswith('-----END CERTIFICATE-----\n'):
7007 errors
.append('crt field is not a valid SSL certificate')
7008 if not key
.startswith('-----BEGIN PRIVATE KEY-----') or not key
.endswith('-----END PRIVATE KEY-----\n'):
7009 errors
.append('key is not a valid SSL private key')
7011 errors
.append("'token' must be more than 8 characters long")
7013 if 'port' in config
:
7015 p
= int(config
['port'])
7018 except (TypeError, ValueError):
7019 errors
.append('port must be an integer > 1024')
7022 raise Error('Parameter errors : {}'.format(', '.join(errors
)))
7025 def port_active(self
):
7026 return port_in_use(self
.ctx
, self
.port
)
7031 if self
.port_active
:
7032 self
.errors
.append(f
'TCP port {self.port} already in use, unable to bind')
7033 if not os
.path
.exists(os
.path
.join(self
.daemon_path
, CephadmDaemon
.key_name
)):
7034 self
.errors
.append(f
"Key file '{CephadmDaemon.key_name}' is missing from {self.daemon_path}")
7035 if not os
.path
.exists(os
.path
.join(self
.daemon_path
, CephadmDaemon
.crt_name
)):
7036 self
.errors
.append(f
"Certificate file '{CephadmDaemon.crt_name}' is missing from {self.daemon_path}")
7037 if self
.token
== 'Unknown':
7038 self
.errors
.append(f
"Authentication token '{CephadmDaemon.token_name}' is missing from {self.daemon_path}")
7039 return len(self
.errors
) == 0
7042 def _unit_name(fsid
, daemon_id
):
7043 return '{}.service'.format(get_unit_name(fsid
, CephadmDaemon
.daemon_type
, daemon_id
))
7046 def unit_name(self
):
7047 return CephadmDaemon
._unit
_name
(self
.fsid
, self
.daemon_id
)
7050 def daemon_path(self
):
7051 return os
.path
.join(
7054 f
'{self.daemon_type}.{self.daemon_id}'
7058 def binary_path(self
):
7059 path
= os
.path
.realpath(__file__
)
7060 assert os
.path
.isfile(path
)
7063 def _handle_thread_exception(self
, exc
, thread_type
):
7064 e_msg
= f
'{exc.__class__.__name__} exception: {str(exc)}'
7065 thread_info
= getattr(self
.cephadm_cache
, thread_type
)
7066 errors
= thread_info
.get('scrape_errors', [])
7067 errors
.append(e_msg
)
7069 logger
.exception(exc
)
7070 self
.cephadm_cache
.update_task(
7073 'scrape_errors': errors
,
7078 def _scrape_host_facts(self
, refresh_interval
=10):
7080 exception_encountered
= False
7084 if self
.stop
or exception_encountered
:
7087 if ctr
>= refresh_interval
:
7089 logger
.debug('executing host-facts scrape')
7091 s_time
= time
.time()
7094 facts
= HostFacts(self
.ctx
)
7095 except Exception as e
:
7096 self
._handle
_thread
_exception
(e
, 'host')
7097 exception_encountered
= True
7099 elapsed
= time
.time() - s_time
7101 data
= json
.loads(facts
.dump())
7102 except json
.decoder
.JSONDecodeError
:
7103 errors
.append('host-facts provided invalid JSON')
7104 logger
.warning(errors
[-1])
7106 self
.cephadm_cache
.update_task(
7109 'scrape_timestamp': s_time
,
7110 'scrape_duration_secs': elapsed
,
7111 'scrape_errors': errors
,
7115 logger
.debug(f
'completed host-facts scrape - {elapsed}s')
7117 time
.sleep(CephadmDaemon
.loop_delay
)
7118 ctr
+= CephadmDaemon
.loop_delay
7119 logger
.info('host-facts thread stopped')
7121 def _scrape_ceph_volume(self
, refresh_interval
=15):
7122 # we're invoking the ceph_volume command, so we need to set the args that it
7124 self
.ctx
.command
= 'inventory --format=json'.split()
7125 self
.ctx
.fsid
= self
.fsid
7128 exception_encountered
= False
7131 if self
.stop
or exception_encountered
:
7134 if ctr
>= refresh_interval
:
7136 logger
.debug('executing ceph-volume scrape')
7138 s_time
= time
.time()
7139 stream
= io
.StringIO()
7141 with
redirect_stdout(stream
):
7142 command_ceph_volume(self
.ctx
)
7143 except Exception as e
:
7144 self
._handle
_thread
_exception
(e
, 'disks')
7145 exception_encountered
= True
7147 elapsed
= time
.time() - s_time
7149 # if the call to ceph-volume returns junk with the
7150 # json, it won't parse
7151 stdout
= stream
.getvalue()
7156 data
= json
.loads(stdout
)
7157 except json
.decoder
.JSONDecodeError
:
7158 errors
.append('ceph-volume thread provided bad json data')
7159 logger
.warning(errors
[-1])
7161 errors
.append('ceph-volume did not return any data')
7162 logger
.warning(errors
[-1])
7164 self
.cephadm_cache
.update_task(
7167 'scrape_timestamp': s_time
,
7168 'scrape_duration_secs': elapsed
,
7169 'scrape_errors': errors
,
7174 logger
.debug(f
'completed ceph-volume scrape - {elapsed}s')
7175 time
.sleep(CephadmDaemon
.loop_delay
)
7176 ctr
+= CephadmDaemon
.loop_delay
7178 logger
.info('ceph-volume thread stopped')
7180 def _scrape_list_daemons(self
, refresh_interval
=20):
7182 exception_encountered
= False
7184 if self
.stop
or exception_encountered
:
7187 if ctr
>= refresh_interval
:
7189 logger
.debug('executing list-daemons scrape')
7191 s_time
= time
.time()
7194 # list daemons should ideally be invoked with a fsid
7195 data
= list_daemons(self
.ctx
)
7196 except Exception as e
:
7197 self
._handle
_thread
_exception
(e
, 'daemons')
7198 exception_encountered
= True
7200 if not isinstance(data
, list):
7201 errors
.append('list-daemons did not supply a list?')
7202 logger
.warning(errors
[-1])
7204 elapsed
= time
.time() - s_time
7205 self
.cephadm_cache
.update_task(
7208 'scrape_timestamp': s_time
,
7209 'scrape_duration_secs': elapsed
,
7210 'scrape_errors': errors
,
7214 logger
.debug(f
'completed list-daemons scrape - {elapsed}s')
7216 time
.sleep(CephadmDaemon
.loop_delay
)
7217 ctr
+= CephadmDaemon
.loop_delay
7218 logger
.info('list-daemons thread stopped')
7220 def _create_thread(self
, target
, name
, refresh_interval
=None):
7221 if refresh_interval
:
7222 t
= Thread(target
=target
, args
=(refresh_interval
,))
7224 t
= Thread(target
=target
)
7227 self
.cephadm_cache
.update_health(name
, 'active')
7230 start_msg
= f
'Started {name} thread'
7231 if refresh_interval
:
7232 logger
.info(f
'{start_msg}, with a refresh interval of {refresh_interval}s')
7234 logger
.info(f
'{start_msg}')
7237 def reload(self
, *args
):
7238 """reload -HUP received
7240 This is a placeholder function only, and serves to provide the hook that could
7241 be exploited later if the exporter evolves to incorporate a config file
7243 logger
.info('Reload request received - ignoring, no action needed')
7245 def shutdown(self
, *args
):
7246 logger
.info('Shutdown request received')
7248 self
.http_server
.shutdown()
7251 logger
.info(f
"cephadm exporter starting for FSID '{self.fsid}'")
7252 if not self
.can_run
:
7253 logger
.error('Unable to start the exporter daemon')
7254 for e
in self
.errors
:
7258 # register signal handlers for running under systemd control
7259 signal
.signal(signal
.SIGTERM
, self
.shutdown
)
7260 signal
.signal(signal
.SIGINT
, self
.shutdown
)
7261 signal
.signal(signal
.SIGHUP
, self
.reload)
7262 logger
.debug('Signal handlers attached')
7264 host_facts
= self
._create
_thread
(self
._scrape
_host
_facts
, 'host', 5)
7265 self
.workers
.append(host_facts
)
7267 daemons
= self
._create
_thread
(self
._scrape
_list
_daemons
, 'daemons', 20)
7268 self
.workers
.append(daemons
)
7270 disks
= self
._create
_thread
(self
._scrape
_ceph
_volume
, 'disks', 20)
7271 self
.workers
.append(disks
)
7273 self
.http_server
= CephadmHTTPServer(('0.0.0.0', self
.port
), CephadmDaemonHandler
) # IPv4 only
7274 self
.http_server
.socket
= ssl
.wrap_socket(self
.http_server
.socket
,
7275 keyfile
=os
.path
.join(self
.daemon_path
, CephadmDaemon
.key_name
),
7276 certfile
=os
.path
.join(self
.daemon_path
, CephadmDaemon
.crt_name
),
7279 self
.http_server
.cephadm_cache
= self
.cephadm_cache
7280 self
.http_server
.token
= self
.token
7281 server_thread
= self
._create
_thread
(self
.http_server
.serve_forever
, 'http_server')
7282 logger
.info(f
'https server listening on {self.http_server.server_address[0]}:{self.http_server.server_port}')
7285 while server_thread
.is_alive():
7289 if ctr
>= CephadmDaemon
.thread_check_interval
:
7291 for worker
in self
.workers
:
7292 if self
.cephadm_cache
.tasks
[worker
.name
] == 'inactive':
7294 if not worker
.is_alive():
7295 logger
.warning(f
'{worker.name} thread not running')
7296 stop_time
= datetime
.datetime
.now().strftime('%Y/%m/%d %H:%M:%S')
7297 self
.cephadm_cache
.update_health(worker
.name
, 'inactive', f
'{worker.name} stopped at {stop_time}')
7299 time
.sleep(CephadmDaemon
.loop_delay
)
7300 ctr
+= CephadmDaemon
.loop_delay
7302 logger
.info('Main http server thread stopped')
7308 {py3} {bin_path} exporter --fsid {fsid} --id {daemon_id} --port {port} &""".format(
7309 py3
=shutil
.which('python3'),
7310 bin_path
=self
.binary_path
,
7312 daemon_id
=self
.daemon_id
,
7317 def unit_file(self
):
7318 docker
= isinstance(self
.ctx
.container_engine
, Docker
)
7319 return """#generated by cephadm
7321 Description=cephadm exporter service for cluster {fsid}
7322 After=network-online.target{docker_after}
7323 Wants=network-online.target
7326 PartOf=ceph-{fsid}.target
7327 Before=ceph-{fsid}.target
7331 ExecStart=/bin/bash {daemon_path}/unit.run
7332 ExecReload=/bin/kill -HUP $MAINPID
7337 WantedBy=ceph-{fsid}.target
7338 """.format(fsid
=self
.fsid
,
7339 daemon_path
=self
.daemon_path
,
7340 # if docker, we depend on docker.service
7341 docker_after
=' docker.service' if docker
else '',
7342 docker_requires
='Requires=docker.service\n' if docker
else '')
7344 def deploy_daemon_unit(self
, config
=None):
7345 """deploy a specific unit file for cephadm
7347 The normal deploy_daemon_units doesn't apply for this
7348 daemon since it's not a container, so we just create a
7349 simple service definition and add it to the fsid's target
7352 raise Error('Attempting to deploy cephadm daemon without a config')
7353 assert isinstance(config
, dict)
7355 # Create the required config files in the daemons dir, with restricted permissions
7356 for filename
in config
:
7357 with
open(os
.open(os
.path
.join(self
.daemon_path
, filename
), os
.O_CREAT | os
.O_WRONLY
, mode
=0o600), 'w') as f
:
7358 f
.write(config
[filename
])
7360 # When __file__ is <stdin> we're being invoked over remoto via the orchestrator, so
7361 # we pick up the file from where the orchestrator placed it - otherwise we'll
7362 # copy it to the binary location for this cluster
7363 if not __file__
== '<stdin>':
7364 shutil
.copy(__file__
,
7367 with
open(os
.path
.join(self
.daemon_path
, 'unit.run'), 'w') as f
:
7368 f
.write(self
.unit_run
)
7371 os
.path
.join(self
.ctx
.unit_dir
,
7372 f
'{self.unit_name}.new'),
7375 f
.write(self
.unit_file
)
7377 os
.path
.join(self
.ctx
.unit_dir
, f
'{self.unit_name}.new'),
7378 os
.path
.join(self
.ctx
.unit_dir
, self
.unit_name
))
7380 call_throws(self
.ctx
, ['systemctl', 'daemon-reload'])
7381 call(self
.ctx
, ['systemctl', 'stop', self
.unit_name
],
7382 verbosity
=CallVerbosity
.DEBUG
)
7383 call(self
.ctx
, ['systemctl', 'reset-failed', self
.unit_name
],
7384 verbosity
=CallVerbosity
.DEBUG
)
7385 call_throws(self
.ctx
, ['systemctl', 'enable', '--now', self
.unit_name
])
7388 def uninstall(cls
, ctx
: CephadmContext
, fsid
, daemon_type
, daemon_id
):
7389 unit_name
= CephadmDaemon
._unit
_name
(fsid
, daemon_id
)
7390 unit_path
= os
.path
.join(ctx
.unit_dir
, unit_name
)
7391 unit_run
= os
.path
.join(ctx
.data_dir
, fsid
, f
'{daemon_type}.{daemon_id}', 'unit.run')
7394 with
open(unit_run
, 'r') as u
:
7395 contents
= u
.read().strip(' &')
7397 logger
.warning(f
'Unable to access the unit.run file @ {unit_run}')
7401 for line
in contents
.split('\n'):
7402 if '--port ' in line
:
7404 port
= int(line
.split('--port ')[-1])
7406 logger
.warning('Unexpected format in unit.run file: port is not numeric')
7407 logger
.warning('Unable to remove the systemd file and close the port')
7414 fw
.close_ports([port
])
7415 except RuntimeError:
7416 logger
.error(f
'Unable to close port {port}')
7418 stdout
, stderr
, rc
= call(ctx
, ['rm', '-f', unit_path
])
7420 logger
.error(f
'Unable to remove the systemd file @ {unit_path}')
7422 logger
.info(f
'removed systemd unit file @ {unit_path}')
7423 stdout
, stderr
, rc
= call(ctx
, ['systemctl', 'daemon-reload'])
7426 def command_exporter(ctx
: CephadmContext
):
7427 exporter
= CephadmDaemon(ctx
, ctx
.fsid
, daemon_id
=ctx
.id, port
=ctx
.port
)
7429 if ctx
.fsid
not in os
.listdir(ctx
.data_dir
):
7430 raise Error(f
"cluster fsid '{ctx.fsid}' not found in '{ctx.data_dir}'")
7434 ##################################
7437 def systemd_target_state(target_name
: str, subsystem
: str = 'ceph') -> bool:
7439 return os
.path
.exists(
7442 f
'{subsystem}.target.wants',
7449 def command_maintenance(ctx
: CephadmContext
):
7451 raise Error('must pass --fsid to specify cluster')
7453 target
= f
'ceph-{ctx.fsid}.target'
7455 if ctx
.maintenance_action
.lower() == 'enter':
7456 logger
.info('Requested to place host into maintenance')
7457 if systemd_target_state(target
):
7458 _out
, _err
, code
= call(ctx
,
7459 ['systemctl', 'disable', target
],
7460 verbosity
=CallVerbosity
.DEBUG
)
7462 logger
.error(f
'Failed to disable the {target} target')
7463 return 'failed - to disable the target'
7465 # stopping a target waits by default
7466 _out
, _err
, code
= call(ctx
,
7467 ['systemctl', 'stop', target
],
7468 verbosity
=CallVerbosity
.DEBUG
)
7470 logger
.error(f
'Failed to stop the {target} target')
7471 return 'failed - to disable the target'
7473 return f
'success - systemd target {target} disabled'
7476 return 'skipped - target already disabled'
7479 logger
.info('Requested to exit maintenance state')
7480 # exit maintenance request
7481 if not systemd_target_state(target
):
7482 _out
, _err
, code
= call(ctx
,
7483 ['systemctl', 'enable', target
],
7484 verbosity
=CallVerbosity
.DEBUG
)
7486 logger
.error(f
'Failed to enable the {target} target')
7487 return 'failed - unable to enable the target'
7489 # starting a target waits by default
7490 _out
, _err
, code
= call(ctx
,
7491 ['systemctl', 'start', target
],
7492 verbosity
=CallVerbosity
.DEBUG
)
7494 logger
.error(f
'Failed to start the {target} target')
7495 return 'failed - unable to start the target'
7497 return f
'success - systemd target {target} enabled and started'
7499 ##################################
7503 # type: () -> argparse.ArgumentParser
7504 parser
= argparse
.ArgumentParser(
7505 description
='Bootstrap Ceph daemons with systemd and containers.',
7506 formatter_class
=argparse
.ArgumentDefaultsHelpFormatter
)
7507 parser
.add_argument(
7509 help='container image. Can also be set via the "CEPHADM_IMAGE" '
7511 parser
.add_argument(
7513 action
='store_true',
7514 help='use docker instead of podman')
7515 parser
.add_argument(
7518 help='base directory for daemon data')
7519 parser
.add_argument(
7522 help='base directory for daemon logs')
7523 parser
.add_argument(
7525 default
=LOGROTATE_DIR
,
7526 help='location of logrotate configuration files')
7527 parser
.add_argument(
7530 help='location of sysctl configuration files')
7531 parser
.add_argument(
7534 help='base directory for systemd units')
7535 parser
.add_argument(
7537 action
='store_true',
7538 help='Show debug-level log messages')
7539 parser
.add_argument(
7542 default
=DEFAULT_TIMEOUT
,
7543 help='timeout in seconds')
7544 parser
.add_argument(
7547 default
=DEFAULT_RETRY
,
7548 help='max number of retries')
7549 parser
.add_argument(
7553 help='set environment variable')
7554 parser
.add_argument(
7555 '--no-container-init',
7556 action
='store_true',
7557 default
=not CONTAINER_INIT
,
7558 help='Do not run podman/docker with `--init`')
7560 subparsers
= parser
.add_subparsers(help='sub-command')
7562 parser_version
= subparsers
.add_parser(
7563 'version', help='get ceph version from container')
7564 parser_version
.set_defaults(func
=command_version
)
7566 parser_pull
= subparsers
.add_parser(
7567 'pull', help='pull latest image version')
7568 parser_pull
.set_defaults(func
=command_pull
)
7570 parser_inspect_image
= subparsers
.add_parser(
7571 'inspect-image', help='inspect local container image')
7572 parser_inspect_image
.set_defaults(func
=command_inspect_image
)
7574 parser_ls
= subparsers
.add_parser(
7575 'ls', help='list daemon instances on this host')
7576 parser_ls
.set_defaults(func
=command_ls
)
7577 parser_ls
.add_argument(
7579 action
='store_true',
7580 help='Do not include daemon status')
7581 parser_ls
.add_argument(
7584 help='base directory for legacy daemon data')
7586 parser_list_networks
= subparsers
.add_parser(
7587 'list-networks', help='list IP networks')
7588 parser_list_networks
.set_defaults(func
=command_list_networks
)
7590 parser_adopt
= subparsers
.add_parser(
7591 'adopt', help='adopt daemon deployed with a different tool')
7592 parser_adopt
.set_defaults(func
=command_adopt
)
7593 parser_adopt
.add_argument(
7596 help='daemon name (type.id)')
7597 parser_adopt
.add_argument(
7600 help='deployment style (legacy, ...)')
7601 parser_adopt
.add_argument(
7604 help='cluster name')
7605 parser_adopt
.add_argument(
7608 help='base directory for legacy daemon data')
7609 parser_adopt
.add_argument(
7611 help='Additional configuration information in JSON format')
7612 parser_adopt
.add_argument(
7614 action
='store_true',
7615 help='Do not configure firewalld')
7616 parser_adopt
.add_argument(
7618 action
='store_true',
7619 help='do not pull the latest image before adopting')
7620 parser_adopt
.add_argument(
7622 action
='store_true',
7623 help='start newly adoped daemon, even if it was not running previously')
7624 parser_adopt
.add_argument(
7626 action
='store_true',
7627 default
=CONTAINER_INIT
,
7628 help=argparse
.SUPPRESS
)
7630 parser_rm_daemon
= subparsers
.add_parser(
7631 'rm-daemon', help='remove daemon instance')
7632 parser_rm_daemon
.set_defaults(func
=command_rm_daemon
)
7633 parser_rm_daemon
.add_argument(
7636 action
=CustomValidation
,
7637 help='daemon name (type.id)')
7638 parser_rm_daemon
.add_argument(
7641 help='cluster FSID')
7642 parser_rm_daemon
.add_argument(
7644 action
='store_true',
7645 help='proceed, even though this may destroy valuable data')
7646 parser_rm_daemon
.add_argument(
7647 '--force-delete-data',
7648 action
='store_true',
7649 help='delete valuable daemon data instead of making a backup')
7651 parser_rm_cluster
= subparsers
.add_parser(
7652 'rm-cluster', help='remove all daemons for a cluster')
7653 parser_rm_cluster
.set_defaults(func
=command_rm_cluster
)
7654 parser_rm_cluster
.add_argument(
7657 help='cluster FSID')
7658 parser_rm_cluster
.add_argument(
7660 action
='store_true',
7661 help='proceed, even though this may destroy valuable data')
7662 parser_rm_cluster
.add_argument(
7664 action
='store_true',
7665 help='do not remove log files')
7666 parser_rm_cluster
.add_argument(
7668 action
='store_true',
7669 help='zap OSD devices for this cluster')
7671 parser_run
= subparsers
.add_parser(
7672 'run', help='run a ceph daemon, in a container, in the foreground')
7673 parser_run
.set_defaults(func
=command_run
)
7674 parser_run
.add_argument(
7677 help='daemon name (type.id)')
7678 parser_run
.add_argument(
7681 help='cluster FSID')
7683 parser_shell
= subparsers
.add_parser(
7684 'shell', help='run an interactive shell inside a daemon container')
7685 parser_shell
.set_defaults(func
=command_shell
)
7686 parser_shell
.add_argument(
7688 help='cluster FSID')
7689 parser_shell
.add_argument(
7691 help='daemon name (type.id)')
7692 parser_shell
.add_argument(
7694 help='ceph.conf to pass through to the container')
7695 parser_shell
.add_argument(
7697 help='ceph.keyring to pass through to the container')
7698 parser_shell
.add_argument(
7700 help=('mount a file or directory in the container. '
7701 'Support multiple mounts. '
7702 'ie: `--mount /foo /bar:/bar`. '
7703 'When no destination is passed, default is /mnt'),
7705 parser_shell
.add_argument(
7709 help='set environment variable')
7710 parser_shell
.add_argument(
7714 help='set environment variable')
7715 parser_shell
.add_argument(
7716 'command', nargs
=argparse
.REMAINDER
,
7717 help='command (optional)')
7718 parser_shell
.add_argument(
7720 action
='store_true',
7721 help='dont pass /etc/hosts through to the container')
7723 parser_enter
= subparsers
.add_parser(
7724 'enter', help='run an interactive shell inside a running daemon container')
7725 parser_enter
.set_defaults(func
=command_enter
)
7726 parser_enter
.add_argument(
7728 help='cluster FSID')
7729 parser_enter
.add_argument(
7732 help='daemon name (type.id)')
7733 parser_enter
.add_argument(
7734 'command', nargs
=argparse
.REMAINDER
,
7737 parser_ceph_volume
= subparsers
.add_parser(
7738 'ceph-volume', help='run ceph-volume inside a container')
7739 parser_ceph_volume
.set_defaults(func
=command_ceph_volume
)
7740 parser_ceph_volume
.add_argument(
7742 help='cluster FSID')
7743 parser_ceph_volume
.add_argument(
7745 help='JSON file with config and (client.bootrap-osd) key')
7746 parser_ceph_volume
.add_argument(
7748 help='ceph conf file')
7749 parser_ceph_volume
.add_argument(
7751 help='ceph.keyring to pass through to the container')
7752 parser_ceph_volume
.add_argument(
7753 'command', nargs
=argparse
.REMAINDER
,
7756 parser_zap_osds
= subparsers
.add_parser(
7757 'zap-osds', help='zap all OSDs associated with a particular fsid')
7758 parser_zap_osds
.set_defaults(func
=command_zap_osds
)
7759 parser_zap_osds
.add_argument(
7762 help='cluster FSID')
7763 parser_zap_osds
.add_argument(
7765 action
='store_true',
7766 help='proceed, even though this may destroy valuable data')
7768 parser_unit
= subparsers
.add_parser(
7769 'unit', help="operate on the daemon's systemd unit")
7770 parser_unit
.set_defaults(func
=command_unit
)
7771 parser_unit
.add_argument(
7773 help='systemd command (start, stop, restart, enable, disable, ...)')
7774 parser_unit
.add_argument(
7776 help='cluster FSID')
7777 parser_unit
.add_argument(
7780 help='daemon name (type.id)')
7782 parser_logs
= subparsers
.add_parser(
7783 'logs', help='print journald logs for a daemon container')
7784 parser_logs
.set_defaults(func
=command_logs
)
7785 parser_logs
.add_argument(
7787 help='cluster FSID')
7788 parser_logs
.add_argument(
7791 help='daemon name (type.id)')
7792 parser_logs
.add_argument(
7793 'command', nargs
='*',
7794 help='additional journalctl args')
7796 parser_bootstrap
= subparsers
.add_parser(
7797 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
7798 parser_bootstrap
.set_defaults(func
=command_bootstrap
)
7799 parser_bootstrap
.add_argument(
7801 help='ceph conf file to incorporate')
7802 parser_bootstrap
.add_argument(
7805 help='mon id (default: local hostname)')
7806 parser_bootstrap
.add_argument(
7808 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
7809 parser_bootstrap
.add_argument(
7812 parser_bootstrap
.add_argument(
7815 help='mgr id (default: randomly generated)')
7816 parser_bootstrap
.add_argument(
7818 help='cluster FSID')
7819 parser_bootstrap
.add_argument(
7821 default
='/etc/ceph',
7822 help='directory to write config, keyring, and pub key files')
7823 parser_bootstrap
.add_argument(
7825 help='location to write keyring file with new cluster admin and mon keys')
7826 parser_bootstrap
.add_argument(
7828 help='location to write conf file to connect to new cluster')
7829 parser_bootstrap
.add_argument(
7830 '--output-pub-ssh-key',
7831 help="location to write the cluster's public SSH key")
7832 parser_bootstrap
.add_argument(
7833 '--skip-admin-label',
7834 action
='store_true',
7835 help='do not create admin label for ceph.conf and client.admin keyring distribution')
7836 parser_bootstrap
.add_argument(
7838 action
='store_true',
7839 help='skip setup of ssh key on local host')
7840 parser_bootstrap
.add_argument(
7841 '--initial-dashboard-user',
7843 help='Initial user for the dashboard')
7844 parser_bootstrap
.add_argument(
7845 '--initial-dashboard-password',
7846 help='Initial password for the initial dashboard user')
7847 parser_bootstrap
.add_argument(
7848 '--ssl-dashboard-port',
7851 help='Port number used to connect with dashboard using SSL')
7852 parser_bootstrap
.add_argument(
7854 type=argparse
.FileType('r'),
7855 help='Dashboard key')
7856 parser_bootstrap
.add_argument(
7858 type=argparse
.FileType('r'),
7859 help='Dashboard certificate')
7861 parser_bootstrap
.add_argument(
7863 type=argparse
.FileType('r'),
7865 parser_bootstrap
.add_argument(
7866 '--ssh-private-key',
7867 type=argparse
.FileType('r'),
7868 help='SSH private key')
7869 parser_bootstrap
.add_argument(
7871 type=argparse
.FileType('r'),
7872 help='SSH public key')
7873 parser_bootstrap
.add_argument(
7876 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
7878 parser_bootstrap
.add_argument(
7879 '--skip-mon-network',
7880 action
='store_true',
7881 help='set mon public_network based on bootstrap mon ip')
7882 parser_bootstrap
.add_argument(
7884 action
='store_true',
7885 help='do not enable the Ceph Dashboard')
7886 parser_bootstrap
.add_argument(
7887 '--dashboard-password-noupdate',
7888 action
='store_true',
7889 help='stop forced dashboard password change')
7890 parser_bootstrap
.add_argument(
7891 '--no-minimize-config',
7892 action
='store_true',
7893 help='do not assimilate and minimize the config file')
7894 parser_bootstrap
.add_argument(
7895 '--skip-ping-check',
7896 action
='store_true',
7897 help='do not verify that mon IP is pingable')
7898 parser_bootstrap
.add_argument(
7900 action
='store_true',
7901 help='do not pull the latest image before bootstrapping')
7902 parser_bootstrap
.add_argument(
7904 action
='store_true',
7905 help='Do not configure firewalld')
7906 parser_bootstrap
.add_argument(
7907 '--allow-overwrite',
7908 action
='store_true',
7909 help='allow overwrite of existing --output-* config/keyring/ssh files')
7910 parser_bootstrap
.add_argument(
7911 '--allow-fqdn-hostname',
7912 action
='store_true',
7913 help='allow hostname that is fully-qualified (contains ".")')
7914 parser_bootstrap
.add_argument(
7915 '--allow-mismatched-release',
7916 action
='store_true',
7917 help="allow bootstrap of ceph that doesn't match this version of cephadm")
7918 parser_bootstrap
.add_argument(
7919 '--skip-prepare-host',
7920 action
='store_true',
7921 help='Do not prepare host')
7922 parser_bootstrap
.add_argument(
7923 '--orphan-initial-daemons',
7924 action
='store_true',
7925 help='Set mon and mgr service to `unmanaged`, Do not create the crash service')
7926 parser_bootstrap
.add_argument(
7927 '--skip-monitoring-stack',
7928 action
='store_true',
7929 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
7930 parser_bootstrap
.add_argument(
7932 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
7934 parser_bootstrap
.add_argument(
7935 '--shared_ceph_folder',
7936 metavar
='CEPH_SOURCE_FOLDER',
7937 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
7939 parser_bootstrap
.add_argument(
7941 help='url for custom registry')
7942 parser_bootstrap
.add_argument(
7943 '--registry-username',
7944 help='username for custom registry')
7945 parser_bootstrap
.add_argument(
7946 '--registry-password',
7947 help='password for custom registry')
7948 parser_bootstrap
.add_argument(
7950 help='json file with custom registry login info (URL, Username, Password)')
7951 parser_bootstrap
.add_argument(
7953 action
='store_true',
7954 default
=CONTAINER_INIT
,
7955 help=argparse
.SUPPRESS
)
7956 parser_bootstrap
.add_argument(
7958 action
='store_true',
7959 help='Automatically deploy cephadm metadata exporter to each node')
7960 parser_bootstrap
.add_argument(
7961 '--exporter-config',
7962 action
=CustomValidation
,
7963 help=f
'Exporter configuration information in JSON format (providing: {", ".join(CephadmDaemon.config_requirements)}, port information)')
7964 parser_bootstrap
.add_argument(
7965 '--cluster-network',
7966 help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
7967 parser_bootstrap
.add_argument(
7968 '--single-host-defaults',
7969 action
='store_true',
7970 help='adjust configuration defaults to suit a single-host cluster')
7972 parser_deploy
= subparsers
.add_parser(
7973 'deploy', help='deploy a daemon')
7974 parser_deploy
.set_defaults(func
=command_deploy
)
7975 parser_deploy
.add_argument(
7978 action
=CustomValidation
,
7979 help='daemon name (type.id)')
7980 parser_deploy
.add_argument(
7983 help='cluster FSID')
7984 parser_deploy
.add_argument(
7986 help='config file for new daemon')
7987 parser_deploy
.add_argument(
7989 help='Additional configuration information in JSON format')
7990 parser_deploy
.add_argument(
7992 help='keyring for new daemon')
7993 parser_deploy
.add_argument(
7995 help='key for new daemon')
7996 parser_deploy
.add_argument(
7998 help='OSD uuid, if creating an OSD container')
7999 parser_deploy
.add_argument(
8001 action
='store_true',
8002 help='Do not configure firewalld')
8003 parser_deploy
.add_argument(
8005 help='List of tcp ports to open in the host firewall')
8006 parser_deploy
.add_argument(
8008 action
='store_true',
8009 help='Reconfigure a previously deployed daemon')
8010 parser_deploy
.add_argument(
8012 action
='store_true',
8013 help='Allow SYS_PTRACE on daemon container')
8014 parser_deploy
.add_argument(
8016 action
='store_true',
8017 default
=CONTAINER_INIT
,
8018 help=argparse
.SUPPRESS
)
8019 parser_deploy
.add_argument(
8021 help='Container memory request/target'
8023 parser_deploy
.add_argument(
8025 help='Container memory hard limit'
8027 parser_deploy
.add_argument(
8029 help='JSON dict of additional metadata'
8032 parser_check_host
= subparsers
.add_parser(
8033 'check-host', help='check host configuration')
8034 parser_check_host
.set_defaults(func
=command_check_host
)
8035 parser_check_host
.add_argument(
8036 '--expect-hostname',
8037 help='Check that hostname matches an expected value')
8039 parser_prepare_host
= subparsers
.add_parser(
8040 'prepare-host', help='prepare a host for cephadm use')
8041 parser_prepare_host
.set_defaults(func
=command_prepare_host
)
8042 parser_prepare_host
.add_argument(
8043 '--expect-hostname',
8044 help='Set hostname')
8046 parser_add_repo
= subparsers
.add_parser(
8047 'add-repo', help='configure package repository')
8048 parser_add_repo
.set_defaults(func
=command_add_repo
)
8049 parser_add_repo
.add_argument(
8051 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE
))
8052 parser_add_repo
.add_argument(
8054 help='use specific upstream version (x.y.z)')
8055 parser_add_repo
.add_argument(
8057 help='use specified bleeding edge build from git branch or tag')
8058 parser_add_repo
.add_argument(
8060 help='use specified bleeding edge build from git commit')
8061 parser_add_repo
.add_argument(
8063 help='specify alternative GPG key location')
8064 parser_add_repo
.add_argument(
8066 default
='https://download.ceph.com',
8067 help='specify alternative repo location')
8070 parser_rm_repo
= subparsers
.add_parser(
8071 'rm-repo', help='remove package repository configuration')
8072 parser_rm_repo
.set_defaults(func
=command_rm_repo
)
8074 parser_install
= subparsers
.add_parser(
8075 'install', help='install ceph package(s)')
8076 parser_install
.set_defaults(func
=command_install
)
8077 parser_install
.add_argument(
8078 'packages', nargs
='*',
8079 default
=['cephadm'],
8082 parser_registry_login
= subparsers
.add_parser(
8083 'registry-login', help='log host into authenticated registry')
8084 parser_registry_login
.set_defaults(func
=command_registry_login
)
8085 parser_registry_login
.add_argument(
8087 help='url for custom registry')
8088 parser_registry_login
.add_argument(
8089 '--registry-username',
8090 help='username for custom registry')
8091 parser_registry_login
.add_argument(
8092 '--registry-password',
8093 help='password for custom registry')
8094 parser_registry_login
.add_argument(
8096 help='json file with custom registry login info (URL, Username, Password)')
8097 parser_registry_login
.add_argument(
8099 help='cluster FSID')
8101 parser_gather_facts
= subparsers
.add_parser(
8102 'gather-facts', help='gather and return host related information (JSON format)')
8103 parser_gather_facts
.set_defaults(func
=command_gather_facts
)
8105 parser_exporter
= subparsers
.add_parser(
8106 'exporter', help='Start cephadm in exporter mode (web service), providing host/daemon/disk metadata')
8107 parser_exporter
.add_argument(
8111 help='fsid of the cephadm exporter to run against')
8112 parser_exporter
.add_argument(
8115 default
=int(CephadmDaemon
.default_port
),
8116 help='port number for the cephadm exporter service')
8117 parser_exporter
.add_argument(
8120 default
=get_hostname().split('.')[0],
8121 help='daemon identifer for the exporter')
8122 parser_exporter
.set_defaults(func
=command_exporter
)
8124 parser_maintenance
= subparsers
.add_parser(
8125 'host-maintenance', help='Manage the maintenance state of a host')
8126 parser_maintenance
.add_argument(
8128 help='cluster FSID')
8129 parser_maintenance
.add_argument(
8130 'maintenance_action',
8132 choices
=['enter', 'exit'],
8133 help='Maintenance action - enter maintenance, or exit maintenance')
8134 parser_maintenance
.set_defaults(func
=command_maintenance
)
8139 def _parse_args(av
):
8140 parser
= _get_parser()
8142 args
= parser
.parse_args(av
)
8143 if 'command' in args
and args
.command
and args
.command
[0] == '--':
8146 # workaround argparse to deprecate the subparser `--container-init` flag
8147 # container_init and no_container_init must always be mutually exclusive
8148 container_init_args
= ('--container-init', '--no-container-init')
8149 if set(container_init_args
).issubset(av
):
8150 parser
.error('argument %s: not allowed with argument %s' % (container_init_args
))
8151 elif '--container-init' in av
:
8152 args
.no_container_init
= not args
.container_init
8154 args
.container_init
= not args
.no_container_init
8155 assert args
.container_init
is not args
.no_container_init
8160 def cephadm_init_ctx(args
: List
[str]) -> CephadmContext
:
8161 ctx
= CephadmContext()
8162 ctx
.set_args(_parse_args(args
))
8166 def cephadm_init(args
: List
[str]) -> CephadmContext
:
8168 ctx
= cephadm_init_ctx(args
)
8170 # Logger configuration
8171 if not os
.path
.exists(LOG_DIR
):
8172 os
.makedirs(LOG_DIR
)
8173 dictConfig(logging_config
)
8174 logger
= logging
.getLogger()
8176 if not os
.path
.exists(ctx
.logrotate_dir
+ '/cephadm'):
8177 with
open(ctx
.logrotate_dir
+ '/cephadm', 'w') as f
:
8178 f
.write("""# created by cephadm
8179 /var/log/ceph/cephadm.log {
8189 for handler
in logger
.handlers
:
8190 if handler
.name
== 'console':
8191 handler
.setLevel(logging
.DEBUG
)
8199 if os
.geteuid() != 0:
8200 sys
.stderr
.write('ERROR: cephadm should be run as root\n')
8206 ctx
= cephadm_init(av
)
8207 if not ctx
.has_function():
8208 sys
.stderr
.write('No command specified; pass -h or --help for usage\n')
8213 ctx
.container_engine
= find_container_engine(ctx
)
8214 if ctx
.func
not in \
8215 [command_check_host
, command_prepare_host
, command_add_repo
, command_install
]:
8216 check_container_engine(ctx
)
8222 logger
.error('ERROR: %s' % e
)
8229 if __name__
== '__main__':