3 DEFAULT_IMAGE
='docker.io/ceph/ceph:v15'
4 DEFAULT_IMAGE_IS_MASTER
=False
5 LATEST_STABLE_RELEASE
= 'octopus'
6 DATA_DIR
= '/var/lib/ceph'
7 LOG_DIR
= '/var/log/ceph'
8 LOCK_DIR
= '/run/cephadm'
9 LOGROTATE_DIR
= '/etc/logrotate.d'
10 UNIT_DIR
= '/etc/systemd/system'
14 CONTAINER_PREFERENCE
= ['podman', 'docker'] # prefer podman to docker
15 CUSTOM_PS1
= r
'[ceph: \u@\h \W]\$ '
16 DEFAULT_TIMEOUT
= None # in seconds
18 SHELL_DEFAULT_CONF
= '/etc/ceph/ceph.conf'
19 SHELL_DEFAULT_KEYRING
= '/etc/ceph/ceph.client.admin.keyring'
22 You can invoke cephadm in two ways:
24 1. The normal way, at the command line.
26 2. By piping the script to the python3 binary. In this latter case, you should
27 prepend one or more lines to the beginning of the script.
35 injected_argv = ['ls']
37 For reading stdin from the '--config-json -' argument,
39 injected_stdin = '...'
47 from logging
.config
import dictConfig
64 from typing
import Dict
, List
, Tuple
, Optional
, Union
, Any
, NoReturn
, Callable
, IO
71 from functools
import wraps
73 from threading
import Thread
75 if sys
.version_info
>= (3, 0):
76 from io
import StringIO
78 from StringIO
import StringIO
80 if sys
.version_info
>= (3, 2):
81 from configparser
import ConfigParser
83 from ConfigParser
import SafeConfigParser
85 if sys
.version_info
>= (3, 0):
86 from urllib
.request
import urlopen
87 from urllib
.error
import HTTPError
89 from urllib2
import urlopen
, HTTPError
91 if sys
.version_info
> (3, 0):
97 DATEFMT
= '%Y-%m-%dT%H:%M:%S.%fZ'
99 # Log and console output config
102 'disable_existing_loggers': True,
105 'format': '%(asctime)s %(levelname)s %(message)s'
111 'class':'logging.StreamHandler',
115 'class': 'logging.handlers.RotatingFileHandler',
116 'formatter': 'cephadm',
117 'filename': '%s/cephadm.log' % LOG_DIR
,
125 'handlers': ['console', 'log_file'],
136 class Error(Exception):
140 class TimeoutExpired(Error
):
143 ##################################
147 daemons
= ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
150 ##################################
153 class Monitoring(object):
154 """Define the configs for the monitoring containers"""
157 "prometheus": [9095], # Avoid default 9090, due to conflict with cockpit UI
158 "node-exporter": [9100],
160 "alertmanager": [9093, 9094],
165 "image": "docker.io/prom/prometheus:v2.18.1",
169 "--config.file=/etc/prometheus/prometheus.yml",
170 "--storage.tsdb.path=/prometheus",
171 "--web.listen-address=:{}".format(port_map
['prometheus'][0]),
173 "config-json-files": [
178 "image": "docker.io/prom/node-exporter:v0.18.1",
182 "--no-collector.timex",
186 "image": "docker.io/ceph/ceph-grafana:6.7.4",
190 "config-json-files": [
192 "provisioning/datasources/ceph-dashboard.yml",
198 "image": "docker.io/prom/alertmanager:v0.20.0",
202 "--web.listen-address=:{}".format(port_map
['alertmanager'][0]),
203 "--cluster.listen-address=:{}".format(port_map
['alertmanager'][1]),
205 "config-json-files": [
208 "config-json-args": [
215 def get_version(container_path
, container_id
, daemon_type
):
216 # type: (str, str, str) -> str
218 :param: daemon_type Either "prometheus", "alertmanager" or "node-exporter"
220 assert daemon_type
in ('prometheus', 'alertmanager', 'node-exporter')
221 cmd
= daemon_type
.replace('-', '_')
225 if daemon_type
== 'alertmanager':
226 for cmd
in ['alertmanager', 'prometheus-alertmanager']:
227 _
, err
, code
= call([
228 container_path
, 'exec', container_id
, cmd
,
230 ], verbosity
=CallVerbosity
.SILENT
)
233 cmd
= 'alertmanager' # reset cmd for version extraction
235 _
, err
, code
= call([
236 container_path
, 'exec', container_id
, cmd
, '--version'
239 err
.startswith('%s, version ' % cmd
):
240 version
= err
.split(' ')[2]
243 ##################################
246 class NFSGanesha(object):
247 """Defines a NFS-Ganesha container"""
250 entrypoint
= '/usr/bin/ganesha.nfsd'
251 daemon_args
= ['-F', '-L', 'STDERR']
253 required_files
= ['ganesha.conf']
263 image
=DEFAULT_IMAGE
):
264 # type: (str, Union[int, str], Dict, str) -> None
266 self
.daemon_id
= daemon_id
269 # config-json options
270 self
.pool
= dict_get(config_json
, 'pool', require
=True)
271 self
.namespace
= dict_get(config_json
, 'namespace')
272 self
.userid
= dict_get(config_json
, 'userid')
273 self
.extra_args
= dict_get(config_json
, 'extra_args', [])
274 self
.files
= dict_get(config_json
, 'files', {})
275 self
.rgw
= dict_get(config_json
, 'rgw', {})
277 # validate the supplied args
281 def init(cls
, fsid
, daemon_id
):
282 # type: (str, Union[int, str]) -> NFSGanesha
283 return cls(fsid
, daemon_id
, get_parm(args
.config_json
), args
.image
)
285 def get_container_mounts(self
, data_dir
):
286 # type: (str) -> Dict[str, str]
288 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
289 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
290 mounts
[os
.path
.join(data_dir
, 'etc/ganesha')] = '/etc/ganesha:z'
292 cluster
= self
.rgw
.get('cluster', 'ceph')
293 rgw_user
= self
.rgw
.get('user', 'admin')
294 mounts
[os
.path
.join(data_dir
, 'keyring.rgw')] = \
295 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster
, rgw_user
)
299 def get_container_envs():
300 # type: () -> List[str]
302 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
307 def get_version(container_id
):
308 # type: (str) -> Optional[str]
310 out
, err
, code
= call(
311 [container_path
, 'exec', container_id
,
312 NFSGanesha
.entrypoint
, '-v'])
314 match
= re
.search(r
'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out
)
316 version
= match
.group(1)
321 if not is_fsid(self
.fsid
):
322 raise Error('not an fsid: %s' % self
.fsid
)
323 if not self
.daemon_id
:
324 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
326 raise Error('invalid image: %s' % self
.image
)
328 # check for the required files
329 if self
.required_files
:
330 for fname
in self
.required_files
:
331 if fname
not in self
.files
:
332 raise Error('required file missing from config-json: %s' % fname
)
334 # check for an RGW config
336 if not self
.rgw
.get('keyring'):
337 raise Error('RGW keyring is missing')
338 if not self
.rgw
.get('user'):
339 raise Error('RGW user is missing')
341 def get_daemon_name(self
):
343 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
345 def get_container_name(self
, desc
=None):
346 # type: (Optional[str]) -> str
347 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
349 cname
= '%s-%s' % (cname
, desc
)
352 def get_daemon_args(self
):
353 # type: () -> List[str]
354 return self
.daemon_args
+ self
.extra_args
356 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
357 # type: (str, int, int) -> None
358 """Create files under the container data dir"""
359 if not os
.path
.isdir(data_dir
):
360 raise OSError('data_dir is not a directory: %s' % (data_dir
))
362 logger
.info('Creating ganesha config...')
364 # create the ganesha conf dir
365 config_dir
= os
.path
.join(data_dir
, 'etc/ganesha')
366 makedirs(config_dir
, uid
, gid
, 0o755)
368 # populate files from the config-json
369 for fname
in self
.files
:
370 config_file
= os
.path
.join(config_dir
, fname
)
371 config_content
= dict_get_join(self
.files
, fname
)
372 logger
.info('Write file: %s' % (config_file
))
373 with
open(config_file
, 'w') as f
:
374 os
.fchown(f
.fileno(), uid
, gid
)
375 os
.fchmod(f
.fileno(), 0o600)
376 f
.write(config_content
)
378 # write the RGW keyring
380 keyring_path
= os
.path
.join(data_dir
, 'keyring.rgw')
381 with
open(keyring_path
, 'w') as f
:
382 os
.fchmod(f
.fileno(), 0o600)
383 os
.fchown(f
.fileno(), uid
, gid
)
384 f
.write(self
.rgw
.get('keyring', ''))
386 def get_rados_grace_container(self
, action
):
387 # type: (str) -> CephContainer
388 """Container for a ganesha action on the grace db"""
389 entrypoint
= '/usr/bin/ganesha-rados-grace'
392 args
=['--pool', self
.pool
]
394 args
+= ['--ns', self
.namespace
]
396 args
+= ['--userid', self
.userid
]
397 args
+= [action
, self
.get_daemon_name()]
399 data_dir
= get_data_dir(self
.fsid
, self
.daemon_type
, self
.daemon_id
)
400 volume_mounts
= self
.get_container_mounts(data_dir
)
401 envs
= self
.get_container_envs()
403 logger
.info('Creating RADOS grace for action: %s' % action
)
406 entrypoint
=entrypoint
,
408 volume_mounts
=volume_mounts
,
409 cname
=self
.get_container_name(desc
='grace-%s' % action
),
414 ##################################
417 class CephIscsi(object):
418 """Defines a Ceph-Iscsi container"""
420 daemon_type
= 'iscsi'
421 entrypoint
= '/usr/bin/rbd-target-api'
423 required_files
= ['iscsi-gateway.cfg']
429 image
=DEFAULT_IMAGE
):
430 # type: (str, Union[int, str], Dict, str) -> None
432 self
.daemon_id
= daemon_id
435 # config-json options
436 self
.files
= dict_get(config_json
, 'files', {})
438 # validate the supplied args
442 def init(cls
, fsid
, daemon_id
):
443 # type: (str, Union[int, str]) -> CephIscsi
444 return cls(fsid
, daemon_id
, get_parm(args
.config_json
), args
.image
)
447 def get_container_mounts(data_dir
, log_dir
):
448 # type: (str, str) -> Dict[str, str]
450 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
451 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
452 mounts
[os
.path
.join(data_dir
, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
453 mounts
[os
.path
.join(data_dir
, 'configfs')] = '/sys/kernel/config'
454 mounts
[log_dir
] = '/var/log/rbd-target-api:z'
455 mounts
['/dev'] = '/dev'
459 def get_container_binds():
460 # type: () -> List[List[str]]
462 lib_modules
= ['type=bind',
463 'source=/lib/modules',
464 'destination=/lib/modules',
466 binds
.append(lib_modules
)
470 def get_version(container_id
):
471 # type: (str) -> Optional[str]
473 out
, err
, code
= call(
474 [container_path
, 'exec', container_id
,
475 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"])
477 version
= out
.strip()
482 if not is_fsid(self
.fsid
):
483 raise Error('not an fsid: %s' % self
.fsid
)
484 if not self
.daemon_id
:
485 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
487 raise Error('invalid image: %s' % self
.image
)
489 # check for the required files
490 if self
.required_files
:
491 for fname
in self
.required_files
:
492 if fname
not in self
.files
:
493 raise Error('required file missing from config-json: %s' % fname
)
495 def get_daemon_name(self
):
497 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
499 def get_container_name(self
, desc
=None):
500 # type: (Optional[str]) -> str
501 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
503 cname
= '%s-%s' % (cname
, desc
)
506 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
507 # type: (str, int, int) -> None
508 """Create files under the container data dir"""
509 if not os
.path
.isdir(data_dir
):
510 raise OSError('data_dir is not a directory: %s' % (data_dir
))
512 logger
.info('Creating ceph-iscsi config...')
513 configfs_dir
= os
.path
.join(data_dir
, 'configfs')
514 makedirs(configfs_dir
, uid
, gid
, 0o755)
516 # populate files from the config-json
517 for fname
in self
.files
:
518 config_file
= os
.path
.join(data_dir
, fname
)
519 config_content
= dict_get_join(self
.files
, fname
)
520 logger
.info('Write file: %s' % (config_file
))
521 with
open(config_file
, 'w') as f
:
522 os
.fchown(f
.fileno(), uid
, gid
)
523 os
.fchmod(f
.fileno(), 0o600)
524 f
.write(config_content
)
527 def configfs_mount_umount(data_dir
, mount
=True):
528 # type: (str, bool) -> List[str]
529 mount_path
= os
.path
.join(data_dir
, 'configfs')
531 cmd
= "if ! grep -qs {0} /proc/mounts; then " \
532 "mount -t configfs none {0}; fi".format(mount_path
)
534 cmd
= "if grep -qs {0} /proc/mounts; then " \
535 "umount {0}; fi".format(mount_path
)
538 def get_tcmu_runner_container(self
):
539 # type: () -> CephContainer
540 tcmu_container
= get_container(self
.fsid
, self
.daemon_type
, self
.daemon_id
)
541 tcmu_container
.entrypoint
= "/usr/bin/tcmu-runner"
542 tcmu_container
.cname
= self
.get_container_name(desc
='tcmu')
543 # remove extra container args for tcmu container.
544 # extra args could cause issue with forking service type
545 tcmu_container
.container_args
= []
546 return tcmu_container
548 ##################################
551 class CustomContainer(object):
552 """Defines a custom container"""
553 daemon_type
= 'container'
555 def __init__(self
, fsid
: str, daemon_id
: Union
[int, str],
556 config_json
: Dict
, image
: str) -> None:
558 self
.daemon_id
= daemon_id
561 # config-json options
562 self
.entrypoint
= dict_get(config_json
, 'entrypoint')
563 self
.uid
= dict_get(config_json
, 'uid', 65534) # nobody
564 self
.gid
= dict_get(config_json
, 'gid', 65534) # nobody
565 self
.volume_mounts
= dict_get(config_json
, 'volume_mounts', {})
566 self
.args
= dict_get(config_json
, 'args', [])
567 self
.envs
= dict_get(config_json
, 'envs', [])
568 self
.privileged
= dict_get(config_json
, 'privileged', False)
569 self
.bind_mounts
= dict_get(config_json
, 'bind_mounts', [])
570 self
.ports
= dict_get(config_json
, 'ports', [])
571 self
.dirs
= dict_get(config_json
, 'dirs', [])
572 self
.files
= dict_get(config_json
, 'files', {})
575 def init(cls
, fsid
: str, daemon_id
: Union
[int, str]) -> 'CustomContainer':
576 return cls(fsid
, daemon_id
, get_parm(args
.config_json
), args
.image
)
578 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
580 Create dirs/files below the container data directory.
582 logger
.info('Creating custom container configuration '
583 'dirs/files in {} ...'.format(data_dir
))
585 if not os
.path
.isdir(data_dir
):
586 raise OSError('data_dir is not a directory: %s' % data_dir
)
588 for dir_path
in self
.dirs
:
589 logger
.info('Creating directory: {}'.format(dir_path
))
590 dir_path
= os
.path
.join(data_dir
, dir_path
.strip('/'))
591 makedirs(dir_path
, uid
, gid
, 0o755)
593 for file_path
in self
.files
:
594 logger
.info('Creating file: {}'.format(file_path
))
595 content
= dict_get_join(self
.files
, file_path
)
596 file_path
= os
.path
.join(data_dir
, file_path
.strip('/'))
597 with
open(file_path
, 'w', encoding
='utf-8') as f
:
598 os
.fchown(f
.fileno(), uid
, gid
)
599 os
.fchmod(f
.fileno(), 0o600)
602 def get_daemon_args(self
) -> List
[str]:
605 def get_container_args(self
) -> List
[str]:
608 def get_container_envs(self
) -> List
[str]:
611 def get_container_mounts(self
, data_dir
: str) -> Dict
[str, str]:
613 Get the volume mounts. Relative source paths will be located below
614 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
624 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
628 for source
, destination
in self
.volume_mounts
.items():
629 source
= os
.path
.join(data_dir
, source
)
630 mounts
[source
] = destination
633 def get_container_binds(self
, data_dir
: str) -> List
[List
[str]]:
635 Get the bind mounts. Relative `source=...` paths will be located below
636 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
641 'source=lib/modules',
642 'destination=/lib/modules',
648 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
652 binds
= self
.bind_mounts
.copy()
654 for index
, value
in enumerate(bind
):
655 match
= re
.match(r
'^source=(.+)$', value
)
657 bind
[index
] = 'source={}'.format(os
.path
.join(
658 data_dir
, match
.group(1)))
661 ##################################
664 def dict_get(d
: Dict
, key
: str, default
: Any
= None, require
: bool = False) -> Any
:
666 Helper function to get a key from a dictionary.
667 :param d: The dictionary to process.
668 :param key: The name of the key to get.
669 :param default: The default value in case the key does not
670 exist. Default is `None`.
671 :param require: Set to `True` if the key is required. An
672 exception will be raised if the key does not exist in
673 the given dictionary.
674 :return: Returns the value of the given key.
675 :raises: :exc:`self.Error` if the given key does not exist
676 and `require` is set to `True`.
678 if require
and key
not in d
.keys():
679 raise Error('{} missing from dict'.format(key
))
680 return d
.get(key
, default
)
682 ##################################
685 def dict_get_join(d
: Dict
, key
: str) -> Any
:
687 Helper function to get the value of a given key from a dictionary.
688 `List` values will be converted to a string by joining them with a
690 :param d: The dictionary to process.
691 :param key: The name of the key to get.
692 :return: Returns the value of the given key. If it was a `list`, it
693 will be joining with a line break.
696 if isinstance(value
, list):
697 value
= '\n'.join(map(str, value
))
700 ##################################
703 def get_supported_daemons():
704 # type: () -> List[str]
705 supported_daemons
= list(Ceph
.daemons
)
706 supported_daemons
.extend(Monitoring
.components
)
707 supported_daemons
.append(NFSGanesha
.daemon_type
)
708 supported_daemons
.append(CephIscsi
.daemon_type
)
709 supported_daemons
.append(CustomContainer
.daemon_type
)
710 assert len(supported_daemons
) == len(set(supported_daemons
))
711 return supported_daemons
713 ##################################
716 def attempt_bind(s
, address
, port
):
717 # type: (socket.socket, str, int) -> None
719 s
.setsockopt(socket
.SOL_SOCKET
, socket
.SO_REUSEADDR
, 1)
720 s
.bind((address
, port
))
721 except (socket
.error
, OSError) as e
: # py2 and py3
722 msg
= 'Cannot bind to IP %s port %d: %s' % (address
, port
, e
)
724 if e
.errno
== errno
.EADDRINUSE
:
726 elif e
.errno
== errno
.EADDRNOTAVAIL
:
732 def port_in_use(port_num
):
733 # type: (int) -> bool
734 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
735 logger
.info('Verifying port %d ...' % port_num
)
737 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
738 attempt_bind(s
, '0.0.0.0', port_num
)
740 s
= socket
.socket(socket
.AF_INET6
, socket
.SOCK_STREAM
)
741 attempt_bind(s
, '::', port_num
)
748 def check_ip_port(ip
, port
):
749 # type: (str, int) -> None
750 if not args
.skip_ping_check
:
751 logger
.info('Verifying IP %s port %d ...' % (ip
, port
))
753 s
= socket
.socket(socket
.AF_INET6
, socket
.SOCK_STREAM
)
756 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
758 attempt_bind(s
, ip
, port
)
762 ##################################
764 # this is an abbreviated version of
765 # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
766 # that drops all of the compatibility (this is Unix/Linux only).
771 TimeoutError
= OSError
774 class Timeout(TimeoutError
):
776 Raised when the lock could not be acquired in *timeout*
780 def __init__(self
, lock_file
):
783 #: The path of the file lock.
784 self
.lock_file
= lock_file
788 temp
= "The file lock '{}' could not be acquired."\
789 .format(self
.lock_file
)
793 class _Acquire_ReturnProxy(object):
794 def __init__(self
, lock
):
801 def __exit__(self
, exc_type
, exc_value
, traceback
):
806 class FileLock(object):
807 def __init__(self
, name
, timeout
=-1):
808 if not os
.path
.exists(LOCK_DIR
):
809 os
.mkdir(LOCK_DIR
, 0o700)
810 self
._lock
_file
= os
.path
.join(LOCK_DIR
, name
+ '.lock')
812 # The file descriptor for the *_lock_file* as it is returned by the
813 # os.open() function.
814 # This file lock is only NOT None, if the object currently holds the
816 self
._lock
_file
_fd
= None
817 self
.timeout
= timeout
818 # The lock counter is used for implementing the nested locking
819 # mechanism. Whenever the lock is acquired, the counter is increased and
820 # the lock is only released, when this value is 0 again.
821 self
._lock
_counter
= 0
826 return self
._lock
_file
_fd
is not None
828 def acquire(self
, timeout
=None, poll_intervall
=0.05):
830 Acquires the file lock or fails with a :exc:`Timeout` error.
831 .. code-block:: python
832 # You can use this method in the context manager (recommended)
835 # Or use an equivalent try-finally construct:
842 The maximum time waited for the file lock.
843 If ``timeout < 0``, there is no timeout and this method will
844 block until the lock could be acquired.
845 If ``timeout`` is None, the default :attr:`~timeout` is used.
846 :arg float poll_intervall:
847 We check once in *poll_intervall* seconds if we can acquire the
850 if the lock could not be acquired in *timeout* seconds.
851 .. versionchanged:: 2.0.0
852 This method returns now a *proxy* object instead of *self*,
853 so that it can be used in a with statement without side effects.
855 # Use the default timeout, if no timeout is provided.
857 timeout
= self
.timeout
859 # Increment the number right at the beginning.
860 # We can still undo it, if something fails.
861 self
._lock
_counter
+= 1
864 lock_filename
= self
._lock
_file
865 start_time
= time
.time()
868 if not self
.is_locked
:
869 logger
.debug('Acquiring lock %s on %s', lock_id
,
874 logger
.debug('Lock %s acquired on %s', lock_id
,
877 elif timeout
>= 0 and time
.time() - start_time
> timeout
:
878 logger
.warning('Timeout acquiring lock %s on %s', lock_id
,
880 raise Timeout(self
._lock
_file
)
883 'Lock %s not acquired on %s, waiting %s seconds ...',
884 lock_id
, lock_filename
, poll_intervall
886 time
.sleep(poll_intervall
)
888 # Something did go wrong, so decrement the counter.
889 self
._lock
_counter
= max(0, self
._lock
_counter
- 1)
892 return _Acquire_ReturnProxy(lock
= self
)
894 def release(self
, force
=False):
896 Releases the file lock.
897 Please note, that the lock is only completly released, if the lock
899 Also note, that the lock file itself is not automatically deleted.
901 If true, the lock counter is ignored and the lock is released in
905 self
._lock
_counter
-= 1
907 if self
._lock
_counter
== 0 or force
:
909 lock_filename
= self
._lock
_file
911 logger
.debug('Releasing lock %s on %s', lock_id
, lock_filename
)
913 self
._lock
_counter
= 0
914 logger
.debug('Lock %s released on %s', lock_id
, lock_filename
)
922 def __exit__(self
, exc_type
, exc_value
, traceback
):
927 self
.release(force
=True)
931 open_mode
= os
.O_RDWR | os
.O_CREAT | os
.O_TRUNC
932 fd
= os
.open(self
._lock
_file
, open_mode
)
935 fcntl
.flock(fd
, fcntl
.LOCK_EX | fcntl
.LOCK_NB
)
936 except (IOError, OSError):
939 self
._lock
_file
_fd
= fd
943 # Do not remove the lockfile:
945 # https://github.com/benediktschmitt/py-filelock/issues/31
946 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
947 fd
= self
._lock
_file
_fd
948 self
._lock
_file
_fd
= None
949 fcntl
.flock(fd
, fcntl
.LOCK_UN
) # type: ignore
950 os
.close(fd
) # type: ignore
954 ##################################
955 # Popen wrappers, lifted from ceph-volume
957 class CallVerbosity(Enum
):
959 # log stdout/stderr to logger.debug
961 # On a non-zero exit status, it will forcefully set
962 # logging ON for the terminal
963 VERBOSE_ON_FAILURE
= 2
964 # log at info (instead of debug) level.
968 def call(command
: List
[str],
969 desc
: Optional
[str] = None,
970 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
971 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
972 **kwargs
) -> Tuple
[str, str, int]:
974 Wrap subprocess.Popen to
976 - log stdout/stderr to a logger,
978 - cleanly return out, err, returncode
980 :param timeout: timeout in seconds
986 timeout
= timeout
or args
.timeout
988 logger
.debug("Running command: %s" % ' '.join(command
))
989 process
= subprocess
.Popen(
991 stdout
=subprocess
.PIPE
,
992 stderr
=subprocess
.PIPE
,
996 # get current p.stdout flags, add O_NONBLOCK
997 assert process
.stdout
is not None
998 assert process
.stderr
is not None
999 stdout_flags
= fcntl
.fcntl(process
.stdout
, fcntl
.F_GETFL
)
1000 stderr_flags
= fcntl
.fcntl(process
.stderr
, fcntl
.F_GETFL
)
1001 fcntl
.fcntl(process
.stdout
, fcntl
.F_SETFL
, stdout_flags | os
.O_NONBLOCK
)
1002 fcntl
.fcntl(process
.stderr
, fcntl
.F_SETFL
, stderr_flags | os
.O_NONBLOCK
)
1008 out_buffer
= '' # partial line (no newline yet)
1009 err_buffer
= '' # partial line (no newline yet)
1010 start_time
= time
.time()
1013 end_time
= start_time
+ timeout
1015 if end_time
and (time
.time() >= end_time
):
1017 if process
.poll() is None:
1018 logger
.info(desc
+ 'timeout after %s seconds' % timeout
)
1020 if reads
and process
.poll() is not None:
1021 # we want to stop, but first read off anything remaining
1025 reads
, _
, _
= select
.select(
1026 [process
.stdout
.fileno(), process
.stderr
.fileno()],
1031 message_b
= os
.read(fd
, 1024)
1032 if isinstance(message_b
, bytes
):
1033 message
= message_b
.decode('utf-8')
1034 if isinstance(message_b
, str):
1036 if stop
and message
:
1037 # process has terminated, but have more to read still, so not stopping yet
1038 # (os.read returns '' when it encounters EOF)
1042 if fd
== process
.stdout
.fileno():
1044 message
= out_buffer
+ message
1045 lines
= message
.split('\n')
1046 out_buffer
= lines
.pop()
1048 if verbosity
== CallVerbosity
.VERBOSE
:
1049 logger
.info(desc
+ 'stdout ' + line
)
1050 elif verbosity
!= CallVerbosity
.SILENT
:
1051 logger
.debug(desc
+ 'stdout ' + line
)
1052 elif fd
== process
.stderr
.fileno():
1054 message
= err_buffer
+ message
1055 lines
= message
.split('\n')
1056 err_buffer
= lines
.pop()
1058 if verbosity
== CallVerbosity
.VERBOSE
:
1059 logger
.info(desc
+ 'stderr ' + line
)
1060 elif verbosity
!= CallVerbosity
.SILENT
:
1061 logger
.debug(desc
+ 'stderr ' + line
)
1064 except (IOError, OSError):
1066 if verbosity
== CallVerbosity
.VERBOSE
:
1067 logger
.debug(desc
+ 'profile rt=%s, stop=%s, exit=%s, reads=%s'
1068 % (time
.time()-start_time
, stop
, process
.poll(), reads
))
1070 returncode
= process
.wait()
1072 if out_buffer
!= '':
1073 if verbosity
== CallVerbosity
.VERBOSE
:
1074 logger
.info(desc
+ 'stdout ' + out_buffer
)
1075 elif verbosity
!= CallVerbosity
.SILENT
:
1076 logger
.debug(desc
+ 'stdout ' + out_buffer
)
1077 if err_buffer
!= '':
1078 if verbosity
== CallVerbosity
.VERBOSE
:
1079 logger
.info(desc
+ 'stderr ' + err_buffer
)
1080 elif verbosity
!= CallVerbosity
.SILENT
:
1081 logger
.debug(desc
+ 'stderr ' + err_buffer
)
1083 if returncode
!= 0 and verbosity
== CallVerbosity
.VERBOSE_ON_FAILURE
:
1084 # dump stdout + stderr
1085 logger
.info('Non-zero exit code %d from %s' % (returncode
, ' '.join(command
)))
1086 for line
in out
.splitlines():
1087 logger
.info(desc
+ 'stdout ' + line
)
1088 for line
in err
.splitlines():
1089 logger
.info(desc
+ 'stderr ' + line
)
1091 return out
, err
, returncode
1094 def call_throws(command
: List
[str],
1095 desc
: Optional
[str] = None,
1096 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
1097 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
1098 **kwargs
) -> Tuple
[str, str, int]:
1099 out
, err
, ret
= call(command
, desc
, verbosity
, timeout
, **kwargs
)
1101 raise RuntimeError('Failed command: %s' % ' '.join(command
))
1102 return out
, err
, ret
1105 def call_timeout(command
, timeout
):
1106 # type: (List[str], int) -> int
1108 logger
.debug('Running command (timeout=%s): %s'
1109 % (timeout
, ' '.join(command
)))
1111 def raise_timeout(command
, timeout
):
1112 # type: (List[str], int) -> NoReturn
1113 msg
= 'Command \'%s\' timed out after %s seconds' % (command
, timeout
)
1115 raise TimeoutExpired(msg
)
1117 def call_timeout_py2(command
, timeout
):
1118 # type: (List[str], int) -> int
1119 proc
= subprocess
.Popen(command
)
1120 thread
= Thread(target
=proc
.wait
)
1122 thread
.join(timeout
)
1123 if thread
.is_alive():
1126 raise_timeout(command
, timeout
)
1127 return proc
.returncode
1129 def call_timeout_py3(command
, timeout
):
1130 # type: (List[str], int) -> int
1132 return subprocess
.call(command
, timeout
=timeout
)
1133 except subprocess
.TimeoutExpired
as e
:
1134 raise_timeout(command
, timeout
)
1137 if sys
.version_info
>= (3, 3):
1138 ret
= call_timeout_py3(command
, timeout
)
1140 # py2 subprocess has no timeout arg
1141 ret
= call_timeout_py2(command
, timeout
)
1144 ##################################
1147 def is_available(what
, func
):
1148 # type: (str, Callable[[], bool]) -> None
1150 Wait for a service to become available
1152 :param what: the name of the service
1153 :param func: the callable object that determines availability
1156 logger
.info('Waiting for %s...' % what
)
1160 logger
.info('%s is available'
1164 raise Error('%s not available after %s tries'
1167 logger
.info('%s not available, waiting (%s/%s)...'
1168 % (what
, num
, retry
))
1174 def read_config(fn
):
1175 # type: (Optional[str]) -> ConfigParser
1176 # bend over backwards here because py2's ConfigParser doesn't like
1177 # whitespace before config option names (e.g., '\n foo = bar\n').
1179 if sys
.version_info
>= (3, 2):
1182 cp
= SafeConfigParser()
1185 with
open(fn
, 'r') as f
:
1187 nice_conf
= re
.sub(r
'\n(\s)+', r
'\n', raw_conf
)
1188 s_io
= StringIO(nice_conf
)
1189 if sys
.version_info
>= (3, 2):
1198 # type: (str) -> str
1199 p
= os
.path
.expanduser(p
)
1200 return os
.path
.abspath(p
)
1203 def get_file_timestamp(fn
):
1204 # type: (str) -> Optional[str]
1206 mt
= os
.path
.getmtime(fn
)
1207 return datetime
.datetime
.fromtimestamp(
1208 mt
, tz
=datetime
.timezone
.utc
1214 def try_convert_datetime(s
):
1215 # type: (str) -> Optional[str]
1216 # This is super irritating because
1217 # 1) podman and docker use different formats
1218 # 2) python's strptime can't parse either one
1221 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1222 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1223 # 2020-03-03 15:52:30.136257504 -0600 CST
1224 # (In the podman case, there is a different string format for
1225 # 'inspect' and 'inspect --format {{.Created}}'!!)
1227 # In *all* cases, the 9 digit second precision is too much for
1228 # python's strptime. Shorten it to 6 digits.
1229 p
= re
.compile(r
'(\.[\d]{6})[\d]*')
1232 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
1233 if s
and s
[-1] == 'Z':
1234 s
= s
[:-1] + '-0000'
1236 # cut off the redundant 'CST' part that strptime can't parse, if
1239 s
= ' '.join(v
[0:3])
1241 # try parsing with several format strings
1243 '%Y-%m-%dT%H:%M:%S.%f%z',
1244 '%Y-%m-%d %H:%M:%S.%f %z',
1248 # return timestamp normalized to UTC, rendered as DATEFMT.
1249 return datetime
.datetime
.strptime(s
, f
).astimezone(tz
=datetime
.timezone
.utc
).strftime(DATEFMT
)
1255 def get_podman_version():
1256 # type: () -> Tuple[int, ...]
1257 if 'podman' not in container_path
:
1258 raise ValueError('not using podman')
1259 out
, _
, _
= call_throws([container_path
, '--version'])
1260 return _parse_podman_version(out
)
1263 def _parse_podman_version(out
):
1264 # type: (str) -> Tuple[int, ...]
1265 _
, _
, version_str
= out
.strip().split()
1267 def to_int(val
, org_e
=None):
1268 if not val
and org_e
:
1272 except ValueError as e
:
1273 return to_int(val
[0:-1], org_e
or e
)
1275 return tuple(map(to_int
, version_str
.split('.')))
1280 return socket
.gethostname()
1285 return socket
.getfqdn() or socket
.gethostname()
1290 return platform
.uname().machine
1293 def generate_service_id():
1295 return get_hostname() + '.' + ''.join(random
.choice(string
.ascii_lowercase
)
1299 def generate_password():
1301 return ''.join(random
.choice(string
.ascii_lowercase
+ string
.digits
)
1305 def normalize_container_id(i
):
1306 # type: (str) -> str
1307 # docker adds the sha256: prefix, but AFAICS both
1308 # docker (18.09.7 in bionic at least) and podman
1309 # both always use sha256, so leave off the prefix
1312 if i
.startswith(prefix
):
1319 return str(uuid
.uuid1())
1323 # type: (str) -> bool
1331 def infer_fsid(func
):
1333 If we only find a single fsid in /var/lib/ceph/*, use that
1338 logger
.debug('Using specified fsid: %s' % args
.fsid
)
1342 daemon_list
= list_daemons(detail
=False)
1343 for daemon
in daemon_list
:
1344 if not is_fsid(daemon
['fsid']):
1347 elif 'name' not in args
or not args
.name
:
1348 # args.name not specified
1349 fsids_set
.add(daemon
['fsid'])
1350 elif daemon
['name'] == args
.name
:
1351 # args.name is a match
1352 fsids_set
.add(daemon
['fsid'])
1353 fsids
= sorted(fsids_set
)
1356 # some commands do not always require an fsid
1358 elif len(fsids
) == 1:
1359 logger
.info('Inferring fsid %s' % fsids
[0])
1360 args
.fsid
= fsids
[0]
1362 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids
)
1368 def infer_config(func
):
1370 If we find a MON daemon, use the config from that container
1373 def _infer_config():
1375 logger
.debug('Using specified config: %s' % args
.config
)
1381 daemon_list
= list_daemons(detail
=False)
1382 for daemon
in daemon_list
:
1383 if daemon
['name'].startswith('mon.'):
1384 name
= daemon
['name']
1387 config
= '/var/lib/ceph/{}/{}/config'.format(args
.fsid
, name
)
1389 logger
.info('Inferring config %s' % config
)
1390 args
.config
= config
1391 elif os
.path
.exists(SHELL_DEFAULT_CONF
):
1392 logger
.debug('Using default config: %s' % SHELL_DEFAULT_CONF
)
1393 args
.config
= SHELL_DEFAULT_CONF
1396 return _infer_config
1399 def _get_default_image():
1400 if DEFAULT_IMAGE_IS_MASTER
:
1401 warn
= '''This is a development version of cephadm.
1402 For information regarding the latest stable release:
1403 https://docs.ceph.com/docs/{}/cephadm/install
1404 '''.format(LATEST_STABLE_RELEASE
)
1405 for line
in warn
.splitlines():
1406 logger
.warning('{}{}{}'.format(termcolor
.yellow
, line
, termcolor
.end
))
1407 return DEFAULT_IMAGE
1410 def infer_image(func
):
1412 Use the most recent ceph image
1417 args
.image
= os
.environ
.get('CEPHADM_IMAGE')
1419 args
.image
= get_last_local_ceph_image()
1421 args
.image
= _get_default_image()
1427 def default_image(func
):
1429 def _default_image():
1431 if 'name' in args
and args
.name
:
1432 type_
= args
.name
.split('.', 1)[0]
1433 if type_
in Monitoring
.components
:
1434 args
.image
= Monitoring
.components
[type_
]['image']
1436 args
.image
= os
.environ
.get('CEPHADM_IMAGE')
1438 args
.image
= _get_default_image()
1442 return _default_image
1445 def get_last_local_ceph_image():
1447 :return: The most recent local ceph image (already pulled)
1449 out
, _
, _
= call_throws(
1450 [container_path
, 'images',
1451 '--filter', 'label=ceph=True',
1452 '--filter', 'dangling=false',
1453 '--format', '{{.Repository}}@{{.Digest}}'])
1454 return _filter_last_local_ceph_image(out
)
1457 def _filter_last_local_ceph_image(out
):
1458 # str -> Optional[str]
1459 for image
in out
.splitlines():
1460 if image
and not image
.endswith('@'):
1461 logger
.info('Using recent ceph image %s' % image
)
1466 def write_tmp(s
, uid
, gid
):
1467 # type: (str, int, int) -> Any
1468 tmp_f
= tempfile
.NamedTemporaryFile(mode
='w',
1470 os
.fchown(tmp_f
.fileno(), uid
, gid
)
1477 def makedirs(dir, uid
, gid
, mode
):
1478 # type: (str, int, int, int) -> None
1479 if not os
.path
.exists(dir):
1480 os
.makedirs(dir, mode
=mode
)
1483 os
.chown(dir, uid
, gid
)
1484 os
.chmod(dir, mode
) # the above is masked by umask...
1487 def get_data_dir(fsid
, t
, n
):
1488 # type: (str, str, Union[int, str]) -> str
1489 return os
.path
.join(args
.data_dir
, fsid
, '%s.%s' % (t
, n
))
1492 def get_log_dir(fsid
):
1493 # type: (str) -> str
1494 return os
.path
.join(args
.log_dir
, fsid
)
1497 def make_data_dir_base(fsid
, uid
, gid
):
1498 # type: (str, int, int) -> str
1499 data_dir_base
= os
.path
.join(args
.data_dir
, fsid
)
1500 makedirs(data_dir_base
, uid
, gid
, DATA_DIR_MODE
)
1501 makedirs(os
.path
.join(data_dir_base
, 'crash'), uid
, gid
, DATA_DIR_MODE
)
1502 makedirs(os
.path
.join(data_dir_base
, 'crash', 'posted'), uid
, gid
,
1504 return data_dir_base
1507 def make_data_dir(fsid
, daemon_type
, daemon_id
, uid
=None, gid
=None):
1508 # type: (str, str, Union[int, str], Optional[int], Optional[int]) -> str
1509 if uid
is None or gid
is None:
1510 uid
, gid
= extract_uid_gid()
1511 make_data_dir_base(fsid
, uid
, gid
)
1512 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1513 makedirs(data_dir
, uid
, gid
, DATA_DIR_MODE
)
1517 def make_log_dir(fsid
, uid
=None, gid
=None):
1518 # type: (str, Optional[int], Optional[int]) -> str
1519 if uid
is None or gid
is None:
1520 uid
, gid
= extract_uid_gid()
1521 log_dir
= get_log_dir(fsid
)
1522 makedirs(log_dir
, uid
, gid
, LOG_DIR_MODE
)
1526 def make_var_run(fsid
, uid
, gid
):
1527 # type: (str, int, int) -> None
1528 call_throws(['install', '-d', '-m0770', '-o', str(uid
), '-g', str(gid
),
1529 '/var/run/ceph/%s' % fsid
])
1532 def copy_tree(src
, dst
, uid
=None, gid
=None):
1533 # type: (List[str], str, Optional[int], Optional[int]) -> None
1535 Copy a directory tree from src to dst
1537 if uid
is None or gid
is None:
1538 (uid
, gid
) = extract_uid_gid()
1542 if os
.path
.isdir(dst
):
1543 dst_dir
= os
.path
.join(dst
, os
.path
.basename(src_dir
))
1545 logger
.debug('copy directory \'%s\' -> \'%s\'' % (src_dir
, dst_dir
))
1546 shutil
.rmtree(dst_dir
, ignore_errors
=True)
1547 shutil
.copytree(src_dir
, dst_dir
) # dirs_exist_ok needs python 3.8
1549 for dirpath
, dirnames
, filenames
in os
.walk(dst_dir
):
1550 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, dirpath
))
1551 os
.chown(dirpath
, uid
, gid
)
1552 for filename
in filenames
:
1553 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, filename
))
1554 os
.chown(os
.path
.join(dirpath
, filename
), uid
, gid
)
1557 def copy_files(src
, dst
, uid
=None, gid
=None):
1558 # type: (List[str], str, Optional[int], Optional[int]) -> None
1560 Copy a files from src to dst
1562 if uid
is None or gid
is None:
1563 (uid
, gid
) = extract_uid_gid()
1565 for src_file
in src
:
1567 if os
.path
.isdir(dst
):
1568 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
1570 logger
.debug('copy file \'%s\' -> \'%s\'' % (src_file
, dst_file
))
1571 shutil
.copyfile(src_file
, dst_file
)
1573 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, dst_file
))
1574 os
.chown(dst_file
, uid
, gid
)
1577 def move_files(src
, dst
, uid
=None, gid
=None):
1578 # type: (List[str], str, Optional[int], Optional[int]) -> None
1580 Move files from src to dst
1582 if uid
is None or gid
is None:
1583 (uid
, gid
) = extract_uid_gid()
1585 for src_file
in src
:
1587 if os
.path
.isdir(dst
):
1588 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
1590 if os
.path
.islink(src_file
):
1591 # shutil.move() in py2 does not handle symlinks correctly
1592 src_rl
= os
.readlink(src_file
)
1593 logger
.debug("symlink '%s' -> '%s'" % (dst_file
, src_rl
))
1594 os
.symlink(src_rl
, dst_file
)
1597 logger
.debug("move file '%s' -> '%s'" % (src_file
, dst_file
))
1598 shutil
.move(src_file
, dst_file
)
1599 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, dst_file
))
1600 os
.chown(dst_file
, uid
, gid
)
1603 ## copied from distutils ##
1604 def find_executable(executable
, path
=None):
1605 """Tries to find 'executable' in the directories listed in 'path'.
1606 A string listing directories separated by 'os.pathsep'; defaults to
1607 os.environ['PATH']. Returns the complete filename or None if not found.
1609 _
, ext
= os
.path
.splitext(executable
)
1610 if (sys
.platform
== 'win32') and (ext
!= '.exe'):
1611 executable
= executable
+ '.exe'
1613 if os
.path
.isfile(executable
):
1617 path
= os
.environ
.get('PATH', None)
1620 path
= os
.confstr("CS_PATH")
1621 except (AttributeError, ValueError):
1622 # os.confstr() or CS_PATH is not available
1624 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1625 # set to an empty string
1627 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1631 paths
= path
.split(os
.pathsep
)
1633 f
= os
.path
.join(p
, executable
)
1634 if os
.path
.isfile(f
):
1635 # the file exists, we have a shot at spawn working
1640 def find_program(filename
):
1641 # type: (str) -> str
1642 name
= find_executable(filename
)
1644 raise ValueError('%s not found' % filename
)
1648 def get_unit_name(fsid
, daemon_type
, daemon_id
=None):
1649 # type: (str, str, Optional[Union[int, str]]) -> str
1650 # accept either name or type + id
1651 if daemon_id
is not None:
1652 return 'ceph-%s@%s.%s' % (fsid
, daemon_type
, daemon_id
)
1654 return 'ceph-%s@%s' % (fsid
, daemon_type
)
1657 def get_unit_name_by_daemon_name(fsid
, name
):
1658 daemon
= get_daemon_description(fsid
, name
)
1660 return daemon
['systemd_unit']
1662 raise Error('Failed to get unit name for {}'.format(daemon
))
1665 def check_unit(unit_name
):
1666 # type: (str) -> Tuple[bool, str, bool]
1667 # NOTE: we ignore the exit code here because systemctl outputs
1668 # various exit codes based on the state of the service, but the
1669 # string result is more explicit (and sufficient).
1673 out
, err
, code
= call(['systemctl', 'is-enabled', unit_name
],
1674 verbosity
=CallVerbosity
.DEBUG
)
1678 elif "disabled" in out
:
1680 except Exception as e
:
1681 logger
.warning('unable to run systemctl: %s' % e
)
1687 out
, err
, code
= call(['systemctl', 'is-active', unit_name
],
1688 verbosity
=CallVerbosity
.DEBUG
)
1690 if out
in ['active']:
1692 elif out
in ['inactive']:
1694 elif out
in ['failed', 'auto-restart']:
1698 except Exception as e
:
1699 logger
.warning('unable to run systemctl: %s' % e
)
1701 return (enabled
, state
, installed
)
1704 def check_units(units
, enabler
=None):
1705 # type: (List[str], Optional[Packager]) -> bool
1707 (enabled
, state
, installed
) = check_unit(u
)
1708 if enabled
and state
== 'running':
1709 logger
.info('Unit %s is enabled and running' % u
)
1711 if enabler
is not None:
1713 logger
.info('Enabling unit %s' % u
)
1714 enabler
.enable_service(u
)
1718 def is_container_running(name
: str) -> bool:
1719 out
, err
, ret
= call_throws([
1720 container_path
, 'ps',
1721 '--format', '{{.Names}}'])
1725 def get_legacy_config_fsid(cluster
, legacy_dir
=None):
1726 # type: (str, Optional[str]) -> Optional[str]
1727 config_file
= '/etc/ceph/%s.conf' % cluster
1728 if legacy_dir
is not None:
1729 config_file
= os
.path
.abspath(legacy_dir
+ config_file
)
1731 if os
.path
.exists(config_file
):
1732 config
= read_config(config_file
)
1733 if config
.has_section('global') and config
.has_option('global', 'fsid'):
1734 return config
.get('global', 'fsid')
1738 def get_legacy_daemon_fsid(cluster
, daemon_type
, daemon_id
, legacy_dir
=None):
1739 # type: (str, str, Union[int, str], Optional[str]) -> Optional[str]
1741 if daemon_type
== 'osd':
1743 fsid_file
= os
.path
.join(args
.data_dir
,
1745 'ceph-%s' % daemon_id
,
1747 if legacy_dir
is not None:
1748 fsid_file
= os
.path
.abspath(legacy_dir
+ fsid_file
)
1749 with
open(fsid_file
, 'r') as f
:
1750 fsid
= f
.read().strip()
1754 fsid
= get_legacy_config_fsid(cluster
, legacy_dir
=legacy_dir
)
1758 def get_daemon_args(fsid
, daemon_type
, daemon_id
):
1759 # type: (str, str, Union[int, str]) -> List[str]
1760 r
= list() # type: List[str]
1762 if daemon_type
in Ceph
.daemons
and daemon_type
!= 'crash':
1764 '--setuser', 'ceph',
1765 '--setgroup', 'ceph',
1766 '--default-log-to-file=false',
1767 '--default-log-to-stderr=true',
1768 '--default-log-stderr-prefix="debug "',
1770 if daemon_type
== 'mon':
1772 '--default-mon-cluster-log-to-file=false',
1773 '--default-mon-cluster-log-to-stderr=true',
1775 elif daemon_type
in Monitoring
.components
:
1776 metadata
= Monitoring
.components
[daemon_type
]
1777 r
+= metadata
.get('args', list())
1778 if daemon_type
== 'alertmanager':
1779 config
= get_parm(args
.config_json
)
1780 peers
= config
.get('peers', list()) # type: ignore
1782 r
+= ["--cluster.peer={}".format(peer
)]
1783 # some alertmanager, by default, look elsewhere for a config
1784 r
+= ["--config.file=/etc/alertmanager/alertmanager.yml"]
1785 elif daemon_type
== NFSGanesha
.daemon_type
:
1786 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
1787 r
+= nfs_ganesha
.get_daemon_args()
1788 elif daemon_type
== CustomContainer
.daemon_type
:
1789 cc
= CustomContainer
.init(fsid
, daemon_id
)
1790 r
.extend(cc
.get_daemon_args())
1795 def create_daemon_dirs(fsid
, daemon_type
, daemon_id
, uid
, gid
,
1796 config
=None, keyring
=None):
1797 # type: (str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
1798 data_dir
= make_data_dir(fsid
, daemon_type
, daemon_id
, uid
=uid
, gid
=gid
)
1799 make_log_dir(fsid
, uid
=uid
, gid
=gid
)
1802 config_path
= os
.path
.join(data_dir
, 'config')
1803 with
open(config_path
, 'w') as f
:
1804 os
.fchown(f
.fileno(), uid
, gid
)
1805 os
.fchmod(f
.fileno(), 0o600)
1809 keyring_path
= os
.path
.join(data_dir
, 'keyring')
1810 with
open(keyring_path
, 'w') as f
:
1811 os
.fchmod(f
.fileno(), 0o600)
1812 os
.fchown(f
.fileno(), uid
, gid
)
1815 if daemon_type
in Monitoring
.components
.keys():
1816 config_json
: Dict
[str, Any
] = get_parm(args
.config_json
)
1817 required_files
= Monitoring
.components
[daemon_type
].get('config-json-files', list())
1819 # Set up directories specific to the monitoring component
1821 if daemon_type
== 'prometheus':
1822 data_dir_root
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1823 config_dir
= 'etc/prometheus'
1824 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
1825 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'alerting'), uid
, gid
, 0o755)
1826 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
1827 elif daemon_type
== 'grafana':
1828 data_dir_root
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1829 config_dir
= 'etc/grafana'
1830 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
1831 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'certs'), uid
, gid
, 0o755)
1832 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'provisioning/datasources'), uid
, gid
, 0o755)
1833 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
1834 elif daemon_type
== 'alertmanager':
1835 data_dir_root
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1836 config_dir
= 'etc/alertmanager'
1837 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
1838 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'data'), uid
, gid
, 0o755)
1840 # populate the config directory for the component from the config-json
1841 for fname
in required_files
:
1842 if 'files' in config_json
: # type: ignore
1843 content
= dict_get_join(config_json
['files'], fname
)
1844 with
open(os
.path
.join(data_dir_root
, config_dir
, fname
), 'w') as f
:
1845 os
.fchown(f
.fileno(), uid
, gid
)
1846 os
.fchmod(f
.fileno(), 0o600)
1849 elif daemon_type
== NFSGanesha
.daemon_type
:
1850 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
1851 nfs_ganesha
.create_daemon_dirs(data_dir
, uid
, gid
)
1853 elif daemon_type
== CephIscsi
.daemon_type
:
1854 ceph_iscsi
= CephIscsi
.init(fsid
, daemon_id
)
1855 ceph_iscsi
.create_daemon_dirs(data_dir
, uid
, gid
)
1857 elif daemon_type
== CustomContainer
.daemon_type
:
1858 cc
= CustomContainer
.init(fsid
, daemon_id
)
1859 cc
.create_daemon_dirs(data_dir
, uid
, gid
)
1862 def get_parm(option
):
1863 # type: (str) -> Dict[str, str]
1870 if cached_stdin
is not None:
1874 j
= injected_stdin
# type: ignore
1876 j
= sys
.stdin
.read()
1879 # inline json string
1880 if option
[0] == '{' and option
[-1] == '}':
1883 elif os
.path
.exists(option
):
1884 with
open(option
, 'r') as f
:
1887 raise Error("Config file {} not found".format(option
))
1891 except ValueError as e
:
1892 raise Error("Invalid JSON in {}: {}".format(option
, e
))
1897 def get_config_and_keyring():
1898 # type: () -> Tuple[Optional[str], Optional[str]]
1902 if 'config_json' in args
and args
.config_json
:
1903 d
= get_parm(args
.config_json
)
1904 config
= d
.get('config')
1905 keyring
= d
.get('keyring')
1907 if 'config' in args
and args
.config
:
1908 with
open(args
.config
, 'r') as f
:
1911 if 'key' in args
and args
.key
:
1912 keyring
= '[%s]\n\tkey = %s\n' % (args
.name
, args
.key
)
1913 elif 'keyring' in args
and args
.keyring
:
1914 with
open(args
.keyring
, 'r') as f
:
1917 return config
, keyring
1920 def get_container_binds(fsid
, daemon_type
, daemon_id
):
1921 # type: (str, str, Union[int, str, None]) -> List[List[str]]
1924 if daemon_type
== CephIscsi
.daemon_type
:
1925 binds
.extend(CephIscsi
.get_container_binds())
1926 elif daemon_type
== CustomContainer
.daemon_type
:
1928 cc
= CustomContainer
.init(fsid
, daemon_id
)
1929 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1930 binds
.extend(cc
.get_container_binds(data_dir
))
1935 def get_container_mounts(fsid
, daemon_type
, daemon_id
,
1937 # type: (str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
1940 if daemon_type
in Ceph
.daemons
:
1942 run_path
= os
.path
.join('/var/run/ceph', fsid
);
1943 if os
.path
.exists(run_path
):
1944 mounts
[run_path
] = '/var/run/ceph:z'
1945 log_dir
= get_log_dir(fsid
)
1946 mounts
[log_dir
] = '/var/log/ceph:z'
1947 crash_dir
= '/var/lib/ceph/%s/crash' % fsid
1948 if os
.path
.exists(crash_dir
):
1949 mounts
[crash_dir
] = '/var/lib/ceph/crash:z'
1951 if daemon_type
in Ceph
.daemons
and daemon_id
:
1952 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1953 if daemon_type
== 'rgw':
1954 cdata_dir
= '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id
)
1956 cdata_dir
= '/var/lib/ceph/%s/ceph-%s' % (daemon_type
, daemon_id
)
1957 if daemon_type
!= 'crash':
1958 mounts
[data_dir
] = cdata_dir
+ ':z'
1960 mounts
[data_dir
+ '/config'] = '/etc/ceph/ceph.conf:z'
1961 if daemon_type
== 'rbd-mirror' or daemon_type
== 'crash':
1962 # these do not search for their keyrings in a data directory
1963 mounts
[data_dir
+ '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type
, daemon_id
)
1965 if daemon_type
in ['mon', 'osd']:
1966 mounts
['/dev'] = '/dev' # FIXME: narrow this down?
1967 mounts
['/run/udev'] = '/run/udev'
1968 if daemon_type
== 'osd':
1969 mounts
['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
1970 mounts
['/run/lvm'] = '/run/lvm'
1971 mounts
['/run/lock/lvm'] = '/run/lock/lvm'
1974 if args
.shared_ceph_folder
: # make easy manager modules/ceph-volume development
1975 ceph_folder
= pathify(args
.shared_ceph_folder
)
1976 if os
.path
.exists(ceph_folder
):
1977 mounts
[ceph_folder
+ '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
1978 mounts
[ceph_folder
+ '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
1979 mounts
[ceph_folder
+ '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
1980 mounts
[ceph_folder
+ '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
1981 mounts
[ceph_folder
+ '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
1983 logger
.error('{}{}{}'.format(termcolor
.red
,
1984 'Ceph shared source folder does not exist.',
1986 except AttributeError:
1989 if daemon_type
in Monitoring
.components
and daemon_id
:
1990 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1991 if daemon_type
== 'prometheus':
1992 mounts
[os
.path
.join(data_dir
, 'etc/prometheus')] = '/etc/prometheus:Z'
1993 mounts
[os
.path
.join(data_dir
, 'data')] = '/prometheus:Z'
1994 elif daemon_type
== 'node-exporter':
1995 mounts
['/proc'] = '/host/proc:ro'
1996 mounts
['/sys'] = '/host/sys:ro'
1997 mounts
['/'] = '/rootfs:ro'
1998 elif daemon_type
== "grafana":
1999 mounts
[os
.path
.join(data_dir
, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
2000 mounts
[os
.path
.join(data_dir
, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
2001 mounts
[os
.path
.join(data_dir
, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
2002 elif daemon_type
== 'alertmanager':
2003 mounts
[os
.path
.join(data_dir
, 'etc/alertmanager')] = '/etc/alertmanager:Z'
2005 if daemon_type
== NFSGanesha
.daemon_type
:
2007 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
2008 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
2009 mounts
.update(nfs_ganesha
.get_container_mounts(data_dir
))
2011 if daemon_type
== CephIscsi
.daemon_type
:
2013 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
2014 log_dir
= get_log_dir(fsid
)
2015 mounts
.update(CephIscsi
.get_container_mounts(data_dir
, log_dir
))
2017 if daemon_type
== CustomContainer
.daemon_type
:
2019 cc
= CustomContainer
.init(fsid
, daemon_id
)
2020 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
2021 mounts
.update(cc
.get_container_mounts(data_dir
))
2026 def get_container(fsid
: str, daemon_type
: str, daemon_id
: Union
[int, str],
2027 privileged
: bool = False,
2028 ptrace
: bool = False,
2029 container_args
: Optional
[List
[str]] = None) -> 'CephContainer':
2030 entrypoint
: str = ''
2032 ceph_args
: List
[str] = []
2033 envs
: List
[str] = []
2034 host_network
: bool = True
2036 if container_args
is None:
2038 if daemon_type
in ['mon', 'osd']:
2039 # mon and osd need privileged in order for libudev to query devices
2041 if daemon_type
== 'rgw':
2042 entrypoint
= '/usr/bin/radosgw'
2043 name
= 'client.rgw.%s' % daemon_id
2044 elif daemon_type
== 'rbd-mirror':
2045 entrypoint
= '/usr/bin/rbd-mirror'
2046 name
= 'client.rbd-mirror.%s' % daemon_id
2047 elif daemon_type
== 'crash':
2048 entrypoint
= '/usr/bin/ceph-crash'
2049 name
= 'client.crash.%s' % daemon_id
2050 elif daemon_type
in ['mon', 'mgr', 'mds', 'osd']:
2051 entrypoint
= '/usr/bin/ceph-' + daemon_type
2052 name
= '%s.%s' % (daemon_type
, daemon_id
)
2053 elif daemon_type
in Monitoring
.components
:
2055 elif daemon_type
== NFSGanesha
.daemon_type
:
2056 entrypoint
= NFSGanesha
.entrypoint
2057 name
= '%s.%s' % (daemon_type
, daemon_id
)
2058 envs
.extend(NFSGanesha
.get_container_envs())
2059 elif daemon_type
== CephIscsi
.daemon_type
:
2060 entrypoint
= CephIscsi
.entrypoint
2061 name
= '%s.%s' % (daemon_type
, daemon_id
)
2062 # So the container can modprobe iscsi_target_mod and have write perms
2063 # to configfs we need to make this a privileged container.
2065 elif daemon_type
== CustomContainer
.daemon_type
:
2066 cc
= CustomContainer
.init(fsid
, daemon_id
)
2067 entrypoint
= cc
.entrypoint
2068 host_network
= False
2069 envs
.extend(cc
.get_container_envs())
2070 container_args
.extend(cc
.get_container_args())
2072 if daemon_type
in Monitoring
.components
:
2073 uid
, gid
= extract_uid_gid_monitoring(daemon_type
)
2077 # FIXME: disable cpu/memory limits for the time being (not supported
2078 # by ubuntu 18.04 kernel!)
2080 container_args
.extend(monitoring_args
)
2081 elif daemon_type
== 'crash':
2082 ceph_args
= ['-n', name
]
2083 elif daemon_type
in Ceph
.daemons
:
2084 ceph_args
= ['-n', name
, '-f']
2086 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2087 # so service can have Type=Forking
2088 if 'podman' in container_path
:
2089 runtime_dir
= '/run'
2090 container_args
.extend(['-d',
2092 runtime_dir
+ '/ceph-%s@%s.%s.service-pid' % (fsid
, daemon_type
, daemon_id
),
2094 runtime_dir
+ '/ceph-%s@%s.%s.service-cid' % (fsid
, daemon_type
, daemon_id
)])
2096 return CephContainer(
2098 entrypoint
=entrypoint
,
2099 args
=ceph_args
+ get_daemon_args(fsid
, daemon_type
, daemon_id
),
2100 container_args
=container_args
,
2101 volume_mounts
=get_container_mounts(fsid
, daemon_type
, daemon_id
),
2102 bind_mounts
=get_container_binds(fsid
, daemon_type
, daemon_id
),
2103 cname
='ceph-%s-%s.%s' % (fsid
, daemon_type
, daemon_id
),
2105 privileged
=privileged
,
2107 host_network
=host_network
,
2111 def extract_uid_gid(img
='', file_path
='/var/lib/ceph'):
2112 # type: (str, Union[str, List[str]]) -> Tuple[int, int]
2117 if isinstance(file_path
, str):
2124 out
= CephContainer(
2127 args
=['-c', '%u %g', fp
]
2129 uid
, gid
= out
.split(' ')
2130 return int(uid
), int(gid
)
2131 except RuntimeError:
2133 raise RuntimeError('uid/gid not found')
2136 def deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
2137 config
=None, keyring
=None,
2141 # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
2144 if any([port_in_use(port
) for port
in ports
]):
2145 raise Error("TCP Port(s) '{}' required for {} already in use".format(",".join(map(str, ports
)), daemon_type
))
2147 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
2148 if reconfig
and not os
.path
.exists(data_dir
):
2149 raise Error('cannot reconfig, data path %s does not exist' % data_dir
)
2150 if daemon_type
== 'mon' and not os
.path
.exists(data_dir
):
2154 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
2157 tmp_config
= write_tmp(config
, uid
, gid
)
2160 create_daemon_dirs(fsid
, daemon_type
, daemon_id
, uid
, gid
)
2161 mon_dir
= get_data_dir(fsid
, 'mon', daemon_id
)
2162 log_dir
= get_log_dir(fsid
)
2163 out
= CephContainer(
2165 entrypoint
='/usr/bin/ceph-mon',
2167 '-i', str(daemon_id
),
2169 '-c', '/tmp/config',
2170 '--keyring', '/tmp/keyring',
2171 ] + get_daemon_args(fsid
, 'mon', daemon_id
),
2173 log_dir
: '/var/log/ceph:z',
2174 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id
),
2175 tmp_keyring
.name
: '/tmp/keyring:z',
2176 tmp_config
.name
: '/tmp/config:z',
2181 with
open(mon_dir
+ '/config', 'w') as f
:
2182 os
.fchown(f
.fileno(), uid
, gid
)
2183 os
.fchmod(f
.fileno(), 0o600)
2186 # dirs, conf, keyring
2188 fsid
, daemon_type
, daemon_id
,
2193 deploy_daemon_units(fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
2196 if not os
.path
.exists(data_dir
+ '/unit.created'):
2197 with
open(data_dir
+ '/unit.created', 'w') as f
:
2198 os
.fchmod(f
.fileno(), 0o600)
2199 os
.fchown(f
.fileno(), uid
, gid
)
2200 f
.write('mtime is time the daemon deployment was created\n')
2202 with
open(data_dir
+ '/unit.configured', 'w') as f
:
2203 f
.write('mtime is time we were last configured\n')
2204 os
.fchmod(f
.fileno(), 0o600)
2205 os
.fchown(f
.fileno(), uid
, gid
)
2207 update_firewalld(daemon_type
)
2209 # Open ports explicitly required for the daemon
2212 fw
.open_ports(ports
)
2215 if reconfig
and daemon_type
not in Ceph
.daemons
:
2216 # ceph daemons do not need a restart; others (presumably) do to pick
2218 call_throws(['systemctl', 'reset-failed',
2219 get_unit_name(fsid
, daemon_type
, daemon_id
)])
2220 call_throws(['systemctl', 'restart',
2221 get_unit_name(fsid
, daemon_type
, daemon_id
)])
2223 def _write_container_cmd_to_bash(file_obj
, container
, comment
=None, background
=False):
2224 # type: (IO[str], CephContainer, Optional[str], Optional[bool]) -> None
2226 # Sometimes adding a comment, especially if there are multiple containers in one
2227 # unit file, makes it easier to read and grok.
2228 file_obj
.write('# ' + comment
+ '\n')
2229 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
2230 file_obj
.write('! '+ ' '.join(container
.rm_cmd()) + ' 2> /dev/null\n')
2231 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
2232 if 'podman' in container_path
:
2233 file_obj
.write('! '+ ' '.join(container
.rm_cmd(storage
=True)) + ' 2> /dev/null\n')
2235 # container run command
2236 file_obj
.write(' '.join(container
.run_cmd()) + (' &' if background
else '') + '\n')
2239 def deploy_daemon_units(fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
2240 enable
=True, start
=True,
2242 # type: (str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None
2244 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
2245 with
open(data_dir
+ '/unit.run.new', 'w') as f
:
2248 if daemon_type
in Ceph
.daemons
:
2249 install_path
= find_program('install')
2250 f
.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path
=install_path
, fsid
=fsid
, uid
=uid
, gid
=gid
))
2253 if daemon_type
== 'osd':
2254 # osds have a pre-start step
2256 simple_fn
= os
.path
.join('/etc/ceph/osd',
2257 '%s-%s.json.adopted-by-cephadm' % (daemon_id
, osd_fsid
))
2258 if os
.path
.exists(simple_fn
):
2259 f
.write('# Simple OSDs need chown on startup:\n')
2260 for n
in ['block', 'block.db', 'block.wal']:
2261 p
= os
.path
.join(data_dir
, n
)
2262 f
.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p
=p
, uid
=uid
, gid
=gid
))
2264 prestart
= CephContainer(
2266 entrypoint
='/usr/sbin/ceph-volume',
2269 str(daemon_id
), osd_fsid
,
2273 volume_mounts
=get_container_mounts(fsid
, daemon_type
, daemon_id
),
2274 bind_mounts
=get_container_binds(fsid
, daemon_type
, daemon_id
),
2275 cname
='ceph-%s-%s.%s-activate' % (fsid
, daemon_type
, daemon_id
),
2277 _write_container_cmd_to_bash(f
, prestart
, 'LVM OSDs use ceph-volume lvm activate')
2278 elif daemon_type
== NFSGanesha
.daemon_type
:
2279 # add nfs to the rados grace db
2280 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
2281 prestart
= nfs_ganesha
.get_rados_grace_container('add')
2282 _write_container_cmd_to_bash(f
, prestart
, 'add daemon to rados grace')
2283 elif daemon_type
== CephIscsi
.daemon_type
:
2284 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=True)) + '\n')
2285 ceph_iscsi
= CephIscsi
.init(fsid
, daemon_id
)
2286 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
2287 _write_container_cmd_to_bash(f
, tcmu_container
, 'iscsi tcmu-runnter container', background
=True)
2289 _write_container_cmd_to_bash(f
, c
, '%s.%s' % (daemon_type
, str(daemon_id
)))
2290 os
.fchmod(f
.fileno(), 0o600)
2291 os
.rename(data_dir
+ '/unit.run.new',
2292 data_dir
+ '/unit.run')
2294 # post-stop command(s)
2295 with
open(data_dir
+ '/unit.poststop.new', 'w') as f
:
2296 if daemon_type
== 'osd':
2298 poststop
= CephContainer(
2300 entrypoint
='/usr/sbin/ceph-volume',
2302 'lvm', 'deactivate',
2303 str(daemon_id
), osd_fsid
,
2306 volume_mounts
=get_container_mounts(fsid
, daemon_type
, daemon_id
),
2307 bind_mounts
=get_container_binds(fsid
, daemon_type
, daemon_id
),
2308 cname
='ceph-%s-%s.%s-deactivate' % (fsid
, daemon_type
,
2311 _write_container_cmd_to_bash(f
, poststop
, 'deactivate osd')
2312 elif daemon_type
== NFSGanesha
.daemon_type
:
2313 # remove nfs from the rados grace db
2314 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
2315 poststop
= nfs_ganesha
.get_rados_grace_container('remove')
2316 _write_container_cmd_to_bash(f
, poststop
, 'remove daemon from rados grace')
2317 elif daemon_type
== CephIscsi
.daemon_type
:
2318 # make sure we also stop the tcmu container
2319 ceph_iscsi
= CephIscsi
.init(fsid
, daemon_id
)
2320 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
2321 f
.write('! '+ ' '.join(tcmu_container
.stop_cmd()) + '\n')
2322 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=False)) + '\n')
2323 os
.fchmod(f
.fileno(), 0o600)
2324 os
.rename(data_dir
+ '/unit.poststop.new',
2325 data_dir
+ '/unit.poststop')
2327 with
open(data_dir
+ '/unit.image.new', 'w') as f
:
2328 f
.write(c
.image
+ '\n')
2329 os
.fchmod(f
.fileno(), 0o600)
2330 os
.rename(data_dir
+ '/unit.image.new',
2331 data_dir
+ '/unit.image')
2334 install_base_units(fsid
)
2335 unit
= get_unit_file(fsid
)
2336 unit_file
= 'ceph-%s@.service' % (fsid
)
2337 with
open(args
.unit_dir
+ '/' + unit_file
+ '.new', 'w') as f
:
2339 os
.rename(args
.unit_dir
+ '/' + unit_file
+ '.new',
2340 args
.unit_dir
+ '/' + unit_file
)
2341 call_throws(['systemctl', 'daemon-reload'])
2343 unit_name
= get_unit_name(fsid
, daemon_type
, daemon_id
)
2344 call(['systemctl', 'stop', unit_name
],
2345 verbosity
=CallVerbosity
.DEBUG
)
2346 call(['systemctl', 'reset-failed', unit_name
],
2347 verbosity
=CallVerbosity
.DEBUG
)
2349 call_throws(['systemctl', 'enable', unit_name
])
2351 call_throws(['systemctl', 'start', unit_name
])
2355 class Firewalld(object):
2358 self
.available
= self
.check()
2362 self
.cmd
= find_executable('firewall-cmd')
2364 logger
.debug('firewalld does not appear to be present')
2366 (enabled
, state
, _
) = check_unit('firewalld.service')
2368 logger
.debug('firewalld.service is not enabled')
2370 if state
!= "running":
2371 logger
.debug('firewalld.service is not running')
2374 logger
.info("firewalld ready")
2377 def enable_service_for(self
, daemon_type
):
2378 # type: (str) -> None
2379 if not self
.available
:
2380 logger
.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type
)
2383 if daemon_type
== 'mon':
2385 elif daemon_type
in ['mgr', 'mds', 'osd']:
2387 elif daemon_type
== NFSGanesha
.daemon_type
:
2392 out
, err
, ret
= call([self
.cmd
, '--permanent', '--query-service', svc
], verbosity
=CallVerbosity
.DEBUG
)
2394 logger
.info('Enabling firewalld service %s in current zone...' % svc
)
2395 out
, err
, ret
= call([self
.cmd
, '--permanent', '--add-service', svc
])
2398 'unable to add service %s to current zone: %s' % (svc
, err
))
2400 logger
.debug('firewalld service %s is enabled in current zone' % svc
)
2402 def open_ports(self
, fw_ports
):
2403 # type: (List[int]) -> None
2404 if not self
.available
:
2405 logger
.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports
)
2408 for port
in fw_ports
:
2409 tcp_port
= str(port
) + '/tcp'
2410 out
, err
, ret
= call([self
.cmd
, '--permanent', '--query-port', tcp_port
], verbosity
=CallVerbosity
.DEBUG
)
2412 logger
.info('Enabling firewalld port %s in current zone...' % tcp_port
)
2413 out
, err
, ret
= call([self
.cmd
, '--permanent', '--add-port', tcp_port
])
2415 raise RuntimeError('unable to add port %s to current zone: %s' %
2418 logger
.debug('firewalld port %s is enabled in current zone' % tcp_port
)
2420 def apply_rules(self
):
2422 if not self
.available
:
2425 call_throws([self
.cmd
, '--reload'])
2428 def update_firewalld(daemon_type
):
2429 # type: (str) -> None
2430 firewall
= Firewalld()
2432 firewall
.enable_service_for(daemon_type
)
2436 if daemon_type
in Monitoring
.port_map
.keys():
2437 fw_ports
.extend(Monitoring
.port_map
[daemon_type
]) # prometheus etc
2439 firewall
.open_ports(fw_ports
)
2440 firewall
.apply_rules()
2442 def install_base_units(fsid
):
2443 # type: (str) -> None
2445 Set up ceph.target and ceph-$fsid.target units.
2448 existed
= os
.path
.exists(args
.unit_dir
+ '/ceph.target')
2449 with
open(args
.unit_dir
+ '/ceph.target.new', 'w') as f
:
2451 'Description=All Ceph clusters and services\n'
2454 'WantedBy=multi-user.target\n')
2455 os
.rename(args
.unit_dir
+ '/ceph.target.new',
2456 args
.unit_dir
+ '/ceph.target')
2458 # we disable before enable in case a different ceph.target
2459 # (from the traditional package) is present; while newer
2460 # systemd is smart enough to disable the old
2461 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2462 # some older versions of systemd error out with EEXIST.
2463 call_throws(['systemctl', 'disable', 'ceph.target'])
2464 call_throws(['systemctl', 'enable', 'ceph.target'])
2465 call_throws(['systemctl', 'start', 'ceph.target'])
2468 existed
= os
.path
.exists(args
.unit_dir
+ '/ceph-%s.target' % fsid
)
2469 with
open(args
.unit_dir
+ '/ceph-%s.target.new' % fsid
, 'w') as f
:
2471 'Description=Ceph cluster {fsid}\n'
2472 'PartOf=ceph.target\n'
2473 'Before=ceph.target\n'
2476 'WantedBy=multi-user.target ceph.target\n'.format(
2479 os
.rename(args
.unit_dir
+ '/ceph-%s.target.new' % fsid
,
2480 args
.unit_dir
+ '/ceph-%s.target' % fsid
)
2482 call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid
])
2483 call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid
])
2485 # logrotate for the cluster
2486 with
open(args
.logrotate_dir
+ '/ceph-%s' % fsid
, 'w') as f
:
2488 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
2489 in all containers, but I don't see an elegant way to send SIGHUP *just* to
2490 the daemons for this cluster. (1) systemd kill -s will get the signal to
2491 podman, but podman will exit. (2) podman kill will get the signal to the
2492 first child (bash), but that isn't the ceph daemon. This is simpler and
2495 f
.write("""# created by cephadm
2496 /var/log/ceph/%s/*.log {
2502 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror || pkill -1 -x "ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror" || true
2511 def get_unit_file(fsid
):
2512 # type: (str) -> str
2514 if 'podman' in container_path
:
2515 extra_args
= ('ExecStartPre=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2516 'ExecStopPost=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2518 'PIDFile=/%t/%n-pid\n')
2520 docker
= 'docker' in container_path
2521 u
= """# generated by cephadm
2523 Description=Ceph %i for {fsid}
2526 # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
2527 # these can be removed once ceph-mon will dynamically change network
2529 After=network-online.target local-fs.target time-sync.target{docker_after}
2530 Wants=network-online.target local-fs.target time-sync.target
2533 PartOf=ceph-{fsid}.target
2534 Before=ceph-{fsid}.target
2539 EnvironmentFile=-/etc/environment
2540 ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
2541 ExecStop=-{container_path} stop ceph-{fsid}-%i
2542 ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
2548 StartLimitInterval=30min
2552 WantedBy=ceph-{fsid}.target
2554 container_path
=container_path
,
2556 data_dir
=args
.data_dir
,
2557 extra_args
=extra_args
,
2558 # if docker, we depend on docker.service
2559 docker_after
=' docker.service' if docker
else '',
2560 docker_requires
='Requires=docker.service\n' if docker
else '',
2565 ##################################
2568 class CephContainer
:
2572 args
: List
[str] = [],
2573 volume_mounts
: Dict
[str, str] = {},
2575 container_args
: List
[str] = [],
2576 envs
: Optional
[List
[str]] = None,
2577 privileged
: bool = False,
2578 ptrace
: bool = False,
2579 bind_mounts
: Optional
[List
[List
[str]]] = None,
2580 init
: Optional
[bool] = None,
2581 host_network
: bool = True,
2584 self
.entrypoint
= entrypoint
2586 self
.volume_mounts
= volume_mounts
2588 self
.container_args
= container_args
2590 self
.privileged
= privileged
2591 self
.ptrace
= ptrace
2592 self
.bind_mounts
= bind_mounts
if bind_mounts
else []
2593 self
.init
= init
if init
else container_init
2594 self
.host_network
= host_network
2596 def run_cmd(self
) -> List
[str]:
2597 cmd_args
: List
[str] = [
2598 str(container_path
),
2604 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
2605 '-e', 'NODE_NAME=%s' % get_hostname(),
2607 vols
: List
[str] = []
2608 binds
: List
[str] = []
2610 if self
.host_network
:
2611 cmd_args
.append('--net=host')
2613 cmd_args
.extend(['--entrypoint', self
.entrypoint
])
2617 # let OSD etc read block devs that haven't been chowned
2618 '--group-add=disk'])
2619 if self
.ptrace
and not self
.privileged
:
2620 # if privileged, the SYS_PTRACE cap is already added
2621 # in addition, --cap-add and --privileged are mutually
2622 # exclusive since podman >= 2.0
2623 cmd_args
.append('--cap-add=SYS_PTRACE')
2625 cmd_args
.append('--init')
2626 envs
+= ['-e', 'CEPH_USE_RANDOM_NONCE=1']
2628 cmd_args
.extend(['--name', self
.cname
])
2630 for env
in self
.envs
:
2631 envs
.extend(['-e', env
])
2634 [['-v', '%s:%s' % (host_dir
, container_dir
)]
2635 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
2636 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
2637 for bind
in self
.bind_mounts
], [])
2639 return cmd_args
+ self
.container_args
+ envs
+ vols
+ binds
+ [
2641 ] + self
.args
# type: ignore
2643 def shell_cmd(self
, cmd
: List
[str]) -> List
[str]:
2644 cmd_args
: List
[str] = [
2645 str(container_path
),
2651 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
2652 '-e', 'NODE_NAME=%s' % get_hostname(),
2654 vols
: List
[str] = []
2655 binds
: List
[str] = []
2657 if self
.host_network
:
2658 cmd_args
.append('--net=host')
2662 # let OSD etc read block devs that haven't been chowned
2666 cmd_args
.append('--init')
2667 envs
+= ['-e', 'CEPH_USE_RANDOM_NONCE=1']
2669 for env
in self
.envs
:
2670 envs
.extend(['-e', env
])
2673 [['-v', '%s:%s' % (host_dir
, container_dir
)]
2674 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
2675 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
2676 for bind
in self
.bind_mounts
], [])
2678 return cmd_args
+ self
.container_args
+ envs
+ vols
+ binds
+ [
2679 '--entrypoint', cmd
[0],
2683 def exec_cmd(self
, cmd
):
2684 # type: (List[str]) -> List[str]
2686 str(container_path
),
2688 ] + self
.container_args
+ [
2692 def rm_cmd(self
, storage
=False):
2693 # type: (bool) -> List[str]
2695 str(container_path
),
2699 ret
.append('--storage')
2700 ret
.append(self
.cname
)
2704 # type () -> List[str]
2706 str(container_path
),
2711 def run(self
, timeout
=DEFAULT_TIMEOUT
):
2712 # type: (Optional[int]) -> str
2713 out
, _
, _
= call_throws(
2714 self
.run_cmd(), desc
=self
.entrypoint
, timeout
=timeout
)
2717 ##################################
2721 def command_version():
2723 out
= CephContainer(args
.image
, 'ceph', ['--version']).run()
2727 ##################################
2734 _pull_image(args
.image
)
2735 return command_inspect_image()
2738 def _pull_image(image
):
2739 # type: (str) -> None
2740 logger
.info('Pulling container image %s...' % image
)
2743 "error creating read-write layer with ID",
2744 "net/http: TLS handshake timeout",
2745 "Digest did not match, expected",
2748 cmd
= [container_path
, 'pull', image
]
2749 cmd_str
= ' '.join(cmd
)
2751 for sleep_secs
in [1, 4, 25]:
2752 out
, err
, ret
= call(cmd
)
2756 if not any(pattern
in err
for pattern
in ignorelist
):
2757 raise RuntimeError('Failed command: %s' % cmd_str
)
2759 logger
.info('"%s failed transiently. Retrying. waiting %s seconds...' % (cmd_str
, sleep_secs
))
2760 time
.sleep(sleep_secs
)
2762 raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str
)
2763 ##################################
2767 def command_inspect_image():
2769 out
, err
, ret
= call_throws([
2770 container_path
, 'inspect',
2771 '--format', '{{.ID}},{{.RepoDigests}}',
2775 info_from
= get_image_info_from_inspect(out
.strip(), args
.image
)
2777 ver
= CephContainer(args
.image
, 'ceph', ['--version']).run().strip()
2778 info_from
['ceph_version'] = ver
2780 print(json
.dumps(info_from
, indent
=4, sort_keys
=True))
2784 def get_image_info_from_inspect(out
, image
):
2785 # type: (str, str) -> Dict[str, str]
2786 image_id
, digests
= out
.split(',', 1)
2788 raise Error('inspect {}: empty result'.format(image
))
2790 'image_id': normalize_container_id(image_id
)
2793 json_digests
= digests
[1:-1].split(' ')
2795 r
['repo_digest'] = json_digests
[0]
2799 ##################################
2802 def unwrap_ipv6(address
):
2803 # type: (str) -> str
2804 if address
.startswith('[') and address
.endswith(']'):
2805 return address
[1:-1]
2809 def wrap_ipv6(address
):
2810 # type: (str) -> str
2812 # We cannot assume it's already wrapped or even an IPv6 address if
2813 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
2816 if ipaddress
.ip_address(unicode(address
)).version
== 6:
2817 return f
"[{address}]"
2824 def is_ipv6(address
):
2825 # type: (str) -> bool
2826 address
= unwrap_ipv6(address
)
2828 return ipaddress
.ip_address(unicode(address
)).version
== 6
2830 logger
.warning("Address: {} isn't a valid IP address".format(address
))
2835 def command_bootstrap():
2838 if not args
.output_config
:
2839 args
.output_config
= os
.path
.join(args
.output_dir
, 'ceph.conf')
2840 if not args
.output_keyring
:
2841 args
.output_keyring
= os
.path
.join(args
.output_dir
,
2842 'ceph.client.admin.keyring')
2843 if not args
.output_pub_ssh_key
:
2844 args
.output_pub_ssh_key
= os
.path
.join(args
.output_dir
, 'ceph.pub')
2846 # verify output files
2847 for f
in [args
.output_config
, args
.output_keyring
, args
.output_pub_ssh_key
]:
2848 if not args
.allow_overwrite
:
2849 if os
.path
.exists(f
):
2850 raise Error('%s already exists; delete or pass '
2851 '--allow-overwrite to overwrite' % f
)
2852 dirname
= os
.path
.dirname(f
)
2853 if dirname
and not os
.path
.exists(dirname
):
2854 fname
= os
.path
.basename(f
)
2855 logger
.info(f
"Creating directory {dirname} for {fname}")
2857 # use makedirs to create intermediate missing dirs
2858 os
.makedirs(dirname
, 0o755)
2859 except PermissionError
:
2860 raise Error(f
"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.")
2863 if not args
.skip_prepare_host
:
2864 command_prepare_host()
2866 logger
.info('Skip prepare_host')
2869 fsid
= args
.fsid
or make_fsid()
2870 hostname
= get_hostname()
2871 if '.' in hostname
and not args
.allow_fqdn_hostname
:
2872 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname
, hostname
.split('.')[0]))
2873 mon_id
= args
.mon_id
or hostname
2874 mgr_id
= args
.mgr_id
or generate_service_id()
2875 logger
.info('Cluster fsid: %s' % fsid
)
2882 r
= re
.compile(r
':(\d+)$')
2885 ipv6
= is_ipv6(args
.mon_ip
)
2887 args
.mon_ip
= wrap_ipv6(args
.mon_ip
)
2888 hasport
= r
.findall(args
.mon_ip
)
2890 port
= int(hasport
[0])
2892 addr_arg
= '[v1:%s]' % args
.mon_ip
2894 addr_arg
= '[v2:%s]' % args
.mon_ip
2896 logger
.warning('Using msgr2 protocol for unrecognized port %d' %
2898 addr_arg
= '[v2:%s]' % args
.mon_ip
2899 base_ip
= args
.mon_ip
[0:-(len(str(port
)))-1]
2900 check_ip_port(base_ip
, port
)
2902 base_ip
= args
.mon_ip
2903 addr_arg
= '[v2:%s:3300,v1:%s:6789]' % (args
.mon_ip
, args
.mon_ip
)
2904 check_ip_port(args
.mon_ip
, 3300)
2905 check_ip_port(args
.mon_ip
, 6789)
2906 elif args
.mon_addrv
:
2907 addr_arg
= args
.mon_addrv
2908 if addr_arg
[0] != '[' or addr_arg
[-1] != ']':
2909 raise Error('--mon-addrv value %s must use square backets' %
2911 ipv6
= addr_arg
.count('[') > 1
2912 for addr
in addr_arg
[1:-1].split(','):
2913 hasport
= r
.findall(addr
)
2915 raise Error('--mon-addrv value %s must include port number' %
2917 port
= int(hasport
[0])
2918 # strip off v1: or v2: prefix
2919 addr
= re
.sub(r
'^\w+:', '', addr
)
2920 base_ip
= addr
[0:-(len(str(port
)))-1]
2921 check_ip_port(base_ip
, port
)
2923 raise Error('must specify --mon-ip or --mon-addrv')
2924 logger
.debug('Base mon IP is %s, final addrv is %s' % (base_ip
, addr_arg
))
2927 if not args
.skip_mon_network
:
2928 # make sure IP is configured locally, and then figure out the
2930 for net
, ips
in list_networks().items():
2931 if ipaddress
.ip_address(unicode(unwrap_ipv6(base_ip
))) in \
2932 [ipaddress
.ip_address(unicode(ip
)) for ip
in ips
]:
2934 logger
.info('Mon IP %s is in CIDR network %s' % (base_ip
,
2938 raise Error('Failed to infer CIDR network for mon ip %s; pass '
2939 '--skip-mon-network to configure it later' % base_ip
)
2942 cp
= read_config(args
.config
)
2943 if not cp
.has_section('global'):
2944 cp
.add_section('global')
2945 cp
.set('global', 'fsid', fsid
);
2946 cp
.set('global', 'mon_host', addr_arg
)
2947 cp
.set('global', 'container_image', args
.image
)
2948 if not cp
.has_section('mon'):
2949 cp
.add_section('mon')
2951 not cp
.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
2952 and not cp
.has_option('mon', 'auth allow insecure global id reclaim')
2954 cp
.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
2957 config
= cpf
.getvalue()
2959 if args
.registry_json
or args
.registry_url
:
2960 command_registry_login()
2962 if not args
.skip_pull
:
2963 _pull_image(args
.image
)
2965 logger
.info('Extracting ceph user uid/gid from container image...')
2966 (uid
, gid
) = extract_uid_gid()
2968 # create some initial keys
2969 logger
.info('Creating initial keys...')
2970 mon_key
= CephContainer(
2972 entrypoint
='/usr/bin/ceph-authtool',
2973 args
=['--gen-print-key'],
2975 admin_key
= CephContainer(
2977 entrypoint
='/usr/bin/ceph-authtool',
2978 args
=['--gen-print-key'],
2980 mgr_key
= CephContainer(
2982 entrypoint
='/usr/bin/ceph-authtool',
2983 args
=['--gen-print-key'],
2986 keyring
= ('[mon.]\n'
2988 '\tcaps mon = allow *\n'
2991 '\tcaps mon = allow *\n'
2992 '\tcaps mds = allow *\n'
2993 '\tcaps mgr = allow *\n'
2994 '\tcaps osd = allow *\n'
2997 '\tcaps mon = profile mgr\n'
2998 '\tcaps mds = allow *\n'
2999 '\tcaps osd = allow *\n'
3000 % (mon_key
, admin_key
, mgr_id
, mgr_key
))
3003 tmp_bootstrap_keyring
= write_tmp(keyring
, uid
, gid
)
3005 # create initial monmap, tmp monmap file
3006 logger
.info('Creating initial monmap...')
3007 tmp_monmap
= write_tmp('', 0, 0)
3008 out
= CephContainer(
3010 entrypoint
='/usr/bin/monmaptool',
3014 '--addv', mon_id
, addr_arg
,
3018 tmp_monmap
.name
: '/tmp/monmap:z',
3022 # pass monmap file to ceph user for use by ceph-mon --mkfs below
3023 os
.fchown(tmp_monmap
.fileno(), uid
, gid
)
3026 logger
.info('Creating mon...')
3027 create_daemon_dirs(fsid
, 'mon', mon_id
, uid
, gid
)
3028 mon_dir
= get_data_dir(fsid
, 'mon', mon_id
)
3029 log_dir
= get_log_dir(fsid
)
3030 out
= CephContainer(
3032 entrypoint
='/usr/bin/ceph-mon',
3037 '--monmap', '/tmp/monmap',
3038 '--keyring', '/tmp/keyring',
3039 ] + get_daemon_args(fsid
, 'mon', mon_id
),
3041 log_dir
: '/var/log/ceph:z',
3042 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
3043 tmp_bootstrap_keyring
.name
: '/tmp/keyring:z',
3044 tmp_monmap
.name
: '/tmp/monmap:z',
3048 with
open(mon_dir
+ '/config', 'w') as f
:
3049 os
.fchown(f
.fileno(), uid
, gid
)
3050 os
.fchmod(f
.fileno(), 0o600)
3053 make_var_run(fsid
, uid
, gid
)
3054 mon_c
= get_container(fsid
, 'mon', mon_id
)
3055 deploy_daemon(fsid
, 'mon', mon_id
, mon_c
, uid
, gid
,
3056 config
=None, keyring
=None)
3058 # client.admin key + config to issue various CLI commands
3059 tmp_admin_keyring
= write_tmp('[client.admin]\n'
3060 '\tkey = ' + admin_key
+ '\n',
3062 tmp_config
= write_tmp(config
, uid
, gid
)
3064 # a CLI helper to reduce our typing
3065 def cli(cmd
, extra_mounts
={}, timeout
=DEFAULT_TIMEOUT
):
3066 # type: (List[str], Dict[str, str], Optional[int]) -> str
3068 log_dir
: '/var/log/ceph:z',
3069 tmp_admin_keyring
.name
: '/etc/ceph/ceph.client.admin.keyring:z',
3070 tmp_config
.name
: '/etc/ceph/ceph.conf:z',
3072 for k
, v
in extra_mounts
.items():
3074 timeout
= timeout
or args
.timeout
3075 return CephContainer(
3077 entrypoint
='/usr/bin/ceph',
3079 volume_mounts
=mounts
,
3080 ).run(timeout
=timeout
)
3082 logger
.info('Waiting for mon to start...')
3085 entrypoint
='/usr/bin/ceph',
3089 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
3090 tmp_admin_keyring
.name
: '/etc/ceph/ceph.client.admin.keyring:z',
3091 tmp_config
.name
: '/etc/ceph/ceph.conf:z',
3095 # wait for the service to become available
3096 def is_mon_available():
3098 timeout
=args
.timeout
if args
.timeout
else 60 # seconds
3099 out
, err
, ret
= call(c
.run_cmd(),
3103 is_available('mon', is_mon_available
)
3105 # assimilate and minimize config
3106 if not args
.no_minimize_config
:
3107 logger
.info('Assimilating anything we can from ceph.conf...')
3109 'config', 'assimilate-conf',
3110 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3112 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3114 logger
.info('Generating new minimal ceph.conf...')
3116 'config', 'generate-minimal-conf',
3117 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3119 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3121 # re-read our minimized config
3122 with
open(mon_dir
+ '/config', 'r') as f
:
3124 logger
.info('Restarting the monitor...')
3128 get_unit_name(fsid
, 'mon', mon_id
)
3132 logger
.info('Setting mon public_network...')
3133 cli(['config', 'set', 'mon', 'public_network', mon_network
])
3136 logger
.info('Enabling IPv6 (ms_bind_ipv6)')
3137 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
3140 logger
.info('Creating mgr...')
3141 mgr_keyring
= '[mgr.%s]\n\tkey = %s\n' % (mgr_id
, mgr_key
)
3142 mgr_c
= get_container(fsid
, 'mgr', mgr_id
)
3143 # Note:the default port used by the Prometheus node exporter is opened in fw
3144 deploy_daemon(fsid
, 'mgr', mgr_id
, mgr_c
, uid
, gid
,
3145 config
=config
, keyring
=mgr_keyring
, ports
=[9283])
3148 with
open(args
.output_keyring
, 'w') as f
:
3149 os
.fchmod(f
.fileno(), 0o600)
3150 f
.write('[client.admin]\n'
3151 '\tkey = ' + admin_key
+ '\n')
3152 logger
.info('Wrote keyring to %s' % args
.output_keyring
)
3154 with
open(args
.output_config
, 'w') as f
:
3156 logger
.info('Wrote config to %s' % args
.output_config
)
3158 # wait for the service to become available
3159 logger
.info('Waiting for mgr to start...')
3160 def is_mgr_available():
3162 timeout
=args
.timeout
if args
.timeout
else 60 # seconds
3164 out
= cli(['status', '-f', 'json-pretty'], timeout
=timeout
)
3166 return j
.get('mgrmap', {}).get('available', False)
3167 except Exception as e
:
3168 logger
.debug('status failed: %s' % e
)
3170 is_available('mgr', is_mgr_available
)
3172 # wait for mgr to restart (after enabling a module)
3173 def wait_for_mgr_restart():
3174 # first get latest mgrmap epoch from the mon
3175 out
= cli(['mgr', 'dump'])
3178 # wait for mgr to have it
3179 logger
.info('Waiting for the mgr to restart...')
3180 def mgr_has_latest_epoch():
3183 out
= cli(['tell', 'mgr', 'mgr_status'])
3185 return j
['mgrmap_epoch'] >= epoch
3186 except Exception as e
:
3187 logger
.debug('tell mgr mgr_status failed: %s' % e
)
3189 is_available('Mgr epoch %d' % epoch
, mgr_has_latest_epoch
)
3192 if not args
.skip_ssh
:
3193 cli(['config-key', 'set', 'mgr/cephadm/ssh_user', args
.ssh_user
])
3195 logger
.info('Enabling cephadm module...')
3196 cli(['mgr', 'module', 'enable', 'cephadm'])
3197 wait_for_mgr_restart()
3199 logger
.info('Setting orchestrator backend to cephadm...')
3200 cli(['orch', 'set', 'backend', 'cephadm'])
3203 logger
.info('Using provided ssh config...')
3205 pathify(args
.ssh_config
.name
): '/tmp/cephadm-ssh-config:z',
3207 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts
=mounts
)
3209 if args
.ssh_private_key
and args
.ssh_public_key
:
3210 logger
.info('Using provided ssh keys...')
3212 pathify(args
.ssh_private_key
.name
): '/tmp/cephadm-ssh-key:z',
3213 pathify(args
.ssh_public_key
.name
): '/tmp/cephadm-ssh-key.pub:z'
3215 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts
=mounts
)
3216 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts
=mounts
)
3218 logger
.info('Generating ssh key...')
3219 cli(['cephadm', 'generate-key'])
3220 ssh_pub
= cli(['cephadm', 'get-pub-key'])
3222 with
open(args
.output_pub_ssh_key
, 'w') as f
:
3224 logger
.info('Wrote public SSH key to to %s' % args
.output_pub_ssh_key
)
3226 logger
.info('Adding key to %s@localhost\'s authorized_keys...' % args
.ssh_user
)
3228 s_pwd
= pwd
.getpwnam(args
.ssh_user
)
3229 except KeyError as e
:
3230 raise Error('Cannot find uid/gid for ssh-user: %s' % (args
.ssh_user
))
3231 ssh_uid
= s_pwd
.pw_uid
3232 ssh_gid
= s_pwd
.pw_gid
3233 ssh_dir
= os
.path
.join(s_pwd
.pw_dir
, '.ssh')
3235 if not os
.path
.exists(ssh_dir
):
3236 makedirs(ssh_dir
, ssh_uid
, ssh_gid
, 0o700)
3238 auth_keys_file
= '%s/authorized_keys' % ssh_dir
3241 if os
.path
.exists(auth_keys_file
):
3242 with
open(auth_keys_file
, 'r') as f
:
3243 f
.seek(0, os
.SEEK_END
)
3245 f
.seek(f
.tell()-1, os
.SEEK_SET
) # go to last char
3246 if f
.read() != '\n':
3249 with
open(auth_keys_file
, 'a') as f
:
3250 os
.fchown(f
.fileno(), ssh_uid
, ssh_gid
) # just in case we created it
3251 os
.fchmod(f
.fileno(), 0o600) # just in case we created it
3254 f
.write(ssh_pub
.strip() + '\n')
3256 host
= get_hostname()
3257 logger
.info('Adding host %s...' % host
)
3259 cli(['orch', 'host', 'add', host
])
3260 except RuntimeError as e
:
3261 raise Error('Failed to add host <%s>: %s' % (host
, e
))
3263 if not args
.orphan_initial_daemons
:
3264 for t
in ['mon', 'mgr', 'crash']:
3265 logger
.info('Deploying %s service with default placement...' % t
)
3266 cli(['orch', 'apply', t
])
3268 if not args
.skip_monitoring_stack
:
3269 logger
.info('Enabling mgr prometheus module...')
3270 cli(['mgr', 'module', 'enable', 'prometheus'])
3271 for t
in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
3272 logger
.info('Deploying %s service with default placement...' % t
)
3273 cli(['orch', 'apply', t
])
3275 if args
.registry_url
and args
.registry_username
and args
.registry_password
:
3276 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', args
.registry_url
, '--force'])
3277 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', args
.registry_username
, '--force'])
3278 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', args
.registry_password
, '--force'])
3280 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(container_init
), '--force'])
3282 if not args
.skip_dashboard
:
3283 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
3284 # if the user does not want to use SSL he can change this setting once the cluster is up
3285 cli(["config", "set", "mgr", "mgr/dashboard/ssl_server_port" , str(args
.ssl_dashboard_port
)])
3287 # configuring dashboard parameters
3288 logger
.info('Enabling the dashboard module...')
3289 cli(['mgr', 'module', 'enable', 'dashboard'])
3290 wait_for_mgr_restart()
3292 # dashboard crt and key
3293 if args
.dashboard_key
and args
.dashboard_crt
:
3294 logger
.info('Using provided dashboard certificate...')
3296 pathify(args
.dashboard_crt
.name
): '/tmp/dashboard.crt:z',
3297 pathify(args
.dashboard_key
.name
): '/tmp/dashboard.key:z'
3299 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts
=mounts
)
3300 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts
=mounts
)
3302 logger
.info('Generating a dashboard self-signed certificate...')
3303 cli(['dashboard', 'create-self-signed-cert'])
3305 logger
.info('Creating initial admin user...')
3306 password
= args
.initial_dashboard_password
or generate_password()
3307 tmp_password_file
= write_tmp(password
, uid
, gid
)
3308 cmd
= ['dashboard', 'ac-user-create', args
.initial_dashboard_user
, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
3309 if not args
.dashboard_password_noupdate
:
3310 cmd
.append('--pwd-update-required')
3311 cli(cmd
, extra_mounts
={pathify(tmp_password_file
.name
): '/tmp/dashboard.pw:z'})
3312 logger
.info('Fetching dashboard port number...')
3313 out
= cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
3316 # Open dashboard port
3318 fw
.open_ports([port
])
3321 logger
.info('Ceph Dashboard is now available at:\n\n'
3322 '\t URL: https://%s:%s/\n'
3324 '\tPassword: %s\n' % (
3326 args
.initial_dashboard_user
,
3330 logger
.info('Applying %s to cluster' % args
.apply_spec
)
3332 with
open(args
.apply_spec
) as f
:
3334 if 'hostname:' in line
:
3335 line
= line
.replace('\n', '')
3336 split
= line
.split(': ')
3337 if split
[1] != host
:
3338 logger
.info('Adding ssh key to %s' % split
[1])
3340 ssh_key
= '/etc/ceph/ceph.pub'
3341 if args
.ssh_public_key
:
3342 ssh_key
= args
.ssh_public_key
.name
3343 out
, err
, code
= call_throws(['sudo', '-u', args
.ssh_user
, 'ssh-copy-id', '-f', '-i', ssh_key
, '-o StrictHostKeyChecking=no', '%s@%s' % (args
.ssh_user
, split
[1])])
3346 mounts
[pathify(args
.apply_spec
)] = '/tmp/spec.yml:z'
3348 out
= cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts
=mounts
)
3351 logger
.info('You can access the Ceph CLI with:\n\n'
3352 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
3356 args
.output_keyring
))
3357 logger
.info('Please consider enabling telemetry to help improve Ceph:\n\n'
3358 '\tceph telemetry on\n\n'
3359 'For more information see:\n\n'
3360 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
3361 logger
.info('Bootstrap complete.')
3364 ##################################
3366 def command_registry_login():
3367 if args
.registry_json
:
3368 logger
.info("Pulling custom registry login info from %s." % args
.registry_json
)
3369 d
= get_parm(args
.registry_json
)
3370 if d
.get('url') and d
.get('username') and d
.get('password'):
3371 args
.registry_url
= d
.get('url')
3372 args
.registry_username
= d
.get('username')
3373 args
.registry_password
= d
.get('password')
3374 registry_login(args
.registry_url
, args
.registry_username
, args
.registry_password
)
3376 raise Error("json provided for custom registry login did not include all necessary fields. "
3377 "Please setup json file as\n"
3379 " \"url\": \"REGISTRY_URL\",\n"
3380 " \"username\": \"REGISTRY_USERNAME\",\n"
3381 " \"password\": \"REGISTRY_PASSWORD\"\n"
3383 elif args
.registry_url
and args
.registry_username
and args
.registry_password
:
3384 registry_login(args
.registry_url
, args
.registry_username
, args
.registry_password
)
3386 raise Error("Invalid custom registry arguments received. To login to a custom registry include "
3387 "--registry-url, --registry-username and --registry-password "
3388 "options or --registry-json option")
3391 def registry_login(url
, username
, password
):
3392 logger
.info("Logging into custom registry.")
3394 out
, _
, _
= call_throws([container_path
, 'login',
3399 raise Error("Failed to login to custom registry @ %s as %s with given password" % (args
.registry_url
, args
.registry_username
))
3401 ##################################
3404 def extract_uid_gid_monitoring(daemon_type
):
3405 # type: (str) -> Tuple[int, int]
3407 if daemon_type
== 'prometheus':
3408 uid
, gid
= extract_uid_gid(file_path
='/etc/prometheus')
3409 elif daemon_type
== 'node-exporter':
3410 uid
, gid
= 65534, 65534
3411 elif daemon_type
== 'grafana':
3412 uid
, gid
= extract_uid_gid(file_path
='/var/lib/grafana')
3413 elif daemon_type
== 'alertmanager':
3414 uid
, gid
= extract_uid_gid(file_path
=['/etc/alertmanager', '/etc/prometheus'])
3416 raise Error("{} not implemented yet".format(daemon_type
))
3421 def command_deploy():
3423 daemon_type
, daemon_id
= args
.name
.split('.', 1)
3425 l
= FileLock(args
.fsid
)
3428 if daemon_type
not in get_supported_daemons():
3429 raise Error('daemon type %s not recognized' % daemon_type
)
3432 unit_name
= get_unit_name(args
.fsid
, daemon_type
, daemon_id
)
3433 container_name
= 'ceph-%s-%s.%s' % (args
.fsid
, daemon_type
, daemon_id
)
3434 (_
, state
, _
) = check_unit(unit_name
)
3435 if state
== 'running' or is_container_running(container_name
):
3439 logger
.info('%s daemon %s ...' % ('Reconfig', args
.name
))
3441 logger
.info('%s daemon %s ...' % ('Redeploy', args
.name
))
3443 logger
.info('%s daemon %s ...' % ('Deploy', args
.name
))
3445 # Get and check ports explicitly required to be opened
3446 daemon_ports
= [] # type: List[int]
3448 daemon_ports
= list(map(int, args
.tcp_ports
.split()))
3450 if daemon_type
in Ceph
.daemons
:
3451 config
, keyring
= get_config_and_keyring()
3452 uid
, gid
= extract_uid_gid()
3453 make_var_run(args
.fsid
, uid
, gid
)
3455 c
= get_container(args
.fsid
, daemon_type
, daemon_id
,
3456 ptrace
=args
.allow_ptrace
)
3457 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
3458 config
=config
, keyring
=keyring
,
3459 osd_fsid
=args
.osd_fsid
,
3460 reconfig
=args
.reconfig
,
3463 elif daemon_type
in Monitoring
.components
:
3464 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
3466 if not args
.reconfig
and not redeploy
:
3467 daemon_ports
.extend(Monitoring
.port_map
[daemon_type
])
3469 # make sure provided config-json is sufficient
3470 config
= get_parm(args
.config_json
) # type: ignore
3471 required_files
= Monitoring
.components
[daemon_type
].get('config-json-files', list())
3472 required_args
= Monitoring
.components
[daemon_type
].get('config-json-args', list())
3474 if not config
or not all(c
in config
.get('files', {}).keys() for c
in required_files
): # type: ignore
3475 raise Error("{} deployment requires config-json which must "
3476 "contain file content for {}".format(daemon_type
.capitalize(), ', '.join(required_files
)))
3478 if not config
or not all(c
in config
.keys() for c
in required_args
): # type: ignore
3479 raise Error("{} deployment requires config-json which must "
3480 "contain arg for {}".format(daemon_type
.capitalize(), ', '.join(required_args
)))
3482 uid
, gid
= extract_uid_gid_monitoring(daemon_type
)
3483 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
3484 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
3485 reconfig
=args
.reconfig
,
3488 elif daemon_type
== NFSGanesha
.daemon_type
:
3489 if not args
.reconfig
and not redeploy
:
3490 daemon_ports
.extend(NFSGanesha
.port_map
.values())
3492 config
, keyring
= get_config_and_keyring()
3493 # TODO: extract ganesha uid/gid (997, 994) ?
3494 uid
, gid
= extract_uid_gid()
3495 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
3496 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
3497 config
=config
, keyring
=keyring
,
3498 reconfig
=args
.reconfig
,
3501 elif daemon_type
== CephIscsi
.daemon_type
:
3502 config
, keyring
= get_config_and_keyring()
3503 uid
, gid
= extract_uid_gid()
3504 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
3505 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
3506 config
=config
, keyring
=keyring
,
3507 reconfig
=args
.reconfig
,
3510 elif daemon_type
== CustomContainer
.daemon_type
:
3511 cc
= CustomContainer
.init(args
.fsid
, daemon_id
)
3512 if not args
.reconfig
and not redeploy
:
3513 daemon_ports
.extend(cc
.ports
)
3514 c
= get_container(args
.fsid
, daemon_type
, daemon_id
,
3515 privileged
=cc
.privileged
,
3516 ptrace
=args
.allow_ptrace
)
3517 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
,
3518 uid
=cc
.uid
, gid
=cc
.gid
, config
=None,
3519 keyring
=None, reconfig
=args
.reconfig
,
3523 raise Error('daemon type {} not implemented in command_deploy function'
3524 .format(daemon_type
))
3526 ##################################
3532 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
3533 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
3534 command
= c
.run_cmd()
3535 return call_timeout(command
, args
.timeout
)
3537 ##################################
3543 def command_shell():
3546 make_log_dir(args
.fsid
)
3548 if '.' in args
.name
:
3549 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
3551 daemon_type
= args
.name
3554 daemon_type
= 'osd' # get the most mounts
3557 if daemon_id
and not args
.fsid
:
3558 raise Error('must pass --fsid to specify cluster')
3560 # use /etc/ceph files by default, if present. we do this instead of
3561 # making these defaults in the arg parser because we don't want an error
3562 # if they don't exist.
3563 if not args
.keyring
and os
.path
.exists(SHELL_DEFAULT_KEYRING
):
3564 args
.keyring
= SHELL_DEFAULT_KEYRING
3566 container_args
= [] # type: List[str]
3567 mounts
= get_container_mounts(args
.fsid
, daemon_type
, daemon_id
,
3568 no_config
=True if args
.config
else False)
3569 binds
= get_container_binds(args
.fsid
, daemon_type
, daemon_id
)
3571 mounts
[pathify(args
.config
)] = '/etc/ceph/ceph.conf:z'
3573 mounts
[pathify(args
.keyring
)] = '/etc/ceph/ceph.keyring:z'
3575 for _mount
in args
.mount
:
3576 split_src_dst
= _mount
.split(':')
3577 mount
= pathify(split_src_dst
[0])
3578 filename
= os
.path
.basename(split_src_dst
[0])
3579 if len(split_src_dst
) > 1:
3580 dst
= split_src_dst
[1] + ':z' if len(split_src_dst
) == 3 else split_src_dst
[1]
3583 mounts
[mount
] = '/mnt/{}:z'.format(filename
)
3585 command
= args
.command
3591 '-e', "PS1=%s" % CUSTOM_PS1
,
3594 home
= os
.path
.join(args
.data_dir
, args
.fsid
, 'home')
3595 if not os
.path
.exists(home
):
3596 logger
.debug('Creating root home at %s' % home
)
3597 makedirs(home
, 0, 0, 0o660)
3598 if os
.path
.exists('/etc/skel'):
3599 for f
in os
.listdir('/etc/skel'):
3600 if f
.startswith('.bash'):
3601 shutil
.copyfile(os
.path
.join('/etc/skel', f
),
3602 os
.path
.join(home
, f
))
3603 mounts
[home
] = '/root'
3607 entrypoint
='doesnotmatter',
3609 container_args
=container_args
,
3610 volume_mounts
=mounts
,
3614 command
= c
.shell_cmd(command
)
3616 return call_timeout(command
, args
.timeout
)
3618 ##################################
3622 def command_enter():
3625 raise Error('must pass --fsid to specify cluster')
3626 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
3627 container_args
= [] # type: List[str]
3629 command
= args
.command
3635 '-e', "PS1=%s" % CUSTOM_PS1
,
3639 entrypoint
='doesnotmatter',
3640 container_args
=container_args
,
3641 cname
='ceph-%s-%s.%s' % (args
.fsid
, daemon_type
, daemon_id
),
3643 command
= c
.exec_cmd(command
)
3644 return call_timeout(command
, args
.timeout
)
3646 ##################################
3651 def command_ceph_volume():
3654 make_log_dir(args
.fsid
)
3656 l
= FileLock(args
.fsid
)
3659 (uid
, gid
) = (0, 0) # ceph-volume runs as root
3660 mounts
= get_container_mounts(args
.fsid
, 'osd', None)
3665 (config
, keyring
) = get_config_and_keyring()
3669 tmp_config
= write_tmp(config
, uid
, gid
)
3670 mounts
[tmp_config
.name
] = '/etc/ceph/ceph.conf:z'
3674 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
3675 mounts
[tmp_keyring
.name
] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
3679 entrypoint
='/usr/sbin/ceph-volume',
3683 volume_mounts
=mounts
,
3685 out
, err
, code
= call_throws(c
.run_cmd(), verbosity
=CallVerbosity
.VERBOSE
)
3689 ##################################
3696 raise Error('must pass --fsid to specify cluster')
3698 unit_name
= get_unit_name_by_daemon_name(args
.fsid
, args
.name
)
3704 verbosity
=CallVerbosity
.VERBOSE
,
3708 ##################################
3715 raise Error('must pass --fsid to specify cluster')
3717 unit_name
= get_unit_name_by_daemon_name(args
.fsid
, args
.name
)
3719 cmd
= [find_program('journalctl')]
3720 cmd
.extend(['-u', unit_name
])
3722 cmd
.extend(args
.command
)
3724 # call this directly, without our wrapper, so that we get an unmolested
3725 # stdout with logger prefixing.
3726 logger
.debug("Running command: %s" % ' '.join(cmd
))
3727 subprocess
.call(cmd
) # type: ignore
3729 ##################################
3732 def list_networks():
3733 # type: () -> Dict[str,List[str]]
3735 ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
3736 ## so we'll need to use a regex to parse 'ip' command output.
3737 #out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
3738 #j = json.loads(out)
3741 res
= _list_ipv4_networks()
3742 res
.update(_list_ipv6_networks())
3746 def _list_ipv4_networks():
3747 out
, _
, _
= call_throws([find_executable('ip'), 'route', 'ls'])
3748 return _parse_ipv4_route(out
)
3751 def _parse_ipv4_route(out
):
3752 r
= {} # type: Dict[str,List[str]]
3753 p
= re
.compile(r
'^(\S+) (.*)scope link (.*)src (\S+)')
3754 for line
in out
.splitlines():
3766 def _list_ipv6_networks():
3767 routes
, _
, _
= call_throws([find_executable('ip'), '-6', 'route', 'ls'])
3768 ips
, _
, _
= call_throws([find_executable('ip'), '-6', 'addr', 'ls'])
3769 return _parse_ipv6_route(routes
, ips
)
3772 def _parse_ipv6_route(routes
, ips
):
3773 r
= {} # type: Dict[str,List[str]]
3774 route_p
= re
.compile(r
'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
3775 ip_p
= re
.compile(r
'^\s+inet6 (\S+)/(.*)scope (.*)$')
3776 for line
in routes
.splitlines():
3777 m
= route_p
.findall(line
)
3778 if not m
or m
[0][0].lower() == 'default':
3784 for line
in ips
.splitlines():
3785 m
= ip_p
.findall(line
)
3789 # find the network it belongs to
3790 net
= [n
for n
in r
.keys()
3791 if ipaddress
.ip_address(unicode(ip
)) in ipaddress
.ip_network(unicode(n
))]
3793 r
[net
[0]].append(ip
)
3798 def command_list_networks():
3801 print(json
.dumps(r
, indent
=4))
3803 ##################################
3809 ls
= list_daemons(detail
=not args
.no_detail
,
3810 legacy_dir
=args
.legacy_dir
)
3811 print(json
.dumps(ls
, indent
=4))
3814 def list_daemons(detail
=True, legacy_dir
=None):
3815 # type: (bool, Optional[str]) -> List[Dict[str, str]]
3819 data_dir
= args
.data_dir
3820 if legacy_dir
is not None:
3821 data_dir
= os
.path
.abspath(legacy_dir
+ data_dir
)
3823 # keep track of ceph versions we see
3824 seen_versions
= {} # type: Dict[str, Optional[str]]
3827 if os
.path
.exists(data_dir
):
3828 for i
in os
.listdir(data_dir
):
3829 if i
in ['mon', 'osd', 'mds', 'mgr']:
3831 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
3834 (cluster
, daemon_id
) = j
.split('-', 1)
3835 fsid
= get_legacy_daemon_fsid(
3836 cluster
, daemon_type
, daemon_id
,
3837 legacy_dir
=legacy_dir
)
3838 legacy_unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
3841 'name': '%s.%s' % (daemon_type
, daemon_id
),
3842 'fsid': fsid
if fsid
is not None else 'unknown',
3843 'systemd_unit': legacy_unit_name
,
3846 (i
['enabled'], i
['state'], _
) = check_unit(legacy_unit_name
)
3847 if not host_version
:
3849 out
, err
, code
= call(['ceph', '-v'])
3850 if not code
and out
.startswith('ceph version '):
3851 host_version
= out
.split(' ')[2]
3854 i
['host_version'] = host_version
3857 fsid
= str(i
) # convince mypy that fsid is a str here
3858 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
3861 (daemon_type
, daemon_id
) = j
.split('.', 1)
3862 unit_name
= get_unit_name(fsid
,
3868 'style': 'cephadm:v1',
3871 'systemd_unit': unit_name
,
3875 (i
['enabled'], i
['state'], _
) = check_unit(unit_name
)
3882 if 'podman' in container_path
and get_podman_version() < (1, 6, 2):
3883 image_field
= '.ImageID'
3885 image_field
= '.Image'
3887 out
, err
, code
= call(
3889 container_path
, 'inspect',
3890 '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field
,
3891 'ceph-%s-%s' % (fsid
, j
)
3893 verbosity
=CallVerbosity
.DEBUG
)
3895 (container_id
, image_name
, image_id
, start
,
3896 version
) = out
.strip().split(',')
3897 image_id
= normalize_container_id(image_id
)
3898 daemon_type
= name
.split('.', 1)[0]
3899 start_stamp
= try_convert_datetime(start
)
3900 if not version
or '.' not in version
:
3901 version
= seen_versions
.get(image_id
, None)
3902 if daemon_type
== NFSGanesha
.daemon_type
:
3903 version
= NFSGanesha
.get_version(container_id
)
3904 if daemon_type
== CephIscsi
.daemon_type
:
3905 version
= CephIscsi
.get_version(container_id
)
3907 if daemon_type
in Ceph
.daemons
:
3908 out
, err
, code
= call(
3909 [container_path
, 'exec', container_id
,
3912 out
.startswith('ceph version '):
3913 version
= out
.split(' ')[2]
3914 seen_versions
[image_id
] = version
3915 elif daemon_type
== 'grafana':
3916 out
, err
, code
= call(
3917 [container_path
, 'exec', container_id
,
3918 'grafana-server', '-v'])
3920 out
.startswith('Version '):
3921 version
= out
.split(' ')[1]
3922 seen_versions
[image_id
] = version
3923 elif daemon_type
in ['prometheus',
3926 version
= Monitoring
.get_version(container_path
, container_id
, daemon_type
)
3927 seen_versions
[image_id
] = version
3928 elif daemon_type
== CustomContainer
.daemon_type
:
3929 # Because a custom container can contain
3930 # everything, we do not know which command
3931 # to execute to get the version.
3934 logger
.warning('version for unknown daemon type %s' % daemon_type
)
3936 vfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.image') # type: ignore
3938 with
open(vfile
, 'r') as f
:
3939 image_name
= f
.read().strip() or None
3942 i
['container_id'] = container_id
3943 i
['container_image_name'] = image_name
3944 i
['container_image_id'] = image_id
3945 i
['version'] = version
3946 i
['started'] = start_stamp
3947 i
['created'] = get_file_timestamp(
3948 os
.path
.join(data_dir
, fsid
, j
, 'unit.created')
3950 i
['deployed'] = get_file_timestamp(
3951 os
.path
.join(data_dir
, fsid
, j
, 'unit.image'))
3952 i
['configured'] = get_file_timestamp(
3953 os
.path
.join(data_dir
, fsid
, j
, 'unit.configured'))
3960 def get_daemon_description(fsid
, name
, detail
=False, legacy_dir
=None):
3961 # type: (str, str, bool, Optional[str]) -> Dict[str, str]
3963 for d
in list_daemons(detail
=detail
, legacy_dir
=legacy_dir
):
3964 if d
['fsid'] != fsid
:
3966 if d
['name'] != name
:
3969 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name
))
3972 ##################################
3975 def command_adopt():
3978 if not args
.skip_pull
:
3979 _pull_image(args
.image
)
3981 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
3984 if args
.style
!= 'legacy':
3985 raise Error('adoption of style %s not implemented' % args
.style
)
3988 fsid
= get_legacy_daemon_fsid(args
.cluster
,
3991 legacy_dir
=args
.legacy_dir
)
3993 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
3997 # call correct adoption
3998 if daemon_type
in Ceph
.daemons
:
3999 command_adopt_ceph(daemon_type
, daemon_id
, fsid
);
4000 elif daemon_type
== 'prometheus':
4001 command_adopt_prometheus(daemon_id
, fsid
)
4002 elif daemon_type
== 'grafana':
4003 command_adopt_grafana(daemon_id
, fsid
)
4004 elif daemon_type
== 'node-exporter':
4005 raise Error('adoption of node-exporter not implemented')
4006 elif daemon_type
== 'alertmanager':
4007 command_adopt_alertmanager(daemon_id
, fsid
)
4009 raise Error('daemon type %s not recognized' % daemon_type
)
4012 class AdoptOsd(object):
4013 def __init__(self
, osd_data_dir
, osd_id
):
4014 # type: (str, str) -> None
4015 self
.osd_data_dir
= osd_data_dir
4016 self
.osd_id
= osd_id
4018 def check_online_osd(self
):
4019 # type: () -> Tuple[Optional[str], Optional[str]]
4021 osd_fsid
, osd_type
= None, None
4023 path
= os
.path
.join(self
.osd_data_dir
, 'fsid')
4025 with
open(path
, 'r') as f
:
4026 osd_fsid
= f
.read().strip()
4027 logger
.info("Found online OSD at %s" % path
)
4029 logger
.info('Unable to read OSD fsid from %s' % path
)
4030 if os
.path
.exists(os
.path
.join(self
.osd_data_dir
, 'type')):
4031 with
open(os
.path
.join(self
.osd_data_dir
, 'type')) as f
:
4032 osd_type
= f
.read().strip()
4034 logger
.info('"type" file missing for OSD data dir')
4036 return osd_fsid
, osd_type
4038 def check_offline_lvm_osd(self
):
4039 # type: () -> Tuple[Optional[str], Optional[str]]
4041 osd_fsid
, osd_type
= None, None
4045 entrypoint
='/usr/sbin/ceph-volume',
4046 args
=['lvm', 'list', '--format=json'],
4049 out
, err
, code
= call_throws(c
.run_cmd())
4052 js
= json
.loads(out
)
4053 if self
.osd_id
in js
:
4054 logger
.info("Found offline LVM OSD {}".format(self
.osd_id
))
4055 osd_fsid
= js
[self
.osd_id
][0]['tags']['ceph.osd_fsid']
4056 for device
in js
[self
.osd_id
]:
4057 if device
['tags']['ceph.type'] == 'block':
4058 osd_type
= 'bluestore'
4060 if device
['tags']['ceph.type'] == 'data':
4061 osd_type
= 'filestore'
4063 except ValueError as e
:
4064 logger
.info("Invalid JSON in ceph-volume lvm list: {}".format(e
))
4066 return osd_fsid
, osd_type
4068 def check_offline_simple_osd(self
):
4069 # type: () -> Tuple[Optional[str], Optional[str]]
4071 osd_fsid
, osd_type
= None, None
4073 osd_file
= glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self
.osd_id
))
4074 if len(osd_file
) == 1:
4075 with
open(osd_file
[0], 'r') as f
:
4077 js
= json
.loads(f
.read())
4078 logger
.info("Found offline simple OSD {}".format(self
.osd_id
))
4079 osd_fsid
= js
["fsid"]
4080 osd_type
= js
["type"]
4081 if osd_type
!= "filestore":
4082 # need this to be mounted for the adopt to work, as it
4083 # needs to move files from this directory
4084 call_throws(['mount', js
["data"]["path"], self
.osd_data_dir
])
4085 except ValueError as e
:
4086 logger
.info("Invalid JSON in {}: {}".format(osd_file
, e
))
4088 return osd_fsid
, osd_type
4091 def command_adopt_ceph(daemon_type
, daemon_id
, fsid
):
4092 # type: (str, str, str) -> None
4094 (uid
, gid
) = extract_uid_gid()
4096 data_dir_src
= ('/var/lib/ceph/%s/%s-%s' %
4097 (daemon_type
, args
.cluster
, daemon_id
))
4098 data_dir_src
= os
.path
.abspath(args
.legacy_dir
+ data_dir_src
)
4100 if not os
.path
.exists(data_dir_src
):
4101 raise Error("{}.{} data directory '{}' does not exist. "
4102 "Incorrect ID specified, or daemon alrady adopted?".format(
4103 daemon_type
, daemon_id
, data_dir_src
))
4106 if daemon_type
== 'osd':
4107 adopt_osd
= AdoptOsd(data_dir_src
, daemon_id
)
4108 osd_fsid
, osd_type
= adopt_osd
.check_online_osd()
4110 osd_fsid
, osd_type
= adopt_osd
.check_offline_lvm_osd()
4112 osd_fsid
, osd_type
= adopt_osd
.check_offline_simple_osd()
4114 raise Error('Unable to find OSD {}'.format(daemon_id
))
4115 logger
.info('objectstore_type is %s' % osd_type
)
4117 if osd_type
== 'filestore':
4118 raise Error('FileStore is not supported by cephadm')
4120 # NOTE: implicit assumption here that the units correspond to the
4121 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
4123 unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
4124 (enabled
, state
, _
) = check_unit(unit_name
)
4125 if state
== 'running':
4126 logger
.info('Stopping old systemd unit %s...' % unit_name
)
4127 call_throws(['systemctl', 'stop', unit_name
])
4129 logger
.info('Disabling old systemd unit %s...' % unit_name
)
4130 call_throws(['systemctl', 'disable', unit_name
])
4133 logger
.info('Moving data...')
4134 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
4136 move_files(glob(os
.path
.join(data_dir_src
, '*')),
4139 logger
.debug('Remove dir \'%s\'' % (data_dir_src
))
4140 if os
.path
.ismount(data_dir_src
):
4141 call_throws(['umount', data_dir_src
])
4142 os
.rmdir(data_dir_src
)
4144 logger
.info('Chowning content...')
4145 call_throws(['chown', '-c', '-R', '%d.%d' % (uid
, gid
), data_dir_dst
])
4147 if daemon_type
== 'mon':
4148 # rename *.ldb -> *.sst, in case they are coming from ubuntu
4149 store
= os
.path
.join(data_dir_dst
, 'store.db')
4151 if os
.path
.exists(store
):
4152 for oldf
in os
.listdir(store
):
4153 if oldf
.endswith('.ldb'):
4154 newf
= oldf
.replace('.ldb', '.sst')
4155 oldp
= os
.path
.join(store
, oldf
)
4156 newp
= os
.path
.join(store
, newf
)
4157 logger
.debug('Renaming %s -> %s' % (oldp
, newp
))
4158 os
.rename(oldp
, newp
)
4160 logger
.info('Renamed %d leveldb *.ldb files to *.sst',
4162 if daemon_type
== 'osd':
4163 for n
in ['block', 'block.db', 'block.wal']:
4164 p
= os
.path
.join(data_dir_dst
, n
)
4165 if os
.path
.exists(p
):
4166 logger
.info('Chowning %s...' % p
)
4167 os
.chown(p
, uid
, gid
)
4168 # disable the ceph-volume 'simple' mode files on the host
4169 simple_fn
= os
.path
.join('/etc/ceph/osd',
4170 '%s-%s.json' % (daemon_id
, osd_fsid
))
4171 if os
.path
.exists(simple_fn
):
4172 new_fn
= simple_fn
+ '.adopted-by-cephadm'
4173 logger
.info('Renaming %s -> %s', simple_fn
, new_fn
)
4174 os
.rename(simple_fn
, new_fn
)
4175 logger
.info('Disabling host unit ceph-volume@ simple unit...')
4176 call(['systemctl', 'disable',
4177 'ceph-volume@simple-%s-%s.service' % (daemon_id
, osd_fsid
)])
4179 # assume this is an 'lvm' c-v for now, but don't error
4181 logger
.info('Disabling host unit ceph-volume@ lvm unit...')
4182 call(['systemctl', 'disable',
4183 'ceph-volume@lvm-%s-%s.service' % (daemon_id
, osd_fsid
)])
4186 config_src
= '/etc/ceph/%s.conf' % (args
.cluster
)
4187 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
4188 config_dst
= os
.path
.join(data_dir_dst
, 'config')
4189 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
4192 logger
.info('Moving logs...')
4193 log_dir_src
= ('/var/log/ceph/%s-%s.%s.log*' %
4194 (args
.cluster
, daemon_type
, daemon_id
))
4195 log_dir_src
= os
.path
.abspath(args
.legacy_dir
+ log_dir_src
)
4196 log_dir_dst
= make_log_dir(fsid
, uid
=uid
, gid
=gid
)
4197 move_files(glob(log_dir_src
),
4201 logger
.info('Creating new units...')
4202 make_var_run(fsid
, uid
, gid
)
4203 c
= get_container(fsid
, daemon_type
, daemon_id
)
4204 deploy_daemon_units(fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
4205 enable
=True, # unconditionally enable the new unit
4206 start
=(state
== 'running' or args
.force_start
),
4208 update_firewalld(daemon_type
)
4211 def command_adopt_prometheus(daemon_id
, fsid
):
4212 # type: (str, str) -> None
4214 daemon_type
= 'prometheus'
4215 (uid
, gid
) = extract_uid_gid_monitoring(daemon_type
)
4217 _stop_and_disable('prometheus')
4219 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
4223 config_src
= '/etc/prometheus/prometheus.yml'
4224 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
4225 config_dst
= os
.path
.join(data_dir_dst
, 'etc/prometheus')
4226 makedirs(config_dst
, uid
, gid
, 0o755)
4227 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
4230 data_src
= '/var/lib/prometheus/metrics/'
4231 data_src
= os
.path
.abspath(args
.legacy_dir
+ data_src
)
4232 data_dst
= os
.path
.join(data_dir_dst
, 'data')
4233 copy_tree([data_src
], data_dst
, uid
=uid
, gid
=gid
)
4235 make_var_run(fsid
, uid
, gid
)
4236 c
= get_container(fsid
, daemon_type
, daemon_id
)
4237 deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
4238 update_firewalld(daemon_type
)
4241 def command_adopt_grafana(daemon_id
, fsid
):
4242 # type: (str, str) -> None
4244 daemon_type
= 'grafana'
4245 (uid
, gid
) = extract_uid_gid_monitoring(daemon_type
)
4247 _stop_and_disable('grafana-server')
4249 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
4253 config_src
= '/etc/grafana/grafana.ini'
4254 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
4255 config_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
4256 makedirs(config_dst
, uid
, gid
, 0o755)
4257 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
4259 prov_src
= '/etc/grafana/provisioning/'
4260 prov_src
= os
.path
.abspath(args
.legacy_dir
+ prov_src
)
4261 prov_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
4262 copy_tree([prov_src
], prov_dst
, uid
=uid
, gid
=gid
)
4265 cert
= '/etc/grafana/grafana.crt'
4266 key
= '/etc/grafana/grafana.key'
4267 if os
.path
.exists(cert
) and os
.path
.exists(key
):
4268 cert_src
= '/etc/grafana/grafana.crt'
4269 cert_src
= os
.path
.abspath(args
.legacy_dir
+ cert_src
)
4270 makedirs(os
.path
.join(data_dir_dst
, 'etc/grafana/certs'), uid
, gid
, 0o755)
4271 cert_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_file')
4272 copy_files([cert_src
], cert_dst
, uid
=uid
, gid
=gid
)
4274 key_src
= '/etc/grafana/grafana.key'
4275 key_src
= os
.path
.abspath(args
.legacy_dir
+ key_src
)
4276 key_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_key')
4277 copy_files([key_src
], key_dst
, uid
=uid
, gid
=gid
)
4279 _adjust_grafana_ini(os
.path
.join(config_dst
, 'grafana.ini'))
4281 logger
.debug("Skipping ssl, missing cert {} or key {}".format(cert
, key
))
4283 # data - possible custom dashboards/plugins
4284 data_src
= '/var/lib/grafana/'
4285 data_src
= os
.path
.abspath(args
.legacy_dir
+ data_src
)
4286 data_dst
= os
.path
.join(data_dir_dst
, 'data')
4287 copy_tree([data_src
], data_dst
, uid
=uid
, gid
=gid
)
4289 make_var_run(fsid
, uid
, gid
)
4290 c
= get_container(fsid
, daemon_type
, daemon_id
)
4291 deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
4292 update_firewalld(daemon_type
)
4295 def command_adopt_alertmanager(daemon_id
, fsid
):
4296 # type: (str, str) -> None
4298 daemon_type
= 'alertmanager'
4299 (uid
, gid
) = extract_uid_gid_monitoring(daemon_type
)
4301 _stop_and_disable('prometheus-alertmanager')
4303 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
4307 config_src
= '/etc/prometheus/alertmanager.yml'
4308 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
4309 config_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager')
4310 makedirs(config_dst
, uid
, gid
, 0o755)
4311 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
4314 data_src
= '/var/lib/prometheus/alertmanager/'
4315 data_src
= os
.path
.abspath(args
.legacy_dir
+ data_src
)
4316 data_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager/data')
4317 copy_tree([data_src
], data_dst
, uid
=uid
, gid
=gid
)
4319 make_var_run(fsid
, uid
, gid
)
4320 c
= get_container(fsid
, daemon_type
, daemon_id
)
4321 deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
4322 update_firewalld(daemon_type
)
4325 def _adjust_grafana_ini(filename
):
4326 # type: (str) -> None
4328 # Update cert_file, cert_key pathnames in server section
4329 # ConfigParser does not preserve comments
4331 with
open(filename
, "r") as grafana_ini
:
4332 lines
= grafana_ini
.readlines()
4333 with
open("{}.new".format(filename
), "w") as grafana_ini
:
4334 server_section
=False
4336 if line
.startswith('['):
4337 server_section
=False
4338 if line
.startswith('[server]'):
4341 line
= re
.sub(r
'^cert_file.*',
4342 'cert_file = /etc/grafana/certs/cert_file', line
)
4343 line
= re
.sub(r
'^cert_key.*',
4344 'cert_key = /etc/grafana/certs/cert_key', line
)
4345 grafana_ini
.write(line
)
4346 os
.rename("{}.new".format(filename
), filename
)
4347 except OSError as err
:
4348 raise Error("Cannot update {}: {}".format(filename
, err
))
4351 def _stop_and_disable(unit_name
):
4352 # type: (str) -> None
4354 (enabled
, state
, _
) = check_unit(unit_name
)
4355 if state
== 'running':
4356 logger
.info('Stopping old systemd unit %s...' % unit_name
)
4357 call_throws(['systemctl', 'stop', unit_name
])
4359 logger
.info('Disabling old systemd unit %s...' % unit_name
)
4360 call_throws(['systemctl', 'disable', unit_name
])
4363 ##################################
4365 def command_rm_daemon():
4368 l
= FileLock(args
.fsid
)
4371 unit_name
= get_unit_name_by_daemon_name(args
.fsid
, args
.name
)
4373 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
4374 if daemon_type
in ['mon', 'osd'] and not args
.force
:
4375 raise Error('must pass --force to proceed: '
4376 'this command may destroy precious data!')
4378 call(['systemctl', 'stop', unit_name
],
4379 verbosity
=CallVerbosity
.DEBUG
)
4380 call(['systemctl', 'reset-failed', unit_name
],
4381 verbosity
=CallVerbosity
.DEBUG
)
4382 call(['systemctl', 'disable', unit_name
],
4383 verbosity
=CallVerbosity
.DEBUG
)
4384 data_dir
= get_data_dir(args
.fsid
, daemon_type
, daemon_id
)
4385 if daemon_type
in ['mon', 'osd', 'prometheus'] and \
4386 not args
.force_delete_data
:
4387 # rename it out of the way -- do not delete
4388 backup_dir
= os
.path
.join(args
.data_dir
, args
.fsid
, 'removed')
4389 if not os
.path
.exists(backup_dir
):
4390 makedirs(backup_dir
, 0, 0, DATA_DIR_MODE
)
4391 dirname
= '%s.%s_%s' % (daemon_type
, daemon_id
,
4392 datetime
.datetime
.utcnow().strftime(DATEFMT
))
4394 os
.path
.join(backup_dir
, dirname
))
4396 call_throws(['rm', '-rf', data_dir
])
4398 ##################################
4401 def command_rm_cluster():
4404 raise Error('must pass --force to proceed: '
4405 'this command may destroy precious data!')
4407 l
= FileLock(args
.fsid
)
4410 # stop + disable individual daemon units
4411 for d
in list_daemons(detail
=False):
4412 if d
['fsid'] != args
.fsid
:
4414 if d
['style'] != 'cephadm:v1':
4416 unit_name
= get_unit_name(args
.fsid
, d
['name'])
4417 call(['systemctl', 'stop', unit_name
],
4418 verbosity
=CallVerbosity
.DEBUG
)
4419 call(['systemctl', 'reset-failed', unit_name
],
4420 verbosity
=CallVerbosity
.DEBUG
)
4421 call(['systemctl', 'disable', unit_name
],
4422 verbosity
=CallVerbosity
.DEBUG
)
4425 for unit_name
in ['ceph-%s.target' % args
.fsid
]:
4426 call(['systemctl', 'stop', unit_name
],
4427 verbosity
=CallVerbosity
.DEBUG
)
4428 call(['systemctl', 'reset-failed', unit_name
],
4429 verbosity
=CallVerbosity
.DEBUG
)
4430 call(['systemctl', 'disable', unit_name
],
4431 verbosity
=CallVerbosity
.DEBUG
)
4433 slice_name
= 'system-%s.slice' % (('ceph-%s' % args
.fsid
).replace('-',
4435 call(['systemctl', 'stop', slice_name
],
4436 verbosity
=CallVerbosity
.DEBUG
)
4439 call_throws(['rm', '-f', args
.unit_dir
+
4440 '/ceph-%s@.service' % args
.fsid
])
4441 call_throws(['rm', '-f', args
.unit_dir
+
4442 '/ceph-%s.target' % args
.fsid
])
4443 call_throws(['rm', '-rf',
4444 args
.unit_dir
+ '/ceph-%s.target.wants' % args
.fsid
])
4446 call_throws(['rm', '-rf', args
.data_dir
+ '/' + args
.fsid
])
4448 call_throws(['rm', '-rf', args
.log_dir
+ '/' + args
.fsid
])
4449 call_throws(['rm', '-rf', args
.log_dir
+
4450 '/*.wants/ceph-%s@*' % args
.fsid
])
4451 # rm logrotate config
4452 call_throws(['rm', '-f', args
.logrotate_dir
+ '/ceph-%s' % args
.fsid
])
4454 # clean up config, keyring, and pub key files
4455 files
= ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
4457 if os
.path
.exists(files
[0]):
4459 with
open(files
[0]) as f
:
4460 if args
.fsid
in f
.read():
4463 for n
in range(0, len(files
)):
4464 if os
.path
.exists(files
[n
]):
4468 ##################################
4470 def check_time_sync(enabler
=None):
4471 # type: (Optional[Packager]) -> bool
4473 'chrony.service', # 18.04 (at least)
4474 'chronyd.service', # el / opensuse
4475 'systemd-timesyncd.service',
4476 'ntpd.service', # el7 (at least)
4477 'ntp.service', # 18.04 (at least)
4478 'ntpsec.service', # 20.04 (at least) / buster
4480 if not check_units(units
, enabler
):
4481 logger
.warning('No time sync service is running; checked for %s' % units
)
4486 def command_check_host():
4488 global container_path
4491 commands
= ['systemctl', 'lvcreate']
4494 container_path
= find_program('docker')
4496 for i
in CONTAINER_PREFERENCE
:
4498 container_path
= find_program(i
)
4500 except Exception as e
:
4501 logger
.debug('Could not locate %s: %s' % (i
, e
))
4502 if not container_path
:
4503 errors
.append('Unable to locate any of %s' % CONTAINER_PREFERENCE
)
4505 logger
.info('podman|docker (%s) is present' % container_path
)
4507 for command
in commands
:
4509 find_program(command
)
4510 logger
.info('%s is present' % command
)
4512 errors
.append('%s binary does not appear to be installed' % command
)
4514 # check for configured+running chronyd or ntp
4515 if not check_time_sync():
4516 errors
.append('No time synchronization is active')
4518 if 'expect_hostname' in args
and args
.expect_hostname
:
4519 if get_hostname().lower() != args
.expect_hostname
.lower():
4520 errors
.append('hostname "%s" does not match expected hostname "%s"' % (
4521 get_hostname(), args
.expect_hostname
))
4522 logger
.info('Hostname "%s" matches what is expected.',
4523 args
.expect_hostname
)
4526 raise Error('\n'.join(errors
))
4528 logger
.info('Host looks OK')
4530 ##################################
4533 def command_prepare_host():
4535 logger
.info('Verifying podman|docker is present...')
4537 if not container_path
:
4539 pkg
= create_packager()
4540 pkg
.install_podman()
4542 logger
.info('Verifying lvm2 is present...')
4543 if not find_executable('lvcreate'):
4545 pkg
= create_packager()
4546 pkg
.install(['lvm2'])
4548 logger
.info('Verifying time synchronization is in place...')
4549 if not check_time_sync():
4551 pkg
= create_packager()
4552 pkg
.install(['chrony'])
4553 # check again, and this time try to enable
4555 check_time_sync(enabler
=pkg
)
4557 if 'expect_hostname' in args
and args
.expect_hostname
and args
.expect_hostname
!= get_hostname():
4558 logger
.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), args
.expect_hostname
))
4559 call_throws(['hostname', args
.expect_hostname
])
4560 with
open('/etc/hostname', 'w') as f
:
4561 f
.write(args
.expect_hostname
+ '\n')
4563 logger
.info('Repeating the final host check...')
4564 command_check_host()
4566 ##################################
4569 class CustomValidation(argparse
.Action
):
4571 def _check_name(self
, values
):
4573 (daemon_type
, daemon_id
) = values
.split('.', 1)
4575 raise argparse
.ArgumentError(self
,
4576 "must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com")
4578 daemons
= get_supported_daemons()
4579 if daemon_type
not in daemons
:
4580 raise argparse
.ArgumentError(self
,
4581 "name must declare the type of daemon e.g. "
4582 "{}".format(', '.join(daemons
)))
4584 def __call__(self
, parser
, namespace
, values
, option_string
=None):
4585 if self
.dest
== "name":
4586 self
._check
_name
(values
)
4587 setattr(namespace
, self
.dest
, values
)
4589 ##################################
4593 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
4595 distro_version
= None
4596 distro_codename
= None
4597 with
open('/etc/os-release', 'r') as f
:
4598 for line
in f
.readlines():
4600 if '=' not in line
or line
.startswith('#'):
4602 (var
, val
) = line
.split('=', 1)
4603 if val
[0] == '"' and val
[-1] == '"':
4606 distro
= val
.lower()
4607 elif var
== 'VERSION_ID':
4608 distro_version
= val
.lower()
4609 elif var
== 'VERSION_CODENAME':
4610 distro_codename
= val
.lower()
4611 return distro
, distro_version
, distro_codename
4614 class Packager(object):
4615 def __init__(self
, stable
=None, version
=None, branch
=None, commit
=None):
4617 (stable
and not version
and not branch
and not commit
) or \
4618 (not stable
and version
and not branch
and not commit
) or \
4619 (not stable
and not version
and branch
) or \
4620 (not stable
and not version
and not branch
and not commit
)
4621 self
.stable
= stable
4622 self
.version
= version
4623 self
.branch
= branch
4624 self
.commit
= commit
4627 raise NotImplementedError
4630 raise NotImplementedError
4632 def query_shaman(self
, distro
, distro_version
, branch
, commit
):
4634 logger
.info('Fetching repo metadata from shaman and chacra...')
4635 shaman_url
= 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
4637 distro_version
=distro_version
,
4639 sha1
=commit
or 'latest',
4643 shaman_response
= urlopen(shaman_url
)
4644 except HTTPError
as err
:
4645 logger
.error('repository not found in shaman (might not be available yet)')
4646 raise Error('%s, failed to fetch %s' % (err
, shaman_url
))
4648 chacra_url
= shaman_response
.geturl()
4649 chacra_response
= urlopen(chacra_url
)
4650 except HTTPError
as err
:
4651 logger
.error('repository not found in chacra (might not be available yet)')
4652 raise Error('%s, failed to fetch %s' % (err
, chacra_url
))
4653 return chacra_response
.read().decode('utf-8')
4655 def repo_gpgkey(self
):
4658 if self
.stable
or self
.version
:
4659 return 'https://download.ceph.com/keys/release.asc', 'release'
4661 return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
4663 def enable_service(self
, service
):
4665 Start and enable the service (typically using systemd).
4667 call_throws(['systemctl', 'enable', '--now', service
])
4670 class Apt(Packager
):
4676 def __init__(self
, stable
, version
, branch
, commit
,
4677 distro
, distro_version
, distro_codename
):
4678 super(Apt
, self
).__init
__(stable
=stable
, version
=version
,
4679 branch
=branch
, commit
=commit
)
4680 self
.distro
= self
.DISTRO_NAMES
[distro
]
4681 self
.distro_codename
= distro_codename
4682 self
.distro_version
= distro_version
4684 def repo_path(self
):
4685 return '/etc/apt/sources.list.d/ceph.list'
4688 url
, name
= self
.repo_gpgkey()
4689 logger
.info('Installing repo GPG key from %s...' % url
)
4691 response
= urlopen(url
)
4692 except HTTPError
as err
:
4693 logger
.error('failed to fetch GPG repo key from %s: %s' % (
4695 raise Error('failed to fetch GPG key')
4696 key
= response
.read().decode('utf-8')
4697 with
open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
, 'w') as f
:
4701 content
= 'deb %s/debian-%s/ %s main\n' % (
4702 args
.repo_url
, self
.version
, self
.distro_codename
)
4704 content
= 'deb %s/debian-%s/ %s main\n' % (
4705 args
.repo_url
, self
.stable
, self
.distro_codename
)
4707 content
= self
.query_shaman(self
.distro
, self
.distro_codename
, self
.branch
,
4710 logger
.info('Installing repo file at %s...' % self
.repo_path())
4711 with
open(self
.repo_path(), 'w') as f
:
4715 for name
in ['autobuild', 'release']:
4716 p
= '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
4717 if os
.path
.exists(p
):
4718 logger
.info('Removing repo GPG key %s...' % p
)
4720 if os
.path
.exists(self
.repo_path()):
4721 logger
.info('Removing repo at %s...' % self
.repo_path())
4722 os
.unlink(self
.repo_path())
4724 if self
.distro
== 'ubuntu':
4725 self
.rm_kubic_repo()
4727 def install(self
, ls
):
4728 logger
.info('Installing packages %s...' % ls
)
4729 call_throws(['apt-get', 'install', '-y'] + ls
)
4731 def install_podman(self
):
4732 if self
.distro
== 'ubuntu':
4733 logger
.info('Setting up repo for podman...')
4734 self
.add_kubic_repo()
4735 call_throws(['apt-get', 'update'])
4737 logger
.info('Attempting podman install...')
4739 self
.install(['podman'])
4741 logger
.info('Podman did not work. Falling back to docker...')
4742 self
.install(['docker.io'])
4744 def kubic_repo_url(self
):
4745 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
4746 'libcontainers:/stable/xUbuntu_%s/' % self
.distro_version
4748 def kubic_repo_path(self
):
4749 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
4751 def kubric_repo_gpgkey_url(self
):
4752 return '%s/Release.key' % self
.kubic_repo_url()
4754 def kubric_repo_gpgkey_path(self
):
4755 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
4757 def add_kubic_repo(self
):
4758 url
= self
.kubric_repo_gpgkey_url()
4759 logger
.info('Installing repo GPG key from %s...' % url
)
4761 response
= urlopen(url
)
4762 except HTTPError
as err
:
4763 logger
.error('failed to fetch GPG repo key from %s: %s' % (
4765 raise Error('failed to fetch GPG key')
4766 key
= response
.read().decode('utf-8')
4767 tmp_key
= write_tmp(key
, 0, 0)
4768 keyring
= self
.kubric_repo_gpgkey_path()
4769 call_throws(['apt-key', '--keyring', keyring
, 'add', tmp_key
.name
])
4771 logger
.info('Installing repo file at %s...' % self
.kubic_repo_path())
4772 content
= 'deb %s /\n' % self
.kubic_repo_url()
4773 with
open(self
.kubic_repo_path(), 'w') as f
:
4776 def rm_kubic_repo(self
):
4777 keyring
= self
.kubric_repo_gpgkey_path()
4778 if os
.path
.exists(keyring
):
4779 logger
.info('Removing repo GPG key %s...' % keyring
)
4782 p
= self
.kubic_repo_path()
4783 if os
.path
.exists(p
):
4784 logger
.info('Removing repo at %s...' % p
)
4788 class YumDnf(Packager
):
4790 'centos': ('centos', 'el'),
4791 'rhel': ('centos', 'el'),
4792 'scientific': ('centos', 'el'),
4793 'fedora': ('fedora', 'fc'),
4796 def __init__(self
, stable
, version
, branch
, commit
,
4797 distro
, distro_version
):
4798 super(YumDnf
, self
).__init
__(stable
=stable
, version
=version
,
4799 branch
=branch
, commit
=commit
)
4800 self
.major
= int(distro_version
.split('.')[0])
4801 self
.distro_normalized
= self
.DISTRO_NAMES
[distro
][0]
4802 self
.distro_code
= self
.DISTRO_NAMES
[distro
][1] + str(self
.major
)
4803 if (self
.distro_code
== 'fc' and self
.major
>= 30) or \
4804 (self
.distro_code
== 'el' and self
.major
>= 8):
4809 def custom_repo(self
, **kw
):
4811 Repo files need special care in that a whole line should not be present
4812 if there is no value for it. Because we were using `format()` we could
4813 not conditionally add a line for a repo file. So the end result would
4814 contain a key with a missing value (say if we were passing `None`).
4816 For example, it could look like::
4823 Which breaks. This function allows us to conditionally add lines,
4824 preserving an order and be more careful.
4826 Previously, and for historical purposes, this is how the template used
4842 # by using tuples (vs a dict) we preserve the order of what we want to
4843 # return, like starting with a [repo name]
4845 ('reponame', '[%s]'),
4846 ('name', 'name=%s'),
4847 ('baseurl', 'baseurl=%s'),
4848 ('enabled', 'enabled=%s'),
4849 ('gpgcheck', 'gpgcheck=%s'),
4850 ('_type', 'type=%s'),
4851 ('gpgkey', 'gpgkey=%s'),
4852 ('proxy', 'proxy=%s'),
4853 ('priority', 'priority=%s'),
4857 tmpl_key
, tmpl_value
= line
# key values from tmpl
4859 # ensure that there is an actual value (not None nor empty string)
4860 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
4861 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
4863 return '\n'.join(lines
)
4865 def repo_path(self
):
4866 return '/etc/yum.repos.d/ceph.repo'
4868 def repo_baseurl(self
):
4869 assert self
.stable
or self
.version
4871 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.version
,
4874 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.stable
,
4878 if self
.stable
or self
.version
:
4881 'Ceph': '$basearch',
4882 'Ceph-noarch': 'noarch',
4883 'Ceph-source': 'SRPMS'}.items():
4884 content
+= '[%s]\n' % (n
)
4885 content
+= self
.custom_repo(
4887 baseurl
=self
.repo_baseurl() + '/' + t
,
4890 gpgkey
=self
.repo_gpgkey()[0],
4894 content
= self
.query_shaman(self
.distro_normalized
, self
.major
,
4898 logger
.info('Writing repo to %s...' % self
.repo_path())
4899 with
open(self
.repo_path(), 'w') as f
:
4902 if self
.distro_code
.startswith('el'):
4903 logger
.info('Enabling EPEL...')
4904 call_throws([self
.tool
, 'install', '-y', 'epel-release'])
4907 if os
.path
.exists(self
.repo_path()):
4908 os
.unlink(self
.repo_path())
4910 def install(self
, ls
):
4911 logger
.info('Installing packages %s...' % ls
)
4912 call_throws([self
.tool
, 'install', '-y'] + ls
)
4914 def install_podman(self
):
4915 self
.install(['podman'])
4918 class Zypper(Packager
):
4921 'opensuse-tumbleweed',
4925 def __init__(self
, stable
, version
, branch
, commit
,
4926 distro
, distro_version
):
4927 super(Zypper
, self
).__init
__(stable
=stable
, version
=version
,
4928 branch
=branch
, commit
=commit
)
4929 self
.tool
= 'zypper'
4930 self
.distro
= 'opensuse'
4931 self
.distro_version
= '15.1'
4932 if 'tumbleweed' not in distro
and distro_version
is not None:
4933 self
.distro_version
= distro_version
4935 def custom_repo(self
, **kw
):
4937 See YumDnf for format explanation.
4941 # by using tuples (vs a dict) we preserve the order of what we want to
4942 # return, like starting with a [repo name]
4944 ('reponame', '[%s]'),
4945 ('name', 'name=%s'),
4946 ('baseurl', 'baseurl=%s'),
4947 ('enabled', 'enabled=%s'),
4948 ('gpgcheck', 'gpgcheck=%s'),
4949 ('_type', 'type=%s'),
4950 ('gpgkey', 'gpgkey=%s'),
4951 ('proxy', 'proxy=%s'),
4952 ('priority', 'priority=%s'),
4956 tmpl_key
, tmpl_value
= line
# key values from tmpl
4958 # ensure that there is an actual value (not None nor empty string)
4959 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
4960 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
4962 return '\n'.join(lines
)
4964 def repo_path(self
):
4965 return '/etc/zypp/repos.d/ceph.repo'
4967 def repo_baseurl(self
):
4968 assert self
.stable
or self
.version
4970 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.stable
, self
.distro
)
4972 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.stable
, self
.distro
)
4975 if self
.stable
or self
.version
:
4978 'Ceph': '$basearch',
4979 'Ceph-noarch': 'noarch',
4980 'Ceph-source': 'SRPMS'}.items():
4981 content
+= '[%s]\n' % (n
)
4982 content
+= self
.custom_repo(
4984 baseurl
=self
.repo_baseurl() + '/' + t
,
4987 gpgkey
=self
.repo_gpgkey()[0],
4991 content
= self
.query_shaman(self
.distro
, self
.distro_version
,
4995 logger
.info('Writing repo to %s...' % self
.repo_path())
4996 with
open(self
.repo_path(), 'w') as f
:
5000 if os
.path
.exists(self
.repo_path()):
5001 os
.unlink(self
.repo_path())
5003 def install(self
, ls
):
5004 logger
.info('Installing packages %s...' % ls
)
5005 call_throws([self
.tool
, 'in', '-y'] + ls
)
5007 def install_podman(self
):
5008 self
.install(['podman'])
5011 def create_packager(stable
=None, version
=None, branch
=None, commit
=None):
5012 distro
, distro_version
, distro_codename
= get_distro()
5013 if distro
in YumDnf
.DISTRO_NAMES
:
5014 return YumDnf(stable
=stable
, version
=version
,
5015 branch
=branch
, commit
=commit
,
5016 distro
=distro
, distro_version
=distro_version
)
5017 elif distro
in Apt
.DISTRO_NAMES
:
5018 return Apt(stable
=stable
, version
=version
,
5019 branch
=branch
, commit
=commit
,
5020 distro
=distro
, distro_version
=distro_version
,
5021 distro_codename
=distro_codename
)
5022 elif distro
in Zypper
.DISTRO_NAMES
:
5023 return Zypper(stable
=stable
, version
=version
,
5024 branch
=branch
, commit
=commit
,
5025 distro
=distro
, distro_version
=distro_version
)
5026 raise Error('Distro %s version %s not supported' % (distro
, distro_version
))
5029 def command_add_repo():
5030 if args
.version
and args
.release
:
5031 raise Error('you can specify either --release or --version but not both')
5032 if not args
.version
and not args
.release
and not args
.dev
and not args
.dev_commit
:
5033 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
5036 (x
, y
, z
) = args
.version
.split('.')
5037 except Exception as e
:
5038 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
5040 pkg
= create_packager(stable
=args
.release
,
5041 version
=args
.version
,
5043 commit
=args
.dev_commit
)
5047 def command_rm_repo():
5048 pkg
= create_packager()
5052 def command_install():
5053 pkg
= create_packager()
5054 pkg
.install(args
.packages
)
5056 ##################################
5058 def get_ipv4_address(ifname
):
5059 # type: (str) -> str
5060 def _extract(sock
, offset
):
5061 return socket
.inet_ntop(
5066 struct
.pack('256s', bytes(ifname
[:15], 'utf-8'))
5069 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_DGRAM
)
5071 addr
= _extract(s
, 35093) # '0x8915' = SIOCGIFADDR
5072 dq_mask
= _extract(s
, 35099) # 0x891b = SIOCGIFNETMASK
5074 # interface does not have an ipv4 address
5077 dec_mask
= sum([bin(int(i
)).count('1')
5078 for i
in dq_mask
.split('.')])
5079 return '{}/{}'.format(addr
, dec_mask
)
5082 def get_ipv6_address(ifname
):
5083 # type: (str) -> str
5084 if not os
.path
.exists('/proc/net/if_inet6'):
5087 raw
= read_file(['/proc/net/if_inet6'])
5088 data
= raw
.splitlines()
5089 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
5090 # field 0 is ipv6, field 2 is scope
5091 for iface_setting
in data
:
5092 field
= iface_setting
.split()
5093 if field
[-1] == ifname
:
5095 ipv6_fmtd
= ":".join([ipv6_raw
[_p
:_p
+4] for _p
in range(0, len(field
[0]),4)])
5096 # apply naming rules using ipaddress module
5097 ipv6
= ipaddress
.ip_address(ipv6_fmtd
)
5098 return "{}/{}".format(str(ipv6
), int('0x{}'.format(field
[2]), 16))
5102 def bytes_to_human(num
, mode
='decimal'):
5103 # type: (float, str) -> str
5104 """Convert a bytes value into it's human-readable form.
5106 :param num: number, in bytes, to convert
5107 :param mode: Either decimal (default) or binary to determine divisor
5108 :returns: string representing the bytes value in a more readable format
5110 unit_list
= ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
5114 if mode
== 'binary':
5115 unit_list
= ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
5119 for unit
in unit_list
:
5120 if abs(num
) < divisor
:
5121 return "%3.1f%s" % (num
, unit
)
5123 return "%.1f%s" % (num
, yotta
)
5126 def read_file(path_list
, file_name
=''):
5127 # type: (List[str], str) -> str
5128 """Returns the content of the first file found within the `path_list`
5130 :param path_list: list of file paths to search
5131 :param file_name: optional file_name to be applied to a file path
5132 :returns: content of the file or 'Unknown'
5134 for path
in path_list
:
5136 file_path
= os
.path
.join(path
, file_name
)
5139 if os
.path
.exists(file_path
):
5140 with
open(file_path
, 'r') as f
:
5142 content
= f
.read().strip()
5144 # sysfs may populate the file, but for devices like
5145 # virtio reads can fail
5152 ##################################
5154 _dmi_path_list
= ['/sys/class/dmi/id']
5155 _nic_path_list
= ['/sys/class/net']
5156 _selinux_path_list
= ['/etc/selinux/config']
5157 _apparmor_path_list
= ['/etc/apparmor']
5158 _disk_vendor_workarounds
= {
5159 "0x1af4": "Virtio Block Device"
5163 self
.cpu_model
= 'Unknown'
5166 self
.cpu_threads
= 0
5167 self
.interfaces
= {}
5169 self
._meminfo
= read_file(['/proc/meminfo']).splitlines()
5171 self
._process
_nics
()
5172 self
.arch
= platform
.processor()
5173 self
.kernel
= platform
.release()
5175 def _get_cpuinfo(self
):
5177 """Determine cpu information via /proc/cpuinfo"""
5178 raw
= read_file(['/proc/cpuinfo'])
5179 output
= raw
.splitlines()
5183 field
= [l
.strip() for l
in line
.split(':')]
5184 if "model name" in line
:
5185 self
.cpu_model
= field
[1]
5186 if "physical id" in line
:
5187 cpu_set
.add(field
[1])
5188 if "siblings" in line
:
5189 self
.cpu_threads
= int(field
[1].strip())
5190 if "cpu cores" in line
:
5191 self
.cpu_cores
= int(field
[1].strip())
5193 self
.cpu_count
= len(cpu_set
)
5195 def _get_block_devs(self
):
5196 # type: () -> List[str]
5197 """Determine the list of block devices by looking at /sys/block"""
5198 return [dev
for dev
in os
.listdir('/sys/block')
5199 if not dev
.startswith('dm')]
5201 def _get_devs_by_type(self
, rota
='0'):
5202 # type: (str) -> List[str]
5203 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
5205 for blk_dev
in self
._get
_block
_devs
():
5206 rot_path
= '/sys/block/{}/queue/rotational'.format(blk_dev
)
5207 rot_value
= read_file([rot_path
])
5208 if rot_value
== rota
:
5209 devs
.append(blk_dev
)
5213 def operating_system(self
):
5215 """Determine OS version"""
5216 raw_info
= read_file(['/etc/os-release'])
5217 os_release
= raw_info
.splitlines()
5221 for line
in os_release
:
5223 var_name
, var_value
= line
.split('=')
5224 rel_dict
[var_name
] = var_value
.strip('"')
5226 # Would normally use PRETTY_NAME, but NAME and VERSION are more
5228 if all(_v
in rel_dict
for _v
in ["NAME", "VERSION"]):
5229 rel_str
= "{} {}".format(rel_dict
['NAME'], rel_dict
['VERSION'])
5235 """Return the hostname"""
5236 return platform
.node()
5239 def subscribed(self
):
5241 """Highlevel check to see if the host is subscribed to receive updates/support"""
5245 entitlements_dir
= '/etc/pki/entitlement'
5246 if os
.path
.exists(entitlements_dir
):
5247 pems
= glob('{}/*.pem'.format(entitlements_dir
))
5253 os_name
= self
.operating_system
5254 if os_name
.upper().startswith("RED HAT"):
5260 def hdd_count(self
):
5262 """Return a count of HDDs (spinners)"""
5263 return len(self
._get
_devs
_by
_type
(rota
='1'))
5265 def _get_capacity(self
, dev
):
5266 # type: (str) -> int
5267 """Determine the size of a given device"""
5268 size_path
= os
.path
.join('/sys/block', dev
, 'size')
5269 size_blocks
= int(read_file([size_path
]))
5270 blk_path
= os
.path
.join('/sys/block', dev
, 'queue', 'logical_block_size')
5271 blk_count
= int(read_file([blk_path
]))
5272 return size_blocks
* blk_count
5274 def _get_capacity_by_type(self
, rota
='0'):
5275 # type: (str) -> int
5276 """Return the total capacity of a category of device (flash or hdd)"""
5277 devs
= self
._get
_devs
_by
_type
(rota
=rota
)
5280 capacity
+= self
._get
_capacity
(dev
)
5283 def _dev_list(self
, dev_list
):
5284 # type: (List[str]) -> List[Dict[str, object]]
5285 """Return a 'pretty' name list for each device in the `dev_list`"""
5288 for dev
in dev_list
:
5289 disk_model
= read_file(['/sys/block/{}/device/model'.format(dev
)]).strip()
5290 disk_rev
= read_file(['/sys/block/{}/device/rev'.format(dev
)]).strip()
5291 disk_wwid
= read_file(['/sys/block/{}/device/wwid'.format(dev
)]).strip()
5292 vendor
= read_file(['/sys/block/{}/device/vendor'.format(dev
)]).strip()
5293 disk_vendor
= HostFacts
._disk
_vendor
_workarounds
.get(vendor
, vendor
)
5294 disk_size_bytes
= self
._get
_capacity
(dev
)
5296 "description": "{} {} ({})".format(disk_vendor
, disk_model
, bytes_to_human(disk_size_bytes
)),
5297 "vendor": disk_vendor
,
5298 "model": disk_model
,
5302 "disk_size_bytes": disk_size_bytes
,
5309 # type: () -> List[Dict[str, object]]
5310 """Return a list of devices that are HDDs (spinners)"""
5311 devs
= self
._get
_devs
_by
_type
(rota
='1')
5312 return self
._dev
_list
(devs
)
5315 def flash_list(self
):
5316 # type: () -> List[Dict[str, object]]
5317 """Return a list of devices that are flash based (SSD, NVMe)"""
5318 devs
= self
._get
_devs
_by
_type
(rota
='0')
5319 return self
._dev
_list
(devs
)
5322 def hdd_capacity_bytes(self
):
5324 """Return the total capacity for all HDD devices (bytes)"""
5325 return self
._get
_capacity
_by
_type
(rota
='1')
5328 def hdd_capacity(self
):
5330 """Return the total capacity for all HDD devices (human readable format)"""
5331 return bytes_to_human(self
.hdd_capacity_bytes
)
5335 # type: () -> Dict[str, float]
5336 """Return the cpu load average data for the host"""
5337 raw
= read_file(['/proc/loadavg']).strip()
5340 "1min": float(data
[0]),
5341 "5min": float(data
[1]),
5342 "15min": float(data
[2]),
5346 def flash_count(self
):
5348 """Return the number of flash devices in the system (SSD, NVMe)"""
5349 return len(self
._get
_devs
_by
_type
(rota
='0'))
5352 def flash_capacity_bytes(self
):
5354 """Return the total capacity for all flash devices (bytes)"""
5355 return self
._get
_capacity
_by
_type
(rota
='0')
5358 def flash_capacity(self
):
5360 """Return the total capacity for all Flash devices (human readable format)"""
5361 return bytes_to_human(self
.flash_capacity_bytes
)
5363 def _process_nics(self
):
5365 """Look at the NIC devices and extract network related metadata"""
5366 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
5373 for nic_path
in HostFacts
._nic
_path
_list
:
5374 if not os
.path
.exists(nic_path
):
5376 for iface
in os
.listdir(nic_path
):
5378 lower_devs_list
= [os
.path
.basename(link
.replace("lower_", "")) for link
in glob(os
.path
.join(nic_path
, iface
, "lower_*"))]
5379 upper_devs_list
= [os
.path
.basename(link
.replace("upper_", "")) for link
in glob(os
.path
.join(nic_path
, iface
, "upper_*"))]
5382 mtu
= int(read_file([os
.path
.join(nic_path
, iface
, 'mtu')]))
5386 operstate
= read_file([os
.path
.join(nic_path
, iface
, 'operstate')])
5388 speed
= int(read_file([os
.path
.join(nic_path
, iface
, 'speed')]))
5389 except (OSError, ValueError):
5390 # OSError : device doesn't support the ethtool get_link_ksettings
5391 # ValueError : raised when the read fails, and returns Unknown
5393 # Either way, we show a -1 when speed isn't available
5396 if os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bridge')):
5398 elif os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bonding')):
5399 nic_type
= "bonding"
5401 nic_type
= hw_lookup
.get(read_file([os
.path
.join(nic_path
, iface
, 'type')]), "Unknown")
5403 dev_link
= os
.path
.join(nic_path
, iface
, 'device')
5404 if os
.path
.exists(dev_link
):
5406 driver_path
= os
.path
.join(dev_link
, 'driver')
5407 if os
.path
.exists(driver_path
):
5408 driver
= os
.path
.basename(
5409 os
.path
.realpath(driver_path
))
5417 self
.interfaces
[iface
] = {
5419 "upper_devs_list": upper_devs_list
,
5420 "lower_devs_list": lower_devs_list
,
5421 "operstate": operstate
,
5423 "nic_type": nic_type
,
5426 "ipv4_address": get_ipv4_address(iface
),
5427 "ipv6_address": get_ipv6_address(iface
),
5431 def nic_count(self
):
5433 """Return a total count of all physical NICs detected in the host"""
5435 for iface
in self
.interfaces
:
5436 if self
.interfaces
[iface
]["iftype"] == 'physical':
5437 phys_devs
.append(iface
)
5438 return len(phys_devs
)
5441 def _get_mem_data(self
, field_name
):
5442 # type: (str) -> int
5443 for line
in self
._meminfo
:
5444 if line
.startswith(field_name
):
5450 def memory_total_kb(self
):
5452 """Determine the memory installed (kb)"""
5453 return self
._get
_mem
_data
('MemTotal')
5456 def memory_free_kb(self
):
5458 """Determine the memory free (not cache, immediately usable)"""
5459 return self
._get
_mem
_data
('MemFree')
5462 def memory_available_kb(self
):
5464 """Determine the memory available to new applications without swapping"""
5465 return self
._get
_mem
_data
('MemAvailable')
5470 """Determine server vendor from DMI data in sysfs"""
5471 return read_file(HostFacts
._dmi
_path
_list
, "sys_vendor")
5476 """Determine server model information from DMI data in sysfs"""
5477 family
= read_file(HostFacts
._dmi
_path
_list
, "product_family")
5478 product
= read_file(HostFacts
._dmi
_path
_list
, "product_name")
5479 if family
== 'Unknown' and product
:
5480 return "{}".format(product
)
5482 return "{} ({})".format(family
, product
)
5485 def bios_version(self
):
5487 """Determine server BIOS version from DMI data in sysfs"""
5488 return read_file(HostFacts
._dmi
_path
_list
, "bios_version")
5491 def bios_date(self
):
5493 """Determine server BIOS date from DMI data in sysfs"""
5494 return read_file(HostFacts
._dmi
_path
_list
, "bios_date")
5497 def timestamp(self
):
5499 """Return the current time as Epoch seconds"""
5503 def system_uptime(self
):
5505 """Return the system uptime (in secs)"""
5506 raw_time
= read_file(['/proc/uptime'])
5507 up_secs
, _
= raw_time
.split()
5508 return float(up_secs
)
5510 def kernel_security(self
):
5511 # type: () -> Dict[str, str]
5512 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
5513 def _fetch_selinux():
5514 """Read the selinux config file to determine state"""
5516 for selinux_path
in HostFacts
._selinux
_path
_list
:
5517 if os
.path
.exists(selinux_path
):
5518 selinux_config
= read_file([selinux_path
]).splitlines()
5519 security
['type'] = 'SELinux'
5520 for line
in selinux_config
:
5521 if line
.strip().startswith('#'):
5523 k
, v
= line
.split('=')
5525 if security
['SELINUX'].lower() == "disabled":
5526 security
['description'] = "SELinux: Disabled"
5528 security
['description'] = "SELinux: Enabled({}, {})".format(security
['SELINUX'], security
['SELINUXTYPE'])
5531 def _fetch_apparmor():
5532 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
5534 for apparmor_path
in HostFacts
._apparmor
_path
_list
:
5535 if os
.path
.exists(apparmor_path
):
5536 security
['type'] = "AppArmor"
5537 security
['description'] = "AppArmor: Enabled"
5539 profiles
= read_file(['/sys/kernel/security/apparmor/profiles'])
5543 summary
= {} # type: Dict[str, int]
5544 for line
in profiles
.split('\n'):
5545 item
, mode
= line
.split(' ')
5546 mode
= mode
.strip('()')
5551 summary_str
= ",".join(["{} {}".format(v
, k
) for k
, v
in summary
.items()])
5552 security
= {**security
, **summary
} # type: ignore
5553 security
['description'] += "({})".format(summary_str
)
5557 if os
.path
.exists('/sys/kernel/security/lsm'):
5558 lsm
= read_file(['/sys/kernel/security/lsm']).strip()
5559 if 'selinux' in lsm
:
5560 return _fetch_selinux()
5561 elif 'apparmor' in lsm
:
5562 return _fetch_apparmor()
5566 "description": "Linux Security Module framework is active, but is not using SELinux or AppArmor"
5571 "description": "Linux Security Module framework is not available"
5575 def kernel_parameters(self
):
5576 # type: () -> Dict[str, str]
5577 """Get kernel parameters required/used in Ceph clusters"""
5580 out
, _
, _
= call_throws(['sysctl', '-a'], verbosity
=CallVerbosity
.SILENT
)
5582 param_list
= out
.split('\n')
5583 param_dict
= { param
.split(" = ")[0]:param
.split(" = ")[-1] for param
in param_list
}
5585 # return only desired parameters
5586 if 'net.ipv4.ip_nonlocal_bind' in param_dict
:
5587 k_param
['net.ipv4.ip_nonlocal_bind'] = param_dict
['net.ipv4.ip_nonlocal_bind']
5593 """Return the attributes of this HostFacts object as json"""
5594 data
= {k
: getattr(self
, k
) for k
in dir(self
)
5595 if not k
.startswith('_') and
5596 isinstance(getattr(self
, k
),
5597 (float, int, str, list, dict, tuple))
5599 return json
.dumps(data
, indent
=2, sort_keys
=True)
5601 ##################################
5603 def command_gather_facts():
5604 """gather_facts is intended to provide host releated metadata to the caller"""
5609 ##################################
5613 # type: () -> argparse.ArgumentParser
5614 parser
= argparse
.ArgumentParser(
5615 description
='Bootstrap Ceph daemons with systemd and containers.',
5616 formatter_class
=argparse
.ArgumentDefaultsHelpFormatter
)
5617 parser
.add_argument(
5619 help='container image. Can also be set via the "CEPHADM_IMAGE" '
5621 parser
.add_argument(
5623 action
='store_true',
5624 help='use docker instead of podman')
5625 parser
.add_argument(
5628 help='base directory for daemon data')
5629 parser
.add_argument(
5632 help='base directory for daemon logs')
5633 parser
.add_argument(
5635 default
=LOGROTATE_DIR
,
5636 help='location of logrotate configuration files')
5637 parser
.add_argument(
5640 help='base directory for systemd units')
5641 parser
.add_argument(
5643 action
='store_true',
5644 help='Show debug-level log messages')
5645 parser
.add_argument(
5648 default
=DEFAULT_TIMEOUT
,
5649 help='timeout in seconds')
5650 parser
.add_argument(
5653 default
=DEFAULT_RETRY
,
5654 help='max number of retries')
5655 parser
.add_argument(
5659 help='set environment variable')
5660 parser
.add_argument(
5661 '--no-container-init',
5662 action
='store_true',
5663 default
=not CONTAINER_INIT
,
5664 help='Do not run podman/docker with `--init`')
5666 subparsers
= parser
.add_subparsers(help='sub-command')
5668 parser_version
= subparsers
.add_parser(
5669 'version', help='get ceph version from container')
5670 parser_version
.set_defaults(func
=command_version
)
5672 parser_pull
= subparsers
.add_parser(
5673 'pull', help='pull latest image version')
5674 parser_pull
.set_defaults(func
=command_pull
)
5676 parser_inspect_image
= subparsers
.add_parser(
5677 'inspect-image', help='inspect local container image')
5678 parser_inspect_image
.set_defaults(func
=command_inspect_image
)
5680 parser_ls
= subparsers
.add_parser(
5681 'ls', help='list daemon instances on this host')
5682 parser_ls
.set_defaults(func
=command_ls
)
5683 parser_ls
.add_argument(
5685 action
='store_true',
5686 help='Do not include daemon status')
5687 parser_ls
.add_argument(
5690 help='base directory for legacy daemon data')
5692 parser_list_networks
= subparsers
.add_parser(
5693 'list-networks', help='list IP networks')
5694 parser_list_networks
.set_defaults(func
=command_list_networks
)
5696 parser_adopt
= subparsers
.add_parser(
5697 'adopt', help='adopt daemon deployed with a different tool')
5698 parser_adopt
.set_defaults(func
=command_adopt
)
5699 parser_adopt
.add_argument(
5702 help='daemon name (type.id)')
5703 parser_adopt
.add_argument(
5706 help='deployment style (legacy, ...)')
5707 parser_adopt
.add_argument(
5710 help='cluster name')
5711 parser_adopt
.add_argument(
5714 help='base directory for legacy daemon data')
5715 parser_adopt
.add_argument(
5717 help='Additional configuration information in JSON format')
5718 parser_adopt
.add_argument(
5720 action
='store_true',
5721 help='Do not configure firewalld')
5722 parser_adopt
.add_argument(
5724 action
='store_true',
5725 help='do not pull the latest image before adopting')
5726 parser_adopt
.add_argument(
5728 action
='store_true',
5729 help="start newly adoped daemon, even if it wasn't running previously")
5730 parser_adopt
.add_argument(
5732 action
='store_true',
5733 default
=CONTAINER_INIT
,
5734 help=argparse
.SUPPRESS
)
5736 parser_rm_daemon
= subparsers
.add_parser(
5737 'rm-daemon', help='remove daemon instance')
5738 parser_rm_daemon
.set_defaults(func
=command_rm_daemon
)
5739 parser_rm_daemon
.add_argument(
5742 action
=CustomValidation
,
5743 help='daemon name (type.id)')
5744 parser_rm_daemon
.add_argument(
5747 help='cluster FSID')
5748 parser_rm_daemon
.add_argument(
5750 action
='store_true',
5751 help='proceed, even though this may destroy valuable data')
5752 parser_rm_daemon
.add_argument(
5753 '--force-delete-data',
5754 action
='store_true',
5755 help='delete valuable daemon data instead of making a backup')
5757 parser_rm_cluster
= subparsers
.add_parser(
5758 'rm-cluster', help='remove all daemons for a cluster')
5759 parser_rm_cluster
.set_defaults(func
=command_rm_cluster
)
5760 parser_rm_cluster
.add_argument(
5763 help='cluster FSID')
5764 parser_rm_cluster
.add_argument(
5766 action
='store_true',
5767 help='proceed, even though this may destroy valuable data')
5769 parser_run
= subparsers
.add_parser(
5770 'run', help='run a ceph daemon, in a container, in the foreground')
5771 parser_run
.set_defaults(func
=command_run
)
5772 parser_run
.add_argument(
5775 help='daemon name (type.id)')
5776 parser_run
.add_argument(
5779 help='cluster FSID')
5781 parser_shell
= subparsers
.add_parser(
5782 'shell', help='run an interactive shell inside a daemon container')
5783 parser_shell
.set_defaults(func
=command_shell
)
5784 parser_shell
.add_argument(
5786 help='cluster FSID')
5787 parser_shell
.add_argument(
5789 help='daemon name (type.id)')
5790 parser_shell
.add_argument(
5792 help='ceph.conf to pass through to the container')
5793 parser_shell
.add_argument(
5795 help='ceph.keyring to pass through to the container')
5796 parser_shell
.add_argument(
5798 help=("mount a file or directory in the container. "
5799 "Support multiple mounts. "
5800 "ie: `--mount /foo /bar:/bar`. "
5801 "When no destination is passed, default is /mnt"),
5803 parser_shell
.add_argument(
5807 help='set environment variable')
5808 parser_shell
.add_argument(
5809 'command', nargs
=argparse
.REMAINDER
,
5810 help='command (optional)')
5812 parser_enter
= subparsers
.add_parser(
5813 'enter', help='run an interactive shell inside a running daemon container')
5814 parser_enter
.set_defaults(func
=command_enter
)
5815 parser_enter
.add_argument(
5817 help='cluster FSID')
5818 parser_enter
.add_argument(
5821 help='daemon name (type.id)')
5822 parser_enter
.add_argument(
5823 'command', nargs
=argparse
.REMAINDER
,
5826 parser_ceph_volume
= subparsers
.add_parser(
5827 'ceph-volume', help='run ceph-volume inside a container')
5828 parser_ceph_volume
.set_defaults(func
=command_ceph_volume
)
5829 parser_ceph_volume
.add_argument(
5831 help='cluster FSID')
5832 parser_ceph_volume
.add_argument(
5834 help='JSON file with config and (client.bootrap-osd) key')
5835 parser_ceph_volume
.add_argument(
5837 help='ceph conf file')
5838 parser_ceph_volume
.add_argument(
5840 help='ceph.keyring to pass through to the container')
5841 parser_ceph_volume
.add_argument(
5842 'command', nargs
=argparse
.REMAINDER
,
5845 parser_unit
= subparsers
.add_parser(
5846 'unit', help='operate on the daemon\'s systemd unit')
5847 parser_unit
.set_defaults(func
=command_unit
)
5848 parser_unit
.add_argument(
5850 help='systemd command (start, stop, restart, enable, disable, ...)')
5851 parser_unit
.add_argument(
5853 help='cluster FSID')
5854 parser_unit
.add_argument(
5857 help='daemon name (type.id)')
5859 parser_logs
= subparsers
.add_parser(
5860 'logs', help='print journald logs for a daemon container')
5861 parser_logs
.set_defaults(func
=command_logs
)
5862 parser_logs
.add_argument(
5864 help='cluster FSID')
5865 parser_logs
.add_argument(
5868 help='daemon name (type.id)')
5869 parser_logs
.add_argument(
5870 'command', nargs
='*',
5871 help='additional journalctl args')
5873 parser_bootstrap
= subparsers
.add_parser(
5874 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
5875 parser_bootstrap
.set_defaults(func
=command_bootstrap
)
5876 parser_bootstrap
.add_argument(
5878 help='ceph conf file to incorporate')
5879 parser_bootstrap
.add_argument(
5882 help='mon id (default: local hostname)')
5883 parser_bootstrap
.add_argument(
5885 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
5886 parser_bootstrap
.add_argument(
5889 parser_bootstrap
.add_argument(
5892 help='mgr id (default: randomly generated)')
5893 parser_bootstrap
.add_argument(
5895 help='cluster FSID')
5896 parser_bootstrap
.add_argument(
5898 default
='/etc/ceph',
5899 help='directory to write config, keyring, and pub key files')
5900 parser_bootstrap
.add_argument(
5902 help='location to write keyring file with new cluster admin and mon keys')
5903 parser_bootstrap
.add_argument(
5905 help='location to write conf file to connect to new cluster')
5906 parser_bootstrap
.add_argument(
5907 '--output-pub-ssh-key',
5908 help='location to write the cluster\'s public SSH key')
5909 parser_bootstrap
.add_argument(
5911 action
='store_true',
5912 help='skip setup of ssh key on local host')
5913 parser_bootstrap
.add_argument(
5914 '--initial-dashboard-user',
5916 help='Initial user for the dashboard')
5917 parser_bootstrap
.add_argument(
5918 '--initial-dashboard-password',
5919 help='Initial password for the initial dashboard user')
5920 parser_bootstrap
.add_argument(
5921 '--ssl-dashboard-port',
5924 help='Port number used to connect with dashboard using SSL')
5925 parser_bootstrap
.add_argument(
5927 type=argparse
.FileType('r'),
5928 help='Dashboard key')
5929 parser_bootstrap
.add_argument(
5931 type=argparse
.FileType('r'),
5932 help='Dashboard certificate')
5934 parser_bootstrap
.add_argument(
5936 type=argparse
.FileType('r'),
5938 parser_bootstrap
.add_argument(
5939 '--ssh-private-key',
5940 type=argparse
.FileType('r'),
5941 help='SSH private key')
5942 parser_bootstrap
.add_argument(
5944 type=argparse
.FileType('r'),
5945 help='SSH public key')
5946 parser_bootstrap
.add_argument(
5949 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
5951 parser_bootstrap
.add_argument(
5952 '--skip-mon-network',
5953 action
='store_true',
5954 help='set mon public_network based on bootstrap mon ip')
5955 parser_bootstrap
.add_argument(
5957 action
='store_true',
5958 help='do not enable the Ceph Dashboard')
5959 parser_bootstrap
.add_argument(
5960 '--dashboard-password-noupdate',
5961 action
='store_true',
5962 help='stop forced dashboard password change')
5963 parser_bootstrap
.add_argument(
5964 '--no-minimize-config',
5965 action
='store_true',
5966 help='do not assimilate and minimize the config file')
5967 parser_bootstrap
.add_argument(
5968 '--skip-ping-check',
5969 action
='store_true',
5970 help='do not verify that mon IP is pingable')
5971 parser_bootstrap
.add_argument(
5973 action
='store_true',
5974 help='do not pull the latest image before bootstrapping')
5975 parser_bootstrap
.add_argument(
5977 action
='store_true',
5978 help='Do not configure firewalld')
5979 parser_bootstrap
.add_argument(
5980 '--allow-overwrite',
5981 action
='store_true',
5982 help='allow overwrite of existing --output-* config/keyring/ssh files')
5983 parser_bootstrap
.add_argument(
5984 '--allow-fqdn-hostname',
5985 action
='store_true',
5986 help='allow hostname that is fully-qualified (contains ".")')
5987 parser_bootstrap
.add_argument(
5988 '--skip-prepare-host',
5989 action
='store_true',
5990 help='Do not prepare host')
5991 parser_bootstrap
.add_argument(
5992 '--orphan-initial-daemons',
5993 action
='store_true',
5994 help='Do not create initial mon, mgr, and crash service specs')
5995 parser_bootstrap
.add_argument(
5996 '--skip-monitoring-stack',
5997 action
='store_true',
5998 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
5999 parser_bootstrap
.add_argument(
6001 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
6003 parser_bootstrap
.add_argument(
6004 '--shared_ceph_folder',
6005 metavar
='CEPH_SOURCE_FOLDER',
6006 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
6008 parser_bootstrap
.add_argument(
6010 help='url for custom registry')
6011 parser_bootstrap
.add_argument(
6012 '--registry-username',
6013 help='username for custom registry')
6014 parser_bootstrap
.add_argument(
6015 '--registry-password',
6016 help='password for custom registry')
6017 parser_bootstrap
.add_argument(
6019 help='json file with custom registry login info (URL, Username, Password)')
6020 parser_bootstrap
.add_argument(
6022 action
='store_true',
6023 default
=CONTAINER_INIT
,
6024 help=argparse
.SUPPRESS
)
6026 parser_deploy
= subparsers
.add_parser(
6027 'deploy', help='deploy a daemon')
6028 parser_deploy
.set_defaults(func
=command_deploy
)
6029 parser_deploy
.add_argument(
6032 action
=CustomValidation
,
6033 help='daemon name (type.id)')
6034 parser_deploy
.add_argument(
6037 help='cluster FSID')
6038 parser_deploy
.add_argument(
6040 help='config file for new daemon')
6041 parser_deploy
.add_argument(
6043 help='Additional configuration information in JSON format')
6044 parser_deploy
.add_argument(
6046 help='keyring for new daemon')
6047 parser_deploy
.add_argument(
6049 help='key for new daemon')
6050 parser_deploy
.add_argument(
6052 help='OSD uuid, if creating an OSD container')
6053 parser_deploy
.add_argument(
6055 action
='store_true',
6056 help='Do not configure firewalld')
6057 parser_deploy
.add_argument(
6059 help='List of tcp ports to open in the host firewall')
6060 parser_deploy
.add_argument(
6062 action
='store_true',
6063 help='Reconfigure a previously deployed daemon')
6064 parser_deploy
.add_argument(
6066 action
='store_true',
6067 help='Allow SYS_PTRACE on daemon container')
6068 parser_deploy
.add_argument(
6070 action
='store_true',
6071 default
=CONTAINER_INIT
,
6072 help=argparse
.SUPPRESS
)
6074 parser_check_host
= subparsers
.add_parser(
6075 'check-host', help='check host configuration')
6076 parser_check_host
.set_defaults(func
=command_check_host
)
6077 parser_check_host
.add_argument(
6078 '--expect-hostname',
6079 help='Check that hostname matches an expected value')
6081 parser_prepare_host
= subparsers
.add_parser(
6082 'prepare-host', help='prepare a host for cephadm use')
6083 parser_prepare_host
.set_defaults(func
=command_prepare_host
)
6084 parser_prepare_host
.add_argument(
6085 '--expect-hostname',
6086 help='Set hostname')
6088 parser_add_repo
= subparsers
.add_parser(
6089 'add-repo', help='configure package repository')
6090 parser_add_repo
.set_defaults(func
=command_add_repo
)
6091 parser_add_repo
.add_argument(
6093 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE
))
6094 parser_add_repo
.add_argument(
6096 help='use specific upstream version (x.y.z)')
6097 parser_add_repo
.add_argument(
6099 help='use specified bleeding edge build from git branch or tag')
6100 parser_add_repo
.add_argument(
6102 help='use specified bleeding edge build from git commit')
6103 parser_add_repo
.add_argument(
6105 help='specify alternative GPG key location')
6106 parser_add_repo
.add_argument(
6108 default
='https://download.ceph.com',
6109 help='specify alternative repo location')
6112 parser_rm_repo
= subparsers
.add_parser(
6113 'rm-repo', help='remove package repository configuration')
6114 parser_rm_repo
.set_defaults(func
=command_rm_repo
)
6116 parser_install
= subparsers
.add_parser(
6117 'install', help='install ceph package(s)')
6118 parser_install
.set_defaults(func
=command_install
)
6119 parser_install
.add_argument(
6120 'packages', nargs
='*',
6121 default
=['cephadm'],
6124 parser_registry_login
= subparsers
.add_parser(
6125 'registry-login', help='log host into authenticated registry')
6126 parser_registry_login
.set_defaults(func
=command_registry_login
)
6127 parser_registry_login
.add_argument(
6129 help='url for custom registry')
6130 parser_registry_login
.add_argument(
6131 '--registry-username',
6132 help='username for custom registry')
6133 parser_registry_login
.add_argument(
6134 '--registry-password',
6135 help='password for custom registry')
6136 parser_registry_login
.add_argument(
6138 help='json file with custom registry login info (URL, Username, Password)')
6139 parser_registry_login
.add_argument(
6141 help='cluster FSID')
6143 parser_gather_facts
= subparsers
.add_parser(
6144 'gather-facts', help='gather and return host related information (JSON format)')
6145 parser_gather_facts
.set_defaults(func
=command_gather_facts
)
6150 def _parse_args(av
):
6151 parser
= _get_parser()
6153 args
= parser
.parse_args(av
)
6154 if 'command' in args
and args
.command
and args
.command
[0] == "--":
6157 # workaround argparse to deprecate the subparser `--container-init` flag
6158 # container_init and no_container_init must always be mutually exclusive
6159 container_init_args
= ('--container-init', '--no-container-init')
6160 if set(container_init_args
).issubset(av
):
6161 parser
.error('argument %s: not allowed with argument %s' % (container_init_args
))
6162 elif '--container-init' in av
:
6163 args
.no_container_init
= not args
.container_init
6165 args
.container_init
= not args
.no_container_init
6166 assert args
.container_init
is not args
.no_container_init
6171 if __name__
== "__main__":
6174 if os
.geteuid() != 0:
6175 sys
.stderr
.write('ERROR: cephadm should be run as root\n')
6178 # Logger configuration
6179 if not os
.path
.exists(LOG_DIR
):
6180 os
.makedirs(LOG_DIR
)
6181 dictConfig(logging_config
)
6182 logger
= logging
.getLogger()
6184 # allow argv to be injected
6186 av
= injected_argv
# type: ignore
6189 logger
.debug("%s\ncephadm %s" % ("-" * 80, av
))
6190 args
= _parse_args(av
)
6192 # More verbose console output
6194 for handler
in logger
.handlers
:
6195 if handler
.name
== "console":
6196 handler
.setLevel(logging
.DEBUG
)
6198 if 'func' not in args
:
6199 sys
.stderr
.write('No command specified; pass -h or --help for usage\n')
6203 if args
.func
!= command_check_host
:
6205 container_path
= find_program('docker')
6207 for i
in CONTAINER_PREFERENCE
:
6209 container_path
= find_program(i
)
6211 except Exception as e
:
6212 logger
.debug('Could not locate %s: %s' % (i
, e
))
6213 if not container_path
and args
.func
!= command_prepare_host\
6214 and args
.func
!= command_add_repo
:
6215 sys
.stderr
.write('Unable to locate any of %s\n' % CONTAINER_PREFERENCE
)
6219 container_init
= args
.container_init
6220 logger
.debug('container_init=%s' % (container_init
))
6227 sys
.stderr
.write('ERROR: %s\n' % e
)