3 DEFAULT_IMAGE
='docker.io/ceph/ceph:v15'
4 DEFAULT_IMAGE_IS_MASTER
=False
5 LATEST_STABLE_RELEASE
= 'octopus'
6 DATA_DIR
= '/var/lib/ceph'
7 LOG_DIR
= '/var/log/ceph'
8 LOCK_DIR
= '/run/cephadm'
9 LOGROTATE_DIR
= '/etc/logrotate.d'
10 UNIT_DIR
= '/etc/systemd/system'
13 CONTAINER_PREFERENCE
= ['podman', 'docker'] # prefer podman to docker
14 CUSTOM_PS1
= r
'[ceph: \u@\h \W]\$ '
15 DEFAULT_TIMEOUT
= None # in seconds
17 SHELL_DEFAULT_CONF
= '/etc/ceph/ceph.conf'
18 SHELL_DEFAULT_KEYRING
= '/etc/ceph/ceph.client.admin.keyring'
21 You can invoke cephadm in two ways:
23 1. The normal way, at the command line.
25 2. By piping the script to the python3 binary. In this latter case, you should
26 prepend one or more lines to the beginning of the script.
34 injected_argv = ['ls']
36 For reading stdin from the '--config-json -' argument,
38 injected_stdin = '...'
46 from logging
.config
import dictConfig
63 from typing
import Dict
, List
, Tuple
, Optional
, Union
, Any
, NoReturn
, Callable
, IO
70 from functools
import wraps
72 from threading
import Thread
74 if sys
.version_info
>= (3, 0):
75 from io
import StringIO
77 from StringIO
import StringIO
79 if sys
.version_info
>= (3, 2):
80 from configparser
import ConfigParser
82 from ConfigParser
import SafeConfigParser
84 if sys
.version_info
>= (3, 0):
85 from urllib
.request
import urlopen
86 from urllib
.error
import HTTPError
88 from urllib2
import urlopen
, HTTPError
90 if sys
.version_info
> (3, 0):
96 DATEFMT
= '%Y-%m-%dT%H:%M:%S.%fZ'
98 # Log and console output config
101 'disable_existing_loggers': True,
104 'format': '%(asctime)s %(levelname)s %(message)s'
110 'class':'logging.StreamHandler',
114 'class': 'logging.handlers.RotatingFileHandler',
115 'formatter': 'cephadm',
116 'filename': '%s/cephadm.log' % LOG_DIR
,
124 'handlers': ['console', 'log_file'],
135 class Error(Exception):
139 class TimeoutExpired(Error
):
142 ##################################
146 daemons
= ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
149 ##################################
152 class Monitoring(object):
153 """Define the configs for the monitoring containers"""
156 "prometheus": [9095], # Avoid default 9090, due to conflict with cockpit UI
157 "node-exporter": [9100],
159 "alertmanager": [9093, 9094],
164 "image": "docker.io/prom/prometheus:v2.18.1",
168 "--config.file=/etc/prometheus/prometheus.yml",
169 "--storage.tsdb.path=/prometheus",
170 "--web.listen-address=:{}".format(port_map
['prometheus'][0]),
172 "config-json-files": [
177 "image": "docker.io/prom/node-exporter:v0.18.1",
181 "--no-collector.timex",
185 "image": "docker.io/ceph/ceph-grafana:6.6.2",
189 "config-json-files": [
191 "provisioning/datasources/ceph-dashboard.yml",
197 "image": "docker.io/prom/alertmanager:v0.20.0",
201 "--web.listen-address=:{}".format(port_map
['alertmanager'][0]),
202 "--cluster.listen-address=:{}".format(port_map
['alertmanager'][1]),
204 "config-json-files": [
207 "config-json-args": [
213 ##################################
216 class NFSGanesha(object):
217 """Defines a NFS-Ganesha container"""
220 entrypoint
= '/usr/bin/ganesha.nfsd'
221 daemon_args
= ['-F', '-L', 'STDERR']
223 required_files
= ['ganesha.conf']
233 image
=DEFAULT_IMAGE
):
234 # type: (str, Union[int, str], Dict, str) -> None
236 self
.daemon_id
= daemon_id
239 # config-json options
240 self
.pool
= dict_get(config_json
, 'pool', require
=True)
241 self
.namespace
= dict_get(config_json
, 'namespace')
242 self
.userid
= dict_get(config_json
, 'userid')
243 self
.extra_args
= dict_get(config_json
, 'extra_args', [])
244 self
.files
= dict_get(config_json
, 'files', {})
245 self
.rgw
= dict_get(config_json
, 'rgw', {})
247 # validate the supplied args
251 def init(cls
, fsid
, daemon_id
):
252 # type: (str, Union[int, str]) -> NFSGanesha
253 return cls(fsid
, daemon_id
, get_parm(args
.config_json
), args
.image
)
255 def get_container_mounts(self
, data_dir
):
256 # type: (str) -> Dict[str, str]
258 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
259 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
260 mounts
[os
.path
.join(data_dir
, 'etc/ganesha')] = '/etc/ganesha:z'
262 cluster
= self
.rgw
.get('cluster', 'ceph')
263 rgw_user
= self
.rgw
.get('user', 'admin')
264 mounts
[os
.path
.join(data_dir
, 'keyring.rgw')] = \
265 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster
, rgw_user
)
269 def get_container_envs():
270 # type: () -> List[str]
272 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
277 def get_version(container_id
):
278 # type: (str) -> Optional[str]
280 out
, err
, code
= call(
281 [container_path
, 'exec', container_id
,
282 NFSGanesha
.entrypoint
, '-v'])
284 match
= re
.search(r
'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out
)
286 version
= match
.group(1)
291 if not is_fsid(self
.fsid
):
292 raise Error('not an fsid: %s' % self
.fsid
)
293 if not self
.daemon_id
:
294 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
296 raise Error('invalid image: %s' % self
.image
)
298 # check for the required files
299 if self
.required_files
:
300 for fname
in self
.required_files
:
301 if fname
not in self
.files
:
302 raise Error('required file missing from config-json: %s' % fname
)
304 # check for an RGW config
306 if not self
.rgw
.get('keyring'):
307 raise Error('RGW keyring is missing')
308 if not self
.rgw
.get('user'):
309 raise Error('RGW user is missing')
311 def get_daemon_name(self
):
313 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
315 def get_container_name(self
, desc
=None):
316 # type: (Optional[str]) -> str
317 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
319 cname
= '%s-%s' % (cname
, desc
)
322 def get_daemon_args(self
):
323 # type: () -> List[str]
324 return self
.daemon_args
+ self
.extra_args
326 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
327 # type: (str, int, int) -> None
328 """Create files under the container data dir"""
329 if not os
.path
.isdir(data_dir
):
330 raise OSError('data_dir is not a directory: %s' % (data_dir
))
332 logger
.info('Creating ganesha config...')
334 # create the ganesha conf dir
335 config_dir
= os
.path
.join(data_dir
, 'etc/ganesha')
336 makedirs(config_dir
, uid
, gid
, 0o755)
338 # populate files from the config-json
339 for fname
in self
.files
:
340 config_file
= os
.path
.join(config_dir
, fname
)
341 config_content
= dict_get_join(self
.files
, fname
)
342 logger
.info('Write file: %s' % (config_file
))
343 with
open(config_file
, 'w') as f
:
344 os
.fchown(f
.fileno(), uid
, gid
)
345 os
.fchmod(f
.fileno(), 0o600)
346 f
.write(config_content
)
348 # write the RGW keyring
350 keyring_path
= os
.path
.join(data_dir
, 'keyring.rgw')
351 with
open(keyring_path
, 'w') as f
:
352 os
.fchmod(f
.fileno(), 0o600)
353 os
.fchown(f
.fileno(), uid
, gid
)
354 f
.write(self
.rgw
.get('keyring', ''))
356 def get_rados_grace_container(self
, action
):
357 # type: (str) -> CephContainer
358 """Container for a ganesha action on the grace db"""
359 entrypoint
= '/usr/bin/ganesha-rados-grace'
362 args
=['--pool', self
.pool
]
364 args
+= ['--ns', self
.namespace
]
366 args
+= ['--userid', self
.userid
]
367 args
+= [action
, self
.get_daemon_name()]
369 data_dir
= get_data_dir(self
.fsid
, self
.daemon_type
, self
.daemon_id
)
370 volume_mounts
= self
.get_container_mounts(data_dir
)
371 envs
= self
.get_container_envs()
373 logger
.info('Creating RADOS grace for action: %s' % action
)
376 entrypoint
=entrypoint
,
378 volume_mounts
=volume_mounts
,
379 cname
=self
.get_container_name(desc
='grace-%s' % action
),
384 ##################################
387 class CephIscsi(object):
388 """Defines a Ceph-Iscsi container"""
390 daemon_type
= 'iscsi'
391 entrypoint
= '/usr/bin/rbd-target-api'
393 required_files
= ['iscsi-gateway.cfg']
399 image
=DEFAULT_IMAGE
):
400 # type: (str, Union[int, str], Dict, str) -> None
402 self
.daemon_id
= daemon_id
405 # config-json options
406 self
.files
= dict_get(config_json
, 'files', {})
408 # validate the supplied args
412 def init(cls
, fsid
, daemon_id
):
413 # type: (str, Union[int, str]) -> CephIscsi
414 return cls(fsid
, daemon_id
, get_parm(args
.config_json
), args
.image
)
417 def get_container_mounts(data_dir
, log_dir
):
418 # type: (str, str) -> Dict[str, str]
420 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
421 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
422 mounts
[os
.path
.join(data_dir
, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
423 mounts
[os
.path
.join(data_dir
, 'configfs')] = '/sys/kernel/config'
424 mounts
[log_dir
] = '/var/log/rbd-target-api:z'
425 mounts
['/dev'] = '/dev'
429 def get_container_binds():
430 # type: () -> List[List[str]]
432 lib_modules
= ['type=bind',
433 'source=/lib/modules',
434 'destination=/lib/modules',
436 binds
.append(lib_modules
)
440 def get_version(container_id
):
441 # type: (str) -> Optional[str]
443 out
, err
, code
= call(
444 [container_path
, 'exec', container_id
,
445 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"])
447 version
= out
.strip()
452 if not is_fsid(self
.fsid
):
453 raise Error('not an fsid: %s' % self
.fsid
)
454 if not self
.daemon_id
:
455 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
457 raise Error('invalid image: %s' % self
.image
)
459 # check for the required files
460 if self
.required_files
:
461 for fname
in self
.required_files
:
462 if fname
not in self
.files
:
463 raise Error('required file missing from config-json: %s' % fname
)
465 def get_daemon_name(self
):
467 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
469 def get_container_name(self
, desc
=None):
470 # type: (Optional[str]) -> str
471 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
473 cname
= '%s-%s' % (cname
, desc
)
476 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
477 # type: (str, int, int) -> None
478 """Create files under the container data dir"""
479 if not os
.path
.isdir(data_dir
):
480 raise OSError('data_dir is not a directory: %s' % (data_dir
))
482 logger
.info('Creating ceph-iscsi config...')
483 configfs_dir
= os
.path
.join(data_dir
, 'configfs')
484 makedirs(configfs_dir
, uid
, gid
, 0o755)
486 # populate files from the config-json
487 for fname
in self
.files
:
488 config_file
= os
.path
.join(data_dir
, fname
)
489 config_content
= dict_get_join(self
.files
, fname
)
490 logger
.info('Write file: %s' % (config_file
))
491 with
open(config_file
, 'w') as f
:
492 os
.fchown(f
.fileno(), uid
, gid
)
493 os
.fchmod(f
.fileno(), 0o600)
494 f
.write(config_content
)
497 def configfs_mount_umount(data_dir
, mount
=True):
498 # type: (str, bool) -> List[str]
499 mount_path
= os
.path
.join(data_dir
, 'configfs')
501 cmd
= "if ! grep -qs {0} /proc/mounts; then " \
502 "mount -t configfs none {0}; fi".format(mount_path
)
504 cmd
= "if grep -qs {0} /proc/mounts; then " \
505 "umount {0}; fi".format(mount_path
)
508 def get_tcmu_runner_container(self
):
509 # type: () -> CephContainer
510 tcmu_container
= get_container(self
.fsid
, self
.daemon_type
, self
.daemon_id
)
511 tcmu_container
.entrypoint
= "/usr/bin/tcmu-runner"
512 tcmu_container
.cname
= self
.get_container_name(desc
='tcmu')
513 # remove extra container args for tcmu container.
514 # extra args could cause issue with forking service type
515 tcmu_container
.container_args
= []
516 return tcmu_container
518 ##################################
521 class CustomContainer(object):
522 """Defines a custom container"""
523 daemon_type
= 'container'
525 def __init__(self
, fsid
: str, daemon_id
: Union
[int, str],
526 config_json
: Dict
, image
: str) -> None:
528 self
.daemon_id
= daemon_id
531 # config-json options
532 self
.entrypoint
= dict_get(config_json
, 'entrypoint')
533 self
.uid
= dict_get(config_json
, 'uid', 65534) # nobody
534 self
.gid
= dict_get(config_json
, 'gid', 65534) # nobody
535 self
.volume_mounts
= dict_get(config_json
, 'volume_mounts', {})
536 self
.args
= dict_get(config_json
, 'args', [])
537 self
.envs
= dict_get(config_json
, 'envs', [])
538 self
.privileged
= dict_get(config_json
, 'privileged', False)
539 self
.bind_mounts
= dict_get(config_json
, 'bind_mounts', [])
540 self
.ports
= dict_get(config_json
, 'ports', [])
541 self
.dirs
= dict_get(config_json
, 'dirs', [])
542 self
.files
= dict_get(config_json
, 'files', {})
545 def init(cls
, fsid
: str, daemon_id
: Union
[int, str]) -> 'CustomContainer':
546 return cls(fsid
, daemon_id
, get_parm(args
.config_json
), args
.image
)
548 def create_daemon_dirs(self
, data_dir
: str, uid
: int, gid
: int) -> None:
550 Create dirs/files below the container data directory.
552 logger
.info('Creating custom container configuration '
553 'dirs/files in {} ...'.format(data_dir
))
555 if not os
.path
.isdir(data_dir
):
556 raise OSError('data_dir is not a directory: %s' % data_dir
)
558 for dir_path
in self
.dirs
:
559 logger
.info('Creating directory: {}'.format(dir_path
))
560 dir_path
= os
.path
.join(data_dir
, dir_path
.strip('/'))
561 makedirs(dir_path
, uid
, gid
, 0o755)
563 for file_path
in self
.files
:
564 logger
.info('Creating file: {}'.format(file_path
))
565 content
= dict_get_join(self
.files
, file_path
)
566 file_path
= os
.path
.join(data_dir
, file_path
.strip('/'))
567 with
open(file_path
, 'w', encoding
='utf-8') as f
:
568 os
.fchown(f
.fileno(), uid
, gid
)
569 os
.fchmod(f
.fileno(), 0o600)
572 def get_daemon_args(self
) -> List
[str]:
575 def get_container_args(self
) -> List
[str]:
578 def get_container_envs(self
) -> List
[str]:
581 def get_container_mounts(self
, data_dir
: str) -> Dict
[str, str]:
583 Get the volume mounts. Relative source paths will be located below
584 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
594 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
598 for source
, destination
in self
.volume_mounts
.items():
599 source
= os
.path
.join(data_dir
, source
)
600 mounts
[source
] = destination
603 def get_container_binds(self
, data_dir
: str) -> List
[List
[str]]:
605 Get the bind mounts. Relative `source=...` paths will be located below
606 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
611 'source=lib/modules',
612 'destination=/lib/modules',
618 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
622 binds
= self
.bind_mounts
.copy()
624 for index
, value
in enumerate(bind
):
625 match
= re
.match(r
'^source=(.+)$', value
)
627 bind
[index
] = 'source={}'.format(os
.path
.join(
628 data_dir
, match
.group(1)))
631 ##################################
634 def dict_get(d
: Dict
, key
: str, default
: Any
= None, require
: bool = False) -> Any
:
636 Helper function to get a key from a dictionary.
637 :param d: The dictionary to process.
638 :param key: The name of the key to get.
639 :param default: The default value in case the key does not
640 exist. Default is `None`.
641 :param require: Set to `True` if the key is required. An
642 exception will be raised if the key does not exist in
643 the given dictionary.
644 :return: Returns the value of the given key.
645 :raises: :exc:`self.Error` if the given key does not exist
646 and `require` is set to `True`.
648 if require
and key
not in d
.keys():
649 raise Error('{} missing from dict'.format(key
))
650 return d
.get(key
, default
)
652 ##################################
655 def dict_get_join(d
: Dict
, key
: str) -> Any
:
657 Helper function to get the value of a given key from a dictionary.
658 `List` values will be converted to a string by joining them with a
660 :param d: The dictionary to process.
661 :param key: The name of the key to get.
662 :return: Returns the value of the given key. If it was a `list`, it
663 will be joining with a line break.
666 if isinstance(value
, list):
667 value
= '\n'.join(map(str, value
))
670 ##################################
673 def get_supported_daemons():
674 # type: () -> List[str]
675 supported_daemons
= list(Ceph
.daemons
)
676 supported_daemons
.extend(Monitoring
.components
)
677 supported_daemons
.append(NFSGanesha
.daemon_type
)
678 supported_daemons
.append(CephIscsi
.daemon_type
)
679 supported_daemons
.append(CustomContainer
.daemon_type
)
680 assert len(supported_daemons
) == len(set(supported_daemons
))
681 return supported_daemons
683 ##################################
686 def attempt_bind(s
, address
, port
):
687 # type: (socket.socket, str, int) -> None
689 s
.setsockopt(socket
.SOL_SOCKET
, socket
.SO_REUSEADDR
, 1)
690 s
.bind((address
, port
))
691 except (socket
.error
, OSError) as e
: # py2 and py3
692 msg
= 'Cannot bind to IP %s port %d: %s' % (address
, port
, e
)
694 if e
.errno
== errno
.EADDRINUSE
:
696 elif e
.errno
== errno
.EADDRNOTAVAIL
:
702 def port_in_use(port_num
):
703 # type: (int) -> bool
704 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
705 logger
.info('Verifying port %d ...' % port_num
)
707 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
708 attempt_bind(s
, '0.0.0.0', port_num
)
710 s
= socket
.socket(socket
.AF_INET6
, socket
.SOCK_STREAM
)
711 attempt_bind(s
, '::', port_num
)
718 def check_ip_port(ip
, port
):
719 # type: (str, int) -> None
720 if not args
.skip_ping_check
:
721 logger
.info('Verifying IP %s port %d ...' % (ip
, port
))
723 s
= socket
.socket(socket
.AF_INET6
, socket
.SOCK_STREAM
)
726 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
728 attempt_bind(s
, ip
, port
)
732 ##################################
734 # this is an abbreviated version of
735 # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
736 # that drops all of the compatibility (this is Unix/Linux only).
741 TimeoutError
= OSError
744 class Timeout(TimeoutError
):
746 Raised when the lock could not be acquired in *timeout*
750 def __init__(self
, lock_file
):
753 #: The path of the file lock.
754 self
.lock_file
= lock_file
758 temp
= "The file lock '{}' could not be acquired."\
759 .format(self
.lock_file
)
763 class _Acquire_ReturnProxy(object):
764 def __init__(self
, lock
):
771 def __exit__(self
, exc_type
, exc_value
, traceback
):
776 class FileLock(object):
777 def __init__(self
, name
, timeout
=-1):
778 if not os
.path
.exists(LOCK_DIR
):
779 os
.mkdir(LOCK_DIR
, 0o700)
780 self
._lock
_file
= os
.path
.join(LOCK_DIR
, name
+ '.lock')
782 # The file descriptor for the *_lock_file* as it is returned by the
783 # os.open() function.
784 # This file lock is only NOT None, if the object currently holds the
786 self
._lock
_file
_fd
= None
787 self
.timeout
= timeout
788 # The lock counter is used for implementing the nested locking
789 # mechanism. Whenever the lock is acquired, the counter is increased and
790 # the lock is only released, when this value is 0 again.
791 self
._lock
_counter
= 0
796 return self
._lock
_file
_fd
is not None
798 def acquire(self
, timeout
=None, poll_intervall
=0.05):
800 Acquires the file lock or fails with a :exc:`Timeout` error.
801 .. code-block:: python
802 # You can use this method in the context manager (recommended)
805 # Or use an equivalent try-finally construct:
812 The maximum time waited for the file lock.
813 If ``timeout < 0``, there is no timeout and this method will
814 block until the lock could be acquired.
815 If ``timeout`` is None, the default :attr:`~timeout` is used.
816 :arg float poll_intervall:
817 We check once in *poll_intervall* seconds if we can acquire the
820 if the lock could not be acquired in *timeout* seconds.
821 .. versionchanged:: 2.0.0
822 This method returns now a *proxy* object instead of *self*,
823 so that it can be used in a with statement without side effects.
825 # Use the default timeout, if no timeout is provided.
827 timeout
= self
.timeout
829 # Increment the number right at the beginning.
830 # We can still undo it, if something fails.
831 self
._lock
_counter
+= 1
834 lock_filename
= self
._lock
_file
835 start_time
= time
.time()
838 if not self
.is_locked
:
839 logger
.debug('Acquiring lock %s on %s', lock_id
,
844 logger
.debug('Lock %s acquired on %s', lock_id
,
847 elif timeout
>= 0 and time
.time() - start_time
> timeout
:
848 logger
.warning('Timeout acquiring lock %s on %s', lock_id
,
850 raise Timeout(self
._lock
_file
)
853 'Lock %s not acquired on %s, waiting %s seconds ...',
854 lock_id
, lock_filename
, poll_intervall
856 time
.sleep(poll_intervall
)
858 # Something did go wrong, so decrement the counter.
859 self
._lock
_counter
= max(0, self
._lock
_counter
- 1)
862 return _Acquire_ReturnProxy(lock
= self
)
864 def release(self
, force
=False):
866 Releases the file lock.
867 Please note, that the lock is only completly released, if the lock
869 Also note, that the lock file itself is not automatically deleted.
871 If true, the lock counter is ignored and the lock is released in
875 self
._lock
_counter
-= 1
877 if self
._lock
_counter
== 0 or force
:
879 lock_filename
= self
._lock
_file
881 logger
.debug('Releasing lock %s on %s', lock_id
, lock_filename
)
883 self
._lock
_counter
= 0
884 logger
.debug('Lock %s released on %s', lock_id
, lock_filename
)
892 def __exit__(self
, exc_type
, exc_value
, traceback
):
897 self
.release(force
=True)
901 open_mode
= os
.O_RDWR | os
.O_CREAT | os
.O_TRUNC
902 fd
= os
.open(self
._lock
_file
, open_mode
)
905 fcntl
.flock(fd
, fcntl
.LOCK_EX | fcntl
.LOCK_NB
)
906 except (IOError, OSError):
909 self
._lock
_file
_fd
= fd
913 # Do not remove the lockfile:
915 # https://github.com/benediktschmitt/py-filelock/issues/31
916 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
917 fd
= self
._lock
_file
_fd
918 self
._lock
_file
_fd
= None
919 fcntl
.flock(fd
, fcntl
.LOCK_UN
) # type: ignore
920 os
.close(fd
) # type: ignore
924 ##################################
925 # Popen wrappers, lifted from ceph-volume
927 class CallVerbosity(Enum
):
929 # log stdout/stderr to logger.debug
931 # On a non-zero exit status, it will forcefully set
932 # logging ON for the terminal
933 VERBOSE_ON_FAILURE
= 2
934 # log at info (instead of debug) level.
938 def call(command
: List
[str],
939 desc
: Optional
[str] = None,
940 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
941 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
942 **kwargs
) -> Tuple
[str, str, int]:
944 Wrap subprocess.Popen to
946 - log stdout/stderr to a logger,
948 - cleanly return out, err, returncode
950 :param timeout: timeout in seconds
956 timeout
= timeout
or args
.timeout
958 logger
.debug("Running command: %s" % ' '.join(command
))
959 process
= subprocess
.Popen(
961 stdout
=subprocess
.PIPE
,
962 stderr
=subprocess
.PIPE
,
966 # get current p.stdout flags, add O_NONBLOCK
967 assert process
.stdout
is not None
968 assert process
.stderr
is not None
969 stdout_flags
= fcntl
.fcntl(process
.stdout
, fcntl
.F_GETFL
)
970 stderr_flags
= fcntl
.fcntl(process
.stderr
, fcntl
.F_GETFL
)
971 fcntl
.fcntl(process
.stdout
, fcntl
.F_SETFL
, stdout_flags | os
.O_NONBLOCK
)
972 fcntl
.fcntl(process
.stderr
, fcntl
.F_SETFL
, stderr_flags | os
.O_NONBLOCK
)
978 out_buffer
= '' # partial line (no newline yet)
979 err_buffer
= '' # partial line (no newline yet)
980 start_time
= time
.time()
983 end_time
= start_time
+ timeout
985 if end_time
and (time
.time() >= end_time
):
987 if process
.poll() is None:
988 logger
.info(desc
+ 'timeout after %s seconds' % timeout
)
990 if reads
and process
.poll() is not None:
991 # we want to stop, but first read off anything remaining
995 reads
, _
, _
= select
.select(
996 [process
.stdout
.fileno(), process
.stderr
.fileno()],
1001 message_b
= os
.read(fd
, 1024)
1002 if isinstance(message_b
, bytes
):
1003 message
= message_b
.decode('utf-8')
1004 if isinstance(message_b
, str):
1006 if stop
and message
:
1007 # process has terminated, but have more to read still, so not stopping yet
1008 # (os.read returns '' when it encounters EOF)
1012 if fd
== process
.stdout
.fileno():
1014 message
= out_buffer
+ message
1015 lines
= message
.split('\n')
1016 out_buffer
= lines
.pop()
1018 if verbosity
== CallVerbosity
.VERBOSE
:
1019 logger
.info(desc
+ 'stdout ' + line
)
1020 elif verbosity
!= CallVerbosity
.SILENT
:
1021 logger
.debug(desc
+ 'stdout ' + line
)
1022 elif fd
== process
.stderr
.fileno():
1024 message
= err_buffer
+ message
1025 lines
= message
.split('\n')
1026 err_buffer
= lines
.pop()
1028 if verbosity
== CallVerbosity
.VERBOSE
:
1029 logger
.info(desc
+ 'stderr ' + line
)
1030 elif verbosity
!= CallVerbosity
.SILENT
:
1031 logger
.debug(desc
+ 'stderr ' + line
)
1034 except (IOError, OSError):
1036 if verbosity
== CallVerbosity
.VERBOSE
:
1037 logger
.debug(desc
+ 'profile rt=%s, stop=%s, exit=%s, reads=%s'
1038 % (time
.time()-start_time
, stop
, process
.poll(), reads
))
1040 returncode
= process
.wait()
1042 if out_buffer
!= '':
1043 if verbosity
== CallVerbosity
.VERBOSE
:
1044 logger
.info(desc
+ 'stdout ' + out_buffer
)
1045 elif verbosity
!= CallVerbosity
.SILENT
:
1046 logger
.debug(desc
+ 'stdout ' + out_buffer
)
1047 if err_buffer
!= '':
1048 if verbosity
== CallVerbosity
.VERBOSE
:
1049 logger
.info(desc
+ 'stderr ' + err_buffer
)
1050 elif verbosity
!= CallVerbosity
.SILENT
:
1051 logger
.debug(desc
+ 'stderr ' + err_buffer
)
1053 if returncode
!= 0 and verbosity
== CallVerbosity
.VERBOSE_ON_FAILURE
:
1054 # dump stdout + stderr
1055 logger
.info('Non-zero exit code %d from %s' % (returncode
, ' '.join(command
)))
1056 for line
in out
.splitlines():
1057 logger
.info(desc
+ 'stdout ' + line
)
1058 for line
in err
.splitlines():
1059 logger
.info(desc
+ 'stderr ' + line
)
1061 return out
, err
, returncode
1064 def call_throws(command
: List
[str],
1065 desc
: Optional
[str] = None,
1066 verbosity
: CallVerbosity
= CallVerbosity
.VERBOSE_ON_FAILURE
,
1067 timeout
: Optional
[int] = DEFAULT_TIMEOUT
,
1068 **kwargs
) -> Tuple
[str, str, int]:
1069 out
, err
, ret
= call(command
, desc
, verbosity
, timeout
, **kwargs
)
1071 raise RuntimeError('Failed command: %s' % ' '.join(command
))
1072 return out
, err
, ret
1075 def call_timeout(command
, timeout
):
1076 # type: (List[str], int) -> int
1078 logger
.debug('Running command (timeout=%s): %s'
1079 % (timeout
, ' '.join(command
)))
1081 def raise_timeout(command
, timeout
):
1082 # type: (List[str], int) -> NoReturn
1083 msg
= 'Command \'%s\' timed out after %s seconds' % (command
, timeout
)
1085 raise TimeoutExpired(msg
)
1087 def call_timeout_py2(command
, timeout
):
1088 # type: (List[str], int) -> int
1089 proc
= subprocess
.Popen(command
)
1090 thread
= Thread(target
=proc
.wait
)
1092 thread
.join(timeout
)
1093 if thread
.is_alive():
1096 raise_timeout(command
, timeout
)
1097 return proc
.returncode
1099 def call_timeout_py3(command
, timeout
):
1100 # type: (List[str], int) -> int
1102 return subprocess
.call(command
, timeout
=timeout
)
1103 except subprocess
.TimeoutExpired
as e
:
1104 raise_timeout(command
, timeout
)
1107 if sys
.version_info
>= (3, 3):
1108 ret
= call_timeout_py3(command
, timeout
)
1110 # py2 subprocess has no timeout arg
1111 ret
= call_timeout_py2(command
, timeout
)
1114 ##################################
1117 def is_available(what
, func
):
1118 # type: (str, Callable[[], bool]) -> None
1120 Wait for a service to become available
1122 :param what: the name of the service
1123 :param func: the callable object that determines availability
1126 logger
.info('Waiting for %s...' % what
)
1130 logger
.info('%s is available'
1134 raise Error('%s not available after %s tries'
1137 logger
.info('%s not available, waiting (%s/%s)...'
1138 % (what
, num
, retry
))
1144 def read_config(fn
):
1145 # type: (Optional[str]) -> ConfigParser
1146 # bend over backwards here because py2's ConfigParser doesn't like
1147 # whitespace before config option names (e.g., '\n foo = bar\n').
1149 if sys
.version_info
>= (3, 2):
1152 cp
= SafeConfigParser()
1155 with
open(fn
, 'r') as f
:
1157 nice_conf
= re
.sub(r
'\n(\s)+', r
'\n', raw_conf
)
1158 s_io
= StringIO(nice_conf
)
1159 if sys
.version_info
>= (3, 2):
1168 # type: (str) -> str
1169 p
= os
.path
.expanduser(p
)
1170 return os
.path
.abspath(p
)
1173 def get_file_timestamp(fn
):
1174 # type: (str) -> Optional[str]
1176 mt
= os
.path
.getmtime(fn
)
1177 return datetime
.datetime
.fromtimestamp(
1178 mt
, tz
=datetime
.timezone
.utc
1184 def try_convert_datetime(s
):
1185 # type: (str) -> Optional[str]
1186 # This is super irritating because
1187 # 1) podman and docker use different formats
1188 # 2) python's strptime can't parse either one
1191 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1192 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1193 # 2020-03-03 15:52:30.136257504 -0600 CST
1194 # (In the podman case, there is a different string format for
1195 # 'inspect' and 'inspect --format {{.Created}}'!!)
1197 # In *all* cases, the 9 digit second precision is too much for
1198 # python's strptime. Shorten it to 6 digits.
1199 p
= re
.compile(r
'(\.[\d]{6})[\d]*')
1202 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
1203 if s
and s
[-1] == 'Z':
1204 s
= s
[:-1] + '-0000'
1206 # cut off the redundant 'CST' part that strptime can't parse, if
1209 s
= ' '.join(v
[0:3])
1211 # try parsing with several format strings
1213 '%Y-%m-%dT%H:%M:%S.%f%z',
1214 '%Y-%m-%d %H:%M:%S.%f %z',
1218 # return timestamp normalized to UTC, rendered as DATEFMT.
1219 return datetime
.datetime
.strptime(s
, f
).astimezone(tz
=datetime
.timezone
.utc
).strftime(DATEFMT
)
1225 def get_podman_version():
1226 # type: () -> Tuple[int, ...]
1227 if 'podman' not in container_path
:
1228 raise ValueError('not using podman')
1229 out
, _
, _
= call_throws([container_path
, '--version'])
1230 return _parse_podman_version(out
)
1233 def _parse_podman_version(out
):
1234 # type: (str) -> Tuple[int, ...]
1235 _
, _
, version_str
= out
.strip().split()
1237 def to_int(val
, org_e
=None):
1238 if not val
and org_e
:
1242 except ValueError as e
:
1243 return to_int(val
[0:-1], org_e
or e
)
1245 return tuple(map(to_int
, version_str
.split('.')))
1250 return socket
.gethostname()
1255 return socket
.getfqdn() or socket
.gethostname()
1260 return platform
.uname().machine
1263 def generate_service_id():
1265 return get_hostname() + '.' + ''.join(random
.choice(string
.ascii_lowercase
)
1269 def generate_password():
1271 return ''.join(random
.choice(string
.ascii_lowercase
+ string
.digits
)
1275 def normalize_container_id(i
):
1276 # type: (str) -> str
1277 # docker adds the sha256: prefix, but AFAICS both
1278 # docker (18.09.7 in bionic at least) and podman
1279 # both always use sha256, so leave off the prefix
1282 if i
.startswith(prefix
):
1289 return str(uuid
.uuid1())
1293 # type: (str) -> bool
1301 def infer_fsid(func
):
1303 If we only find a single fsid in /var/lib/ceph/*, use that
1308 logger
.debug('Using specified fsid: %s' % args
.fsid
)
1312 daemon_list
= list_daemons(detail
=False)
1313 for daemon
in daemon_list
:
1314 if not is_fsid(daemon
['fsid']):
1317 elif 'name' not in args
or not args
.name
:
1318 # args.name not specified
1319 fsids_set
.add(daemon
['fsid'])
1320 elif daemon
['name'] == args
.name
:
1321 # args.name is a match
1322 fsids_set
.add(daemon
['fsid'])
1323 fsids
= sorted(fsids_set
)
1326 # some commands do not always require an fsid
1328 elif len(fsids
) == 1:
1329 logger
.info('Inferring fsid %s' % fsids
[0])
1330 args
.fsid
= fsids
[0]
1332 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids
)
1338 def infer_config(func
):
1340 If we find a MON daemon, use the config from that container
1343 def _infer_config():
1345 logger
.debug('Using specified config: %s' % args
.config
)
1351 daemon_list
= list_daemons(detail
=False)
1352 for daemon
in daemon_list
:
1353 if daemon
['name'].startswith('mon.'):
1354 name
= daemon
['name']
1357 config
= '/var/lib/ceph/{}/{}/config'.format(args
.fsid
, name
)
1359 logger
.info('Inferring config %s' % config
)
1360 args
.config
= config
1361 elif os
.path
.exists(SHELL_DEFAULT_CONF
):
1362 logger
.debug('Using default config: %s' % SHELL_DEFAULT_CONF
)
1363 args
.config
= SHELL_DEFAULT_CONF
1366 return _infer_config
1369 def _get_default_image():
1370 if DEFAULT_IMAGE_IS_MASTER
:
1371 warn
= '''This is a development version of cephadm.
1372 For information regarding the latest stable release:
1373 https://docs.ceph.com/docs/{}/cephadm/install
1374 '''.format(LATEST_STABLE_RELEASE
)
1375 for line
in warn
.splitlines():
1376 logger
.warning('{}{}{}'.format(termcolor
.yellow
, line
, termcolor
.end
))
1377 return DEFAULT_IMAGE
1380 def infer_image(func
):
1382 Use the most recent ceph image
1387 args
.image
= os
.environ
.get('CEPHADM_IMAGE')
1389 args
.image
= get_last_local_ceph_image()
1391 args
.image
= _get_default_image()
1397 def default_image(func
):
1399 def _default_image():
1401 if 'name' in args
and args
.name
:
1402 type_
= args
.name
.split('.', 1)[0]
1403 if type_
in Monitoring
.components
:
1404 args
.image
= Monitoring
.components
[type_
]['image']
1406 args
.image
= os
.environ
.get('CEPHADM_IMAGE')
1408 args
.image
= _get_default_image()
1412 return _default_image
1415 def get_last_local_ceph_image():
1417 :return: The most recent local ceph image (already pulled)
1419 out
, _
, _
= call_throws(
1420 [container_path
, 'images',
1421 '--filter', 'label=ceph=True',
1422 '--filter', 'dangling=false',
1423 '--format', '{{.Repository}}@{{.Digest}}'])
1424 return _filter_last_local_ceph_image(out
)
1427 def _filter_last_local_ceph_image(out
):
1428 # str -> Optional[str]
1429 for image
in out
.splitlines():
1430 if image
and not image
.endswith('@'):
1431 logger
.info('Using recent ceph image %s' % image
)
1436 def write_tmp(s
, uid
, gid
):
1437 # type: (str, int, int) -> Any
1438 tmp_f
= tempfile
.NamedTemporaryFile(mode
='w',
1440 os
.fchown(tmp_f
.fileno(), uid
, gid
)
1447 def makedirs(dir, uid
, gid
, mode
):
1448 # type: (str, int, int, int) -> None
1449 if not os
.path
.exists(dir):
1450 os
.makedirs(dir, mode
=mode
)
1453 os
.chown(dir, uid
, gid
)
1454 os
.chmod(dir, mode
) # the above is masked by umask...
1457 def get_data_dir(fsid
, t
, n
):
1458 # type: (str, str, Union[int, str]) -> str
1459 return os
.path
.join(args
.data_dir
, fsid
, '%s.%s' % (t
, n
))
1462 def get_log_dir(fsid
):
1463 # type: (str) -> str
1464 return os
.path
.join(args
.log_dir
, fsid
)
1467 def make_data_dir_base(fsid
, uid
, gid
):
1468 # type: (str, int, int) -> str
1469 data_dir_base
= os
.path
.join(args
.data_dir
, fsid
)
1470 makedirs(data_dir_base
, uid
, gid
, DATA_DIR_MODE
)
1471 makedirs(os
.path
.join(data_dir_base
, 'crash'), uid
, gid
, DATA_DIR_MODE
)
1472 makedirs(os
.path
.join(data_dir_base
, 'crash', 'posted'), uid
, gid
,
1474 return data_dir_base
1477 def make_data_dir(fsid
, daemon_type
, daemon_id
, uid
=None, gid
=None):
1478 # type: (str, str, Union[int, str], Optional[int], Optional[int]) -> str
1479 if uid
is None or gid
is None:
1480 uid
, gid
= extract_uid_gid()
1481 make_data_dir_base(fsid
, uid
, gid
)
1482 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1483 makedirs(data_dir
, uid
, gid
, DATA_DIR_MODE
)
1487 def make_log_dir(fsid
, uid
=None, gid
=None):
1488 # type: (str, Optional[int], Optional[int]) -> str
1489 if uid
is None or gid
is None:
1490 uid
, gid
= extract_uid_gid()
1491 log_dir
= get_log_dir(fsid
)
1492 makedirs(log_dir
, uid
, gid
, LOG_DIR_MODE
)
1496 def make_var_run(fsid
, uid
, gid
):
1497 # type: (str, int, int) -> None
1498 call_throws(['install', '-d', '-m0770', '-o', str(uid
), '-g', str(gid
),
1499 '/var/run/ceph/%s' % fsid
])
1502 def copy_tree(src
, dst
, uid
=None, gid
=None):
1503 # type: (List[str], str, Optional[int], Optional[int]) -> None
1505 Copy a directory tree from src to dst
1507 if uid
is None or gid
is None:
1508 (uid
, gid
) = extract_uid_gid()
1512 if os
.path
.isdir(dst
):
1513 dst_dir
= os
.path
.join(dst
, os
.path
.basename(src_dir
))
1515 logger
.debug('copy directory \'%s\' -> \'%s\'' % (src_dir
, dst_dir
))
1516 shutil
.rmtree(dst_dir
, ignore_errors
=True)
1517 shutil
.copytree(src_dir
, dst_dir
) # dirs_exist_ok needs python 3.8
1519 for dirpath
, dirnames
, filenames
in os
.walk(dst_dir
):
1520 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, dirpath
))
1521 os
.chown(dirpath
, uid
, gid
)
1522 for filename
in filenames
:
1523 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, filename
))
1524 os
.chown(os
.path
.join(dirpath
, filename
), uid
, gid
)
1527 def copy_files(src
, dst
, uid
=None, gid
=None):
1528 # type: (List[str], str, Optional[int], Optional[int]) -> None
1530 Copy a files from src to dst
1532 if uid
is None or gid
is None:
1533 (uid
, gid
) = extract_uid_gid()
1535 for src_file
in src
:
1537 if os
.path
.isdir(dst
):
1538 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
1540 logger
.debug('copy file \'%s\' -> \'%s\'' % (src_file
, dst_file
))
1541 shutil
.copyfile(src_file
, dst_file
)
1543 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, dst_file
))
1544 os
.chown(dst_file
, uid
, gid
)
1547 def move_files(src
, dst
, uid
=None, gid
=None):
1548 # type: (List[str], str, Optional[int], Optional[int]) -> None
1550 Move files from src to dst
1552 if uid
is None or gid
is None:
1553 (uid
, gid
) = extract_uid_gid()
1555 for src_file
in src
:
1557 if os
.path
.isdir(dst
):
1558 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
1560 if os
.path
.islink(src_file
):
1561 # shutil.move() in py2 does not handle symlinks correctly
1562 src_rl
= os
.readlink(src_file
)
1563 logger
.debug("symlink '%s' -> '%s'" % (dst_file
, src_rl
))
1564 os
.symlink(src_rl
, dst_file
)
1567 logger
.debug("move file '%s' -> '%s'" % (src_file
, dst_file
))
1568 shutil
.move(src_file
, dst_file
)
1569 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, dst_file
))
1570 os
.chown(dst_file
, uid
, gid
)
1573 ## copied from distutils ##
1574 def find_executable(executable
, path
=None):
1575 """Tries to find 'executable' in the directories listed in 'path'.
1576 A string listing directories separated by 'os.pathsep'; defaults to
1577 os.environ['PATH']. Returns the complete filename or None if not found.
1579 _
, ext
= os
.path
.splitext(executable
)
1580 if (sys
.platform
== 'win32') and (ext
!= '.exe'):
1581 executable
= executable
+ '.exe'
1583 if os
.path
.isfile(executable
):
1587 path
= os
.environ
.get('PATH', None)
1590 path
= os
.confstr("CS_PATH")
1591 except (AttributeError, ValueError):
1592 # os.confstr() or CS_PATH is not available
1594 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1595 # set to an empty string
1597 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1601 paths
= path
.split(os
.pathsep
)
1603 f
= os
.path
.join(p
, executable
)
1604 if os
.path
.isfile(f
):
1605 # the file exists, we have a shot at spawn working
1610 def find_program(filename
):
1611 # type: (str) -> str
1612 name
= find_executable(filename
)
1614 raise ValueError('%s not found' % filename
)
1618 def get_unit_name(fsid
, daemon_type
, daemon_id
=None):
1619 # type: (str, str, Optional[Union[int, str]]) -> str
1620 # accept either name or type + id
1621 if daemon_id
is not None:
1622 return 'ceph-%s@%s.%s' % (fsid
, daemon_type
, daemon_id
)
1624 return 'ceph-%s@%s' % (fsid
, daemon_type
)
1627 def get_unit_name_by_daemon_name(fsid
, name
):
1628 daemon
= get_daemon_description(fsid
, name
)
1630 return daemon
['systemd_unit']
1632 raise Error('Failed to get unit name for {}'.format(daemon
))
1635 def check_unit(unit_name
):
1636 # type: (str) -> Tuple[bool, str, bool]
1637 # NOTE: we ignore the exit code here because systemctl outputs
1638 # various exit codes based on the state of the service, but the
1639 # string result is more explicit (and sufficient).
1643 out
, err
, code
= call(['systemctl', 'is-enabled', unit_name
],
1644 verbosity
=CallVerbosity
.DEBUG
)
1648 elif "disabled" in out
:
1650 except Exception as e
:
1651 logger
.warning('unable to run systemctl: %s' % e
)
1657 out
, err
, code
= call(['systemctl', 'is-active', unit_name
],
1658 verbosity
=CallVerbosity
.DEBUG
)
1660 if out
in ['active']:
1662 elif out
in ['inactive']:
1664 elif out
in ['failed', 'auto-restart']:
1668 except Exception as e
:
1669 logger
.warning('unable to run systemctl: %s' % e
)
1671 return (enabled
, state
, installed
)
1674 def check_units(units
, enabler
=None):
1675 # type: (List[str], Optional[Packager]) -> bool
1677 (enabled
, state
, installed
) = check_unit(u
)
1678 if enabled
and state
== 'running':
1679 logger
.info('Unit %s is enabled and running' % u
)
1681 if enabler
is not None:
1683 logger
.info('Enabling unit %s' % u
)
1684 enabler
.enable_service(u
)
1688 def get_legacy_config_fsid(cluster
, legacy_dir
=None):
1689 # type: (str, Optional[str]) -> Optional[str]
1690 config_file
= '/etc/ceph/%s.conf' % cluster
1691 if legacy_dir
is not None:
1692 config_file
= os
.path
.abspath(legacy_dir
+ config_file
)
1694 if os
.path
.exists(config_file
):
1695 config
= read_config(config_file
)
1696 if config
.has_section('global') and config
.has_option('global', 'fsid'):
1697 return config
.get('global', 'fsid')
1701 def get_legacy_daemon_fsid(cluster
, daemon_type
, daemon_id
, legacy_dir
=None):
1702 # type: (str, str, Union[int, str], Optional[str]) -> Optional[str]
1704 if daemon_type
== 'osd':
1706 fsid_file
= os
.path
.join(args
.data_dir
,
1708 'ceph-%s' % daemon_id
,
1710 if legacy_dir
is not None:
1711 fsid_file
= os
.path
.abspath(legacy_dir
+ fsid_file
)
1712 with
open(fsid_file
, 'r') as f
:
1713 fsid
= f
.read().strip()
1717 fsid
= get_legacy_config_fsid(cluster
, legacy_dir
=legacy_dir
)
1721 def get_daemon_args(fsid
, daemon_type
, daemon_id
):
1722 # type: (str, str, Union[int, str]) -> List[str]
1723 r
= list() # type: List[str]
1725 if daemon_type
in Ceph
.daemons
and daemon_type
!= 'crash':
1727 '--setuser', 'ceph',
1728 '--setgroup', 'ceph',
1729 '--default-log-to-file=false',
1730 '--default-log-to-stderr=true',
1731 '--default-log-stderr-prefix="debug "',
1733 if daemon_type
== 'mon':
1735 '--default-mon-cluster-log-to-file=false',
1736 '--default-mon-cluster-log-to-stderr=true',
1738 elif daemon_type
in Monitoring
.components
:
1739 metadata
= Monitoring
.components
[daemon_type
]
1740 r
+= metadata
.get('args', list())
1741 if daemon_type
== 'alertmanager':
1742 config
= get_parm(args
.config_json
)
1743 peers
= config
.get('peers', list()) # type: ignore
1745 r
+= ["--cluster.peer={}".format(peer
)]
1746 # some alertmanager, by default, look elsewhere for a config
1747 r
+= ["--config.file=/etc/alertmanager/alertmanager.yml"]
1748 elif daemon_type
== NFSGanesha
.daemon_type
:
1749 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
1750 r
+= nfs_ganesha
.get_daemon_args()
1751 elif daemon_type
== CustomContainer
.daemon_type
:
1752 cc
= CustomContainer
.init(fsid
, daemon_id
)
1753 r
.extend(cc
.get_daemon_args())
1758 def create_daemon_dirs(fsid
, daemon_type
, daemon_id
, uid
, gid
,
1759 config
=None, keyring
=None):
1760 # type: (str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
1761 data_dir
= make_data_dir(fsid
, daemon_type
, daemon_id
, uid
=uid
, gid
=gid
)
1762 make_log_dir(fsid
, uid
=uid
, gid
=gid
)
1765 config_path
= os
.path
.join(data_dir
, 'config')
1766 with
open(config_path
, 'w') as f
:
1767 os
.fchown(f
.fileno(), uid
, gid
)
1768 os
.fchmod(f
.fileno(), 0o600)
1772 keyring_path
= os
.path
.join(data_dir
, 'keyring')
1773 with
open(keyring_path
, 'w') as f
:
1774 os
.fchmod(f
.fileno(), 0o600)
1775 os
.fchown(f
.fileno(), uid
, gid
)
1778 if daemon_type
in Monitoring
.components
.keys():
1779 config_json
: Dict
[str, Any
] = get_parm(args
.config_json
)
1780 required_files
= Monitoring
.components
[daemon_type
].get('config-json-files', list())
1782 # Set up directories specific to the monitoring component
1784 if daemon_type
== 'prometheus':
1785 data_dir_root
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1786 config_dir
= 'etc/prometheus'
1787 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
1788 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'alerting'), uid
, gid
, 0o755)
1789 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
1790 elif daemon_type
== 'grafana':
1791 data_dir_root
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1792 config_dir
= 'etc/grafana'
1793 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
1794 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'certs'), uid
, gid
, 0o755)
1795 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'provisioning/datasources'), uid
, gid
, 0o755)
1796 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
1797 elif daemon_type
== 'alertmanager':
1798 data_dir_root
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1799 config_dir
= 'etc/alertmanager'
1800 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
1801 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'data'), uid
, gid
, 0o755)
1803 # populate the config directory for the component from the config-json
1804 for fname
in required_files
:
1805 if 'files' in config_json
: # type: ignore
1806 content
= dict_get_join(config_json
['files'], fname
)
1807 with
open(os
.path
.join(data_dir_root
, config_dir
, fname
), 'w') as f
:
1808 os
.fchown(f
.fileno(), uid
, gid
)
1809 os
.fchmod(f
.fileno(), 0o600)
1812 elif daemon_type
== NFSGanesha
.daemon_type
:
1813 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
1814 nfs_ganesha
.create_daemon_dirs(data_dir
, uid
, gid
)
1816 elif daemon_type
== CephIscsi
.daemon_type
:
1817 ceph_iscsi
= CephIscsi
.init(fsid
, daemon_id
)
1818 ceph_iscsi
.create_daemon_dirs(data_dir
, uid
, gid
)
1820 elif daemon_type
== CustomContainer
.daemon_type
:
1821 cc
= CustomContainer
.init(fsid
, daemon_id
)
1822 cc
.create_daemon_dirs(data_dir
, uid
, gid
)
1825 def get_parm(option
):
1826 # type: (str) -> Dict[str, str]
1833 if cached_stdin
is not None:
1837 j
= injected_stdin
# type: ignore
1839 j
= sys
.stdin
.read()
1842 # inline json string
1843 if option
[0] == '{' and option
[-1] == '}':
1846 elif os
.path
.exists(option
):
1847 with
open(option
, 'r') as f
:
1850 raise Error("Config file {} not found".format(option
))
1854 except ValueError as e
:
1855 raise Error("Invalid JSON in {}: {}".format(option
, e
))
1860 def get_config_and_keyring():
1861 # type: () -> Tuple[Optional[str], Optional[str]]
1865 if 'config_json' in args
and args
.config_json
:
1866 d
= get_parm(args
.config_json
)
1867 config
= d
.get('config')
1868 keyring
= d
.get('keyring')
1870 if 'config' in args
and args
.config
:
1871 with
open(args
.config
, 'r') as f
:
1874 if 'key' in args
and args
.key
:
1875 keyring
= '[%s]\n\tkey = %s\n' % (args
.name
, args
.key
)
1876 elif 'keyring' in args
and args
.keyring
:
1877 with
open(args
.keyring
, 'r') as f
:
1880 return config
, keyring
1883 def get_container_binds(fsid
, daemon_type
, daemon_id
):
1884 # type: (str, str, Union[int, str, None]) -> List[List[str]]
1887 if daemon_type
== CephIscsi
.daemon_type
:
1888 binds
.extend(CephIscsi
.get_container_binds())
1889 elif daemon_type
== CustomContainer
.daemon_type
:
1891 cc
= CustomContainer
.init(fsid
, daemon_id
)
1892 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1893 binds
.extend(cc
.get_container_binds(data_dir
))
1898 def get_container_mounts(fsid
, daemon_type
, daemon_id
,
1900 # type: (str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
1903 if daemon_type
in Ceph
.daemons
:
1905 run_path
= os
.path
.join('/var/run/ceph', fsid
);
1906 if os
.path
.exists(run_path
):
1907 mounts
[run_path
] = '/var/run/ceph:z'
1908 log_dir
= get_log_dir(fsid
)
1909 mounts
[log_dir
] = '/var/log/ceph:z'
1910 crash_dir
= '/var/lib/ceph/%s/crash' % fsid
1911 if os
.path
.exists(crash_dir
):
1912 mounts
[crash_dir
] = '/var/lib/ceph/crash:z'
1914 if daemon_type
in Ceph
.daemons
and daemon_id
:
1915 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1916 if daemon_type
== 'rgw':
1917 cdata_dir
= '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id
)
1919 cdata_dir
= '/var/lib/ceph/%s/ceph-%s' % (daemon_type
, daemon_id
)
1920 if daemon_type
!= 'crash':
1921 mounts
[data_dir
] = cdata_dir
+ ':z'
1923 mounts
[data_dir
+ '/config'] = '/etc/ceph/ceph.conf:z'
1924 if daemon_type
== 'rbd-mirror' or daemon_type
== 'crash':
1925 # these do not search for their keyrings in a data directory
1926 mounts
[data_dir
+ '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type
, daemon_id
)
1928 if daemon_type
in ['mon', 'osd']:
1929 mounts
['/dev'] = '/dev' # FIXME: narrow this down?
1930 mounts
['/run/udev'] = '/run/udev'
1931 if daemon_type
== 'osd':
1932 mounts
['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
1933 mounts
['/run/lvm'] = '/run/lvm'
1934 mounts
['/run/lock/lvm'] = '/run/lock/lvm'
1937 if args
.shared_ceph_folder
: # make easy manager modules/ceph-volume development
1938 ceph_folder
= pathify(args
.shared_ceph_folder
)
1939 if os
.path
.exists(ceph_folder
):
1940 mounts
[ceph_folder
+ '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
1941 mounts
[ceph_folder
+ '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
1942 mounts
[ceph_folder
+ '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
1943 mounts
[ceph_folder
+ '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
1944 mounts
[ceph_folder
+ '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
1946 logger
.error('{}{}{}'.format(termcolor
.red
,
1947 'Ceph shared source folder does not exist.',
1949 except AttributeError:
1952 if daemon_type
in Monitoring
.components
and daemon_id
:
1953 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1954 if daemon_type
== 'prometheus':
1955 mounts
[os
.path
.join(data_dir
, 'etc/prometheus')] = '/etc/prometheus:Z'
1956 mounts
[os
.path
.join(data_dir
, 'data')] = '/prometheus:Z'
1957 elif daemon_type
== 'node-exporter':
1958 mounts
['/proc'] = '/host/proc:ro'
1959 mounts
['/sys'] = '/host/sys:ro'
1960 mounts
['/'] = '/rootfs:ro'
1961 elif daemon_type
== "grafana":
1962 mounts
[os
.path
.join(data_dir
, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
1963 mounts
[os
.path
.join(data_dir
, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
1964 mounts
[os
.path
.join(data_dir
, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
1965 elif daemon_type
== 'alertmanager':
1966 mounts
[os
.path
.join(data_dir
, 'etc/alertmanager')] = '/etc/alertmanager:Z'
1968 if daemon_type
== NFSGanesha
.daemon_type
:
1970 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1971 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
1972 mounts
.update(nfs_ganesha
.get_container_mounts(data_dir
))
1974 if daemon_type
== CephIscsi
.daemon_type
:
1976 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1977 log_dir
= get_log_dir(fsid
)
1978 mounts
.update(CephIscsi
.get_container_mounts(data_dir
, log_dir
))
1980 if daemon_type
== CustomContainer
.daemon_type
:
1982 cc
= CustomContainer
.init(fsid
, daemon_id
)
1983 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1984 mounts
.update(cc
.get_container_mounts(data_dir
))
1989 def get_container(fsid
: str, daemon_type
: str, daemon_id
: Union
[int, str],
1990 privileged
: bool = False,
1991 ptrace
: bool = False,
1992 container_args
: Optional
[List
[str]] = None) -> 'CephContainer':
1993 entrypoint
: str = ''
1995 ceph_args
: List
[str] = []
1996 envs
: List
[str] = []
1997 host_network
: bool = True
1999 if container_args
is None:
2001 if daemon_type
in ['mon', 'osd']:
2002 # mon and osd need privileged in order for libudev to query devices
2004 if daemon_type
== 'rgw':
2005 entrypoint
= '/usr/bin/radosgw'
2006 name
= 'client.rgw.%s' % daemon_id
2007 elif daemon_type
== 'rbd-mirror':
2008 entrypoint
= '/usr/bin/rbd-mirror'
2009 name
= 'client.rbd-mirror.%s' % daemon_id
2010 elif daemon_type
== 'crash':
2011 entrypoint
= '/usr/bin/ceph-crash'
2012 name
= 'client.crash.%s' % daemon_id
2013 elif daemon_type
in ['mon', 'mgr', 'mds', 'osd']:
2014 entrypoint
= '/usr/bin/ceph-' + daemon_type
2015 name
= '%s.%s' % (daemon_type
, daemon_id
)
2016 elif daemon_type
in Monitoring
.components
:
2018 elif daemon_type
== NFSGanesha
.daemon_type
:
2019 entrypoint
= NFSGanesha
.entrypoint
2020 name
= '%s.%s' % (daemon_type
, daemon_id
)
2021 envs
.extend(NFSGanesha
.get_container_envs())
2022 elif daemon_type
== CephIscsi
.daemon_type
:
2023 entrypoint
= CephIscsi
.entrypoint
2024 name
= '%s.%s' % (daemon_type
, daemon_id
)
2025 # So the container can modprobe iscsi_target_mod and have write perms
2026 # to configfs we need to make this a privileged container.
2028 elif daemon_type
== CustomContainer
.daemon_type
:
2029 cc
= CustomContainer
.init(fsid
, daemon_id
)
2030 entrypoint
= cc
.entrypoint
2031 host_network
= False
2032 envs
.extend(cc
.get_container_envs())
2033 container_args
.extend(cc
.get_container_args())
2035 if daemon_type
in Monitoring
.components
:
2036 uid
, gid
= extract_uid_gid_monitoring(daemon_type
)
2040 # FIXME: disable cpu/memory limits for the time being (not supported
2041 # by ubuntu 18.04 kernel!)
2043 container_args
.extend(monitoring_args
)
2044 elif daemon_type
== 'crash':
2045 ceph_args
= ['-n', name
]
2046 elif daemon_type
in Ceph
.daemons
:
2047 ceph_args
= ['-n', name
, '-f']
2049 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2050 # so service can have Type=Forking
2051 if 'podman' in container_path
:
2052 runtime_dir
= '/run'
2053 container_args
.extend(['-d',
2055 runtime_dir
+ '/ceph-%s@%s.%s.service-pid' % (fsid
, daemon_type
, daemon_id
),
2057 runtime_dir
+ '/ceph-%s@%s.%s.service-cid' % (fsid
, daemon_type
, daemon_id
)])
2059 return CephContainer(
2061 entrypoint
=entrypoint
,
2062 args
=ceph_args
+ get_daemon_args(fsid
, daemon_type
, daemon_id
),
2063 container_args
=container_args
,
2064 volume_mounts
=get_container_mounts(fsid
, daemon_type
, daemon_id
),
2065 bind_mounts
=get_container_binds(fsid
, daemon_type
, daemon_id
),
2066 cname
='ceph-%s-%s.%s' % (fsid
, daemon_type
, daemon_id
),
2068 privileged
=privileged
,
2070 init
=args
.container_init
,
2071 host_network
=host_network
,
2075 def extract_uid_gid(img
='', file_path
='/var/lib/ceph'):
2076 # type: (str, Union[str, List[str]]) -> Tuple[int, int]
2081 if isinstance(file_path
, str):
2088 out
= CephContainer(
2091 args
=['-c', '%u %g', fp
]
2093 uid
, gid
= out
.split(' ')
2094 return int(uid
), int(gid
)
2095 except RuntimeError:
2097 raise RuntimeError('uid/gid not found')
2100 def deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
2101 config
=None, keyring
=None,
2105 # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
2108 if any([port_in_use(port
) for port
in ports
]):
2109 raise Error("TCP Port(s) '{}' required for {} already in use".format(",".join(map(str, ports
)), daemon_type
))
2111 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
2112 if reconfig
and not os
.path
.exists(data_dir
):
2113 raise Error('cannot reconfig, data path %s does not exist' % data_dir
)
2114 if daemon_type
== 'mon' and not os
.path
.exists(data_dir
):
2118 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
2121 tmp_config
= write_tmp(config
, uid
, gid
)
2124 create_daemon_dirs(fsid
, daemon_type
, daemon_id
, uid
, gid
)
2125 mon_dir
= get_data_dir(fsid
, 'mon', daemon_id
)
2126 log_dir
= get_log_dir(fsid
)
2127 out
= CephContainer(
2129 entrypoint
='/usr/bin/ceph-mon',
2131 '-i', str(daemon_id
),
2133 '-c', '/tmp/config',
2134 '--keyring', '/tmp/keyring',
2135 ] + get_daemon_args(fsid
, 'mon', daemon_id
),
2137 log_dir
: '/var/log/ceph:z',
2138 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id
),
2139 tmp_keyring
.name
: '/tmp/keyring:z',
2140 tmp_config
.name
: '/tmp/config:z',
2145 with
open(mon_dir
+ '/config', 'w') as f
:
2146 os
.fchown(f
.fileno(), uid
, gid
)
2147 os
.fchmod(f
.fileno(), 0o600)
2150 # dirs, conf, keyring
2152 fsid
, daemon_type
, daemon_id
,
2157 deploy_daemon_units(fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
2160 if not os
.path
.exists(data_dir
+ '/unit.created'):
2161 with
open(data_dir
+ '/unit.created', 'w') as f
:
2162 os
.fchmod(f
.fileno(), 0o600)
2163 os
.fchown(f
.fileno(), uid
, gid
)
2164 f
.write('mtime is time the daemon deployment was created\n')
2166 with
open(data_dir
+ '/unit.configured', 'w') as f
:
2167 f
.write('mtime is time we were last configured\n')
2168 os
.fchmod(f
.fileno(), 0o600)
2169 os
.fchown(f
.fileno(), uid
, gid
)
2171 update_firewalld(daemon_type
)
2173 # Open ports explicitly required for the daemon
2176 fw
.open_ports(ports
)
2179 if reconfig
and daemon_type
not in Ceph
.daemons
:
2180 # ceph daemons do not need a restart; others (presumably) do to pick
2182 call_throws(['systemctl', 'reset-failed',
2183 get_unit_name(fsid
, daemon_type
, daemon_id
)])
2184 call_throws(['systemctl', 'restart',
2185 get_unit_name(fsid
, daemon_type
, daemon_id
)])
2187 def _write_container_cmd_to_bash(file_obj
, container
, comment
=None, background
=False):
2188 # type: (IO[str], CephContainer, Optional[str], Optional[bool]) -> None
2190 # Sometimes adding a comment, especially if there are multiple containers in one
2191 # unit file, makes it easier to read and grok.
2192 file_obj
.write('# ' + comment
+ '\n')
2193 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
2194 file_obj
.write('! '+ ' '.join(container
.rm_cmd()) + ' 2> /dev/null\n')
2195 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
2196 if 'podman' in container_path
:
2197 file_obj
.write('! '+ ' '.join(container
.rm_cmd(storage
=True)) + ' 2> /dev/null\n')
2199 # container run command
2200 file_obj
.write(' '.join(container
.run_cmd()) + (' &' if background
else '') + '\n')
2203 def deploy_daemon_units(fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
2204 enable
=True, start
=True,
2206 # type: (str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None
2208 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
2209 with
open(data_dir
+ '/unit.run.new', 'w') as f
:
2212 if daemon_type
in Ceph
.daemons
:
2213 install_path
= find_program('install')
2214 f
.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path
=install_path
, fsid
=fsid
, uid
=uid
, gid
=gid
))
2217 if daemon_type
== 'osd':
2218 # osds have a pre-start step
2220 simple_fn
= os
.path
.join('/etc/ceph/osd',
2221 '%s-%s.json.adopted-by-cephadm' % (daemon_id
, osd_fsid
))
2222 if os
.path
.exists(simple_fn
):
2223 f
.write('# Simple OSDs need chown on startup:\n')
2224 for n
in ['block', 'block.db', 'block.wal']:
2225 p
= os
.path
.join(data_dir
, n
)
2226 f
.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p
=p
, uid
=uid
, gid
=gid
))
2228 prestart
= CephContainer(
2230 entrypoint
='/usr/sbin/ceph-volume',
2233 str(daemon_id
), osd_fsid
,
2237 volume_mounts
=get_container_mounts(fsid
, daemon_type
, daemon_id
),
2238 bind_mounts
=get_container_binds(fsid
, daemon_type
, daemon_id
),
2239 cname
='ceph-%s-%s.%s-activate' % (fsid
, daemon_type
, daemon_id
),
2241 _write_container_cmd_to_bash(f
, prestart
, 'LVM OSDs use ceph-volume lvm activate')
2242 elif daemon_type
== NFSGanesha
.daemon_type
:
2243 # add nfs to the rados grace db
2244 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
2245 prestart
= nfs_ganesha
.get_rados_grace_container('add')
2246 _write_container_cmd_to_bash(f
, prestart
, 'add daemon to rados grace')
2247 elif daemon_type
== CephIscsi
.daemon_type
:
2248 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=True)) + '\n')
2249 ceph_iscsi
= CephIscsi
.init(fsid
, daemon_id
)
2250 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
2251 _write_container_cmd_to_bash(f
, tcmu_container
, 'iscsi tcmu-runnter container', background
=True)
2253 _write_container_cmd_to_bash(f
, c
, '%s.%s' % (daemon_type
, str(daemon_id
)))
2254 os
.fchmod(f
.fileno(), 0o600)
2255 os
.rename(data_dir
+ '/unit.run.new',
2256 data_dir
+ '/unit.run')
2258 # post-stop command(s)
2259 with
open(data_dir
+ '/unit.poststop.new', 'w') as f
:
2260 if daemon_type
== 'osd':
2262 poststop
= CephContainer(
2264 entrypoint
='/usr/sbin/ceph-volume',
2266 'lvm', 'deactivate',
2267 str(daemon_id
), osd_fsid
,
2270 volume_mounts
=get_container_mounts(fsid
, daemon_type
, daemon_id
),
2271 bind_mounts
=get_container_binds(fsid
, daemon_type
, daemon_id
),
2272 cname
='ceph-%s-%s.%s-deactivate' % (fsid
, daemon_type
,
2275 _write_container_cmd_to_bash(f
, poststop
, 'deactivate osd')
2276 elif daemon_type
== NFSGanesha
.daemon_type
:
2277 # remove nfs from the rados grace db
2278 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
2279 poststop
= nfs_ganesha
.get_rados_grace_container('remove')
2280 _write_container_cmd_to_bash(f
, poststop
, 'remove daemon from rados grace')
2281 elif daemon_type
== CephIscsi
.daemon_type
:
2282 # make sure we also stop the tcmu container
2283 ceph_iscsi
= CephIscsi
.init(fsid
, daemon_id
)
2284 tcmu_container
= ceph_iscsi
.get_tcmu_runner_container()
2285 f
.write('! '+ ' '.join(tcmu_container
.stop_cmd()) + '\n')
2286 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=False)) + '\n')
2287 os
.fchmod(f
.fileno(), 0o600)
2288 os
.rename(data_dir
+ '/unit.poststop.new',
2289 data_dir
+ '/unit.poststop')
2291 with
open(data_dir
+ '/unit.image.new', 'w') as f
:
2292 f
.write(c
.image
+ '\n')
2293 os
.fchmod(f
.fileno(), 0o600)
2294 os
.rename(data_dir
+ '/unit.image.new',
2295 data_dir
+ '/unit.image')
2298 install_base_units(fsid
)
2299 unit
= get_unit_file(fsid
)
2300 unit_file
= 'ceph-%s@.service' % (fsid
)
2301 with
open(args
.unit_dir
+ '/' + unit_file
+ '.new', 'w') as f
:
2303 os
.rename(args
.unit_dir
+ '/' + unit_file
+ '.new',
2304 args
.unit_dir
+ '/' + unit_file
)
2305 call_throws(['systemctl', 'daemon-reload'])
2307 unit_name
= get_unit_name(fsid
, daemon_type
, daemon_id
)
2308 call(['systemctl', 'stop', unit_name
],
2309 verbosity
=CallVerbosity
.DEBUG
)
2310 call(['systemctl', 'reset-failed', unit_name
],
2311 verbosity
=CallVerbosity
.DEBUG
)
2313 call_throws(['systemctl', 'enable', unit_name
])
2315 call_throws(['systemctl', 'start', unit_name
])
2319 class Firewalld(object):
2322 self
.available
= self
.check()
2326 self
.cmd
= find_executable('firewall-cmd')
2328 logger
.debug('firewalld does not appear to be present')
2330 (enabled
, state
, _
) = check_unit('firewalld.service')
2332 logger
.debug('firewalld.service is not enabled')
2334 if state
!= "running":
2335 logger
.debug('firewalld.service is not running')
2338 logger
.info("firewalld ready")
2341 def enable_service_for(self
, daemon_type
):
2342 # type: (str) -> None
2343 if not self
.available
:
2344 logger
.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type
)
2347 if daemon_type
== 'mon':
2349 elif daemon_type
in ['mgr', 'mds', 'osd']:
2351 elif daemon_type
== NFSGanesha
.daemon_type
:
2356 out
, err
, ret
= call([self
.cmd
, '--permanent', '--query-service', svc
], verbosity
=CallVerbosity
.DEBUG
)
2358 logger
.info('Enabling firewalld service %s in current zone...' % svc
)
2359 out
, err
, ret
= call([self
.cmd
, '--permanent', '--add-service', svc
])
2362 'unable to add service %s to current zone: %s' % (svc
, err
))
2364 logger
.debug('firewalld service %s is enabled in current zone' % svc
)
2366 def open_ports(self
, fw_ports
):
2367 # type: (List[int]) -> None
2368 if not self
.available
:
2369 logger
.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports
)
2372 for port
in fw_ports
:
2373 tcp_port
= str(port
) + '/tcp'
2374 out
, err
, ret
= call([self
.cmd
, '--permanent', '--query-port', tcp_port
], verbosity
=CallVerbosity
.DEBUG
)
2376 logger
.info('Enabling firewalld port %s in current zone...' % tcp_port
)
2377 out
, err
, ret
= call([self
.cmd
, '--permanent', '--add-port', tcp_port
])
2379 raise RuntimeError('unable to add port %s to current zone: %s' %
2382 logger
.debug('firewalld port %s is enabled in current zone' % tcp_port
)
2384 out
, err
, ret
= call([self
.cmd
, '--permanent', '--query-port', tcp_port
], verbose_on_failure
=False)
2385 def apply_rules(self
):
2387 if not self
.available
:
2390 call_throws([self
.cmd
, '--reload'])
2393 def update_firewalld(daemon_type
):
2394 # type: (str) -> None
2395 firewall
= Firewalld()
2397 firewall
.enable_service_for(daemon_type
)
2401 if daemon_type
in Monitoring
.port_map
.keys():
2402 fw_ports
.extend(Monitoring
.port_map
[daemon_type
]) # prometheus etc
2404 firewall
.open_ports(fw_ports
)
2405 firewall
.apply_rules()
2407 def install_base_units(fsid
):
2408 # type: (str) -> None
2410 Set up ceph.target and ceph-$fsid.target units.
2413 existed
= os
.path
.exists(args
.unit_dir
+ '/ceph.target')
2414 with
open(args
.unit_dir
+ '/ceph.target.new', 'w') as f
:
2416 'Description=All Ceph clusters and services\n'
2419 'WantedBy=multi-user.target\n')
2420 os
.rename(args
.unit_dir
+ '/ceph.target.new',
2421 args
.unit_dir
+ '/ceph.target')
2423 # we disable before enable in case a different ceph.target
2424 # (from the traditional package) is present; while newer
2425 # systemd is smart enough to disable the old
2426 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2427 # some older versions of systemd error out with EEXIST.
2428 call_throws(['systemctl', 'disable', 'ceph.target'])
2429 call_throws(['systemctl', 'enable', 'ceph.target'])
2430 call_throws(['systemctl', 'start', 'ceph.target'])
2433 existed
= os
.path
.exists(args
.unit_dir
+ '/ceph-%s.target' % fsid
)
2434 with
open(args
.unit_dir
+ '/ceph-%s.target.new' % fsid
, 'w') as f
:
2436 'Description=Ceph cluster {fsid}\n'
2437 'PartOf=ceph.target\n'
2438 'Before=ceph.target\n'
2441 'WantedBy=multi-user.target ceph.target\n'.format(
2444 os
.rename(args
.unit_dir
+ '/ceph-%s.target.new' % fsid
,
2445 args
.unit_dir
+ '/ceph-%s.target' % fsid
)
2447 call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid
])
2448 call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid
])
2450 # logrotate for the cluster
2451 with
open(args
.logrotate_dir
+ '/ceph-%s' % fsid
, 'w') as f
:
2453 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
2454 in all containers, but I don't see an elegant way to send SIGHUP *just* to
2455 the daemons for this cluster. (1) systemd kill -s will get the signal to
2456 podman, but podman will exit. (2) podman kill will get the signal to the
2457 first child (bash), but that isn't the ceph daemon. This is simpler and
2460 f
.write("""# created by cephadm
2461 /var/log/ceph/%s/*.log {
2467 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror || pkill -1 -x "ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror" || true
2476 def get_unit_file(fsid
):
2477 # type: (str) -> str
2479 if 'podman' in container_path
:
2480 extra_args
= ('ExecStartPre=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2481 'ExecStopPost=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2483 'PIDFile=/%t/%n-pid\n')
2485 u
= """# generated by cephadm
2487 Description=Ceph %i for {fsid}
2490 # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
2491 # these can be removed once ceph-mon will dynamically change network
2493 After=network-online.target local-fs.target time-sync.target
2494 Wants=network-online.target local-fs.target time-sync.target
2496 PartOf=ceph-{fsid}.target
2497 Before=ceph-{fsid}.target
2502 EnvironmentFile=-/etc/environment
2503 ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
2504 ExecStop=-{container_path} stop ceph-{fsid}-%i
2505 ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
2511 StartLimitInterval=30min
2515 WantedBy=ceph-{fsid}.target
2517 container_path
=container_path
,
2519 data_dir
=args
.data_dir
,
2520 extra_args
=extra_args
)
2524 ##################################
2527 class CephContainer
:
2531 args
: List
[str] = [],
2532 volume_mounts
: Dict
[str, str] = {},
2534 container_args
: List
[str] = [],
2535 envs
: Optional
[List
[str]] = None,
2536 privileged
: bool = False,
2537 ptrace
: bool = False,
2538 bind_mounts
: Optional
[List
[List
[str]]] = None,
2540 host_network
: bool = True,
2543 self
.entrypoint
= entrypoint
2545 self
.volume_mounts
= volume_mounts
2547 self
.container_args
= container_args
2549 self
.privileged
= privileged
2550 self
.ptrace
= ptrace
2551 self
.bind_mounts
= bind_mounts
if bind_mounts
else []
2553 self
.host_network
= host_network
2555 def run_cmd(self
) -> List
[str]:
2556 cmd_args
: List
[str] = [
2557 str(container_path
),
2563 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
2564 '-e', 'NODE_NAME=%s' % get_hostname(),
2566 vols
: List
[str] = []
2567 binds
: List
[str] = []
2569 if self
.host_network
:
2570 cmd_args
.append('--net=host')
2572 cmd_args
.extend(['--entrypoint', self
.entrypoint
])
2576 # let OSD etc read block devs that haven't been chowned
2577 '--group-add=disk'])
2578 if self
.ptrace
and not self
.privileged
:
2579 # if privileged, the SYS_PTRACE cap is already added
2580 # in addition, --cap-add and --privileged are mutually
2581 # exclusive since podman >= 2.0
2582 cmd_args
.append('--cap-add=SYS_PTRACE')
2584 cmd_args
.append('--init')
2586 cmd_args
.extend(['--name', self
.cname
])
2588 for env
in self
.envs
:
2589 envs
.extend(['-e', env
])
2592 [['-v', '%s:%s' % (host_dir
, container_dir
)]
2593 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
2594 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
2595 for bind
in self
.bind_mounts
], [])
2597 return cmd_args
+ self
.container_args
+ envs
+ vols
+ binds
+ [
2599 ] + self
.args
# type: ignore
2601 def shell_cmd(self
, cmd
: List
[str]) -> List
[str]:
2602 cmd_args
: List
[str] = [
2603 str(container_path
),
2609 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
2610 '-e', 'NODE_NAME=%s' % get_hostname(),
2612 vols
: List
[str] = []
2613 binds
: List
[str] = []
2615 if self
.host_network
:
2616 cmd_args
.append('--net=host')
2620 # let OSD etc read block devs that haven't been chowned
2624 for env
in self
.envs
:
2625 envs
.extend(['-e', env
])
2628 [['-v', '%s:%s' % (host_dir
, container_dir
)]
2629 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
2630 binds
= sum([['--mount', '{}'.format(','.join(bind
))]
2631 for bind
in self
.bind_mounts
], [])
2633 return cmd_args
+ self
.container_args
+ envs
+ vols
+ binds
+ [
2634 '--entrypoint', cmd
[0],
2638 def exec_cmd(self
, cmd
):
2639 # type: (List[str]) -> List[str]
2641 str(container_path
),
2643 ] + self
.container_args
+ [
2647 def rm_cmd(self
, storage
=False):
2648 # type: (bool) -> List[str]
2650 str(container_path
),
2654 ret
.append('--storage')
2655 ret
.append(self
.cname
)
2659 # type () -> List[str]
2661 str(container_path
),
2666 def run(self
, timeout
=DEFAULT_TIMEOUT
):
2667 # type: (Optional[int]) -> str
2668 out
, _
, _
= call_throws(
2669 self
.run_cmd(), desc
=self
.entrypoint
, timeout
=timeout
)
2672 ##################################
2676 def command_version():
2678 out
= CephContainer(args
.image
, 'ceph', ['--version']).run()
2682 ##################################
2689 _pull_image(args
.image
)
2690 return command_inspect_image()
2693 def _pull_image(image
):
2694 # type: (str) -> None
2695 logger
.info('Pulling container image %s...' % image
)
2698 "error creating read-write layer with ID",
2699 "net/http: TLS handshake timeout",
2700 "Digest did not match, expected",
2703 cmd
= [container_path
, 'pull', image
]
2704 cmd_str
= ' '.join(cmd
)
2706 for sleep_secs
in [1, 4, 25]:
2707 out
, err
, ret
= call(cmd
)
2711 if not any(pattern
in err
for pattern
in ignorelist
):
2712 raise RuntimeError('Failed command: %s' % cmd_str
)
2714 logger
.info('"%s failed transiently. Retrying. waiting %s seconds...' % (cmd_str
, sleep_secs
))
2715 time
.sleep(sleep_secs
)
2717 raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str
)
2718 ##################################
2722 def command_inspect_image():
2724 out
, err
, ret
= call_throws([
2725 container_path
, 'inspect',
2726 '--format', '{{.ID}},{{json .RepoDigests}}',
2730 info_from
= get_image_info_from_inspect(out
.strip(), args
.image
)
2732 ver
= CephContainer(args
.image
, 'ceph', ['--version']).run().strip()
2733 info_from
['ceph_version'] = ver
2735 print(json
.dumps(info_from
, indent
=4, sort_keys
=True))
2739 def get_image_info_from_inspect(out
, image
):
2740 # type: (str, str) -> Dict[str, str]
2741 image_id
, digests
= out
.split(',', 1)
2743 raise Error('inspect {}: empty result'.format(image
))
2745 'image_id': normalize_container_id(image_id
)
2748 json_digests
= json
.loads(digests
)
2750 r
['repo_digest'] = json_digests
[0]
2754 ##################################
2757 def unwrap_ipv6(address
):
2758 # type: (str) -> str
2759 if address
.startswith('[') and address
.endswith(']'):
2760 return address
[1:-1]
2764 def wrap_ipv6(address
):
2765 # type: (str) -> str
2767 # We cannot assume it's already wrapped or even an IPv6 address if
2768 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
2771 if ipaddress
.ip_address(unicode(address
)).version
== 6:
2772 return f
"[{address}]"
2779 def is_ipv6(address
):
2780 # type: (str) -> bool
2781 address
= unwrap_ipv6(address
)
2783 return ipaddress
.ip_address(unicode(address
)).version
== 6
2785 logger
.warning("Address: {} isn't a valid IP address".format(address
))
2790 def command_bootstrap():
2793 if not args
.output_config
:
2794 args
.output_config
= os
.path
.join(args
.output_dir
, 'ceph.conf')
2795 if not args
.output_keyring
:
2796 args
.output_keyring
= os
.path
.join(args
.output_dir
,
2797 'ceph.client.admin.keyring')
2798 if not args
.output_pub_ssh_key
:
2799 args
.output_pub_ssh_key
= os
.path
.join(args
.output_dir
, 'ceph.pub')
2801 # verify output files
2802 for f
in [args
.output_config
, args
.output_keyring
, args
.output_pub_ssh_key
]:
2803 if not args
.allow_overwrite
:
2804 if os
.path
.exists(f
):
2805 raise Error('%s already exists; delete or pass '
2806 '--allow-overwrite to overwrite' % f
)
2807 dirname
= os
.path
.dirname(f
)
2808 if dirname
and not os
.path
.exists(dirname
):
2809 fname
= os
.path
.basename(f
)
2810 logger
.info(f
"Creating directory {dirname} for {fname}")
2812 # use makedirs to create intermediate missing dirs
2813 os
.makedirs(dirname
, 0o755)
2814 except PermissionError
:
2815 raise Error(f
"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.")
2818 if not args
.skip_prepare_host
:
2819 command_prepare_host()
2821 logger
.info('Skip prepare_host')
2824 fsid
= args
.fsid
or make_fsid()
2825 hostname
= get_hostname()
2826 if '.' in hostname
and not args
.allow_fqdn_hostname
:
2827 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname
, hostname
.split('.')[0]))
2828 mon_id
= args
.mon_id
or hostname
2829 mgr_id
= args
.mgr_id
or generate_service_id()
2830 logger
.info('Cluster fsid: %s' % fsid
)
2837 r
= re
.compile(r
':(\d+)$')
2840 ipv6
= is_ipv6(args
.mon_ip
)
2842 args
.mon_ip
= wrap_ipv6(args
.mon_ip
)
2843 hasport
= r
.findall(args
.mon_ip
)
2845 port
= int(hasport
[0])
2847 addr_arg
= '[v1:%s]' % args
.mon_ip
2849 addr_arg
= '[v2:%s]' % args
.mon_ip
2851 logger
.warning('Using msgr2 protocol for unrecognized port %d' %
2853 addr_arg
= '[v2:%s]' % args
.mon_ip
2854 base_ip
= args
.mon_ip
[0:-(len(str(port
)))-1]
2855 check_ip_port(base_ip
, port
)
2857 base_ip
= args
.mon_ip
2858 addr_arg
= '[v2:%s:3300,v1:%s:6789]' % (args
.mon_ip
, args
.mon_ip
)
2859 check_ip_port(args
.mon_ip
, 3300)
2860 check_ip_port(args
.mon_ip
, 6789)
2861 elif args
.mon_addrv
:
2862 addr_arg
= args
.mon_addrv
2863 if addr_arg
[0] != '[' or addr_arg
[-1] != ']':
2864 raise Error('--mon-addrv value %s must use square backets' %
2866 ipv6
= addr_arg
.count('[') > 1
2867 for addr
in addr_arg
[1:-1].split(','):
2868 hasport
= r
.findall(addr
)
2870 raise Error('--mon-addrv value %s must include port number' %
2872 port
= int(hasport
[0])
2873 # strip off v1: or v2: prefix
2874 addr
= re
.sub(r
'^\w+:', '', addr
)
2875 base_ip
= addr
[0:-(len(str(port
)))-1]
2876 check_ip_port(base_ip
, port
)
2878 raise Error('must specify --mon-ip or --mon-addrv')
2879 logger
.debug('Base mon IP is %s, final addrv is %s' % (base_ip
, addr_arg
))
2882 if not args
.skip_mon_network
:
2883 # make sure IP is configured locally, and then figure out the
2885 for net
, ips
in list_networks().items():
2886 if ipaddress
.ip_address(unicode(unwrap_ipv6(base_ip
))) in \
2887 [ipaddress
.ip_address(unicode(ip
)) for ip
in ips
]:
2889 logger
.info('Mon IP %s is in CIDR network %s' % (base_ip
,
2893 raise Error('Failed to infer CIDR network for mon ip %s; pass '
2894 '--skip-mon-network to configure it later' % base_ip
)
2897 cp
= read_config(args
.config
)
2898 if not cp
.has_section('global'):
2899 cp
.add_section('global')
2900 cp
.set('global', 'fsid', fsid
);
2901 cp
.set('global', 'mon host', addr_arg
)
2902 cp
.set('global', 'container_image', args
.image
)
2905 config
= cpf
.getvalue()
2907 if args
.registry_json
or args
.registry_url
:
2908 command_registry_login()
2910 if not args
.skip_pull
:
2911 _pull_image(args
.image
)
2913 logger
.info('Extracting ceph user uid/gid from container image...')
2914 (uid
, gid
) = extract_uid_gid()
2916 # create some initial keys
2917 logger
.info('Creating initial keys...')
2918 mon_key
= CephContainer(
2920 entrypoint
='/usr/bin/ceph-authtool',
2921 args
=['--gen-print-key'],
2923 admin_key
= CephContainer(
2925 entrypoint
='/usr/bin/ceph-authtool',
2926 args
=['--gen-print-key'],
2928 mgr_key
= CephContainer(
2930 entrypoint
='/usr/bin/ceph-authtool',
2931 args
=['--gen-print-key'],
2934 keyring
= ('[mon.]\n'
2936 '\tcaps mon = allow *\n'
2939 '\tcaps mon = allow *\n'
2940 '\tcaps mds = allow *\n'
2941 '\tcaps mgr = allow *\n'
2942 '\tcaps osd = allow *\n'
2945 '\tcaps mon = profile mgr\n'
2946 '\tcaps mds = allow *\n'
2947 '\tcaps osd = allow *\n'
2948 % (mon_key
, admin_key
, mgr_id
, mgr_key
))
2951 tmp_bootstrap_keyring
= write_tmp(keyring
, uid
, gid
)
2953 # create initial monmap, tmp monmap file
2954 logger
.info('Creating initial monmap...')
2955 tmp_monmap
= write_tmp('', 0, 0)
2956 out
= CephContainer(
2958 entrypoint
='/usr/bin/monmaptool',
2962 '--addv', mon_id
, addr_arg
,
2966 tmp_monmap
.name
: '/tmp/monmap:z',
2970 # pass monmap file to ceph user for use by ceph-mon --mkfs below
2971 os
.fchown(tmp_monmap
.fileno(), uid
, gid
)
2974 logger
.info('Creating mon...')
2975 create_daemon_dirs(fsid
, 'mon', mon_id
, uid
, gid
)
2976 mon_dir
= get_data_dir(fsid
, 'mon', mon_id
)
2977 log_dir
= get_log_dir(fsid
)
2978 out
= CephContainer(
2980 entrypoint
='/usr/bin/ceph-mon',
2985 '--monmap', '/tmp/monmap',
2986 '--keyring', '/tmp/keyring',
2987 ] + get_daemon_args(fsid
, 'mon', mon_id
),
2989 log_dir
: '/var/log/ceph:z',
2990 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
2991 tmp_bootstrap_keyring
.name
: '/tmp/keyring:z',
2992 tmp_monmap
.name
: '/tmp/monmap:z',
2996 with
open(mon_dir
+ '/config', 'w') as f
:
2997 os
.fchown(f
.fileno(), uid
, gid
)
2998 os
.fchmod(f
.fileno(), 0o600)
3001 make_var_run(fsid
, uid
, gid
)
3002 mon_c
= get_container(fsid
, 'mon', mon_id
)
3003 deploy_daemon(fsid
, 'mon', mon_id
, mon_c
, uid
, gid
,
3004 config
=None, keyring
=None)
3006 # client.admin key + config to issue various CLI commands
3007 tmp_admin_keyring
= write_tmp('[client.admin]\n'
3008 '\tkey = ' + admin_key
+ '\n',
3010 tmp_config
= write_tmp(config
, uid
, gid
)
3012 # a CLI helper to reduce our typing
3013 def cli(cmd
, extra_mounts
={}, timeout
=DEFAULT_TIMEOUT
):
3014 # type: (List[str], Dict[str, str], Optional[int]) -> str
3016 log_dir
: '/var/log/ceph:z',
3017 tmp_admin_keyring
.name
: '/etc/ceph/ceph.client.admin.keyring:z',
3018 tmp_config
.name
: '/etc/ceph/ceph.conf:z',
3020 for k
, v
in extra_mounts
.items():
3022 timeout
= timeout
or args
.timeout
3023 return CephContainer(
3025 entrypoint
='/usr/bin/ceph',
3027 volume_mounts
=mounts
,
3028 ).run(timeout
=timeout
)
3030 logger
.info('Waiting for mon to start...')
3033 entrypoint
='/usr/bin/ceph',
3037 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
3038 tmp_admin_keyring
.name
: '/etc/ceph/ceph.client.admin.keyring:z',
3039 tmp_config
.name
: '/etc/ceph/ceph.conf:z',
3043 # wait for the service to become available
3044 def is_mon_available():
3046 timeout
=args
.timeout
if args
.timeout
else 60 # seconds
3047 out
, err
, ret
= call(c
.run_cmd(),
3051 is_available('mon', is_mon_available
)
3053 # assimilate and minimize config
3054 if not args
.no_minimize_config
:
3055 logger
.info('Assimilating anything we can from ceph.conf...')
3057 'config', 'assimilate-conf',
3058 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3060 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3062 logger
.info('Generating new minimal ceph.conf...')
3064 'config', 'generate-minimal-conf',
3065 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3067 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3069 # re-read our minimized config
3070 with
open(mon_dir
+ '/config', 'r') as f
:
3072 logger
.info('Restarting the monitor...')
3076 get_unit_name(fsid
, 'mon', mon_id
)
3080 logger
.info('Setting mon public_network...')
3081 cli(['config', 'set', 'mon', 'public_network', mon_network
])
3084 logger
.info('Enabling IPv6 (ms_bind_ipv6)')
3085 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
3088 logger
.info('Creating mgr...')
3089 mgr_keyring
= '[mgr.%s]\n\tkey = %s\n' % (mgr_id
, mgr_key
)
3090 mgr_c
= get_container(fsid
, 'mgr', mgr_id
)
3091 # Note:the default port used by the Prometheus node exporter is opened in fw
3092 deploy_daemon(fsid
, 'mgr', mgr_id
, mgr_c
, uid
, gid
,
3093 config
=config
, keyring
=mgr_keyring
, ports
=[9283])
3096 with
open(args
.output_keyring
, 'w') as f
:
3097 os
.fchmod(f
.fileno(), 0o600)
3098 f
.write('[client.admin]\n'
3099 '\tkey = ' + admin_key
+ '\n')
3100 logger
.info('Wrote keyring to %s' % args
.output_keyring
)
3102 with
open(args
.output_config
, 'w') as f
:
3104 logger
.info('Wrote config to %s' % args
.output_config
)
3106 # wait for the service to become available
3107 logger
.info('Waiting for mgr to start...')
3108 def is_mgr_available():
3110 timeout
=args
.timeout
if args
.timeout
else 60 # seconds
3112 out
= cli(['status', '-f', 'json-pretty'], timeout
=timeout
)
3114 return j
.get('mgrmap', {}).get('available', False)
3115 except Exception as e
:
3116 logger
.debug('status failed: %s' % e
)
3118 is_available('mgr', is_mgr_available
)
3120 # wait for mgr to restart (after enabling a module)
3121 def wait_for_mgr_restart():
3122 # first get latest mgrmap epoch from the mon
3123 out
= cli(['mgr', 'dump'])
3126 # wait for mgr to have it
3127 logger
.info('Waiting for the mgr to restart...')
3128 def mgr_has_latest_epoch():
3131 out
= cli(['tell', 'mgr', 'mgr_status'])
3133 return j
['mgrmap_epoch'] >= epoch
3134 except Exception as e
:
3135 logger
.debug('tell mgr mgr_status failed: %s' % e
)
3137 is_available('Mgr epoch %d' % epoch
, mgr_has_latest_epoch
)
3140 if not args
.skip_ssh
:
3141 cli(['config-key', 'set', 'mgr/cephadm/ssh_user', args
.ssh_user
])
3143 logger
.info('Enabling cephadm module...')
3144 cli(['mgr', 'module', 'enable', 'cephadm'])
3145 wait_for_mgr_restart()
3147 logger
.info('Setting orchestrator backend to cephadm...')
3148 cli(['orch', 'set', 'backend', 'cephadm'])
3151 logger
.info('Using provided ssh config...')
3153 pathify(args
.ssh_config
.name
): '/tmp/cephadm-ssh-config:z',
3155 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts
=mounts
)
3157 if args
.ssh_private_key
and args
.ssh_public_key
:
3158 logger
.info('Using provided ssh keys...')
3160 pathify(args
.ssh_private_key
.name
): '/tmp/cephadm-ssh-key:z',
3161 pathify(args
.ssh_public_key
.name
): '/tmp/cephadm-ssh-key.pub:z'
3163 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts
=mounts
)
3164 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts
=mounts
)
3166 logger
.info('Generating ssh key...')
3167 cli(['cephadm', 'generate-key'])
3168 ssh_pub
= cli(['cephadm', 'get-pub-key'])
3170 with
open(args
.output_pub_ssh_key
, 'w') as f
:
3172 logger
.info('Wrote public SSH key to to %s' % args
.output_pub_ssh_key
)
3174 logger
.info('Adding key to %s@localhost\'s authorized_keys...' % args
.ssh_user
)
3176 s_pwd
= pwd
.getpwnam(args
.ssh_user
)
3177 except KeyError as e
:
3178 raise Error('Cannot find uid/gid for ssh-user: %s' % (args
.ssh_user
))
3179 ssh_uid
= s_pwd
.pw_uid
3180 ssh_gid
= s_pwd
.pw_gid
3181 ssh_dir
= os
.path
.join(s_pwd
.pw_dir
, '.ssh')
3183 if not os
.path
.exists(ssh_dir
):
3184 makedirs(ssh_dir
, ssh_uid
, ssh_gid
, 0o700)
3186 auth_keys_file
= '%s/authorized_keys' % ssh_dir
3189 if os
.path
.exists(auth_keys_file
):
3190 with
open(auth_keys_file
, 'r') as f
:
3191 f
.seek(0, os
.SEEK_END
)
3193 f
.seek(f
.tell()-1, os
.SEEK_SET
) # go to last char
3194 if f
.read() != '\n':
3197 with
open(auth_keys_file
, 'a') as f
:
3198 os
.fchown(f
.fileno(), ssh_uid
, ssh_gid
) # just in case we created it
3199 os
.fchmod(f
.fileno(), 0o600) # just in case we created it
3202 f
.write(ssh_pub
.strip() + '\n')
3204 host
= get_hostname()
3205 logger
.info('Adding host %s...' % host
)
3207 cli(['orch', 'host', 'add', host
])
3208 except RuntimeError as e
:
3209 raise Error('Failed to add host <%s>: %s' % (host
, e
))
3211 if not args
.orphan_initial_daemons
:
3212 for t
in ['mon', 'mgr', 'crash']:
3213 logger
.info('Deploying %s service with default placement...' % t
)
3214 cli(['orch', 'apply', t
])
3216 if not args
.skip_monitoring_stack
:
3217 logger
.info('Enabling mgr prometheus module...')
3218 cli(['mgr', 'module', 'enable', 'prometheus'])
3219 for t
in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
3220 logger
.info('Deploying %s service with default placement...' % t
)
3221 cli(['orch', 'apply', t
])
3223 if args
.registry_url
and args
.registry_username
and args
.registry_password
:
3224 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', args
.registry_url
, '--force'])
3225 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', args
.registry_username
, '--force'])
3226 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', args
.registry_password
, '--force'])
3228 if args
.container_init
:
3229 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(args
.container_init
), '--force'])
3231 if not args
.skip_dashboard
:
3232 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
3233 # if the user does not want to use SSL he can change this setting once the cluster is up
3234 cli(["config", "set", "mgr", "mgr/dashboard/ssl_server_port" , str(args
.ssl_dashboard_port
)])
3236 # configuring dashboard parameters
3237 logger
.info('Enabling the dashboard module...')
3238 cli(['mgr', 'module', 'enable', 'dashboard'])
3239 wait_for_mgr_restart()
3241 # dashboard crt and key
3242 if args
.dashboard_key
and args
.dashboard_crt
:
3243 logger
.info('Using provided dashboard certificate...')
3245 pathify(args
.dashboard_crt
.name
): '/tmp/dashboard.crt:z',
3246 pathify(args
.dashboard_key
.name
): '/tmp/dashboard.key:z'
3248 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts
=mounts
)
3249 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts
=mounts
)
3251 logger
.info('Generating a dashboard self-signed certificate...')
3252 cli(['dashboard', 'create-self-signed-cert'])
3254 logger
.info('Creating initial admin user...')
3255 password
= args
.initial_dashboard_password
or generate_password()
3256 cmd
= ['dashboard', 'ac-user-create', args
.initial_dashboard_user
, password
, 'administrator', '--force-password']
3257 if not args
.dashboard_password_noupdate
:
3258 cmd
.append('--pwd-update-required')
3260 logger
.info('Fetching dashboard port number...')
3261 out
= cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
3264 # Open dashboard port
3266 fw
.open_ports([port
])
3269 logger
.info('Ceph Dashboard is now available at:\n\n'
3270 '\t URL: https://%s:%s/\n'
3272 '\tPassword: %s\n' % (
3274 args
.initial_dashboard_user
,
3278 logger
.info('Applying %s to cluster' % args
.apply_spec
)
3280 with
open(args
.apply_spec
) as f
:
3282 if 'hostname:' in line
:
3283 line
= line
.replace('\n', '')
3284 split
= line
.split(': ')
3285 if split
[1] != host
:
3286 logger
.info('Adding ssh key to %s' % split
[1])
3288 ssh_key
= '/etc/ceph/ceph.pub'
3289 if args
.ssh_public_key
:
3290 ssh_key
= args
.ssh_public_key
.name
3291 out
, err
, code
= call_throws(['ssh-copy-id', '-f', '-i', ssh_key
, '%s@%s' % (args
.ssh_user
, split
[1])])
3294 mounts
[pathify(args
.apply_spec
)] = '/tmp/spec.yml:z'
3296 out
= cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts
=mounts
)
3299 logger
.info('You can access the Ceph CLI with:\n\n'
3300 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
3304 args
.output_keyring
))
3305 logger
.info('Please consider enabling telemetry to help improve Ceph:\n\n'
3306 '\tceph telemetry on\n\n'
3307 'For more information see:\n\n'
3308 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
3309 logger
.info('Bootstrap complete.')
3312 ##################################
3314 def command_registry_login():
3315 if args
.registry_json
:
3316 logger
.info("Pulling custom registry login info from %s." % args
.registry_json
)
3317 d
= get_parm(args
.registry_json
)
3318 if d
.get('url') and d
.get('username') and d
.get('password'):
3319 args
.registry_url
= d
.get('url')
3320 args
.registry_username
= d
.get('username')
3321 args
.registry_password
= d
.get('password')
3322 registry_login(args
.registry_url
, args
.registry_username
, args
.registry_password
)
3324 raise Error("json provided for custom registry login did not include all necessary fields. "
3325 "Please setup json file as\n"
3327 " \"url\": \"REGISTRY_URL\",\n"
3328 " \"username\": \"REGISTRY_USERNAME\",\n"
3329 " \"password\": \"REGISTRY_PASSWORD\"\n"
3331 elif args
.registry_url
and args
.registry_username
and args
.registry_password
:
3332 registry_login(args
.registry_url
, args
.registry_username
, args
.registry_password
)
3334 raise Error("Invalid custom registry arguments received. To login to a custom registry include "
3335 "--registry-url, --registry-username and --registry-password "
3336 "options or --registry-json option")
3339 def registry_login(url
, username
, password
):
3340 logger
.info("Logging into custom registry.")
3342 out
, _
, _
= call_throws([container_path
, 'login',
3347 raise Error("Failed to login to custom registry @ %s as %s with given password" % (args
.registry_url
, args
.registry_username
))
3349 ##################################
3352 def extract_uid_gid_monitoring(daemon_type
):
3353 # type: (str) -> Tuple[int, int]
3355 if daemon_type
== 'prometheus':
3356 uid
, gid
= extract_uid_gid(file_path
='/etc/prometheus')
3357 elif daemon_type
== 'node-exporter':
3358 uid
, gid
= 65534, 65534
3359 elif daemon_type
== 'grafana':
3360 uid
, gid
= extract_uid_gid(file_path
='/var/lib/grafana')
3361 elif daemon_type
== 'alertmanager':
3362 uid
, gid
= extract_uid_gid(file_path
=['/etc/alertmanager', '/etc/prometheus'])
3364 raise Error("{} not implemented yet".format(daemon_type
))
3369 def command_deploy():
3371 daemon_type
, daemon_id
= args
.name
.split('.', 1)
3373 l
= FileLock(args
.fsid
)
3376 if daemon_type
not in get_supported_daemons():
3377 raise Error('daemon type %s not recognized' % daemon_type
)
3380 unit_name
= get_unit_name(args
.fsid
, daemon_type
, daemon_id
)
3381 (_
, state
, _
) = check_unit(unit_name
)
3382 if state
== 'running':
3386 logger
.info('%s daemon %s ...' % ('Reconfig', args
.name
))
3388 logger
.info('%s daemon %s ...' % ('Redeploy', args
.name
))
3390 logger
.info('%s daemon %s ...' % ('Deploy', args
.name
))
3392 # Get and check ports explicitly required to be opened
3393 daemon_ports
= [] # type: List[int]
3395 daemon_ports
= list(map(int, args
.tcp_ports
.split()))
3397 if daemon_type
in Ceph
.daemons
:
3398 config
, keyring
= get_config_and_keyring()
3399 uid
, gid
= extract_uid_gid()
3400 make_var_run(args
.fsid
, uid
, gid
)
3402 c
= get_container(args
.fsid
, daemon_type
, daemon_id
,
3403 ptrace
=args
.allow_ptrace
)
3404 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
3405 config
=config
, keyring
=keyring
,
3406 osd_fsid
=args
.osd_fsid
,
3407 reconfig
=args
.reconfig
,
3410 elif daemon_type
in Monitoring
.components
:
3411 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
3413 if not args
.reconfig
and not redeploy
:
3414 daemon_ports
.extend(Monitoring
.port_map
[daemon_type
])
3416 # make sure provided config-json is sufficient
3417 config
= get_parm(args
.config_json
) # type: ignore
3418 required_files
= Monitoring
.components
[daemon_type
].get('config-json-files', list())
3419 required_args
= Monitoring
.components
[daemon_type
].get('config-json-args', list())
3421 if not config
or not all(c
in config
.get('files', {}).keys() for c
in required_files
): # type: ignore
3422 raise Error("{} deployment requires config-json which must "
3423 "contain file content for {}".format(daemon_type
.capitalize(), ', '.join(required_files
)))
3425 if not config
or not all(c
in config
.keys() for c
in required_args
): # type: ignore
3426 raise Error("{} deployment requires config-json which must "
3427 "contain arg for {}".format(daemon_type
.capitalize(), ', '.join(required_args
)))
3429 uid
, gid
= extract_uid_gid_monitoring(daemon_type
)
3430 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
3431 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
3432 reconfig
=args
.reconfig
,
3435 elif daemon_type
== NFSGanesha
.daemon_type
:
3436 if not args
.reconfig
and not redeploy
:
3437 daemon_ports
.extend(NFSGanesha
.port_map
.values())
3439 config
, keyring
= get_config_and_keyring()
3440 # TODO: extract ganesha uid/gid (997, 994) ?
3441 uid
, gid
= extract_uid_gid()
3442 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
3443 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
3444 config
=config
, keyring
=keyring
,
3445 reconfig
=args
.reconfig
,
3448 elif daemon_type
== CephIscsi
.daemon_type
:
3449 config
, keyring
= get_config_and_keyring()
3450 uid
, gid
= extract_uid_gid()
3451 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
3452 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
3453 config
=config
, keyring
=keyring
,
3454 reconfig
=args
.reconfig
,
3457 elif daemon_type
== CustomContainer
.daemon_type
:
3458 cc
= CustomContainer
.init(args
.fsid
, daemon_id
)
3459 if not args
.reconfig
and not redeploy
:
3460 daemon_ports
.extend(cc
.ports
)
3461 c
= get_container(args
.fsid
, daemon_type
, daemon_id
,
3462 privileged
=cc
.privileged
,
3463 ptrace
=args
.allow_ptrace
)
3464 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
,
3465 uid
=cc
.uid
, gid
=cc
.gid
, config
=None,
3466 keyring
=None, reconfig
=args
.reconfig
,
3470 raise Error('daemon type {} not implemented in command_deploy function'
3471 .format(daemon_type
))
3473 ##################################
3479 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
3480 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
3481 command
= c
.run_cmd()
3482 return call_timeout(command
, args
.timeout
)
3484 ##################################
3490 def command_shell():
3493 make_log_dir(args
.fsid
)
3495 if '.' in args
.name
:
3496 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
3498 daemon_type
= args
.name
3501 daemon_type
= 'osd' # get the most mounts
3504 if daemon_id
and not args
.fsid
:
3505 raise Error('must pass --fsid to specify cluster')
3507 # use /etc/ceph files by default, if present. we do this instead of
3508 # making these defaults in the arg parser because we don't want an error
3509 # if they don't exist.
3510 if not args
.keyring
and os
.path
.exists(SHELL_DEFAULT_KEYRING
):
3511 args
.keyring
= SHELL_DEFAULT_KEYRING
3513 container_args
= [] # type: List[str]
3514 mounts
= get_container_mounts(args
.fsid
, daemon_type
, daemon_id
,
3515 no_config
=True if args
.config
else False)
3516 binds
= get_container_binds(args
.fsid
, daemon_type
, daemon_id
)
3518 mounts
[pathify(args
.config
)] = '/etc/ceph/ceph.conf:z'
3520 mounts
[pathify(args
.keyring
)] = '/etc/ceph/ceph.keyring:z'
3522 for _mount
in args
.mount
:
3523 split_src_dst
= _mount
.split(':')
3524 mount
= pathify(split_src_dst
[0])
3525 filename
= os
.path
.basename(split_src_dst
[0])
3526 if len(split_src_dst
) > 1:
3527 dst
= split_src_dst
[1] + ':z' if len(split_src_dst
) == 3 else split_src_dst
[1]
3530 mounts
[mount
] = '/mnt/{}:z'.format(filename
)
3532 command
= args
.command
3538 '-e', "PS1=%s" % CUSTOM_PS1
,
3541 home
= os
.path
.join(args
.data_dir
, args
.fsid
, 'home')
3542 if not os
.path
.exists(home
):
3543 logger
.debug('Creating root home at %s' % home
)
3544 makedirs(home
, 0, 0, 0o660)
3545 if os
.path
.exists('/etc/skel'):
3546 for f
in os
.listdir('/etc/skel'):
3547 if f
.startswith('.bash'):
3548 shutil
.copyfile(os
.path
.join('/etc/skel', f
),
3549 os
.path
.join(home
, f
))
3550 mounts
[home
] = '/root'
3554 entrypoint
='doesnotmatter',
3556 container_args
=container_args
,
3557 volume_mounts
=mounts
,
3561 command
= c
.shell_cmd(command
)
3563 return call_timeout(command
, args
.timeout
)
3565 ##################################
3569 def command_enter():
3572 raise Error('must pass --fsid to specify cluster')
3573 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
3574 container_args
= [] # type: List[str]
3576 command
= args
.command
3582 '-e', "PS1=%s" % CUSTOM_PS1
,
3586 entrypoint
='doesnotmatter',
3587 container_args
=container_args
,
3588 cname
='ceph-%s-%s.%s' % (args
.fsid
, daemon_type
, daemon_id
),
3590 command
= c
.exec_cmd(command
)
3591 return call_timeout(command
, args
.timeout
)
3593 ##################################
3598 def command_ceph_volume():
3601 make_log_dir(args
.fsid
)
3603 l
= FileLock(args
.fsid
)
3606 (uid
, gid
) = (0, 0) # ceph-volume runs as root
3607 mounts
= get_container_mounts(args
.fsid
, 'osd', None)
3612 (config
, keyring
) = get_config_and_keyring()
3616 tmp_config
= write_tmp(config
, uid
, gid
)
3617 mounts
[tmp_config
.name
] = '/etc/ceph/ceph.conf:z'
3621 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
3622 mounts
[tmp_keyring
.name
] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
3626 entrypoint
='/usr/sbin/ceph-volume',
3630 volume_mounts
=mounts
,
3632 out
, err
, code
= call_throws(c
.run_cmd(), verbosity
=CallVerbosity
.VERBOSE
)
3636 ##################################
3643 raise Error('must pass --fsid to specify cluster')
3645 unit_name
= get_unit_name_by_daemon_name(args
.fsid
, args
.name
)
3651 verbosity
=CallVerbosity
.VERBOSE
,
3655 ##################################
3662 raise Error('must pass --fsid to specify cluster')
3664 unit_name
= get_unit_name_by_daemon_name(args
.fsid
, args
.name
)
3666 cmd
= [find_program('journalctl')]
3667 cmd
.extend(['-u', unit_name
])
3669 cmd
.extend(args
.command
)
3671 # call this directly, without our wrapper, so that we get an unmolested
3672 # stdout with logger prefixing.
3673 logger
.debug("Running command: %s" % ' '.join(cmd
))
3674 subprocess
.call(cmd
) # type: ignore
3676 ##################################
3679 def list_networks():
3680 # type: () -> Dict[str,List[str]]
3682 ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
3683 ## so we'll need to use a regex to parse 'ip' command output.
3684 #out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
3685 #j = json.loads(out)
3688 res
= _list_ipv4_networks()
3689 res
.update(_list_ipv6_networks())
3693 def _list_ipv4_networks():
3694 out
, _
, _
= call_throws([find_executable('ip'), 'route', 'ls'])
3695 return _parse_ipv4_route(out
)
3698 def _parse_ipv4_route(out
):
3699 r
= {} # type: Dict[str,List[str]]
3700 p
= re
.compile(r
'^(\S+) (.*)scope link (.*)src (\S+)')
3701 for line
in out
.splitlines():
3713 def _list_ipv6_networks():
3714 routes
, _
, _
= call_throws([find_executable('ip'), '-6', 'route', 'ls'])
3715 ips
, _
, _
= call_throws([find_executable('ip'), '-6', 'addr', 'ls'])
3716 return _parse_ipv6_route(routes
, ips
)
3719 def _parse_ipv6_route(routes
, ips
):
3720 r
= {} # type: Dict[str,List[str]]
3721 route_p
= re
.compile(r
'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
3722 ip_p
= re
.compile(r
'^\s+inet6 (\S+)/(.*)scope (.*)$')
3723 for line
in routes
.splitlines():
3724 m
= route_p
.findall(line
)
3725 if not m
or m
[0][0].lower() == 'default':
3731 for line
in ips
.splitlines():
3732 m
= ip_p
.findall(line
)
3736 # find the network it belongs to
3737 net
= [n
for n
in r
.keys()
3738 if ipaddress
.ip_address(unicode(ip
)) in ipaddress
.ip_network(unicode(n
))]
3740 r
[net
[0]].append(ip
)
3745 def command_list_networks():
3748 print(json
.dumps(r
, indent
=4))
3750 ##################################
3756 ls
= list_daemons(detail
=not args
.no_detail
,
3757 legacy_dir
=args
.legacy_dir
)
3758 print(json
.dumps(ls
, indent
=4))
3761 def list_daemons(detail
=True, legacy_dir
=None):
3762 # type: (bool, Optional[str]) -> List[Dict[str, str]]
3766 data_dir
= args
.data_dir
3767 if legacy_dir
is not None:
3768 data_dir
= os
.path
.abspath(legacy_dir
+ data_dir
)
3770 # keep track of ceph versions we see
3771 seen_versions
= {} # type: Dict[str, Optional[str]]
3774 if os
.path
.exists(data_dir
):
3775 for i
in os
.listdir(data_dir
):
3776 if i
in ['mon', 'osd', 'mds', 'mgr']:
3778 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
3781 (cluster
, daemon_id
) = j
.split('-', 1)
3782 fsid
= get_legacy_daemon_fsid(
3783 cluster
, daemon_type
, daemon_id
,
3784 legacy_dir
=legacy_dir
)
3785 legacy_unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
3788 'name': '%s.%s' % (daemon_type
, daemon_id
),
3789 'fsid': fsid
if fsid
is not None else 'unknown',
3790 'systemd_unit': legacy_unit_name
,
3793 (i
['enabled'], i
['state'], _
) = check_unit(legacy_unit_name
)
3794 if not host_version
:
3796 out
, err
, code
= call(['ceph', '-v'])
3797 if not code
and out
.startswith('ceph version '):
3798 host_version
= out
.split(' ')[2]
3801 i
['host_version'] = host_version
3804 fsid
= str(i
) # convince mypy that fsid is a str here
3805 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
3808 (daemon_type
, daemon_id
) = j
.split('.', 1)
3809 unit_name
= get_unit_name(fsid
,
3815 'style': 'cephadm:v1',
3818 'systemd_unit': unit_name
,
3822 (i
['enabled'], i
['state'], _
) = check_unit(unit_name
)
3829 if 'podman' in container_path
and get_podman_version() < (1, 6, 2):
3830 image_field
= '.ImageID'
3832 image_field
= '.Image'
3834 out
, err
, code
= call(
3836 container_path
, 'inspect',
3837 '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field
,
3838 'ceph-%s-%s' % (fsid
, j
)
3840 verbosity
=CallVerbosity
.DEBUG
)
3842 (container_id
, image_name
, image_id
, start
,
3843 version
) = out
.strip().split(',')
3844 image_id
= normalize_container_id(image_id
)
3845 daemon_type
= name
.split('.', 1)[0]
3846 start_stamp
= try_convert_datetime(start
)
3847 if not version
or '.' not in version
:
3848 version
= seen_versions
.get(image_id
, None)
3849 if daemon_type
== NFSGanesha
.daemon_type
:
3850 version
= NFSGanesha
.get_version(container_id
)
3851 if daemon_type
== CephIscsi
.daemon_type
:
3852 version
= CephIscsi
.get_version(container_id
)
3854 if daemon_type
in Ceph
.daemons
:
3855 out
, err
, code
= call(
3856 [container_path
, 'exec', container_id
,
3859 out
.startswith('ceph version '):
3860 version
= out
.split(' ')[2]
3861 seen_versions
[image_id
] = version
3862 elif daemon_type
== 'grafana':
3863 out
, err
, code
= call(
3864 [container_path
, 'exec', container_id
,
3865 'grafana-server', '-v'])
3867 out
.startswith('Version '):
3868 version
= out
.split(' ')[1]
3869 seen_versions
[image_id
] = version
3870 elif daemon_type
in ['prometheus',
3873 cmd
= daemon_type
.replace('-', '_')
3874 out
, err
, code
= call(
3875 [container_path
, 'exec', container_id
,
3878 err
.startswith('%s, version ' % cmd
):
3879 version
= err
.split(' ')[2]
3880 seen_versions
[image_id
] = version
3881 elif daemon_type
== CustomContainer
.daemon_type
:
3882 # Because a custom container can contain
3883 # everything, we do not know which command
3884 # to execute to get the version.
3887 logger
.warning('version for unknown daemon type %s' % daemon_type
)
3889 vfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.image') # type: ignore
3891 with
open(vfile
, 'r') as f
:
3892 image_name
= f
.read().strip() or None
3895 i
['container_id'] = container_id
3896 i
['container_image_name'] = image_name
3897 i
['container_image_id'] = image_id
3898 i
['version'] = version
3899 i
['started'] = start_stamp
3900 i
['created'] = get_file_timestamp(
3901 os
.path
.join(data_dir
, fsid
, j
, 'unit.created')
3903 i
['deployed'] = get_file_timestamp(
3904 os
.path
.join(data_dir
, fsid
, j
, 'unit.image'))
3905 i
['configured'] = get_file_timestamp(
3906 os
.path
.join(data_dir
, fsid
, j
, 'unit.configured'))
3913 def get_daemon_description(fsid
, name
, detail
=False, legacy_dir
=None):
3914 # type: (str, str, bool, Optional[str]) -> Dict[str, str]
3916 for d
in list_daemons(detail
=detail
, legacy_dir
=legacy_dir
):
3917 if d
['fsid'] != fsid
:
3919 if d
['name'] != name
:
3922 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name
))
3925 ##################################
3928 def command_adopt():
3931 if not args
.skip_pull
:
3932 _pull_image(args
.image
)
3934 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
3937 if args
.style
!= 'legacy':
3938 raise Error('adoption of style %s not implemented' % args
.style
)
3941 fsid
= get_legacy_daemon_fsid(args
.cluster
,
3944 legacy_dir
=args
.legacy_dir
)
3946 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
3950 # call correct adoption
3951 if daemon_type
in Ceph
.daemons
:
3952 command_adopt_ceph(daemon_type
, daemon_id
, fsid
);
3953 elif daemon_type
== 'prometheus':
3954 command_adopt_prometheus(daemon_id
, fsid
)
3955 elif daemon_type
== 'grafana':
3956 command_adopt_grafana(daemon_id
, fsid
)
3957 elif daemon_type
== 'node-exporter':
3958 raise Error('adoption of node-exporter not implemented')
3959 elif daemon_type
== 'alertmanager':
3960 command_adopt_alertmanager(daemon_id
, fsid
)
3962 raise Error('daemon type %s not recognized' % daemon_type
)
3965 class AdoptOsd(object):
3966 def __init__(self
, osd_data_dir
, osd_id
):
3967 # type: (str, str) -> None
3968 self
.osd_data_dir
= osd_data_dir
3969 self
.osd_id
= osd_id
3971 def check_online_osd(self
):
3972 # type: () -> Tuple[Optional[str], Optional[str]]
3974 osd_fsid
, osd_type
= None, None
3976 path
= os
.path
.join(self
.osd_data_dir
, 'fsid')
3978 with
open(path
, 'r') as f
:
3979 osd_fsid
= f
.read().strip()
3980 logger
.info("Found online OSD at %s" % path
)
3982 logger
.info('Unable to read OSD fsid from %s' % path
)
3983 if os
.path
.exists(os
.path
.join(self
.osd_data_dir
, 'type')):
3984 with
open(os
.path
.join(self
.osd_data_dir
, 'type')) as f
:
3985 osd_type
= f
.read().strip()
3987 logger
.info('"type" file missing for OSD data dir')
3989 return osd_fsid
, osd_type
3991 def check_offline_lvm_osd(self
):
3992 # type: () -> Tuple[Optional[str], Optional[str]]
3994 osd_fsid
, osd_type
= None, None
3998 entrypoint
='/usr/sbin/ceph-volume',
3999 args
=['lvm', 'list', '--format=json'],
4002 out
, err
, code
= call_throws(c
.run_cmd())
4005 js
= json
.loads(out
)
4006 if self
.osd_id
in js
:
4007 logger
.info("Found offline LVM OSD {}".format(self
.osd_id
))
4008 osd_fsid
= js
[self
.osd_id
][0]['tags']['ceph.osd_fsid']
4009 for device
in js
[self
.osd_id
]:
4010 if device
['tags']['ceph.type'] == 'block':
4011 osd_type
= 'bluestore'
4013 if device
['tags']['ceph.type'] == 'data':
4014 osd_type
= 'filestore'
4016 except ValueError as e
:
4017 logger
.info("Invalid JSON in ceph-volume lvm list: {}".format(e
))
4019 return osd_fsid
, osd_type
4021 def check_offline_simple_osd(self
):
4022 # type: () -> Tuple[Optional[str], Optional[str]]
4024 osd_fsid
, osd_type
= None, None
4026 osd_file
= glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self
.osd_id
))
4027 if len(osd_file
) == 1:
4028 with
open(osd_file
[0], 'r') as f
:
4030 js
= json
.loads(f
.read())
4031 logger
.info("Found offline simple OSD {}".format(self
.osd_id
))
4032 osd_fsid
= js
["fsid"]
4033 osd_type
= js
["type"]
4034 if osd_type
!= "filestore":
4035 # need this to be mounted for the adopt to work, as it
4036 # needs to move files from this directory
4037 call_throws(['mount', js
["data"]["path"], self
.osd_data_dir
])
4038 except ValueError as e
:
4039 logger
.info("Invalid JSON in {}: {}".format(osd_file
, e
))
4041 return osd_fsid
, osd_type
4044 def command_adopt_ceph(daemon_type
, daemon_id
, fsid
):
4045 # type: (str, str, str) -> None
4047 (uid
, gid
) = extract_uid_gid()
4049 data_dir_src
= ('/var/lib/ceph/%s/%s-%s' %
4050 (daemon_type
, args
.cluster
, daemon_id
))
4051 data_dir_src
= os
.path
.abspath(args
.legacy_dir
+ data_dir_src
)
4053 if not os
.path
.exists(data_dir_src
):
4054 raise Error("{}.{} data directory '{}' does not exist. "
4055 "Incorrect ID specified, or daemon alrady adopted?".format(
4056 daemon_type
, daemon_id
, data_dir_src
))
4059 if daemon_type
== 'osd':
4060 adopt_osd
= AdoptOsd(data_dir_src
, daemon_id
)
4061 osd_fsid
, osd_type
= adopt_osd
.check_online_osd()
4063 osd_fsid
, osd_type
= adopt_osd
.check_offline_lvm_osd()
4065 osd_fsid
, osd_type
= adopt_osd
.check_offline_simple_osd()
4067 raise Error('Unable to find OSD {}'.format(daemon_id
))
4068 logger
.info('objectstore_type is %s' % osd_type
)
4070 if osd_type
== 'filestore':
4071 raise Error('FileStore is not supported by cephadm')
4073 # NOTE: implicit assumption here that the units correspond to the
4074 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
4076 unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
4077 (enabled
, state
, _
) = check_unit(unit_name
)
4078 if state
== 'running':
4079 logger
.info('Stopping old systemd unit %s...' % unit_name
)
4080 call_throws(['systemctl', 'stop', unit_name
])
4082 logger
.info('Disabling old systemd unit %s...' % unit_name
)
4083 call_throws(['systemctl', 'disable', unit_name
])
4086 logger
.info('Moving data...')
4087 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
4089 move_files(glob(os
.path
.join(data_dir_src
, '*')),
4092 logger
.debug('Remove dir \'%s\'' % (data_dir_src
))
4093 if os
.path
.ismount(data_dir_src
):
4094 call_throws(['umount', data_dir_src
])
4095 os
.rmdir(data_dir_src
)
4097 logger
.info('Chowning content...')
4098 call_throws(['chown', '-c', '-R', '%d.%d' % (uid
, gid
), data_dir_dst
])
4100 if daemon_type
== 'mon':
4101 # rename *.ldb -> *.sst, in case they are coming from ubuntu
4102 store
= os
.path
.join(data_dir_dst
, 'store.db')
4104 if os
.path
.exists(store
):
4105 for oldf
in os
.listdir(store
):
4106 if oldf
.endswith('.ldb'):
4107 newf
= oldf
.replace('.ldb', '.sst')
4108 oldp
= os
.path
.join(store
, oldf
)
4109 newp
= os
.path
.join(store
, newf
)
4110 logger
.debug('Renaming %s -> %s' % (oldp
, newp
))
4111 os
.rename(oldp
, newp
)
4113 logger
.info('Renamed %d leveldb *.ldb files to *.sst',
4115 if daemon_type
== 'osd':
4116 for n
in ['block', 'block.db', 'block.wal']:
4117 p
= os
.path
.join(data_dir_dst
, n
)
4118 if os
.path
.exists(p
):
4119 logger
.info('Chowning %s...' % p
)
4120 os
.chown(p
, uid
, gid
)
4121 # disable the ceph-volume 'simple' mode files on the host
4122 simple_fn
= os
.path
.join('/etc/ceph/osd',
4123 '%s-%s.json' % (daemon_id
, osd_fsid
))
4124 if os
.path
.exists(simple_fn
):
4125 new_fn
= simple_fn
+ '.adopted-by-cephadm'
4126 logger
.info('Renaming %s -> %s', simple_fn
, new_fn
)
4127 os
.rename(simple_fn
, new_fn
)
4128 logger
.info('Disabling host unit ceph-volume@ simple unit...')
4129 call(['systemctl', 'disable',
4130 'ceph-volume@simple-%s-%s.service' % (daemon_id
, osd_fsid
)])
4132 # assume this is an 'lvm' c-v for now, but don't error
4134 logger
.info('Disabling host unit ceph-volume@ lvm unit...')
4135 call(['systemctl', 'disable',
4136 'ceph-volume@lvm-%s-%s.service' % (daemon_id
, osd_fsid
)])
4139 config_src
= '/etc/ceph/%s.conf' % (args
.cluster
)
4140 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
4141 config_dst
= os
.path
.join(data_dir_dst
, 'config')
4142 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
4145 logger
.info('Moving logs...')
4146 log_dir_src
= ('/var/log/ceph/%s-%s.%s.log*' %
4147 (args
.cluster
, daemon_type
, daemon_id
))
4148 log_dir_src
= os
.path
.abspath(args
.legacy_dir
+ log_dir_src
)
4149 log_dir_dst
= make_log_dir(fsid
, uid
=uid
, gid
=gid
)
4150 move_files(glob(log_dir_src
),
4154 logger
.info('Creating new units...')
4155 make_var_run(fsid
, uid
, gid
)
4156 c
= get_container(fsid
, daemon_type
, daemon_id
)
4157 deploy_daemon_units(fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
4158 enable
=True, # unconditionally enable the new unit
4159 start
=(state
== 'running' or args
.force_start
),
4161 update_firewalld(daemon_type
)
4164 def command_adopt_prometheus(daemon_id
, fsid
):
4165 # type: (str, str) -> None
4167 daemon_type
= 'prometheus'
4168 (uid
, gid
) = extract_uid_gid_monitoring(daemon_type
)
4170 _stop_and_disable('prometheus')
4172 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
4176 config_src
= '/etc/prometheus/prometheus.yml'
4177 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
4178 config_dst
= os
.path
.join(data_dir_dst
, 'etc/prometheus')
4179 makedirs(config_dst
, uid
, gid
, 0o755)
4180 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
4183 data_src
= '/var/lib/prometheus/metrics/'
4184 data_src
= os
.path
.abspath(args
.legacy_dir
+ data_src
)
4185 data_dst
= os
.path
.join(data_dir_dst
, 'data')
4186 copy_tree([data_src
], data_dst
, uid
=uid
, gid
=gid
)
4188 make_var_run(fsid
, uid
, gid
)
4189 c
= get_container(fsid
, daemon_type
, daemon_id
)
4190 deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
4191 update_firewalld(daemon_type
)
4194 def command_adopt_grafana(daemon_id
, fsid
):
4195 # type: (str, str) -> None
4197 daemon_type
= 'grafana'
4198 (uid
, gid
) = extract_uid_gid_monitoring(daemon_type
)
4200 _stop_and_disable('grafana-server')
4202 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
4206 config_src
= '/etc/grafana/grafana.ini'
4207 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
4208 config_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
4209 makedirs(config_dst
, uid
, gid
, 0o755)
4210 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
4212 prov_src
= '/etc/grafana/provisioning/'
4213 prov_src
= os
.path
.abspath(args
.legacy_dir
+ prov_src
)
4214 prov_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
4215 copy_tree([prov_src
], prov_dst
, uid
=uid
, gid
=gid
)
4218 cert
= '/etc/grafana/grafana.crt'
4219 key
= '/etc/grafana/grafana.key'
4220 if os
.path
.exists(cert
) and os
.path
.exists(key
):
4221 cert_src
= '/etc/grafana/grafana.crt'
4222 cert_src
= os
.path
.abspath(args
.legacy_dir
+ cert_src
)
4223 makedirs(os
.path
.join(data_dir_dst
, 'etc/grafana/certs'), uid
, gid
, 0o755)
4224 cert_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_file')
4225 copy_files([cert_src
], cert_dst
, uid
=uid
, gid
=gid
)
4227 key_src
= '/etc/grafana/grafana.key'
4228 key_src
= os
.path
.abspath(args
.legacy_dir
+ key_src
)
4229 key_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_key')
4230 copy_files([key_src
], key_dst
, uid
=uid
, gid
=gid
)
4232 _adjust_grafana_ini(os
.path
.join(config_dst
, 'grafana.ini'))
4234 logger
.debug("Skipping ssl, missing cert {} or key {}".format(cert
, key
))
4236 # data - possible custom dashboards/plugins
4237 data_src
= '/var/lib/grafana/'
4238 data_src
= os
.path
.abspath(args
.legacy_dir
+ data_src
)
4239 data_dst
= os
.path
.join(data_dir_dst
, 'data')
4240 copy_tree([data_src
], data_dst
, uid
=uid
, gid
=gid
)
4242 make_var_run(fsid
, uid
, gid
)
4243 c
= get_container(fsid
, daemon_type
, daemon_id
)
4244 deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
4245 update_firewalld(daemon_type
)
4248 def command_adopt_alertmanager(daemon_id
, fsid
):
4249 # type: (str, str) -> None
4251 daemon_type
= 'alertmanager'
4252 (uid
, gid
) = extract_uid_gid_monitoring(daemon_type
)
4254 _stop_and_disable('prometheus-alertmanager')
4256 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
4260 config_src
= '/etc/prometheus/alertmanager.yml'
4261 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
4262 config_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager')
4263 makedirs(config_dst
, uid
, gid
, 0o755)
4264 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
4267 data_src
= '/var/lib/prometheus/alertmanager/'
4268 data_src
= os
.path
.abspath(args
.legacy_dir
+ data_src
)
4269 data_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager/data')
4270 copy_tree([data_src
], data_dst
, uid
=uid
, gid
=gid
)
4272 make_var_run(fsid
, uid
, gid
)
4273 c
= get_container(fsid
, daemon_type
, daemon_id
)
4274 deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
4275 update_firewalld(daemon_type
)
4278 def _adjust_grafana_ini(filename
):
4279 # type: (str) -> None
4281 # Update cert_file, cert_key pathnames in server section
4282 # ConfigParser does not preserve comments
4284 with
open(filename
, "r") as grafana_ini
:
4285 lines
= grafana_ini
.readlines()
4286 with
open("{}.new".format(filename
), "w") as grafana_ini
:
4287 server_section
=False
4289 if line
.startswith('['):
4290 server_section
=False
4291 if line
.startswith('[server]'):
4294 line
= re
.sub(r
'^cert_file.*',
4295 'cert_file = /etc/grafana/certs/cert_file', line
)
4296 line
= re
.sub(r
'^cert_key.*',
4297 'cert_key = /etc/grafana/certs/cert_key', line
)
4298 grafana_ini
.write(line
)
4299 os
.rename("{}.new".format(filename
), filename
)
4300 except OSError as err
:
4301 raise Error("Cannot update {}: {}".format(filename
, err
))
4304 def _stop_and_disable(unit_name
):
4305 # type: (str) -> None
4307 (enabled
, state
, _
) = check_unit(unit_name
)
4308 if state
== 'running':
4309 logger
.info('Stopping old systemd unit %s...' % unit_name
)
4310 call_throws(['systemctl', 'stop', unit_name
])
4312 logger
.info('Disabling old systemd unit %s...' % unit_name
)
4313 call_throws(['systemctl', 'disable', unit_name
])
4316 ##################################
4318 def command_rm_daemon():
4321 l
= FileLock(args
.fsid
)
4324 unit_name
= get_unit_name_by_daemon_name(args
.fsid
, args
.name
)
4326 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
4327 if daemon_type
in ['mon', 'osd'] and not args
.force
:
4328 raise Error('must pass --force to proceed: '
4329 'this command may destroy precious data!')
4331 call(['systemctl', 'stop', unit_name
],
4332 verbosity
=CallVerbosity
.DEBUG
)
4333 call(['systemctl', 'reset-failed', unit_name
],
4334 verbosity
=CallVerbosity
.DEBUG
)
4335 call(['systemctl', 'disable', unit_name
],
4336 verbosity
=CallVerbosity
.DEBUG
)
4337 data_dir
= get_data_dir(args
.fsid
, daemon_type
, daemon_id
)
4338 if daemon_type
in ['mon', 'osd', 'prometheus'] and \
4339 not args
.force_delete_data
:
4340 # rename it out of the way -- do not delete
4341 backup_dir
= os
.path
.join(args
.data_dir
, args
.fsid
, 'removed')
4342 if not os
.path
.exists(backup_dir
):
4343 makedirs(backup_dir
, 0, 0, DATA_DIR_MODE
)
4344 dirname
= '%s.%s_%s' % (daemon_type
, daemon_id
,
4345 datetime
.datetime
.utcnow().strftime(DATEFMT
))
4347 os
.path
.join(backup_dir
, dirname
))
4349 call_throws(['rm', '-rf', data_dir
])
4351 ##################################
4354 def command_rm_cluster():
4357 raise Error('must pass --force to proceed: '
4358 'this command may destroy precious data!')
4360 l
= FileLock(args
.fsid
)
4363 # stop + disable individual daemon units
4364 for d
in list_daemons(detail
=False):
4365 if d
['fsid'] != args
.fsid
:
4367 if d
['style'] != 'cephadm:v1':
4369 unit_name
= get_unit_name(args
.fsid
, d
['name'])
4370 call(['systemctl', 'stop', unit_name
],
4371 verbosity
=CallVerbosity
.DEBUG
)
4372 call(['systemctl', 'reset-failed', unit_name
],
4373 verbosity
=CallVerbosity
.DEBUG
)
4374 call(['systemctl', 'disable', unit_name
],
4375 verbosity
=CallVerbosity
.DEBUG
)
4378 for unit_name
in ['ceph-%s.target' % args
.fsid
]:
4379 call(['systemctl', 'stop', unit_name
],
4380 verbosity
=CallVerbosity
.DEBUG
)
4381 call(['systemctl', 'reset-failed', unit_name
],
4382 verbosity
=CallVerbosity
.DEBUG
)
4383 call(['systemctl', 'disable', unit_name
],
4384 verbosity
=CallVerbosity
.DEBUG
)
4386 slice_name
= 'system-%s.slice' % (('ceph-%s' % args
.fsid
).replace('-',
4388 call(['systemctl', 'stop', slice_name
],
4389 verbosity
=CallVerbosity
.DEBUG
)
4392 call_throws(['rm', '-f', args
.unit_dir
+
4393 '/ceph-%s@.service' % args
.fsid
])
4394 call_throws(['rm', '-f', args
.unit_dir
+
4395 '/ceph-%s.target' % args
.fsid
])
4396 call_throws(['rm', '-rf',
4397 args
.unit_dir
+ '/ceph-%s.target.wants' % args
.fsid
])
4399 call_throws(['rm', '-rf', args
.data_dir
+ '/' + args
.fsid
])
4401 call_throws(['rm', '-rf', args
.log_dir
+ '/' + args
.fsid
])
4402 call_throws(['rm', '-rf', args
.log_dir
+
4403 '/*.wants/ceph-%s@*' % args
.fsid
])
4404 # rm logrotate config
4405 call_throws(['rm', '-f', args
.logrotate_dir
+ '/ceph-%s' % args
.fsid
])
4407 # clean up config, keyring, and pub key files
4408 files
= ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
4410 if os
.path
.exists(files
[0]):
4412 with
open(files
[0]) as f
:
4413 if args
.fsid
in f
.read():
4416 for n
in range(0, len(files
)):
4417 if os
.path
.exists(files
[n
]):
4421 ##################################
4423 def check_time_sync(enabler
=None):
4424 # type: (Optional[Packager]) -> bool
4426 'chrony.service', # 18.04 (at least)
4427 'chronyd.service', # el / opensuse
4428 'systemd-timesyncd.service',
4429 'ntpd.service', # el7 (at least)
4430 'ntp.service', # 18.04 (at least)
4431 'ntpsec.service', # 20.04 (at least) / buster
4433 if not check_units(units
, enabler
):
4434 logger
.warning('No time sync service is running; checked for %s' % units
)
4439 def command_check_host():
4441 global container_path
4444 commands
= ['systemctl', 'lvcreate']
4447 container_path
= find_program('docker')
4449 for i
in CONTAINER_PREFERENCE
:
4451 container_path
= find_program(i
)
4453 except Exception as e
:
4454 logger
.debug('Could not locate %s: %s' % (i
, e
))
4455 if not container_path
:
4456 errors
.append('Unable to locate any of %s' % CONTAINER_PREFERENCE
)
4458 logger
.info('podman|docker (%s) is present' % container_path
)
4460 for command
in commands
:
4462 find_program(command
)
4463 logger
.info('%s is present' % command
)
4465 errors
.append('%s binary does not appear to be installed' % command
)
4467 # check for configured+running chronyd or ntp
4468 if not check_time_sync():
4469 errors
.append('No time synchronization is active')
4471 if 'expect_hostname' in args
and args
.expect_hostname
:
4472 if get_hostname().lower() != args
.expect_hostname
.lower():
4473 errors
.append('hostname "%s" does not match expected hostname "%s"' % (
4474 get_hostname(), args
.expect_hostname
))
4475 logger
.info('Hostname "%s" matches what is expected.',
4476 args
.expect_hostname
)
4479 raise Error('\n'.join(errors
))
4481 logger
.info('Host looks OK')
4483 ##################################
4486 def command_prepare_host():
4488 logger
.info('Verifying podman|docker is present...')
4490 if not container_path
:
4492 pkg
= create_packager()
4493 pkg
.install_podman()
4495 logger
.info('Verifying lvm2 is present...')
4496 if not find_executable('lvcreate'):
4498 pkg
= create_packager()
4499 pkg
.install(['lvm2'])
4501 logger
.info('Verifying time synchronization is in place...')
4502 if not check_time_sync():
4504 pkg
= create_packager()
4505 pkg
.install(['chrony'])
4506 # check again, and this time try to enable
4508 check_time_sync(enabler
=pkg
)
4510 if 'expect_hostname' in args
and args
.expect_hostname
and args
.expect_hostname
!= get_hostname():
4511 logger
.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), args
.expect_hostname
))
4512 call_throws(['hostname', args
.expect_hostname
])
4513 with
open('/etc/hostname', 'w') as f
:
4514 f
.write(args
.expect_hostname
+ '\n')
4516 logger
.info('Repeating the final host check...')
4517 command_check_host()
4519 ##################################
4522 class CustomValidation(argparse
.Action
):
4524 def _check_name(self
, values
):
4526 (daemon_type
, daemon_id
) = values
.split('.', 1)
4528 raise argparse
.ArgumentError(self
,
4529 "must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com")
4531 daemons
= get_supported_daemons()
4532 if daemon_type
not in daemons
:
4533 raise argparse
.ArgumentError(self
,
4534 "name must declare the type of daemon e.g. "
4535 "{}".format(', '.join(daemons
)))
4537 def __call__(self
, parser
, namespace
, values
, option_string
=None):
4538 if self
.dest
== "name":
4539 self
._check
_name
(values
)
4540 setattr(namespace
, self
.dest
, values
)
4542 ##################################
4546 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
4548 distro_version
= None
4549 distro_codename
= None
4550 with
open('/etc/os-release', 'r') as f
:
4551 for line
in f
.readlines():
4553 if '=' not in line
or line
.startswith('#'):
4555 (var
, val
) = line
.split('=', 1)
4556 if val
[0] == '"' and val
[-1] == '"':
4559 distro
= val
.lower()
4560 elif var
== 'VERSION_ID':
4561 distro_version
= val
.lower()
4562 elif var
== 'VERSION_CODENAME':
4563 distro_codename
= val
.lower()
4564 return distro
, distro_version
, distro_codename
4567 class Packager(object):
4568 def __init__(self
, stable
=None, version
=None, branch
=None, commit
=None):
4570 (stable
and not version
and not branch
and not commit
) or \
4571 (not stable
and version
and not branch
and not commit
) or \
4572 (not stable
and not version
and branch
) or \
4573 (not stable
and not version
and not branch
and not commit
)
4574 self
.stable
= stable
4575 self
.version
= version
4576 self
.branch
= branch
4577 self
.commit
= commit
4580 raise NotImplementedError
4583 raise NotImplementedError
4585 def query_shaman(self
, distro
, distro_version
, branch
, commit
):
4587 logger
.info('Fetching repo metadata from shaman and chacra...')
4588 shaman_url
= 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
4590 distro_version
=distro_version
,
4592 sha1
=commit
or 'latest',
4596 shaman_response
= urlopen(shaman_url
)
4597 except HTTPError
as err
:
4598 logger
.error('repository not found in shaman (might not be available yet)')
4599 raise Error('%s, failed to fetch %s' % (err
, shaman_url
))
4601 chacra_url
= shaman_response
.geturl()
4602 chacra_response
= urlopen(chacra_url
)
4603 except HTTPError
as err
:
4604 logger
.error('repository not found in chacra (might not be available yet)')
4605 raise Error('%s, failed to fetch %s' % (err
, chacra_url
))
4606 return chacra_response
.read().decode('utf-8')
4608 def repo_gpgkey(self
):
4611 if self
.stable
or self
.version
:
4612 return 'https://download.ceph.com/keys/release.asc', 'release'
4614 return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
4616 def enable_service(self
, service
):
4618 Start and enable the service (typically using systemd).
4620 call_throws(['systemctl', 'enable', '--now', service
])
4623 class Apt(Packager
):
4629 def __init__(self
, stable
, version
, branch
, commit
,
4630 distro
, distro_version
, distro_codename
):
4631 super(Apt
, self
).__init
__(stable
=stable
, version
=version
,
4632 branch
=branch
, commit
=commit
)
4633 self
.distro
= self
.DISTRO_NAMES
[distro
]
4634 self
.distro_codename
= distro_codename
4635 self
.distro_version
= distro_version
4637 def repo_path(self
):
4638 return '/etc/apt/sources.list.d/ceph.list'
4641 url
, name
= self
.repo_gpgkey()
4642 logger
.info('Installing repo GPG key from %s...' % url
)
4644 response
= urlopen(url
)
4645 except HTTPError
as err
:
4646 logger
.error('failed to fetch GPG repo key from %s: %s' % (
4648 raise Error('failed to fetch GPG key')
4649 key
= response
.read().decode('utf-8')
4650 with
open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
, 'w') as f
:
4654 content
= 'deb %s/debian-%s/ %s main\n' % (
4655 args
.repo_url
, self
.version
, self
.distro_codename
)
4657 content
= 'deb %s/debian-%s/ %s main\n' % (
4658 args
.repo_url
, self
.stable
, self
.distro_codename
)
4660 content
= self
.query_shaman(self
.distro
, self
.distro_codename
, self
.branch
,
4663 logger
.info('Installing repo file at %s...' % self
.repo_path())
4664 with
open(self
.repo_path(), 'w') as f
:
4668 for name
in ['autobuild', 'release']:
4669 p
= '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
4670 if os
.path
.exists(p
):
4671 logger
.info('Removing repo GPG key %s...' % p
)
4673 if os
.path
.exists(self
.repo_path()):
4674 logger
.info('Removing repo at %s...' % self
.repo_path())
4675 os
.unlink(self
.repo_path())
4677 if self
.distro
== 'ubuntu':
4678 self
.rm_kubic_repo()
4680 def install(self
, ls
):
4681 logger
.info('Installing packages %s...' % ls
)
4682 call_throws(['apt-get', 'install', '-y'] + ls
)
4684 def install_podman(self
):
4685 if self
.distro
== 'ubuntu':
4686 logger
.info('Setting up repo for podman...')
4687 self
.add_kubic_repo()
4688 call_throws(['apt-get', 'update'])
4690 logger
.info('Attempting podman install...')
4692 self
.install(['podman'])
4694 logger
.info('Podman did not work. Falling back to docker...')
4695 self
.install(['docker.io'])
4697 def kubic_repo_url(self
):
4698 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
4699 'libcontainers:/stable/xUbuntu_%s/' % self
.distro_version
4701 def kubic_repo_path(self
):
4702 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
4704 def kubric_repo_gpgkey_url(self
):
4705 return '%s/Release.key' % self
.kubic_repo_url()
4707 def kubric_repo_gpgkey_path(self
):
4708 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
4710 def add_kubic_repo(self
):
4711 url
= self
.kubric_repo_gpgkey_url()
4712 logger
.info('Installing repo GPG key from %s...' % url
)
4714 response
= urlopen(url
)
4715 except HTTPError
as err
:
4716 logger
.error('failed to fetch GPG repo key from %s: %s' % (
4718 raise Error('failed to fetch GPG key')
4719 key
= response
.read().decode('utf-8')
4720 tmp_key
= write_tmp(key
, 0, 0)
4721 keyring
= self
.kubric_repo_gpgkey_path()
4722 call_throws(['apt-key', '--keyring', keyring
, 'add', tmp_key
.name
])
4724 logger
.info('Installing repo file at %s...' % self
.kubic_repo_path())
4725 content
= 'deb %s /\n' % self
.kubic_repo_url()
4726 with
open(self
.kubic_repo_path(), 'w') as f
:
4729 def rm_kubic_repo(self
):
4730 keyring
= self
.kubric_repo_gpgkey_path()
4731 if os
.path
.exists(keyring
):
4732 logger
.info('Removing repo GPG key %s...' % keyring
)
4735 p
= self
.kubic_repo_path()
4736 if os
.path
.exists(p
):
4737 logger
.info('Removing repo at %s...' % p
)
4741 class YumDnf(Packager
):
4743 'centos': ('centos', 'el'),
4744 'rhel': ('centos', 'el'),
4745 'scientific': ('centos', 'el'),
4746 'fedora': ('fedora', 'fc'),
4749 def __init__(self
, stable
, version
, branch
, commit
,
4750 distro
, distro_version
):
4751 super(YumDnf
, self
).__init
__(stable
=stable
, version
=version
,
4752 branch
=branch
, commit
=commit
)
4753 self
.major
= int(distro_version
.split('.')[0])
4754 self
.distro_normalized
= self
.DISTRO_NAMES
[distro
][0]
4755 self
.distro_code
= self
.DISTRO_NAMES
[distro
][1] + str(self
.major
)
4756 if (self
.distro_code
== 'fc' and self
.major
>= 30) or \
4757 (self
.distro_code
== 'el' and self
.major
>= 8):
4762 def custom_repo(self
, **kw
):
4764 Repo files need special care in that a whole line should not be present
4765 if there is no value for it. Because we were using `format()` we could
4766 not conditionally add a line for a repo file. So the end result would
4767 contain a key with a missing value (say if we were passing `None`).
4769 For example, it could look like::
4776 Which breaks. This function allows us to conditionally add lines,
4777 preserving an order and be more careful.
4779 Previously, and for historical purposes, this is how the template used
4795 # by using tuples (vs a dict) we preserve the order of what we want to
4796 # return, like starting with a [repo name]
4798 ('reponame', '[%s]'),
4799 ('name', 'name=%s'),
4800 ('baseurl', 'baseurl=%s'),
4801 ('enabled', 'enabled=%s'),
4802 ('gpgcheck', 'gpgcheck=%s'),
4803 ('_type', 'type=%s'),
4804 ('gpgkey', 'gpgkey=%s'),
4805 ('proxy', 'proxy=%s'),
4806 ('priority', 'priority=%s'),
4810 tmpl_key
, tmpl_value
= line
# key values from tmpl
4812 # ensure that there is an actual value (not None nor empty string)
4813 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
4814 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
4816 return '\n'.join(lines
)
4818 def repo_path(self
):
4819 return '/etc/yum.repos.d/ceph.repo'
4821 def repo_baseurl(self
):
4822 assert self
.stable
or self
.version
4824 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.version
,
4827 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.stable
,
4831 if self
.stable
or self
.version
:
4834 'Ceph': '$basearch',
4835 'Ceph-noarch': 'noarch',
4836 'Ceph-source': 'SRPMS'}.items():
4837 content
+= '[%s]\n' % (n
)
4838 content
+= self
.custom_repo(
4840 baseurl
=self
.repo_baseurl() + '/' + t
,
4843 gpgkey
=self
.repo_gpgkey()[0],
4847 content
= self
.query_shaman(self
.distro_normalized
, self
.major
,
4851 logger
.info('Writing repo to %s...' % self
.repo_path())
4852 with
open(self
.repo_path(), 'w') as f
:
4855 if self
.distro_code
.startswith('el'):
4856 logger
.info('Enabling EPEL...')
4857 call_throws([self
.tool
, 'install', '-y', 'epel-release'])
4860 if os
.path
.exists(self
.repo_path()):
4861 os
.unlink(self
.repo_path())
4863 def install(self
, ls
):
4864 logger
.info('Installing packages %s...' % ls
)
4865 call_throws([self
.tool
, 'install', '-y'] + ls
)
4867 def install_podman(self
):
4868 self
.install(['podman'])
4871 class Zypper(Packager
):
4874 'opensuse-tumbleweed',
4878 def __init__(self
, stable
, version
, branch
, commit
,
4879 distro
, distro_version
):
4880 super(Zypper
, self
).__init
__(stable
=stable
, version
=version
,
4881 branch
=branch
, commit
=commit
)
4882 self
.tool
= 'zypper'
4883 self
.distro
= 'opensuse'
4884 self
.distro_version
= '15.1'
4885 if 'tumbleweed' not in distro
and distro_version
is not None:
4886 self
.distro_version
= distro_version
4888 def custom_repo(self
, **kw
):
4890 See YumDnf for format explanation.
4894 # by using tuples (vs a dict) we preserve the order of what we want to
4895 # return, like starting with a [repo name]
4897 ('reponame', '[%s]'),
4898 ('name', 'name=%s'),
4899 ('baseurl', 'baseurl=%s'),
4900 ('enabled', 'enabled=%s'),
4901 ('gpgcheck', 'gpgcheck=%s'),
4902 ('_type', 'type=%s'),
4903 ('gpgkey', 'gpgkey=%s'),
4904 ('proxy', 'proxy=%s'),
4905 ('priority', 'priority=%s'),
4909 tmpl_key
, tmpl_value
= line
# key values from tmpl
4911 # ensure that there is an actual value (not None nor empty string)
4912 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
4913 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
4915 return '\n'.join(lines
)
4917 def repo_path(self
):
4918 return '/etc/zypp/repos.d/ceph.repo'
4920 def repo_baseurl(self
):
4921 assert self
.stable
or self
.version
4923 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.stable
, self
.distro
)
4925 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.stable
, self
.distro
)
4928 if self
.stable
or self
.version
:
4931 'Ceph': '$basearch',
4932 'Ceph-noarch': 'noarch',
4933 'Ceph-source': 'SRPMS'}.items():
4934 content
+= '[%s]\n' % (n
)
4935 content
+= self
.custom_repo(
4937 baseurl
=self
.repo_baseurl() + '/' + t
,
4940 gpgkey
=self
.repo_gpgkey()[0],
4944 content
= self
.query_shaman(self
.distro
, self
.distro_version
,
4948 logger
.info('Writing repo to %s...' % self
.repo_path())
4949 with
open(self
.repo_path(), 'w') as f
:
4953 if os
.path
.exists(self
.repo_path()):
4954 os
.unlink(self
.repo_path())
4956 def install(self
, ls
):
4957 logger
.info('Installing packages %s...' % ls
)
4958 call_throws([self
.tool
, 'in', '-y'] + ls
)
4960 def install_podman(self
):
4961 self
.install(['podman'])
4964 def create_packager(stable
=None, version
=None, branch
=None, commit
=None):
4965 distro
, distro_version
, distro_codename
= get_distro()
4966 if distro
in YumDnf
.DISTRO_NAMES
:
4967 return YumDnf(stable
=stable
, version
=version
,
4968 branch
=branch
, commit
=commit
,
4969 distro
=distro
, distro_version
=distro_version
)
4970 elif distro
in Apt
.DISTRO_NAMES
:
4971 return Apt(stable
=stable
, version
=version
,
4972 branch
=branch
, commit
=commit
,
4973 distro
=distro
, distro_version
=distro_version
,
4974 distro_codename
=distro_codename
)
4975 elif distro
in Zypper
.DISTRO_NAMES
:
4976 return Zypper(stable
=stable
, version
=version
,
4977 branch
=branch
, commit
=commit
,
4978 distro
=distro
, distro_version
=distro_version
)
4979 raise Error('Distro %s version %s not supported' % (distro
, distro_version
))
4982 def command_add_repo():
4983 if args
.version
and args
.release
:
4984 raise Error('you can specify either --release or --version but not both')
4985 if not args
.version
and not args
.release
and not args
.dev
and not args
.dev_commit
:
4986 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
4989 (x
, y
, z
) = args
.version
.split('.')
4990 except Exception as e
:
4991 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
4993 pkg
= create_packager(stable
=args
.release
,
4994 version
=args
.version
,
4996 commit
=args
.dev_commit
)
5000 def command_rm_repo():
5001 pkg
= create_packager()
5005 def command_install():
5006 pkg
= create_packager()
5007 pkg
.install(args
.packages
)
5009 ##################################
5011 def get_ipv4_address(ifname
):
5012 # type: (str) -> str
5013 def _extract(sock
, offset
):
5014 return socket
.inet_ntop(
5019 struct
.pack('256s', bytes(ifname
[:15], 'utf-8'))
5022 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_DGRAM
)
5024 addr
= _extract(s
, 35093) # '0x8915' = SIOCGIFADDR
5025 dq_mask
= _extract(s
, 35099) # 0x891b = SIOCGIFNETMASK
5027 # interface does not have an ipv4 address
5030 dec_mask
= sum([bin(int(i
)).count('1')
5031 for i
in dq_mask
.split('.')])
5032 return '{}/{}'.format(addr
, dec_mask
)
5035 def get_ipv6_address(ifname
):
5036 # type: (str) -> str
5037 if not os
.path
.exists('/proc/net/if_inet6'):
5040 raw
= read_file(['/proc/net/if_inet6'])
5041 data
= raw
.splitlines()
5042 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
5043 # field 0 is ipv6, field 2 is scope
5044 for iface_setting
in data
:
5045 field
= iface_setting
.split()
5046 if field
[-1] == ifname
:
5048 ipv6_fmtd
= ":".join([ipv6_raw
[_p
:_p
+4] for _p
in range(0, len(field
[0]),4)])
5049 # apply naming rules using ipaddress module
5050 ipv6
= ipaddress
.ip_address(ipv6_fmtd
)
5051 return "{}/{}".format(str(ipv6
), int('0x{}'.format(field
[2]), 16))
5055 def bytes_to_human(num
, mode
='decimal'):
5056 # type: (float, str) -> str
5057 """Convert a bytes value into it's human-readable form.
5059 :param num: number, in bytes, to convert
5060 :param mode: Either decimal (default) or binary to determine divisor
5061 :returns: string representing the bytes value in a more readable format
5063 unit_list
= ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
5067 if mode
== 'binary':
5068 unit_list
= ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
5072 for unit
in unit_list
:
5073 if abs(num
) < divisor
:
5074 return "%3.1f%s" % (num
, unit
)
5076 return "%.1f%s" % (num
, yotta
)
5079 def read_file(path_list
, file_name
=''):
5080 # type: (List[str], str) -> str
5081 """Returns the content of the first file found within the `path_list`
5083 :param path_list: list of file paths to search
5084 :param file_name: optional file_name to be applied to a file path
5085 :returns: content of the file or 'Unknown'
5087 for path
in path_list
:
5089 file_path
= os
.path
.join(path
, file_name
)
5092 if os
.path
.exists(file_path
):
5093 with
open(file_path
, 'r') as f
:
5095 content
= f
.read().strip()
5097 # sysfs may populate the file, but for devices like
5098 # virtio reads can fail
5105 ##################################
5107 _dmi_path_list
= ['/sys/class/dmi/id']
5108 _nic_path_list
= ['/sys/class/net']
5109 _selinux_path_list
= ['/etc/selinux/config']
5110 _apparmor_path_list
= ['/etc/apparmor']
5111 _disk_vendor_workarounds
= {
5112 "0x1af4": "Virtio Block Device"
5116 self
.cpu_model
= 'Unknown'
5119 self
.cpu_threads
= 0
5120 self
.interfaces
= {}
5122 self
._meminfo
= read_file(['/proc/meminfo']).splitlines()
5124 self
._process
_nics
()
5125 self
.arch
= platform
.processor()
5126 self
.kernel
= platform
.release()
5128 def _get_cpuinfo(self
):
5130 """Determine cpu information via /proc/cpuinfo"""
5131 raw
= read_file(['/proc/cpuinfo'])
5132 output
= raw
.splitlines()
5136 field
= [l
.strip() for l
in line
.split(':')]
5137 if "model name" in line
:
5138 self
.cpu_model
= field
[1]
5139 if "physical id" in line
:
5140 cpu_set
.add(field
[1])
5141 if "siblings" in line
:
5142 self
.cpu_threads
= int(field
[1].strip())
5143 if "cpu cores" in line
:
5144 self
.cpu_cores
= int(field
[1].strip())
5146 self
.cpu_count
= len(cpu_set
)
5148 def _get_block_devs(self
):
5149 # type: () -> List[str]
5150 """Determine the list of block devices by looking at /sys/block"""
5151 return [dev
for dev
in os
.listdir('/sys/block')
5152 if not dev
.startswith('dm')]
5154 def _get_devs_by_type(self
, rota
='0'):
5155 # type: (str) -> List[str]
5156 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
5158 for blk_dev
in self
._get
_block
_devs
():
5159 rot_path
= '/sys/block/{}/queue/rotational'.format(blk_dev
)
5160 rot_value
= read_file([rot_path
])
5161 if rot_value
== rota
:
5162 devs
.append(blk_dev
)
5166 def operating_system(self
):
5168 """Determine OS version"""
5169 raw_info
= read_file(['/etc/os-release'])
5170 os_release
= raw_info
.splitlines()
5174 for line
in os_release
:
5176 var_name
, var_value
= line
.split('=')
5177 rel_dict
[var_name
] = var_value
.strip('"')
5179 # Would normally use PRETTY_NAME, but NAME and VERSION are more
5181 if all(_v
in rel_dict
for _v
in ["NAME", "VERSION"]):
5182 rel_str
= "{} {}".format(rel_dict
['NAME'], rel_dict
['VERSION'])
5188 """Return the hostname"""
5189 return platform
.node()
5192 def subscribed(self
):
5194 """Highlevel check to see if the host is subscribed to receive updates/support"""
5198 entitlements_dir
= '/etc/pki/entitlement'
5199 if os
.path
.exists(entitlements_dir
):
5200 pems
= glob('{}/*.pem'.format(entitlements_dir
))
5206 os_name
= self
.operating_system
5207 if os_name
.upper().startswith("RED HAT"):
5213 def hdd_count(self
):
5215 """Return a count of HDDs (spinners)"""
5216 return len(self
._get
_devs
_by
_type
(rota
='1'))
5218 def _get_capacity(self
, dev
):
5219 # type: (str) -> int
5220 """Determine the size of a given device"""
5221 size_path
= os
.path
.join('/sys/block', dev
, 'size')
5222 size_blocks
= int(read_file([size_path
]))
5223 blk_path
= os
.path
.join('/sys/block', dev
, 'queue', 'logical_block_size')
5224 blk_count
= int(read_file([blk_path
]))
5225 return size_blocks
* blk_count
5227 def _get_capacity_by_type(self
, rota
='0'):
5228 # type: (str) -> int
5229 """Return the total capacity of a category of device (flash or hdd)"""
5230 devs
= self
._get
_devs
_by
_type
(rota
=rota
)
5233 capacity
+= self
._get
_capacity
(dev
)
5236 def _dev_list(self
, dev_list
):
5237 # type: (List[str]) -> List[Dict[str, object]]
5238 """Return a 'pretty' name list for each device in the `dev_list`"""
5241 for dev
in dev_list
:
5242 disk_model
= read_file(['/sys/block/{}/device/model'.format(dev
)]).strip()
5243 disk_rev
= read_file(['/sys/block/{}/device/rev'.format(dev
)]).strip()
5244 disk_wwid
= read_file(['/sys/block/{}/device/wwid'.format(dev
)]).strip()
5245 vendor
= read_file(['/sys/block/{}/device/vendor'.format(dev
)]).strip()
5246 disk_vendor
= HostFacts
._disk
_vendor
_workarounds
.get(vendor
, vendor
)
5247 disk_size_bytes
= self
._get
_capacity
(dev
)
5249 "description": "{} {} ({})".format(disk_vendor
, disk_model
, bytes_to_human(disk_size_bytes
)),
5250 "vendor": disk_vendor
,
5251 "model": disk_model
,
5255 "disk_size_bytes": disk_size_bytes
,
5262 # type: () -> List[Dict[str, object]]
5263 """Return a list of devices that are HDDs (spinners)"""
5264 devs
= self
._get
_devs
_by
_type
(rota
='1')
5265 return self
._dev
_list
(devs
)
5268 def flash_list(self
):
5269 # type: () -> List[Dict[str, object]]
5270 """Return a list of devices that are flash based (SSD, NVMe)"""
5271 devs
= self
._get
_devs
_by
_type
(rota
='0')
5272 return self
._dev
_list
(devs
)
5275 def hdd_capacity_bytes(self
):
5277 """Return the total capacity for all HDD devices (bytes)"""
5278 return self
._get
_capacity
_by
_type
(rota
='1')
5281 def hdd_capacity(self
):
5283 """Return the total capacity for all HDD devices (human readable format)"""
5284 return bytes_to_human(self
.hdd_capacity_bytes
)
5288 # type: () -> Dict[str, float]
5289 """Return the cpu load average data for the host"""
5290 raw
= read_file(['/proc/loadavg']).strip()
5293 "1min": float(data
[0]),
5294 "5min": float(data
[1]),
5295 "15min": float(data
[2]),
5299 def flash_count(self
):
5301 """Return the number of flash devices in the system (SSD, NVMe)"""
5302 return len(self
._get
_devs
_by
_type
(rota
='0'))
5305 def flash_capacity_bytes(self
):
5307 """Return the total capacity for all flash devices (bytes)"""
5308 return self
._get
_capacity
_by
_type
(rota
='0')
5311 def flash_capacity(self
):
5313 """Return the total capacity for all Flash devices (human readable format)"""
5314 return bytes_to_human(self
.flash_capacity_bytes
)
5316 def _process_nics(self
):
5318 """Look at the NIC devices and extract network related metadata"""
5319 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
5326 for nic_path
in HostFacts
._nic
_path
_list
:
5327 if not os
.path
.exists(nic_path
):
5329 for iface
in os
.listdir(nic_path
):
5331 lower_devs_list
= [os
.path
.basename(link
.replace("lower_", "")) for link
in glob(os
.path
.join(nic_path
, iface
, "lower_*"))]
5332 upper_devs_list
= [os
.path
.basename(link
.replace("upper_", "")) for link
in glob(os
.path
.join(nic_path
, iface
, "upper_*"))]
5335 mtu
= int(read_file([os
.path
.join(nic_path
, iface
, 'mtu')]))
5339 operstate
= read_file([os
.path
.join(nic_path
, iface
, 'operstate')])
5341 speed
= int(read_file([os
.path
.join(nic_path
, iface
, 'speed')]))
5342 except (OSError, ValueError):
5343 # OSError : device doesn't support the ethtool get_link_ksettings
5344 # ValueError : raised when the read fails, and returns Unknown
5346 # Either way, we show a -1 when speed isn't available
5349 if os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bridge')):
5351 elif os
.path
.exists(os
.path
.join(nic_path
, iface
, 'bonding')):
5352 nic_type
= "bonding"
5354 nic_type
= hw_lookup
.get(read_file([os
.path
.join(nic_path
, iface
, 'type')]), "Unknown")
5356 dev_link
= os
.path
.join(nic_path
, iface
, 'device')
5357 if os
.path
.exists(dev_link
):
5359 driver_path
= os
.path
.join(dev_link
, 'driver')
5360 if os
.path
.exists(driver_path
):
5361 driver
= os
.path
.basename(
5362 os
.path
.realpath(driver_path
))
5370 self
.interfaces
[iface
] = {
5372 "upper_devs_list": upper_devs_list
,
5373 "lower_devs_list": lower_devs_list
,
5374 "operstate": operstate
,
5376 "nic_type": nic_type
,
5379 "ipv4_address": get_ipv4_address(iface
),
5380 "ipv6_address": get_ipv6_address(iface
),
5384 def nic_count(self
):
5386 """Return a total count of all physical NICs detected in the host"""
5388 for iface
in self
.interfaces
:
5389 if self
.interfaces
[iface
]["iftype"] == 'physical':
5390 phys_devs
.append(iface
)
5391 return len(phys_devs
)
5394 def _get_mem_data(self
, field_name
):
5395 # type: (str) -> int
5396 for line
in self
._meminfo
:
5397 if line
.startswith(field_name
):
5403 def memory_total_kb(self
):
5405 """Determine the memory installed (kb)"""
5406 return self
._get
_mem
_data
('MemTotal')
5409 def memory_free_kb(self
):
5411 """Determine the memory free (not cache, immediately usable)"""
5412 return self
._get
_mem
_data
('MemFree')
5415 def memory_available_kb(self
):
5417 """Determine the memory available to new applications without swapping"""
5418 return self
._get
_mem
_data
('MemAvailable')
5423 """Determine server vendor from DMI data in sysfs"""
5424 return read_file(HostFacts
._dmi
_path
_list
, "sys_vendor")
5429 """Determine server model information from DMI data in sysfs"""
5430 family
= read_file(HostFacts
._dmi
_path
_list
, "product_family")
5431 product
= read_file(HostFacts
._dmi
_path
_list
, "product_name")
5432 if family
== 'Unknown' and product
:
5433 return "{}".format(product
)
5435 return "{} ({})".format(family
, product
)
5438 def bios_version(self
):
5440 """Determine server BIOS version from DMI data in sysfs"""
5441 return read_file(HostFacts
._dmi
_path
_list
, "bios_version")
5444 def bios_date(self
):
5446 """Determine server BIOS date from DMI data in sysfs"""
5447 return read_file(HostFacts
._dmi
_path
_list
, "bios_date")
5450 def timestamp(self
):
5452 """Return the current time as Epoch seconds"""
5456 def system_uptime(self
):
5458 """Return the system uptime (in secs)"""
5459 raw_time
= read_file(['/proc/uptime'])
5460 up_secs
, _
= raw_time
.split()
5461 return float(up_secs
)
5463 def kernel_security(self
):
5464 # type: () -> Dict[str, str]
5465 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
5466 def _fetch_selinux():
5467 """Read the selinux config file to determine state"""
5469 for selinux_path
in HostFacts
._selinux
_path
_list
:
5470 if os
.path
.exists(selinux_path
):
5471 selinux_config
= read_file([selinux_path
]).splitlines()
5472 security
['type'] = 'SELinux'
5473 for line
in selinux_config
:
5474 if line
.strip().startswith('#'):
5476 k
, v
= line
.split('=')
5478 if security
['SELINUX'].lower() == "disabled":
5479 security
['description'] = "SELinux: Disabled"
5481 security
['description'] = "SELinux: Enabled({}, {})".format(security
['SELINUX'], security
['SELINUXTYPE'])
5484 def _fetch_apparmor():
5485 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
5487 for apparmor_path
in HostFacts
._apparmor
_path
_list
:
5488 if os
.path
.exists(apparmor_path
):
5489 security
['type'] = "AppArmor"
5490 security
['description'] = "AppArmor: Enabled"
5492 profiles
= read_file(['/sys/kernel/security/apparmor/profiles'])
5496 summary
= {} # type: Dict[str, int]
5497 for line
in profiles
.split('\n'):
5498 item
, mode
= line
.split(' ')
5499 mode
= mode
.strip('()')
5504 summary_str
= ",".join(["{} {}".format(v
, k
) for k
, v
in summary
.items()])
5505 security
= {**security
, **summary
} # type: ignore
5506 security
['description'] += "({})".format(summary_str
)
5510 if os
.path
.exists('/sys/kernel/security/lsm'):
5511 lsm
= read_file(['/sys/kernel/security/lsm']).strip()
5512 if 'selinux' in lsm
:
5513 return _fetch_selinux()
5514 elif 'apparmor' in lsm
:
5515 return _fetch_apparmor()
5519 "description": "Linux Security Module framework is active, but is not using SELinux or AppArmor"
5524 "description": "Linux Security Module framework is not available"
5528 def kernel_parameters(self
):
5529 # type: () -> Dict[str, str]
5530 """Get kernel parameters required/used in Ceph clusters"""
5533 out
, _
, _
= call_throws(['sysctl', '-a'], verbosity
=CallVerbosity
.SILENT
)
5535 param_list
= out
.split('\n')
5536 param_dict
= { param
.split(" = ")[0]:param
.split(" = ")[-1] for param
in param_list
}
5538 # return only desired parameters
5539 if 'net.ipv4.ip_nonlocal_bind' in param_dict
:
5540 k_param
['net.ipv4.ip_nonlocal_bind'] = param_dict
['net.ipv4.ip_nonlocal_bind']
5546 """Return the attributes of this HostFacts object as json"""
5547 data
= {k
: getattr(self
, k
) for k
in dir(self
)
5548 if not k
.startswith('_') and
5549 isinstance(getattr(self
, k
),
5550 (float, int, str, list, dict, tuple))
5552 return json
.dumps(data
, indent
=2, sort_keys
=True)
5554 ##################################
5556 def command_gather_facts():
5557 """gather_facts is intended to provide host releated metadata to the caller"""
5562 ##################################
5566 # type: () -> argparse.ArgumentParser
5567 parser
= argparse
.ArgumentParser(
5568 description
='Bootstrap Ceph daemons with systemd and containers.',
5569 formatter_class
=argparse
.ArgumentDefaultsHelpFormatter
)
5570 parser
.add_argument(
5572 help='container image. Can also be set via the "CEPHADM_IMAGE" '
5574 parser
.add_argument(
5576 action
='store_true',
5577 help='use docker instead of podman')
5578 parser
.add_argument(
5581 help='base directory for daemon data')
5582 parser
.add_argument(
5585 help='base directory for daemon logs')
5586 parser
.add_argument(
5588 default
=LOGROTATE_DIR
,
5589 help='location of logrotate configuration files')
5590 parser
.add_argument(
5593 help='base directory for systemd units')
5594 parser
.add_argument(
5596 action
='store_true',
5597 help='Show debug-level log messages')
5598 parser
.add_argument(
5601 default
=DEFAULT_TIMEOUT
,
5602 help='timeout in seconds')
5603 parser
.add_argument(
5606 default
=DEFAULT_RETRY
,
5607 help='max number of retries')
5608 parser
.add_argument(
5612 help='set environment variable')
5614 subparsers
= parser
.add_subparsers(help='sub-command')
5616 parser_version
= subparsers
.add_parser(
5617 'version', help='get ceph version from container')
5618 parser_version
.set_defaults(func
=command_version
)
5620 parser_pull
= subparsers
.add_parser(
5621 'pull', help='pull latest image version')
5622 parser_pull
.set_defaults(func
=command_pull
)
5624 parser_inspect_image
= subparsers
.add_parser(
5625 'inspect-image', help='inspect local container image')
5626 parser_inspect_image
.set_defaults(func
=command_inspect_image
)
5628 parser_ls
= subparsers
.add_parser(
5629 'ls', help='list daemon instances on this host')
5630 parser_ls
.set_defaults(func
=command_ls
)
5631 parser_ls
.add_argument(
5633 action
='store_true',
5634 help='Do not include daemon status')
5635 parser_ls
.add_argument(
5638 help='base directory for legacy daemon data')
5640 parser_list_networks
= subparsers
.add_parser(
5641 'list-networks', help='list IP networks')
5642 parser_list_networks
.set_defaults(func
=command_list_networks
)
5644 parser_adopt
= subparsers
.add_parser(
5645 'adopt', help='adopt daemon deployed with a different tool')
5646 parser_adopt
.set_defaults(func
=command_adopt
)
5647 parser_adopt
.add_argument(
5650 help='daemon name (type.id)')
5651 parser_adopt
.add_argument(
5654 help='deployment style (legacy, ...)')
5655 parser_adopt
.add_argument(
5658 help='cluster name')
5659 parser_adopt
.add_argument(
5662 help='base directory for legacy daemon data')
5663 parser_adopt
.add_argument(
5665 help='Additional configuration information in JSON format')
5666 parser_adopt
.add_argument(
5668 action
='store_true',
5669 help='Do not configure firewalld')
5670 parser_adopt
.add_argument(
5672 action
='store_true',
5673 help='do not pull the latest image before adopting')
5674 parser_adopt
.add_argument(
5676 action
='store_true',
5677 help="start newly adoped daemon, even if it wasn't running previously")
5678 parser_adopt
.add_argument(
5680 action
='store_true',
5681 help='Run podman/docker with `--init`')
5683 parser_rm_daemon
= subparsers
.add_parser(
5684 'rm-daemon', help='remove daemon instance')
5685 parser_rm_daemon
.set_defaults(func
=command_rm_daemon
)
5686 parser_rm_daemon
.add_argument(
5689 action
=CustomValidation
,
5690 help='daemon name (type.id)')
5691 parser_rm_daemon
.add_argument(
5694 help='cluster FSID')
5695 parser_rm_daemon
.add_argument(
5697 action
='store_true',
5698 help='proceed, even though this may destroy valuable data')
5699 parser_rm_daemon
.add_argument(
5700 '--force-delete-data',
5701 action
='store_true',
5702 help='delete valuable daemon data instead of making a backup')
5704 parser_rm_cluster
= subparsers
.add_parser(
5705 'rm-cluster', help='remove all daemons for a cluster')
5706 parser_rm_cluster
.set_defaults(func
=command_rm_cluster
)
5707 parser_rm_cluster
.add_argument(
5710 help='cluster FSID')
5711 parser_rm_cluster
.add_argument(
5713 action
='store_true',
5714 help='proceed, even though this may destroy valuable data')
5716 parser_run
= subparsers
.add_parser(
5717 'run', help='run a ceph daemon, in a container, in the foreground')
5718 parser_run
.set_defaults(func
=command_run
)
5719 parser_run
.add_argument(
5722 help='daemon name (type.id)')
5723 parser_run
.add_argument(
5726 help='cluster FSID')
5728 parser_shell
= subparsers
.add_parser(
5729 'shell', help='run an interactive shell inside a daemon container')
5730 parser_shell
.set_defaults(func
=command_shell
)
5731 parser_shell
.add_argument(
5733 help='cluster FSID')
5734 parser_shell
.add_argument(
5736 help='daemon name (type.id)')
5737 parser_shell
.add_argument(
5739 help='ceph.conf to pass through to the container')
5740 parser_shell
.add_argument(
5742 help='ceph.keyring to pass through to the container')
5743 parser_shell
.add_argument(
5745 help=("mount a file or directory in the container. "
5746 "Support multiple mounts. "
5747 "ie: `--mount /foo /bar:/bar`. "
5748 "When no destination is passed, default is /mnt"),
5750 parser_shell
.add_argument(
5754 help='set environment variable')
5755 parser_shell
.add_argument(
5756 'command', nargs
=argparse
.REMAINDER
,
5757 help='command (optional)')
5759 parser_enter
= subparsers
.add_parser(
5760 'enter', help='run an interactive shell inside a running daemon container')
5761 parser_enter
.set_defaults(func
=command_enter
)
5762 parser_enter
.add_argument(
5764 help='cluster FSID')
5765 parser_enter
.add_argument(
5768 help='daemon name (type.id)')
5769 parser_enter
.add_argument(
5770 'command', nargs
=argparse
.REMAINDER
,
5773 parser_ceph_volume
= subparsers
.add_parser(
5774 'ceph-volume', help='run ceph-volume inside a container')
5775 parser_ceph_volume
.set_defaults(func
=command_ceph_volume
)
5776 parser_ceph_volume
.add_argument(
5778 help='cluster FSID')
5779 parser_ceph_volume
.add_argument(
5781 help='JSON file with config and (client.bootrap-osd) key')
5782 parser_ceph_volume
.add_argument(
5784 help='ceph conf file')
5785 parser_ceph_volume
.add_argument(
5787 help='ceph.keyring to pass through to the container')
5788 parser_ceph_volume
.add_argument(
5789 'command', nargs
=argparse
.REMAINDER
,
5792 parser_unit
= subparsers
.add_parser(
5793 'unit', help='operate on the daemon\'s systemd unit')
5794 parser_unit
.set_defaults(func
=command_unit
)
5795 parser_unit
.add_argument(
5797 help='systemd command (start, stop, restart, enable, disable, ...)')
5798 parser_unit
.add_argument(
5800 help='cluster FSID')
5801 parser_unit
.add_argument(
5804 help='daemon name (type.id)')
5806 parser_logs
= subparsers
.add_parser(
5807 'logs', help='print journald logs for a daemon container')
5808 parser_logs
.set_defaults(func
=command_logs
)
5809 parser_logs
.add_argument(
5811 help='cluster FSID')
5812 parser_logs
.add_argument(
5815 help='daemon name (type.id)')
5816 parser_logs
.add_argument(
5817 'command', nargs
='*',
5818 help='additional journalctl args')
5820 parser_bootstrap
= subparsers
.add_parser(
5821 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
5822 parser_bootstrap
.set_defaults(func
=command_bootstrap
)
5823 parser_bootstrap
.add_argument(
5825 help='ceph conf file to incorporate')
5826 parser_bootstrap
.add_argument(
5829 help='mon id (default: local hostname)')
5830 parser_bootstrap
.add_argument(
5832 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
5833 parser_bootstrap
.add_argument(
5836 parser_bootstrap
.add_argument(
5839 help='mgr id (default: randomly generated)')
5840 parser_bootstrap
.add_argument(
5842 help='cluster FSID')
5843 parser_bootstrap
.add_argument(
5845 default
='/etc/ceph',
5846 help='directory to write config, keyring, and pub key files')
5847 parser_bootstrap
.add_argument(
5849 help='location to write keyring file with new cluster admin and mon keys')
5850 parser_bootstrap
.add_argument(
5852 help='location to write conf file to connect to new cluster')
5853 parser_bootstrap
.add_argument(
5854 '--output-pub-ssh-key',
5855 help='location to write the cluster\'s public SSH key')
5856 parser_bootstrap
.add_argument(
5858 action
='store_true',
5859 help='skip setup of ssh key on local host')
5860 parser_bootstrap
.add_argument(
5861 '--initial-dashboard-user',
5863 help='Initial user for the dashboard')
5864 parser_bootstrap
.add_argument(
5865 '--initial-dashboard-password',
5866 help='Initial password for the initial dashboard user')
5867 parser_bootstrap
.add_argument(
5868 '--ssl-dashboard-port',
5871 help='Port number used to connect with dashboard using SSL')
5872 parser_bootstrap
.add_argument(
5874 type=argparse
.FileType('r'),
5875 help='Dashboard key')
5876 parser_bootstrap
.add_argument(
5878 type=argparse
.FileType('r'),
5879 help='Dashboard certificate')
5881 parser_bootstrap
.add_argument(
5883 type=argparse
.FileType('r'),
5885 parser_bootstrap
.add_argument(
5886 '--ssh-private-key',
5887 type=argparse
.FileType('r'),
5888 help='SSH private key')
5889 parser_bootstrap
.add_argument(
5891 type=argparse
.FileType('r'),
5892 help='SSH public key')
5893 parser_bootstrap
.add_argument(
5896 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
5898 parser_bootstrap
.add_argument(
5899 '--skip-mon-network',
5900 action
='store_true',
5901 help='set mon public_network based on bootstrap mon ip')
5902 parser_bootstrap
.add_argument(
5904 action
='store_true',
5905 help='do not enable the Ceph Dashboard')
5906 parser_bootstrap
.add_argument(
5907 '--dashboard-password-noupdate',
5908 action
='store_true',
5909 help='stop forced dashboard password change')
5910 parser_bootstrap
.add_argument(
5911 '--no-minimize-config',
5912 action
='store_true',
5913 help='do not assimilate and minimize the config file')
5914 parser_bootstrap
.add_argument(
5915 '--skip-ping-check',
5916 action
='store_true',
5917 help='do not verify that mon IP is pingable')
5918 parser_bootstrap
.add_argument(
5920 action
='store_true',
5921 help='do not pull the latest image before bootstrapping')
5922 parser_bootstrap
.add_argument(
5924 action
='store_true',
5925 help='Do not configure firewalld')
5926 parser_bootstrap
.add_argument(
5927 '--allow-overwrite',
5928 action
='store_true',
5929 help='allow overwrite of existing --output-* config/keyring/ssh files')
5930 parser_bootstrap
.add_argument(
5931 '--allow-fqdn-hostname',
5932 action
='store_true',
5933 help='allow hostname that is fully-qualified (contains ".")')
5934 parser_bootstrap
.add_argument(
5935 '--skip-prepare-host',
5936 action
='store_true',
5937 help='Do not prepare host')
5938 parser_bootstrap
.add_argument(
5939 '--orphan-initial-daemons',
5940 action
='store_true',
5941 help='Do not create initial mon, mgr, and crash service specs')
5942 parser_bootstrap
.add_argument(
5943 '--skip-monitoring-stack',
5944 action
='store_true',
5945 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
5946 parser_bootstrap
.add_argument(
5948 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
5950 parser_bootstrap
.add_argument(
5951 '--shared_ceph_folder',
5952 metavar
='CEPH_SOURCE_FOLDER',
5953 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
5955 parser_bootstrap
.add_argument(
5957 help='url for custom registry')
5958 parser_bootstrap
.add_argument(
5959 '--registry-username',
5960 help='username for custom registry')
5961 parser_bootstrap
.add_argument(
5962 '--registry-password',
5963 help='password for custom registry')
5964 parser_bootstrap
.add_argument(
5966 help='json file with custom registry login info (URL, Username, Password)')
5967 parser_bootstrap
.add_argument(
5969 action
='store_true',
5970 help='Run podman/docker with `--init`')
5972 parser_deploy
= subparsers
.add_parser(
5973 'deploy', help='deploy a daemon')
5974 parser_deploy
.set_defaults(func
=command_deploy
)
5975 parser_deploy
.add_argument(
5978 action
=CustomValidation
,
5979 help='daemon name (type.id)')
5980 parser_deploy
.add_argument(
5983 help='cluster FSID')
5984 parser_deploy
.add_argument(
5986 help='config file for new daemon')
5987 parser_deploy
.add_argument(
5989 help='Additional configuration information in JSON format')
5990 parser_deploy
.add_argument(
5992 help='keyring for new daemon')
5993 parser_deploy
.add_argument(
5995 help='key for new daemon')
5996 parser_deploy
.add_argument(
5998 help='OSD uuid, if creating an OSD container')
5999 parser_deploy
.add_argument(
6001 action
='store_true',
6002 help='Do not configure firewalld')
6003 parser_deploy
.add_argument(
6005 help='List of tcp ports to open in the host firewall')
6006 parser_deploy
.add_argument(
6008 action
='store_true',
6009 help='Reconfigure a previously deployed daemon')
6010 parser_deploy
.add_argument(
6012 action
='store_true',
6013 help='Allow SYS_PTRACE on daemon container')
6014 parser_deploy
.add_argument(
6016 action
='store_true',
6017 help='Run podman/docker with `--init`')
6019 parser_check_host
= subparsers
.add_parser(
6020 'check-host', help='check host configuration')
6021 parser_check_host
.set_defaults(func
=command_check_host
)
6022 parser_check_host
.add_argument(
6023 '--expect-hostname',
6024 help='Check that hostname matches an expected value')
6026 parser_prepare_host
= subparsers
.add_parser(
6027 'prepare-host', help='prepare a host for cephadm use')
6028 parser_prepare_host
.set_defaults(func
=command_prepare_host
)
6029 parser_prepare_host
.add_argument(
6030 '--expect-hostname',
6031 help='Set hostname')
6033 parser_add_repo
= subparsers
.add_parser(
6034 'add-repo', help='configure package repository')
6035 parser_add_repo
.set_defaults(func
=command_add_repo
)
6036 parser_add_repo
.add_argument(
6038 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE
))
6039 parser_add_repo
.add_argument(
6041 help='use specific upstream version (x.y.z)')
6042 parser_add_repo
.add_argument(
6044 help='use specified bleeding edge build from git branch or tag')
6045 parser_add_repo
.add_argument(
6047 help='use specified bleeding edge build from git commit')
6048 parser_add_repo
.add_argument(
6050 help='specify alternative GPG key location')
6051 parser_add_repo
.add_argument(
6053 default
='https://download.ceph.com',
6054 help='specify alternative repo location')
6057 parser_rm_repo
= subparsers
.add_parser(
6058 'rm-repo', help='remove package repository configuration')
6059 parser_rm_repo
.set_defaults(func
=command_rm_repo
)
6061 parser_install
= subparsers
.add_parser(
6062 'install', help='install ceph package(s)')
6063 parser_install
.set_defaults(func
=command_install
)
6064 parser_install
.add_argument(
6065 'packages', nargs
='*',
6066 default
=['cephadm'],
6069 parser_registry_login
= subparsers
.add_parser(
6070 'registry-login', help='log host into authenticated registry')
6071 parser_registry_login
.set_defaults(func
=command_registry_login
)
6072 parser_registry_login
.add_argument(
6074 help='url for custom registry')
6075 parser_registry_login
.add_argument(
6076 '--registry-username',
6077 help='username for custom registry')
6078 parser_registry_login
.add_argument(
6079 '--registry-password',
6080 help='password for custom registry')
6081 parser_registry_login
.add_argument(
6083 help='json file with custom registry login info (URL, Username, Password)')
6084 parser_registry_login
.add_argument(
6086 help='cluster FSID')
6088 parser_gather_facts
= subparsers
.add_parser(
6089 'gather-facts', help='gather and return host related information (JSON format)')
6090 parser_gather_facts
.set_defaults(func
=command_gather_facts
)
6095 def _parse_args(av
):
6096 parser
= _get_parser()
6097 args
= parser
.parse_args(av
)
6098 if 'command' in args
and args
.command
and args
.command
[0] == "--":
6103 if __name__
== "__main__":
6106 if os
.geteuid() != 0:
6107 sys
.stderr
.write('ERROR: cephadm should be run as root\n')
6110 # Logger configuration
6111 if not os
.path
.exists(LOG_DIR
):
6112 os
.makedirs(LOG_DIR
)
6113 dictConfig(logging_config
)
6114 logger
= logging
.getLogger()
6116 # allow argv to be injected
6118 av
= injected_argv
# type: ignore
6121 logger
.debug("%s\ncephadm %s" % ("-" * 80, av
))
6122 args
= _parse_args(av
)
6124 # More verbose console output
6126 for handler
in logger
.handlers
:
6127 if handler
.name
== "console":
6128 handler
.setLevel(logging
.DEBUG
)
6130 if 'func' not in args
:
6131 sys
.stderr
.write('No command specified; pass -h or --help for usage\n')
6135 if args
.func
!= command_check_host
:
6137 container_path
= find_program('docker')
6139 for i
in CONTAINER_PREFERENCE
:
6141 container_path
= find_program(i
)
6143 except Exception as e
:
6144 logger
.debug('Could not locate %s: %s' % (i
, e
))
6145 if not container_path
and args
.func
!= command_prepare_host\
6146 and args
.func
!= command_add_repo
:
6147 sys
.stderr
.write('Unable to locate any of %s\n' % CONTAINER_PREFERENCE
)
6155 sys
.stderr
.write('ERROR: %s\n' % e
)