3 DEFAULT_IMAGE
='docker.io/ceph/ceph:v15'
4 DEFAULT_IMAGE_IS_MASTER
=False
5 LATEST_STABLE_RELEASE
='octopus'
6 DATA_DIR
='/var/lib/ceph'
7 LOG_DIR
='/var/log/ceph'
8 LOCK_DIR
='/run/cephadm'
9 LOGROTATE_DIR
='/etc/logrotate.d'
10 UNIT_DIR
='/etc/systemd/system'
13 CONTAINER_PREFERENCE
= ['podman', 'docker'] # prefer podman to docker
14 CUSTOM_PS1
=r
'[ceph: \u@\h \W]\$ '
15 DEFAULT_TIMEOUT
=None # in seconds
17 SHELL_DEFAULT_CONF
='/etc/ceph/ceph.conf'
18 SHELL_DEFAULT_KEYRING
='/etc/ceph/ceph.client.admin.keyring'
21 You can invoke cephadm in two ways:
23 1. The normal way, at the command line.
25 2. By piping the script to the python3 binary. In this latter case, you should
26 prepend one or more lines to the beginning of the script.
34 injected_argv = ['ls']
36 For reading stdin from the '--config-json -' argument,
38 injected_stdin = '...'
60 from typing
import Dict
, List
, Tuple
, Optional
, Union
, Any
, NoReturn
, Callable
65 from functools
import wraps
67 from threading
import Thread
69 if sys
.version_info
>= (3, 0):
70 from io
import StringIO
72 from StringIO
import StringIO
74 if sys
.version_info
>= (3, 2):
75 from configparser
import ConfigParser
77 from ConfigParser
import SafeConfigParser
79 if sys
.version_info
>= (3, 0):
80 from urllib
.request
import urlopen
81 from urllib
.error
import HTTPError
83 from urllib2
import urlopen
, HTTPError
88 DATEFMT
= '%Y-%m-%dT%H:%M:%S.%f'
96 class Error(Exception):
99 class TimeoutExpired(Error
):
102 ##################################
105 daemons
= ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
108 ##################################
110 class Monitoring(object):
111 """Define the configs for the monitoring containers"""
114 "prometheus": [9095], # Avoid default 9090, due to conflict with cockpit UI
115 "node-exporter": [9100],
117 "alertmanager": [9093, 9094],
122 "image": "prom/prometheus:latest",
126 "--config.file=/etc/prometheus/prometheus.yml",
127 "--storage.tsdb.path=/prometheus",
128 "--web.listen-address=:{}".format(port_map
['prometheus'][0]),
130 "config-json-files": [
135 "image": "prom/node-exporter",
139 "--no-collector.timex",
143 "image": "ceph/ceph-grafana:latest",
147 "config-json-files": [
149 "provisioning/datasources/ceph-dashboard.yml",
155 "image": "prom/alertmanager",
159 "config-json-files": [
162 "config-json-args": [
168 ##################################
170 class NFSGanesha(object):
171 """Defines a NFS-Ganesha container"""
174 entrypoint
= '/usr/bin/ganesha.nfsd'
175 daemon_args
= ['-F', '-L', 'STDERR']
177 required_files
= ['ganesha.conf']
187 image
=DEFAULT_IMAGE
):
188 # type: (str, Union[int, str], Dict, str) -> None
190 self
.daemon_id
= daemon_id
193 def json_get(key
, default
=None, require
=False):
194 if require
and not key
in config_json
.keys():
195 raise Error('{} missing from config-json'.format(key
))
196 return config_json
.get(key
, default
)
198 # config-json options
199 self
.pool
= json_get('pool', require
=True)
200 self
.namespace
= json_get('namespace')
201 self
.userid
= json_get('userid')
202 self
.extra_args
= json_get('extra_args', [])
203 self
.files
= json_get('files', {})
205 # validate the supplied args
209 def init(cls
, fsid
, daemon_id
):
210 # type: (str, Union[int, str]) -> NFSGanesha
211 return cls(fsid
, daemon_id
, get_parm(args
.config_json
), args
.image
)
216 for (srv
, port
) in NFSGanesha
.port_map
.items():
217 if port_in_use(port
):
218 msg
= 'TCP port {} required for {} is already in use'.format(port
, srv
)
222 def get_container_mounts(data_dir
):
223 # type: (str) -> Dict[str, str]
225 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
226 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
227 mounts
[os
.path
.join(data_dir
, 'etc/ganesha')] = '/etc/ganesha:z'
231 def get_container_envs():
232 # type: () -> List[str]
234 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
239 def get_version(container_id
):
240 # type: (str) -> Optional[str]
242 out
, err
, code
= call(
243 [container_path
, 'exec', container_id
,
244 NFSGanesha
.entrypoint
, '-v'])
246 match
= re
.search(r
'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out
)
248 version
= match
.group(1)
253 if not is_fsid(self
.fsid
):
254 raise Error('not an fsid: %s' % self
.fsid
)
255 if not self
.daemon_id
:
256 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
258 raise Error('invalid image: %s' % self
.image
)
260 # check for the required files
261 if self
.required_files
:
262 for fname
in self
.required_files
:
263 if fname
not in self
.files
:
264 raise Error('required file missing from config-json: %s' % fname
)
266 def get_daemon_name(self
):
268 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
270 def get_container_name(self
, desc
=None):
271 # type: (Optional[str]) -> str
272 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
274 cname
= '%s-%s' % (cname
, desc
)
277 def get_daemon_args(self
):
278 # type: () -> List[str]
279 return self
.daemon_args
+ self
.extra_args
281 def get_file_content(self
, fname
):
283 """Normalize the json file content into a string"""
284 content
= self
.files
.get(fname
)
285 if isinstance(content
, list):
286 content
= '\n'.join(content
)
289 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
290 # type: (str, int, int) -> None
291 """Create files under the container data dir"""
292 if not os
.path
.isdir(data_dir
):
293 raise OSError('data_dir is not a directory: %s' % (data_dir
))
295 logger
.info('Creating ganesha config...')
297 # create the ganesha conf dir
298 config_dir
= os
.path
.join(data_dir
, 'etc/ganesha')
299 makedirs(config_dir
, uid
, gid
, 0o755)
301 # populate files from the config-json
302 for fname
in self
.files
:
303 config_file
= os
.path
.join(config_dir
, fname
)
304 config_content
= self
.get_file_content(fname
)
305 logger
.info('Write file: %s' % (config_file
))
306 with
open(config_file
, 'w') as f
:
307 os
.fchown(f
.fileno(), uid
, gid
)
308 os
.fchmod(f
.fileno(), 0o600)
309 f
.write(config_content
)
311 def get_rados_grace_container(self
, action
):
312 # type: (str) -> CephContainer
313 """Container for a ganesha action on the grace db"""
314 entrypoint
= '/usr/bin/ganesha-rados-grace'
317 args
=['--pool', self
.pool
]
319 args
+= ['--ns', self
.namespace
]
321 args
+= ['--userid', self
.userid
]
322 args
+= [action
, self
.get_daemon_name()]
324 data_dir
= get_data_dir(self
.fsid
, self
.daemon_type
, self
.daemon_id
)
325 volume_mounts
= self
.get_container_mounts(data_dir
)
326 envs
= self
.get_container_envs()
328 logger
.info('Creating RADOS grace for action: %s' % (action
))
331 entrypoint
=entrypoint
,
333 volume_mounts
=volume_mounts
,
334 cname
=self
.get_container_name(desc
='grace-%s' % (action
)),
339 ##################################
341 class CephIscsi(object):
342 """Defines a Ceph-Iscsi container"""
344 daemon_type
= 'iscsi'
345 entrypoint
= '/usr/bin/rbd-target-api'
347 required_files
= ['iscsi-gateway.cfg']
353 image
=DEFAULT_IMAGE
):
354 # type: (str, Union[int, str], Dict, str) -> None
356 self
.daemon_id
= daemon_id
359 def json_get(key
, default
=None, require
=False):
360 if require
and not key
in config_json
.keys():
361 raise Error('{} missing from config-json'.format(key
))
362 return config_json
.get(key
, default
)
364 # config-json options
365 self
.files
= json_get('files', {})
367 # validate the supplied args
371 def init(cls
, fsid
, daemon_id
):
372 # type: (str, Union[int, str]) -> CephIscsi
373 return cls(fsid
, daemon_id
, get_parm(args
.config_json
), args
.image
)
376 def get_container_mounts(data_dir
, log_dir
):
377 # type: (str, str) -> Dict[str, str]
379 mounts
[os
.path
.join(data_dir
, 'config')] = '/etc/ceph/ceph.conf:z'
380 mounts
[os
.path
.join(data_dir
, 'keyring')] = '/etc/ceph/keyring:z'
381 mounts
[os
.path
.join(data_dir
, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
382 mounts
[os
.path
.join(data_dir
, 'configfs')] = '/sys/kernel/config:z'
383 mounts
[log_dir
] = '/var/log/rbd-target-api:z'
384 mounts
['/dev/log'] = '/dev/log:z'
388 def get_version(container_id
):
389 # type: (str) -> Optional[str]
391 out
, err
, code
= call(
392 [container_path
, 'exec', container_id
,
393 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"])
400 if not is_fsid(self
.fsid
):
401 raise Error('not an fsid: %s' % self
.fsid
)
402 if not self
.daemon_id
:
403 raise Error('invalid daemon_id: %s' % self
.daemon_id
)
405 raise Error('invalid image: %s' % self
.image
)
407 # check for the required files
408 if self
.required_files
:
409 for fname
in self
.required_files
:
410 if fname
not in self
.files
:
411 raise Error('required file missing from config-json: %s' % fname
)
413 def get_daemon_name(self
):
415 return '%s.%s' % (self
.daemon_type
, self
.daemon_id
)
417 def get_container_name(self
, desc
=None):
418 # type: (Optional[str]) -> str
419 cname
= 'ceph-%s-%s' % (self
.fsid
, self
.get_daemon_name())
421 cname
= '%s-%s' % (cname
, desc
)
424 def get_file_content(self
, fname
):
426 """Normalize the json file content into a string"""
427 content
= self
.files
.get(fname
)
428 if isinstance(content
, list):
429 content
= '\n'.join(content
)
432 def create_daemon_dirs(self
, data_dir
, uid
, gid
):
433 # type: (str, int, int) -> None
434 """Create files under the container data dir"""
435 if not os
.path
.isdir(data_dir
):
436 raise OSError('data_dir is not a directory: %s' % (data_dir
))
438 logger
.info('Creating ceph-iscsi config...')
439 configfs_dir
= os
.path
.join(data_dir
, 'configfs')
440 makedirs(configfs_dir
, uid
, gid
, 0o755)
442 # populate files from the config-json
443 for fname
in self
.files
:
444 config_file
= os
.path
.join(data_dir
, fname
)
445 config_content
= self
.get_file_content(fname
)
446 logger
.info('Write file: %s' % (config_file
))
447 with
open(config_file
, 'w') as f
:
448 os
.fchown(f
.fileno(), uid
, gid
)
449 os
.fchmod(f
.fileno(), 0o600)
450 f
.write(config_content
)
453 def configfs_mount_umount(data_dir
, mount
=True):
454 # type: (str, bool) -> List[str]
455 mount_path
= os
.path
.join(data_dir
, 'configfs')
457 cmd
= "if ! grep -qs {0} /proc/mounts; then " \
458 "mount -t configfs none {0}; fi".format(mount_path
)
460 cmd
= "if grep -qs {0} /proc/mounts; then " \
461 "umount {0}; fi".format(mount_path
)
464 ##################################
466 def get_supported_daemons():
467 # type: () -> List[str]
468 supported_daemons
= list(Ceph
.daemons
)
469 supported_daemons
.extend(Monitoring
.components
)
470 supported_daemons
.append(NFSGanesha
.daemon_type
)
471 supported_daemons
.append(CephIscsi
.daemon_type
)
472 assert len(supported_daemons
) == len(set(supported_daemons
))
473 return supported_daemons
475 ##################################
477 def attempt_bind(s
, address
, port
):
478 # type: (socket.socket, str, int) -> None
480 s
.setsockopt(socket
.SOL_SOCKET
, socket
.SO_REUSEADDR
, 1)
481 s
.bind((address
, port
))
482 except (socket
.error
, OSError) as e
: # py2 and py3
483 msg
= 'Cannot bind to IP %s port %d: %s' % (address
, port
, e
)
485 if e
.errno
== errno
.EADDRINUSE
:
487 elif e
.errno
== errno
.EADDRNOTAVAIL
:
492 def port_in_use(port_num
):
493 # type: (int) -> bool
494 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
495 logger
.info('Verifying port %d ...' % port_num
)
497 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
498 attempt_bind(s
, '0.0.0.0', port_num
)
500 s
= socket
.socket(socket
.AF_INET6
, socket
.SOCK_STREAM
)
501 attempt_bind(s
, '::', port_num
)
507 def check_ip_port(ip
, port
):
508 # type: (str, int) -> None
509 if not args
.skip_ping_check
:
510 logger
.info('Verifying IP %s port %d ...' % (ip
, port
))
511 if ip
.startswith('[') or '::' in ip
:
512 s
= socket
.socket(socket
.AF_INET6
, socket
.SOCK_STREAM
)
513 if ip
.startswith('[') and ip
.endswith(']'):
516 s
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
518 attempt_bind(s
, ip
, port
)
522 ##################################
524 # this is an abbreviated version of
525 # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
526 # that drops all of the compatibility (this is Unix/Linux only).
531 TimeoutError
= OSError
533 class Timeout(TimeoutError
):
535 Raised when the lock could not be acquired in *timeout*
539 def __init__(self
, lock_file
):
542 #: The path of the file lock.
543 self
.lock_file
= lock_file
547 temp
= "The file lock '{}' could not be acquired."\
548 .format(self
.lock_file
)
552 class _Acquire_ReturnProxy(object):
553 def __init__(self
, lock
):
560 def __exit__(self
, exc_type
, exc_value
, traceback
):
565 class FileLock(object):
566 def __init__(self
, name
, timeout
= -1):
567 if not os
.path
.exists(LOCK_DIR
):
568 os
.mkdir(LOCK_DIR
, 0o700)
569 self
._lock
_file
= os
.path
.join(LOCK_DIR
, name
+ '.lock')
571 # The file descriptor for the *_lock_file* as it is returned by the
572 # os.open() function.
573 # This file lock is only NOT None, if the object currently holds the
575 self
._lock
_file
_fd
= None
576 self
.timeout
= timeout
577 # The lock counter is used for implementing the nested locking
578 # mechanism. Whenever the lock is acquired, the counter is increased and
579 # the lock is only released, when this value is 0 again.
580 self
._lock
_counter
= 0
585 return self
._lock
_file
_fd
is not None
587 def acquire(self
, timeout
=None, poll_intervall
=0.05):
589 Acquires the file lock or fails with a :exc:`Timeout` error.
590 .. code-block:: python
591 # You can use this method in the context manager (recommended)
594 # Or use an equivalent try-finally construct:
601 The maximum time waited for the file lock.
602 If ``timeout < 0``, there is no timeout and this method will
603 block until the lock could be acquired.
604 If ``timeout`` is None, the default :attr:`~timeout` is used.
605 :arg float poll_intervall:
606 We check once in *poll_intervall* seconds if we can acquire the
609 if the lock could not be acquired in *timeout* seconds.
610 .. versionchanged:: 2.0.0
611 This method returns now a *proxy* object instead of *self*,
612 so that it can be used in a with statement without side effects.
614 # Use the default timeout, if no timeout is provided.
616 timeout
= self
.timeout
618 # Increment the number right at the beginning.
619 # We can still undo it, if something fails.
620 self
._lock
_counter
+= 1
623 lock_filename
= self
._lock
_file
624 start_time
= time
.time()
627 if not self
.is_locked
:
628 logger
.debug('Acquiring lock %s on %s', lock_id
,
633 logger
.debug('Lock %s acquired on %s', lock_id
,
636 elif timeout
>= 0 and time
.time() - start_time
> timeout
:
637 logger
.warning('Timeout acquiring lock %s on %s', lock_id
,
639 raise Timeout(self
._lock
_file
)
642 'Lock %s not acquired on %s, waiting %s seconds ...',
643 lock_id
, lock_filename
, poll_intervall
645 time
.sleep(poll_intervall
)
647 # Something did go wrong, so decrement the counter.
648 self
._lock
_counter
= max(0, self
._lock
_counter
- 1)
651 return _Acquire_ReturnProxy(lock
= self
)
653 def release(self
, force
= False):
655 Releases the file lock.
656 Please note, that the lock is only completly released, if the lock
658 Also note, that the lock file itself is not automatically deleted.
660 If true, the lock counter is ignored and the lock is released in
664 self
._lock
_counter
-= 1
666 if self
._lock
_counter
== 0 or force
:
668 lock_filename
= self
._lock
_file
670 logger
.debug('Releasing lock %s on %s', lock_id
, lock_filename
)
672 self
._lock
_counter
= 0
673 logger
.debug('Lock %s released on %s', lock_id
, lock_filename
)
681 def __exit__(self
, exc_type
, exc_value
, traceback
):
686 self
.release(force
= True)
691 open_mode
= os
.O_RDWR | os
.O_CREAT | os
.O_TRUNC
692 fd
= os
.open(self
._lock
_file
, open_mode
)
695 fcntl
.flock(fd
, fcntl
.LOCK_EX | fcntl
.LOCK_NB
)
696 except (IOError, OSError):
699 self
._lock
_file
_fd
= fd
703 # Do not remove the lockfile:
705 # https://github.com/benediktschmitt/py-filelock/issues/31
706 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
707 fd
= self
._lock
_file
_fd
708 self
._lock
_file
_fd
= None
709 fcntl
.flock(fd
, fcntl
.LOCK_UN
)
714 ##################################
715 # Popen wrappers, lifted from ceph-volume
717 def call(command
, # type: List[str]
718 desc
=None, # type: Optional[str]
719 verbose
=False, # type: bool
720 verbose_on_failure
=True, # type: bool
721 timeout
=DEFAULT_TIMEOUT
, # type: Optional[int]
724 Wrap subprocess.Popen to
726 - log stdout/stderr to a logger,
728 - cleanly return out, err, returncode
730 If verbose=True, log at info (instead of debug) level.
732 :param verbose_on_failure: On a non-zero exit status, it will forcefully set
733 logging ON for the terminal
734 :param timeout: timeout in seconds
738 timeout
= timeout
or args
.timeout
740 logger
.debug("Running command: %s" % ' '.join(command
))
741 process
= subprocess
.Popen(
743 stdout
=subprocess
.PIPE
,
744 stderr
=subprocess
.PIPE
,
748 # get current p.stdout flags, add O_NONBLOCK
749 assert process
.stdout
is not None
750 assert process
.stderr
is not None
751 stdout_flags
= fcntl
.fcntl(process
.stdout
, fcntl
.F_GETFL
)
752 stderr_flags
= fcntl
.fcntl(process
.stderr
, fcntl
.F_GETFL
)
753 fcntl
.fcntl(process
.stdout
, fcntl
.F_SETFL
, stdout_flags | os
.O_NONBLOCK
)
754 fcntl
.fcntl(process
.stderr
, fcntl
.F_SETFL
, stderr_flags | os
.O_NONBLOCK
)
760 out_buffer
= '' # partial line (no newline yet)
761 err_buffer
= '' # partial line (no newline yet)
762 start_time
= time
.time()
765 end_time
= start_time
+ timeout
767 if end_time
and (time
.time() >= end_time
):
768 logger
.info(desc
+ ':timeout after %s seconds' % timeout
)
771 if reads
and process
.poll() is not None:
772 # we want to stop, but first read off anything remaining
776 reads
, _
, _
= select
.select(
777 [process
.stdout
.fileno(), process
.stderr
.fileno()],
782 message_b
= os
.read(fd
, 1024)
783 if isinstance(message_b
, bytes
):
784 message
= message_b
.decode('utf-8')
785 if isinstance(message_b
, str):
788 # process has terminated, but have more to read still, so not stopping yet
789 # (os.read returns '' when it encounters EOF)
793 if fd
== process
.stdout
.fileno():
795 message
= out_buffer
+ message
796 lines
= message
.split('\n')
797 out_buffer
= lines
.pop()
800 logger
.info(desc
+ ':stdout ' + line
)
802 logger
.debug(desc
+ ':stdout ' + line
)
803 elif fd
== process
.stderr
.fileno():
805 message
= err_buffer
+ message
806 lines
= message
.split('\n')
807 err_buffer
= lines
.pop()
810 logger
.info(desc
+ ':stderr ' + line
)
812 logger
.debug(desc
+ ':stderr ' + line
)
815 except (IOError, OSError):
818 returncode
= process
.wait()
822 logger
.info(desc
+ ':stdout ' + out_buffer
)
824 logger
.debug(desc
+ ':stdout ' + out_buffer
)
827 logger
.info(desc
+ ':stderr ' + err_buffer
)
829 logger
.debug(desc
+ ':stderr ' + err_buffer
)
831 if returncode
!= 0 and verbose_on_failure
and not verbose
:
832 # dump stdout + stderr
833 logger
.info('Non-zero exit code %d from %s' % (returncode
, ' '.join(command
)))
834 for line
in out
.splitlines():
835 logger
.info(desc
+ ':stdout ' + line
)
836 for line
in err
.splitlines():
837 logger
.info(desc
+ ':stderr ' + line
)
839 return out
, err
, returncode
842 def call_throws(command
, **kwargs
):
843 # type: (List[str], Any) -> Tuple[str, str, int]
844 out
, err
, ret
= call(command
, **kwargs
)
846 raise RuntimeError('Failed command: %s' % ' '.join(command
))
850 def call_timeout(command
, timeout
):
851 # type: (List[str], int) -> int
853 logger
.debug('Running command (timeout=%s): %s'
854 % (timeout
, ' '.join(command
)))
856 def raise_timeout(command
, timeout
):
857 # type: (List[str], int) -> NoReturn
858 msg
= 'Command \'%s\' timed out after %s seconds' % (command
, timeout
)
860 raise TimeoutExpired(msg
)
862 def call_timeout_py2(command
, timeout
):
863 # type: (List[str], int) -> int
864 proc
= subprocess
.Popen(command
)
865 thread
= Thread(target
=proc
.wait
)
868 if thread
.is_alive():
871 raise_timeout(command
, timeout
)
872 return proc
.returncode
874 def call_timeout_py3(command
, timeout
):
875 # type: (List[str], int) -> int
877 return subprocess
.call(command
, timeout
=timeout
)
878 except subprocess
.TimeoutExpired
as e
:
879 raise_timeout(command
, timeout
)
882 if sys
.version_info
>= (3, 3):
883 ret
= call_timeout_py3(command
, timeout
)
885 # py2 subprocess has no timeout arg
886 ret
= call_timeout_py2(command
, timeout
)
889 ##################################
891 def is_available(what
, func
):
892 # type: (str, Callable[[], bool]) -> None
894 Wait for a service to become available
896 :param what: the name of the service
897 :param func: the callable object that determines availability
900 logger
.info('Waiting for %s...' % (what
))
904 logger
.info('%s is available'
908 raise Error('%s not available after %s tries'
911 logger
.info('%s not available, waiting (%s/%s)...'
912 % (what
, num
, retry
))
919 # type: (Optional[str]) -> ConfigParser
920 # bend over backwards here because py2's ConfigParser doesn't like
921 # whitespace before config option names (e.g., '\n foo = bar\n').
923 if sys
.version_info
>= (3, 2):
926 cp
= SafeConfigParser()
929 with
open(fn
, 'r') as f
:
931 nice_conf
= re
.sub(r
'\n(\s)+', r
'\n', raw_conf
)
932 s_io
= StringIO(nice_conf
)
933 if sys
.version_info
>= (3, 2):
942 p
= os
.path
.expanduser(p
)
943 return os
.path
.abspath(p
)
945 def get_file_timestamp(fn
):
946 # type: (str) -> Optional[str]
948 mt
= os
.path
.getmtime(fn
)
949 return datetime
.datetime
.fromtimestamp(
950 mt
, tz
=datetime
.timezone
.utc
952 except Exception as e
:
955 def try_convert_datetime(s
):
956 # type: (str) -> Optional[str]
957 # This is super irritating because
958 # 1) podman and docker use different formats
959 # 2) python's strptime can't parse either one
962 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
963 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
964 # 2020-03-03 15:52:30.136257504 -0600 CST
965 # (In the podman case, there is a different string format for
966 # 'inspect' and 'inspect --format {{.Created}}'!!)
968 # In *all* cases, the 9 digit second precision is too much for
969 # python's strptime. Shorten it to 6 digits.
970 p
= re
.compile(r
'(\.[\d]{6})[\d]*')
973 # replace trailling Z with -0000, since (on python 3.6.8) it won't parse
974 if s
and s
[-1] == 'Z':
977 # cut off the redundnat 'CST' part that strptime can't parse, if
982 # try parsing with several format strings
984 '%Y-%m-%dT%H:%M:%S.%f%z',
985 '%Y-%m-%d %H:%M:%S.%f %z',
989 # return timestamp normalized to UTC, rendered as DATEFMT.
990 return datetime
.datetime
.strptime(s
, f
).astimezone(tz
=datetime
.timezone
.utc
).strftime(DATEFMT
)
995 def get_podman_version():
996 # type: () -> Tuple[int, ...]
997 if 'podman' not in container_path
:
998 raise ValueError('not using podman')
999 out
, _
, _
= call_throws([container_path
, '--version'])
1000 return _parse_podman_version(out
)
1002 def _parse_podman_version(out
):
1003 # type: (str) -> Tuple[int, ...]
1004 _
, _
, version_str
= out
.strip().split()
1006 def to_int(val
, org_e
=None):
1007 if not val
and org_e
:
1011 except ValueError as e
:
1012 return to_int(val
[0:-1], org_e
or e
)
1014 return tuple(map(to_int
, version_str
.split('.')))
1019 return socket
.gethostname()
1023 return socket
.getfqdn() or socket
.gethostname()
1027 return platform
.uname().machine
1029 def generate_service_id():
1031 return get_hostname() + '.' + ''.join(random
.choice(string
.ascii_lowercase
)
1034 def generate_password():
1036 return ''.join(random
.choice(string
.ascii_lowercase
+ string
.digits
)
1039 def normalize_container_id(i
):
1040 # type: (str) -> str
1041 # docker adds the sha256: prefix, but AFAICS both
1042 # docker (18.09.7 in bionic at least) and podman
1043 # both always use sha256, so leave off the prefix
1046 if i
.startswith(prefix
):
1052 return str(uuid
.uuid1())
1055 # type: (str) -> bool
1062 def infer_fsid(func
):
1064 If we only find a single fsid in /var/lib/ceph/*, use that
1069 logger
.debug('Using specified fsid: %s' % args
.fsid
)
1073 daemon_list
= list_daemons(detail
=False)
1074 for daemon
in daemon_list
:
1075 if 'name' not in args
or not args
.name
:
1076 fsids
.add(daemon
['fsid'])
1077 elif daemon
['name'] == args
.name
:
1078 fsids
.add(daemon
['fsid'])
1082 # some commands do not always require an fsid
1084 elif len(fsids
) == 1:
1085 logger
.info('Inferring fsid %s' % fsids
[0])
1086 args
.fsid
= fsids
[0]
1088 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids
)
1093 def infer_config(func
):
1095 If we find a MON daemon, use the config from that container
1098 def _infer_config():
1100 logger
.debug('Using specified config: %s' % args
.config
)
1106 daemon_list
= list_daemons(detail
=False)
1107 for daemon
in daemon_list
:
1108 if daemon
['name'].startswith('mon.'):
1109 name
= daemon
['name']
1112 config
= '/var/lib/ceph/{}/{}/config'.format(args
.fsid
, name
)
1114 logger
.info('Inferring config %s' % config
)
1115 args
.config
= config
1116 elif os
.path
.exists(SHELL_DEFAULT_CONF
):
1117 logger
.debug('Using default config: %s' % SHELL_DEFAULT_CONF
)
1118 args
.config
= SHELL_DEFAULT_CONF
1121 return _infer_config
1123 def _get_default_image():
1124 if DEFAULT_IMAGE_IS_MASTER
:
1125 warn
= '''This is a development version of cephadm.
1126 For information regarding the latest stable release:
1127 https://docs.ceph.com/docs/{}/cephadm/install
1128 '''.format(LATEST_STABLE_RELEASE
)
1129 for line
in warn
.splitlines():
1130 logger
.warning('{}{}{}'.format(termcolor
.yellow
, line
, termcolor
.end
))
1131 return DEFAULT_IMAGE
1133 def infer_image(func
):
1135 Use the most recent ceph image
1140 args
.image
= os
.environ
.get('CEPHADM_IMAGE')
1142 args
.image
= get_last_local_ceph_image()
1144 args
.image
= _get_default_image()
1149 def default_image(func
):
1151 def _default_image():
1153 if 'name' in args
and args
.name
:
1154 type_
= args
.name
.split('.', 1)[0]
1155 if type_
in Monitoring
.components
:
1156 args
.image
= Monitoring
.components
[type_
]['image']
1158 args
.image
= os
.environ
.get('CEPHADM_IMAGE')
1160 args
.image
= _get_default_image()
1164 return _default_image
1166 def get_last_local_ceph_image():
1168 :return: The most recent local ceph image (already pulled)
1170 out
, _
, _
= call_throws(
1171 [container_path
, 'images',
1172 '--filter', 'label=ceph=True',
1173 '--format', '{{.Repository}} {{.Tag}}'])
1174 out_lines
= out
.splitlines()
1175 if len(out_lines
) > 0:
1176 repository
, tag
= out_lines
[0].split()
1177 r
= '{}:{}'.format(repository
, tag
)
1178 logger
.info('Using recent ceph image %s' % r
)
1182 def write_tmp(s
, uid
, gid
):
1183 # type: (str, int, int) -> Any
1184 tmp_f
= tempfile
.NamedTemporaryFile(mode
='w',
1186 os
.fchown(tmp_f
.fileno(), uid
, gid
)
1192 def makedirs(dir, uid
, gid
, mode
):
1193 # type: (str, int, int, int) -> None
1194 if not os
.path
.exists(dir):
1195 os
.makedirs(dir, mode
=mode
)
1198 os
.chown(dir, uid
, gid
)
1199 os
.chmod(dir, mode
) # the above is masked by umask...
1201 def get_data_dir(fsid
, t
, n
):
1202 # type: (str, str, Union[int, str]) -> str
1203 return os
.path
.join(args
.data_dir
, fsid
, '%s.%s' % (t
, n
))
1205 def get_log_dir(fsid
):
1206 # type: (str) -> str
1207 return os
.path
.join(args
.log_dir
, fsid
)
1209 def make_data_dir_base(fsid
, uid
, gid
):
1210 # type: (str, int, int) -> str
1211 data_dir_base
= os
.path
.join(args
.data_dir
, fsid
)
1212 makedirs(data_dir_base
, uid
, gid
, DATA_DIR_MODE
)
1213 makedirs(os
.path
.join(data_dir_base
, 'crash'), uid
, gid
, DATA_DIR_MODE
)
1214 makedirs(os
.path
.join(data_dir_base
, 'crash', 'posted'), uid
, gid
,
1216 return data_dir_base
1218 def make_data_dir(fsid
, daemon_type
, daemon_id
, uid
=None, gid
=None):
1219 # type: (str, str, Union[int, str], int, int) -> str
1220 if not uid
or not gid
:
1221 (uid
, gid
) = extract_uid_gid()
1222 make_data_dir_base(fsid
, uid
, gid
)
1223 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1224 makedirs(data_dir
, uid
, gid
, DATA_DIR_MODE
)
1227 def make_log_dir(fsid
, uid
=None, gid
=None):
1228 # type: (str, int, int) -> str
1229 if not uid
or not gid
:
1230 (uid
, gid
) = extract_uid_gid()
1231 log_dir
= get_log_dir(fsid
)
1232 makedirs(log_dir
, uid
, gid
, LOG_DIR_MODE
)
1235 def make_var_run(fsid
, uid
, gid
):
1236 # type: (str, int, int) -> None
1237 call_throws(['install', '-d', '-m0770', '-o', str(uid
), '-g', str(gid
),
1238 '/var/run/ceph/%s' % fsid
])
1240 def copy_tree(src
, dst
, uid
=None, gid
=None):
1241 # type: (List[str], str, int, int) -> None
1243 Copy a directory tree from src to dst
1245 if not uid
or not gid
:
1246 (uid
, gid
) = extract_uid_gid()
1250 if os
.path
.isdir(dst
):
1251 dst_dir
= os
.path
.join(dst
, os
.path
.basename(src_dir
))
1253 logger
.debug('copy directory \'%s\' -> \'%s\'' % (src_dir
, dst_dir
))
1254 shutil
.rmtree(dst_dir
, ignore_errors
=True)
1255 shutil
.copytree(src_dir
, dst_dir
) # dirs_exist_ok needs python 3.8
1257 for dirpath
, dirnames
, filenames
in os
.walk(dst_dir
):
1258 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, dirpath
))
1259 os
.chown(dirpath
, uid
, gid
)
1260 for filename
in filenames
:
1261 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, filename
))
1262 os
.chown(os
.path
.join(dirpath
, filename
), uid
, gid
)
1265 def copy_files(src
, dst
, uid
=None, gid
=None):
1266 # type: (List[str], str, int, int) -> None
1268 Copy a files from src to dst
1270 if not uid
or not gid
:
1271 (uid
, gid
) = extract_uid_gid()
1273 for src_file
in src
:
1275 if os
.path
.isdir(dst
):
1276 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
1278 logger
.debug('copy file \'%s\' -> \'%s\'' % (src_file
, dst_file
))
1279 shutil
.copyfile(src_file
, dst_file
)
1281 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, dst_file
))
1282 os
.chown(dst_file
, uid
, gid
)
1284 def move_files(src
, dst
, uid
=None, gid
=None):
1285 # type: (List[str], str, int, int) -> None
1287 Move files from src to dst
1289 if not uid
or not gid
:
1290 (uid
, gid
) = extract_uid_gid()
1292 for src_file
in src
:
1294 if os
.path
.isdir(dst
):
1295 dst_file
= os
.path
.join(dst
, os
.path
.basename(src_file
))
1297 if os
.path
.islink(src_file
):
1298 # shutil.move() in py2 does not handle symlinks correctly
1299 src_rl
= os
.readlink(src_file
)
1300 logger
.debug("symlink '%s' -> '%s'" % (dst_file
, src_rl
))
1301 os
.symlink(src_rl
, dst_file
)
1304 logger
.debug("move file '%s' -> '%s'" % (src_file
, dst_file
))
1305 shutil
.move(src_file
, dst_file
)
1306 logger
.debug('chown %s:%s \'%s\'' % (uid
, gid
, dst_file
))
1307 os
.chown(dst_file
, uid
, gid
)
1309 ## copied from distutils ##
1310 def find_executable(executable
, path
=None):
1311 """Tries to find 'executable' in the directories listed in 'path'.
1312 A string listing directories separated by 'os.pathsep'; defaults to
1313 os.environ['PATH']. Returns the complete filename or None if not found.
1315 _
, ext
= os
.path
.splitext(executable
)
1316 if (sys
.platform
== 'win32') and (ext
!= '.exe'):
1317 executable
= executable
+ '.exe'
1319 if os
.path
.isfile(executable
):
1323 path
= os
.environ
.get('PATH', None)
1326 path
= os
.confstr("CS_PATH")
1327 except (AttributeError, ValueError):
1328 # os.confstr() or CS_PATH is not available
1330 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1331 # set to an empty string
1333 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1337 paths
= path
.split(os
.pathsep
)
1339 f
= os
.path
.join(p
, executable
)
1340 if os
.path
.isfile(f
):
1341 # the file exists, we have a shot at spawn working
1345 def find_program(filename
):
1346 # type: (str) -> str
1347 name
= find_executable(filename
)
1349 raise ValueError('%s not found' % filename
)
1352 def get_unit_name(fsid
, daemon_type
, daemon_id
=None):
1353 # type: (str, str, Optional[Union[int, str]]) -> str
1354 # accept either name or type + id
1355 if daemon_id
is not None:
1356 return 'ceph-%s@%s.%s' % (fsid
, daemon_type
, daemon_id
)
1358 return 'ceph-%s@%s' % (fsid
, daemon_type
)
1360 def get_unit_name_by_daemon_name(fsid
, name
):
1361 daemon
= get_daemon_description(fsid
, name
)
1363 return daemon
['systemd_unit']
1365 raise Error('Failed to get unit name for {}'.format(daemon
))
1367 def check_unit(unit_name
):
1368 # type: (str) -> Tuple[bool, str, bool]
1369 # NOTE: we ignore the exit code here because systemctl outputs
1370 # various exit codes based on the state of the service, but the
1371 # string result is more explicit (and sufficient).
1375 out
, err
, code
= call(['systemctl', 'is-enabled', unit_name
],
1376 verbose_on_failure
=False)
1380 elif "disabled" in out
:
1382 except Exception as e
:
1383 logger
.warning('unable to run systemctl: %s' % e
)
1389 out
, err
, code
= call(['systemctl', 'is-active', unit_name
],
1390 verbose_on_failure
=False)
1392 if out
in ['active']:
1394 elif out
in ['inactive']:
1396 elif out
in ['failed', 'auto-restart']:
1400 except Exception as e
:
1401 logger
.warning('unable to run systemctl: %s' % e
)
1403 return (enabled
, state
, installed
)
1405 def check_units(units
, enabler
=None):
1406 # type: (List[str], Optional[Packager]) -> bool
1408 (enabled
, state
, installed
) = check_unit(u
)
1409 if enabled
and state
== 'running':
1410 logger
.info('Unit %s is enabled and running' % u
)
1412 if enabler
is not None:
1414 logger
.info('Enabling unit %s' % u
)
1415 enabler
.enable_service(u
)
1418 def get_legacy_config_fsid(cluster
, legacy_dir
=None):
1419 # type: (str, str) -> Optional[str]
1420 config_file
= '/etc/ceph/%s.conf' % cluster
1421 if legacy_dir
is not None:
1422 config_file
= os
.path
.abspath(legacy_dir
+ config_file
)
1424 if os
.path
.exists(config_file
):
1425 config
= read_config(config_file
)
1426 if config
.has_section('global') and config
.has_option('global', 'fsid'):
1427 return config
.get('global', 'fsid')
1430 def get_legacy_daemon_fsid(cluster
, daemon_type
, daemon_id
, legacy_dir
=None):
1431 # type: (str, str, Union[int, str], str) -> Optional[str]
1433 if daemon_type
== 'osd':
1435 fsid_file
= os
.path
.join(args
.data_dir
,
1437 'ceph-%s' % daemon_id
,
1439 if legacy_dir
is not None:
1440 fsid_file
= os
.path
.abspath(legacy_dir
+ fsid_file
)
1441 with
open(fsid_file
, 'r') as f
:
1442 fsid
= f
.read().strip()
1446 fsid
= get_legacy_config_fsid(cluster
, legacy_dir
=legacy_dir
)
1449 def get_daemon_args(fsid
, daemon_type
, daemon_id
):
1450 # type: (str, str, Union[int, str]) -> List[str]
1451 r
= list() # type: List[str]
1453 if daemon_type
in Ceph
.daemons
and daemon_type
!= 'crash':
1455 '--setuser', 'ceph',
1456 '--setgroup', 'ceph',
1457 '--default-log-to-file=false',
1458 '--default-log-to-stderr=true',
1459 '--default-log-stderr-prefix="debug "',
1461 if daemon_type
== 'mon':
1463 '--default-mon-cluster-log-to-file=false',
1464 '--default-mon-cluster-log-to-stderr=true',
1466 elif daemon_type
in Monitoring
.components
:
1467 metadata
= Monitoring
.components
[daemon_type
]
1468 r
+= metadata
.get('args', list())
1469 if daemon_type
== 'alertmanager':
1470 config
= get_parm(args
.config_json
)
1471 peers
= config
.get('peers', list()) # type: ignore
1473 r
+= ["--cluster.peer={}".format(peer
)]
1474 elif daemon_type
== NFSGanesha
.daemon_type
:
1475 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
1476 r
+= nfs_ganesha
.get_daemon_args()
1480 def create_daemon_dirs(fsid
, daemon_type
, daemon_id
, uid
, gid
,
1481 config
=None, keyring
=None):
1482 # type: (str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
1483 data_dir
= make_data_dir(fsid
, daemon_type
, daemon_id
, uid
=uid
, gid
=gid
)
1484 make_log_dir(fsid
, uid
=uid
, gid
=gid
)
1487 config_path
= os
.path
.join(data_dir
, 'config')
1488 with
open(config_path
, 'w') as f
:
1489 os
.fchown(f
.fileno(), uid
, gid
)
1490 os
.fchmod(f
.fileno(), 0o600)
1493 keyring_path
= os
.path
.join(data_dir
, 'keyring')
1494 with
open(keyring_path
, 'w') as f
:
1495 os
.fchmod(f
.fileno(), 0o600)
1496 os
.fchown(f
.fileno(), uid
, gid
)
1499 if daemon_type
in Monitoring
.components
.keys():
1500 config
= get_parm(args
.config_json
) # type: ignore
1501 required_files
= Monitoring
.components
[daemon_type
].get('config-json-files', list())
1503 # Set up directories specific to the monitoring component
1505 if daemon_type
== 'prometheus':
1506 data_dir_root
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1507 config_dir
= 'etc/prometheus'
1508 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
1509 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'alerting'), uid
, gid
, 0o755)
1510 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
1511 elif daemon_type
== 'grafana':
1512 data_dir_root
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1513 config_dir
= 'etc/grafana'
1514 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
1515 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'certs'), uid
, gid
, 0o755)
1516 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'provisioning/datasources'), uid
, gid
, 0o755)
1517 makedirs(os
.path
.join(data_dir_root
, 'data'), uid
, gid
, 0o755)
1518 elif daemon_type
== 'alertmanager':
1519 data_dir_root
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1520 config_dir
= 'etc/alertmanager'
1521 makedirs(os
.path
.join(data_dir_root
, config_dir
), uid
, gid
, 0o755)
1522 makedirs(os
.path
.join(data_dir_root
, config_dir
, 'data'), uid
, gid
, 0o755)
1525 # populate the config directory for the component from the config-json
1526 for fname
in required_files
:
1527 if 'files' in config
: # type: ignore
1528 if isinstance(config
['files'][fname
], list): # type: ignore
1529 content
= '\n'.join(config
['files'][fname
]) # type: ignore
1531 content
= config
['files'][fname
] # type: ignore
1533 with
open(os
.path
.join(data_dir_root
, config_dir
, fname
), 'w') as f
:
1534 os
.fchown(f
.fileno(), uid
, gid
)
1535 os
.fchmod(f
.fileno(), 0o600)
1538 if daemon_type
== NFSGanesha
.daemon_type
:
1539 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
1540 nfs_ganesha
.create_daemon_dirs(data_dir
, uid
, gid
)
1542 if daemon_type
== CephIscsi
.daemon_type
:
1543 ceph_iscsi
= CephIscsi
.init(fsid
, daemon_id
)
1544 ceph_iscsi
.create_daemon_dirs(data_dir
, uid
, gid
)
1546 def get_parm(option
):
1547 # type: (str) -> Dict[str, str]
1554 if cached_stdin
is not None:
1558 j
= injected_stdin
# type: ignore
1560 j
= sys
.stdin
.read()
1563 # inline json string
1564 if option
[0] == '{' and option
[-1] == '}':
1567 elif os
.path
.exists(option
):
1568 with
open(option
, 'r') as f
:
1571 raise Error("Config file {} not found".format(option
))
1575 except ValueError as e
:
1576 raise Error("Invalid JSON in {}: {}".format(option
, e
))
1580 def get_config_and_keyring():
1581 # type: () -> Tuple[Optional[str], Optional[str]]
1585 if 'config_json' in args
and args
.config_json
:
1586 d
= get_parm(args
.config_json
)
1587 config
= d
.get('config')
1588 keyring
= d
.get('keyring')
1590 if 'config' in args
and args
.config
:
1591 with
open(args
.config
, 'r') as f
:
1594 if 'key' in args
and args
.key
:
1595 keyring
= '[%s]\n\tkey = %s\n' % (args
.name
, args
.key
)
1596 elif 'keyring' in args
and args
.keyring
:
1597 with
open(args
.keyring
, 'r') as f
:
1600 return (config
, keyring
)
1602 def get_container_mounts(fsid
, daemon_type
, daemon_id
,
1604 # type: (str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
1607 if daemon_type
in Ceph
.daemons
:
1609 run_path
= os
.path
.join('/var/run/ceph', fsid
);
1610 if os
.path
.exists(run_path
):
1611 mounts
[run_path
] = '/var/run/ceph:z'
1612 log_dir
= get_log_dir(fsid
)
1613 mounts
[log_dir
] = '/var/log/ceph:z'
1614 crash_dir
= '/var/lib/ceph/%s/crash' % fsid
1615 if os
.path
.exists(crash_dir
):
1616 mounts
[crash_dir
] = '/var/lib/ceph/crash:z'
1618 if daemon_type
in Ceph
.daemons
and daemon_id
:
1619 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1620 if daemon_type
== 'rgw':
1621 cdata_dir
= '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id
)
1623 cdata_dir
= '/var/lib/ceph/%s/ceph-%s' % (daemon_type
, daemon_id
)
1624 if daemon_type
!= 'crash':
1625 mounts
[data_dir
] = cdata_dir
+ ':z'
1627 mounts
[data_dir
+ '/config'] = '/etc/ceph/ceph.conf:z'
1628 if daemon_type
== 'rbd-mirror' or daemon_type
== 'crash':
1629 # these do not search for their keyrings in a data directory
1630 mounts
[data_dir
+ '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type
, daemon_id
)
1632 if daemon_type
in ['mon', 'osd']:
1633 mounts
['/dev'] = '/dev' # FIXME: narrow this down?
1634 mounts
['/run/udev'] = '/run/udev'
1635 if daemon_type
== 'osd':
1636 mounts
['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
1637 mounts
['/run/lvm'] = '/run/lvm'
1638 mounts
['/run/lock/lvm'] = '/run/lock/lvm'
1641 if args
.shared_ceph_folder
: # make easy manager modules/ceph-volume development
1642 ceph_folder
= pathify(args
.shared_ceph_folder
)
1643 if os
.path
.exists(ceph_folder
):
1644 mounts
[ceph_folder
+ '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
1645 mounts
[ceph_folder
+ '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
1646 mounts
[ceph_folder
+ '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
1647 mounts
[ceph_folder
+ '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
1648 mounts
[ceph_folder
+ '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
1650 logger
.error('{}{}{}'.format(termcolor
.red
,
1651 'Ceph shared source folder does not exist.',
1653 except AttributeError:
1656 if daemon_type
in Monitoring
.components
and daemon_id
:
1657 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1658 if daemon_type
== 'prometheus':
1659 mounts
[os
.path
.join(data_dir
, 'etc/prometheus')] = '/etc/prometheus:Z'
1660 mounts
[os
.path
.join(data_dir
, 'data')] = '/prometheus:Z'
1661 elif daemon_type
== 'node-exporter':
1662 mounts
['/proc'] = '/host/proc:ro'
1663 mounts
['/sys'] = '/host/sys:ro'
1664 mounts
['/'] = '/rootfs:ro'
1665 elif daemon_type
== "grafana":
1666 mounts
[os
.path
.join(data_dir
, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
1667 mounts
[os
.path
.join(data_dir
, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
1668 mounts
[os
.path
.join(data_dir
, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
1669 elif daemon_type
== 'alertmanager':
1670 mounts
[os
.path
.join(data_dir
, 'etc/alertmanager')] = '/alertmanager:Z'
1672 if daemon_type
== NFSGanesha
.daemon_type
:
1674 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1675 mounts
.update(NFSGanesha
.get_container_mounts(data_dir
))
1677 if daemon_type
== CephIscsi
.daemon_type
:
1679 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1680 log_dir
= get_log_dir(fsid
)
1681 mounts
.update(CephIscsi
.get_container_mounts(data_dir
, log_dir
))
1685 def get_container(fsid
, daemon_type
, daemon_id
,
1689 # type: (str, str, Union[int, str], bool, bool, List[str]) -> CephContainer
1690 if daemon_type
in ['mon', 'osd']:
1691 # mon and osd need privileged in order for libudev to query devices
1693 if daemon_type
== 'rgw':
1694 entrypoint
= '/usr/bin/radosgw'
1695 name
= 'client.rgw.%s' % daemon_id
1696 elif daemon_type
== 'rbd-mirror':
1697 entrypoint
= '/usr/bin/rbd-mirror'
1698 name
= 'client.rbd-mirror.%s' % daemon_id
1699 elif daemon_type
== 'crash':
1700 entrypoint
= '/usr/bin/ceph-crash'
1701 name
= 'client.crash.%s' % daemon_id
1702 elif daemon_type
in ['mon', 'mgr', 'mds', 'osd']:
1703 entrypoint
= '/usr/bin/ceph-' + daemon_type
1704 name
= '%s.%s' % (daemon_type
, daemon_id
)
1705 elif daemon_type
in Monitoring
.components
:
1708 elif daemon_type
== NFSGanesha
.daemon_type
:
1709 entrypoint
= NFSGanesha
.entrypoint
1710 name
= '%s.%s' % (daemon_type
, daemon_id
)
1711 elif daemon_type
== CephIscsi
.daemon_type
:
1712 entrypoint
= CephIscsi
.entrypoint
1713 name
= '%s.%s' % (daemon_type
, daemon_id
)
1714 # So the container can modprobe iscsi_target_mod and have write perms
1715 # to configfs we need to make this a privileged container.
1721 ceph_args
= [] # type: List[str]
1722 if daemon_type
in Monitoring
.components
:
1723 uid
, gid
= extract_uid_gid_monitoring(daemon_type
)
1724 m
= Monitoring
.components
[daemon_type
] # type: ignore
1725 metadata
= m
.get('image', dict()) # type: ignore
1729 # FIXME: disable cpu/memory limits for the time being (not supported
1730 # by ubuntu 18.04 kernel!)
1732 #metadata.get('cpus', '2'),
1734 #metadata.get('memory', '4GB')
1736 container_args
.extend(monitoring_args
)
1737 elif daemon_type
== 'crash':
1738 ceph_args
= ['-n', name
]
1739 elif daemon_type
in Ceph
.daemons
:
1740 ceph_args
= ['-n', name
, '-f']
1742 envs
=[] # type: List[str]
1743 if daemon_type
== NFSGanesha
.daemon_type
:
1744 envs
.extend(NFSGanesha
.get_container_envs())
1746 return CephContainer(
1748 entrypoint
=entrypoint
,
1749 args
=ceph_args
+ get_daemon_args(fsid
, daemon_type
, daemon_id
),
1750 container_args
=container_args
,
1751 volume_mounts
=get_container_mounts(fsid
, daemon_type
, daemon_id
),
1752 cname
='ceph-%s-%s.%s' % (fsid
, daemon_type
, daemon_id
),
1754 privileged
=privileged
,
1758 def extract_uid_gid(img
='', file_path
='/var/lib/ceph'):
1759 # type: (str, str) -> Tuple[int, int]
1764 out
= CephContainer(
1767 args
=['-c', '%u %g', file_path
]
1769 (uid
, gid
) = out
.split(' ')
1770 return (int(uid
), int(gid
))
1772 def deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
1773 config
=None, keyring
=None,
1776 # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool]) -> None
1777 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1778 if reconfig
and not os
.path
.exists(data_dir
):
1779 raise Error('cannot reconfig, data path %s does not exist' % data_dir
)
1780 if daemon_type
== 'mon' and not os
.path
.exists(data_dir
):
1784 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
1787 tmp_config
= write_tmp(config
, uid
, gid
)
1790 create_daemon_dirs(fsid
, daemon_type
, daemon_id
, uid
, gid
)
1791 mon_dir
= get_data_dir(fsid
, 'mon', daemon_id
)
1792 log_dir
= get_log_dir(fsid
)
1793 out
= CephContainer(
1795 entrypoint
='/usr/bin/ceph-mon',
1797 '-i', str(daemon_id
),
1799 '-c', '/tmp/config',
1800 '--keyring', '/tmp/keyring',
1801 ] + get_daemon_args(fsid
, 'mon', daemon_id
),
1803 log_dir
: '/var/log/ceph:z',
1804 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id
),
1805 tmp_keyring
.name
: '/tmp/keyring:z',
1806 tmp_config
.name
: '/tmp/config:z',
1811 with
open(mon_dir
+ '/config', 'w') as f
:
1812 os
.fchown(f
.fileno(), uid
, gid
)
1813 os
.fchmod(f
.fileno(), 0o600)
1816 # dirs, conf, keyring
1818 fsid
, daemon_type
, daemon_id
,
1823 deploy_daemon_units(fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
1826 if not os
.path
.exists(data_dir
+ '/unit.created'):
1827 with
open(data_dir
+ '/unit.created', 'w') as f
:
1828 os
.fchmod(f
.fileno(), 0o600)
1829 os
.fchown(f
.fileno(), uid
, gid
)
1830 f
.write('mtime is time the daemon deployment was created\n')
1832 with
open(data_dir
+ '/unit.configured', 'w') as f
:
1833 f
.write('mtime is time we were last configured\n')
1834 os
.fchmod(f
.fileno(), 0o600)
1835 os
.fchown(f
.fileno(), uid
, gid
)
1837 update_firewalld(daemon_type
)
1839 if reconfig
and daemon_type
not in Ceph
.daemons
:
1840 # ceph daemons do not need a restart; others (presumably) do to pick
1842 call_throws(['systemctl', 'reset-failed',
1843 get_unit_name(fsid
, daemon_type
, daemon_id
)])
1844 call_throws(['systemctl', 'restart',
1845 get_unit_name(fsid
, daemon_type
, daemon_id
)])
1847 def deploy_daemon_units(fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
1848 enable
=True, start
=True,
1850 # type: (str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None
1852 data_dir
= get_data_dir(fsid
, daemon_type
, daemon_id
)
1853 with
open(data_dir
+ '/unit.run.new', 'w') as f
:
1855 if daemon_type
== 'osd':
1856 # osds have a pre-start step
1858 f
.write('# Simple OSDs need chown on startup:\n')
1859 for n
in ['block', 'block.db', 'block.wal']:
1860 p
= os
.path
.join(data_dir
, n
)
1861 f
.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p
=p
, uid
=uid
, gid
=gid
))
1862 f
.write('# LVM OSDs use ceph-volume lvm activate:\n')
1863 prestart
= CephContainer(
1865 entrypoint
='/usr/sbin/ceph-volume',
1868 str(daemon_id
), osd_fsid
,
1872 volume_mounts
=get_container_mounts(fsid
, daemon_type
, daemon_id
),
1873 cname
='ceph-%s-%s.%s-activate' % (fsid
, daemon_type
, daemon_id
),
1875 f
.write(' '.join(prestart
.run_cmd()) + '\n')
1876 elif daemon_type
== NFSGanesha
.daemon_type
:
1877 # add nfs to the rados grace db
1878 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
1879 prestart
= nfs_ganesha
.get_rados_grace_container('add')
1880 f
.write(' '.join(prestart
.run_cmd()) + '\n')
1881 elif daemon_type
== CephIscsi
.daemon_type
:
1882 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=True)) + '\n')
1884 if daemon_type
in Ceph
.daemons
:
1885 install_path
= find_program('install')
1886 f
.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path
=install_path
, fsid
=fsid
, uid
=uid
, gid
=gid
))
1888 # container run command
1889 f
.write(' '.join(c
.run_cmd()) + '\n')
1890 os
.fchmod(f
.fileno(), 0o600)
1891 os
.rename(data_dir
+ '/unit.run.new',
1892 data_dir
+ '/unit.run')
1894 # post-stop command(s)
1895 with
open(data_dir
+ '/unit.poststop.new', 'w') as f
:
1896 if daemon_type
== 'osd':
1898 poststop
= CephContainer(
1900 entrypoint
='/usr/sbin/ceph-volume',
1902 'lvm', 'deactivate',
1903 str(daemon_id
), osd_fsid
,
1906 volume_mounts
=get_container_mounts(fsid
, daemon_type
, daemon_id
),
1907 cname
='ceph-%s-%s.%s-deactivate' % (fsid
, daemon_type
,
1910 f
.write(' '.join(poststop
.run_cmd()) + '\n')
1911 elif daemon_type
== NFSGanesha
.daemon_type
:
1912 # remove nfs from the rados grace db
1913 nfs_ganesha
= NFSGanesha
.init(fsid
, daemon_id
)
1914 poststop
= nfs_ganesha
.get_rados_grace_container('remove')
1915 f
.write(' '.join(poststop
.run_cmd()) + '\n')
1916 elif daemon_type
== CephIscsi
.daemon_type
:
1917 f
.write(' '.join(CephIscsi
.configfs_mount_umount(data_dir
, mount
=False)) + '\n')
1918 os
.fchmod(f
.fileno(), 0o600)
1919 os
.rename(data_dir
+ '/unit.poststop.new',
1920 data_dir
+ '/unit.poststop')
1922 with
open(data_dir
+ '/unit.image.new', 'w') as f
:
1923 f
.write(c
.image
+ '\n')
1924 os
.fchmod(f
.fileno(), 0o600)
1925 os
.rename(data_dir
+ '/unit.image.new',
1926 data_dir
+ '/unit.image')
1929 install_base_units(fsid
)
1930 unit
= get_unit_file(fsid
)
1931 unit_file
= 'ceph-%s@.service' % (fsid
)
1932 with
open(args
.unit_dir
+ '/' + unit_file
+ '.new', 'w') as f
:
1934 os
.rename(args
.unit_dir
+ '/' + unit_file
+ '.new',
1935 args
.unit_dir
+ '/' + unit_file
)
1936 call_throws(['systemctl', 'daemon-reload'])
1938 unit_name
= get_unit_name(fsid
, daemon_type
, daemon_id
)
1939 call(['systemctl', 'stop', unit_name
],
1940 verbose_on_failure
=False)
1941 call(['systemctl', 'reset-failed', unit_name
],
1942 verbose_on_failure
=False)
1944 call_throws(['systemctl', 'enable', unit_name
])
1946 call_throws(['systemctl', 'start', unit_name
])
1948 def update_firewalld(daemon_type
):
1949 # type: (str) -> None
1950 if args
.skip_firewalld
:
1952 cmd
= find_executable('firewall-cmd')
1954 logger
.debug('firewalld does not appear to be present')
1956 (enabled
, state
, _
) = check_unit('firewalld.service')
1958 logger
.debug('firewalld.service is not enabled')
1963 if daemon_type
== 'mon':
1964 fw_services
.append('ceph-mon')
1965 elif daemon_type
in ['mgr', 'mds', 'osd']:
1966 fw_services
.append('ceph')
1967 if daemon_type
== 'mgr':
1968 fw_ports
.append(8080) # dashboard
1969 fw_ports
.append(8443) # dashboard
1970 fw_ports
.append(9283) # mgr/prometheus exporter
1971 elif daemon_type
in Monitoring
.port_map
.keys():
1972 fw_ports
.extend(Monitoring
.port_map
[daemon_type
]) # prometheus etc
1973 elif daemon_type
== NFSGanesha
.daemon_type
:
1974 fw_services
.append('nfs')
1976 for svc
in fw_services
:
1977 out
, err
, ret
= call([cmd
, '--permanent', '--query-service', svc
])
1979 logger
.info('Enabling firewalld service %s in current zone...' % svc
)
1980 out
, err
, ret
= call([cmd
, '--permanent', '--add-service', svc
])
1983 'unable to add service %s to current zone: %s' % (svc
, err
))
1985 logger
.debug('firewalld service %s is enabled in current zone' % svc
)
1986 for port
in fw_ports
:
1987 tcp_port
= str(port
) + '/tcp'
1988 out
, err
, ret
= call([cmd
, '--permanent', '--query-port', tcp_port
])
1990 logger
.info('Enabling firewalld port %s in current zone...' % tcp_port
)
1991 out
, err
, ret
= call([cmd
, '--permanent', '--add-port', tcp_port
])
1993 raise RuntimeError('unable to add port %s to current zone: %s' %
1996 logger
.debug('firewalld port %s is enabled in current zone' % tcp_port
)
1997 call_throws([cmd
, '--reload'])
1999 def install_base_units(fsid
):
2000 # type: (str) -> None
2002 Set up ceph.target and ceph-$fsid.target units.
2005 existed
= os
.path
.exists(args
.unit_dir
+ '/ceph.target')
2006 with
open(args
.unit_dir
+ '/ceph.target.new', 'w') as f
:
2008 'Description=All Ceph clusters and services\n'
2011 'WantedBy=multi-user.target\n')
2012 os
.rename(args
.unit_dir
+ '/ceph.target.new',
2013 args
.unit_dir
+ '/ceph.target')
2015 # we disable before enable in case a different ceph.target
2016 # (from the traditional package) is present; while newer
2017 # systemd is smart enough to disable the old
2018 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2019 # some older versions of systemd error out with EEXIST.
2020 call_throws(['systemctl', 'disable', 'ceph.target'])
2021 call_throws(['systemctl', 'enable', 'ceph.target'])
2022 call_throws(['systemctl', 'start', 'ceph.target'])
2025 existed
= os
.path
.exists(args
.unit_dir
+ '/ceph-%s.target' % fsid
)
2026 with
open(args
.unit_dir
+ '/ceph-%s.target.new' % fsid
, 'w') as f
:
2028 'Description=Ceph cluster {fsid}\n'
2029 'PartOf=ceph.target\n'
2030 'Before=ceph.target\n'
2033 'WantedBy=multi-user.target ceph.target\n'.format(
2036 os
.rename(args
.unit_dir
+ '/ceph-%s.target.new' % fsid
,
2037 args
.unit_dir
+ '/ceph-%s.target' % fsid
)
2039 call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid
])
2040 call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid
])
2042 # logrotate for the cluster
2043 with
open(args
.logrotate_dir
+ '/ceph-%s' % fsid
, 'w') as f
:
2045 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
2046 in all containers, but I don't see an elegant way to send SIGHUP *just* to
2047 the daemons for this cluster. (1) systemd kill -s will get the signal to
2048 podman, but podman will exit. (2) podman kill will get the signal to the
2049 first child (bash), but that isn't the ceph daemon. This is simpler and
2052 f
.write("""# created by cephadm
2053 /var/log/ceph/%s/*.log {
2059 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror || pkill -1 -x "ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror" || true
2067 def get_unit_file(fsid
):
2068 # type: (str) -> str
2069 u
= """# generated by cephadm
2071 Description=Ceph %i for {fsid}
2074 # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
2075 # these can be removed once ceph-mon will dynamically change network
2077 After=network-online.target local-fs.target time-sync.target
2078 Wants=network-online.target local-fs.target time-sync.target
2080 PartOf=ceph-{fsid}.target
2081 Before=ceph-{fsid}.target
2086 EnvironmentFile=-/etc/environment
2087 ExecStartPre=-{container_path} rm ceph-{fsid}-%i
2088 ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
2089 ExecStop=-{container_path} stop ceph-{fsid}-%i
2090 ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
2096 StartLimitInterval=30min
2100 WantedBy=ceph-{fsid}.target
2102 container_path
=container_path
,
2104 data_dir
=args
.data_dir
)
2107 ##################################
2109 class CephContainer
:
2120 # type: (str, str, List[str], Dict[str, str], str, List[str], Optional[List[str]], bool, bool) -> None
2122 self
.entrypoint
= entrypoint
2124 self
.volume_mounts
= volume_mounts
2126 self
.container_args
= container_args
2128 self
.privileged
= privileged
2129 self
.ptrace
= ptrace
2132 # type: () -> List[str]
2133 vols
= [] # type: List[str]
2134 envs
= [] # type: List[str]
2135 cname
= [] # type: List[str]
2136 entrypoint
= [] # type: List[str]
2138 entrypoint
= ['--entrypoint', self
.entrypoint
]
2140 priv
= [] # type: List[str]
2142 priv
= ['--privileged',
2143 # let OSD etc read block devs that haven't been chowned
2146 priv
.append('--cap-add=SYS_PTRACE')
2148 [['-v', '%s:%s' % (host_dir
, container_dir
)]
2149 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
2151 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
2152 '-e', 'NODE_NAME=%s' % get_hostname(),
2156 envs
.extend(['-e', e
])
2157 cname
= ['--name', self
.cname
] if self
.cname
else []
2159 str(container_path
),
2164 ] + self
.container_args
+ priv
+ \
2166 vols
+ entrypoint
+ \
2169 ] + self
.args
# type: ignore
2171 def shell_cmd(self
, cmd
):
2172 # type: (List[str]) -> List[str]
2173 priv
= [] # type: List[str]
2175 priv
= ['--privileged',
2176 # let OSD etc read block devs that haven't been chowned
2178 vols
= [] # type: List[str]
2180 [['-v', '%s:%s' % (host_dir
, container_dir
)]
2181 for host_dir
, container_dir
in self
.volume_mounts
.items()], [])
2183 '-e', 'CONTAINER_IMAGE=%s' % self
.image
,
2184 '-e', 'NODE_NAME=%s' % get_hostname(),
2188 envs
.extend(['-e', e
])
2189 cmd_args
= [] # type: List[str]
2191 cmd_args
= ['-c'] + cmd
2193 str(container_path
),
2198 ] + self
.container_args
+ priv
+ envs
+ vols
+ [
2199 '--entrypoint', cmd
[0],
2203 def exec_cmd(self
, cmd
):
2204 # type: (List[str]) -> List[str]
2206 str(container_path
),
2208 ] + self
.container_args
+ [
2212 def run(self
, timeout
=DEFAULT_TIMEOUT
):
2213 # type: (Optional[int]) -> str
2214 logger
.debug(self
.run_cmd())
2215 out
, _
, _
= call_throws(
2216 self
.run_cmd(), desc
=self
.entrypoint
, timeout
=timeout
)
2219 ##################################
2222 def command_version():
2224 out
= CephContainer(args
.image
, 'ceph', ['--version']).run()
2228 ##################################
2233 logger
.info('Pulling latest %s...' % args
.image
)
2234 call_throws([container_path
, 'pull', args
.image
])
2235 return command_inspect_image()
2237 ##################################
2240 def command_inspect_image():
2242 out
, err
, ret
= call_throws([
2243 container_path
, 'inspect',
2244 '--format', '{{.Id}}',
2248 image_id
= normalize_container_id(out
.strip())
2249 ver
= CephContainer(args
.image
, 'ceph', ['--version']).run().strip()
2251 'image_id': image_id
,
2252 'ceph_version': ver
,
2254 print(json
.dumps(r
, indent
=4, sort_keys
=True))
2257 ##################################
2260 def command_bootstrap():
2263 if not args
.output_config
:
2264 args
.output_config
= os
.path
.join(args
.output_dir
, 'ceph.conf')
2265 if not args
.output_keyring
:
2266 args
.output_keyring
= os
.path
.join(args
.output_dir
,
2267 'ceph.client.admin.keyring')
2268 if not args
.output_pub_ssh_key
:
2269 args
.output_pub_ssh_key
= os
.path
.join(args
.output_dir
, 'ceph.pub')
2271 # verify output files
2272 for f
in [args
.output_config
, args
.output_keyring
, args
.output_pub_ssh_key
]:
2273 if not args
.allow_overwrite
:
2274 if os
.path
.exists(f
):
2275 raise Error('%s already exists; delete or pass '
2276 '--allow-overwrite to overwrite' % f
)
2277 dirname
= os
.path
.dirname(f
)
2278 if dirname
and not os
.path
.exists(dirname
):
2279 raise Error('%s directory %s does not exist' % (f
, dirname
))
2281 if not args
.skip_prepare_host
:
2282 command_prepare_host()
2284 logger
.info('Skip prepare_host')
2287 fsid
= args
.fsid
or make_fsid()
2288 hostname
= get_hostname()
2289 if '.' in hostname
and not args
.allow_fqdn_hostname
:
2290 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname
, hostname
.split('.')[0]))
2291 mon_id
= args
.mon_id
or hostname
2292 mgr_id
= args
.mgr_id
or generate_service_id()
2293 logging
.info('Cluster fsid: %s' % fsid
)
2299 r
= re
.compile(r
':(\d+)$')
2302 hasport
= r
.findall(args
.mon_ip
)
2304 port
= int(hasport
[0])
2306 addr_arg
= '[v1:%s]' % args
.mon_ip
2308 addr_arg
= '[v2:%s]' % args
.mon_ip
2310 logger
.warning('Using msgr2 protocol for unrecognized port %d' %
2312 addr_arg
= '[v2:%s]' % args
.mon_ip
2313 base_ip
= args
.mon_ip
[0:-(len(str(port
)))-1]
2314 check_ip_port(base_ip
, port
)
2316 base_ip
= args
.mon_ip
2317 addr_arg
= '[v2:%s:3300,v1:%s:6789]' % (args
.mon_ip
, args
.mon_ip
)
2318 check_ip_port(args
.mon_ip
, 3300)
2319 check_ip_port(args
.mon_ip
, 6789)
2320 elif args
.mon_addrv
:
2321 addr_arg
= args
.mon_addrv
2322 if addr_arg
[0] != '[' or addr_arg
[-1] != ']':
2323 raise Error('--mon-addrv value %s must use square backets' %
2325 for addr
in addr_arg
[1:-1].split(','):
2326 hasport
= r
.findall(addr
)
2328 raise Error('--mon-addrv value %s must include port number' %
2330 port
= int(hasport
[0])
2331 # strip off v1: or v2: prefix
2332 addr
= re
.sub(r
'^\w+:', '', addr
)
2333 base_ip
= addr
[0:-(len(str(port
)))-1]
2334 check_ip_port(base_ip
, port
)
2336 raise Error('must specify --mon-ip or --mon-addrv')
2337 logger
.debug('Base mon IP is %s, final addrv is %s' % (base_ip
, addr_arg
))
2340 if not args
.skip_mon_network
:
2341 # make sure IP is configured locally, and then figure out the
2343 for net
, ips
in list_networks().items():
2346 logger
.info('Mon IP %s is in CIDR network %s' % (base_ip
,
2350 raise Error('Failed to infer CIDR network for mon ip %s; pass '
2351 '--skip-mon-network to configure it later' % base_ip
)
2354 cp
= read_config(args
.config
)
2355 if not cp
.has_section('global'):
2356 cp
.add_section('global')
2357 cp
.set('global', 'fsid', fsid
);
2358 cp
.set('global', 'mon host', addr_arg
)
2359 cp
.set('global', 'container_image', args
.image
)
2362 config
= cpf
.getvalue()
2364 if not args
.skip_pull
:
2365 logger
.info('Pulling latest %s container...' % args
.image
)
2366 call_throws([container_path
, 'pull', args
.image
])
2368 logger
.info('Extracting ceph user uid/gid from container image...')
2369 (uid
, gid
) = extract_uid_gid()
2371 # create some initial keys
2372 logger
.info('Creating initial keys...')
2373 mon_key
= CephContainer(
2375 entrypoint
='/usr/bin/ceph-authtool',
2376 args
=['--gen-print-key'],
2378 admin_key
= CephContainer(
2380 entrypoint
='/usr/bin/ceph-authtool',
2381 args
=['--gen-print-key'],
2383 mgr_key
= CephContainer(
2385 entrypoint
='/usr/bin/ceph-authtool',
2386 args
=['--gen-print-key'],
2389 keyring
= ('[mon.]\n'
2391 '\tcaps mon = allow *\n'
2394 '\tcaps mon = allow *\n'
2395 '\tcaps mds = allow *\n'
2396 '\tcaps mgr = allow *\n'
2397 '\tcaps osd = allow *\n'
2400 '\tcaps mon = profile mgr\n'
2401 '\tcaps mds = allow *\n'
2402 '\tcaps osd = allow *\n'
2403 % (mon_key
, admin_key
, mgr_id
, mgr_key
))
2406 tmp_bootstrap_keyring
= write_tmp(keyring
, uid
, gid
)
2408 # create initial monmap, tmp monmap file
2409 logger
.info('Creating initial monmap...')
2410 tmp_monmap
= write_tmp('', 0, 0)
2411 out
= CephContainer(
2413 entrypoint
='/usr/bin/monmaptool',
2417 '--addv', mon_id
, addr_arg
,
2421 tmp_monmap
.name
: '/tmp/monmap:z',
2425 # pass monmap file to ceph user for use by ceph-mon --mkfs below
2426 os
.fchown(tmp_monmap
.fileno(), uid
, gid
)
2429 logger
.info('Creating mon...')
2430 create_daemon_dirs(fsid
, 'mon', mon_id
, uid
, gid
)
2431 mon_dir
= get_data_dir(fsid
, 'mon', mon_id
)
2432 log_dir
= get_log_dir(fsid
)
2433 out
= CephContainer(
2435 entrypoint
='/usr/bin/ceph-mon',
2440 '--monmap', '/tmp/monmap',
2441 '--keyring', '/tmp/keyring',
2442 ] + get_daemon_args(fsid
, 'mon', mon_id
),
2444 log_dir
: '/var/log/ceph:z',
2445 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
2446 tmp_bootstrap_keyring
.name
: '/tmp/keyring:z',
2447 tmp_monmap
.name
: '/tmp/monmap:z',
2451 with
open(mon_dir
+ '/config', 'w') as f
:
2452 os
.fchown(f
.fileno(), uid
, gid
)
2453 os
.fchmod(f
.fileno(), 0o600)
2456 make_var_run(fsid
, uid
, gid
)
2457 mon_c
= get_container(fsid
, 'mon', mon_id
)
2458 deploy_daemon(fsid
, 'mon', mon_id
, mon_c
, uid
, gid
,
2459 config
=None, keyring
=None)
2461 # client.admin key + config to issue various CLI commands
2462 tmp_admin_keyring
= write_tmp('[client.admin]\n'
2463 '\tkey = ' + admin_key
+ '\n',
2465 tmp_config
= write_tmp(config
, uid
, gid
)
2467 # a CLI helper to reduce our typing
2468 def cli(cmd
, extra_mounts
={}, timeout
=DEFAULT_TIMEOUT
):
2469 # type: (List[str], Dict[str, str], Optional[int]) -> str
2471 log_dir
: '/var/log/ceph:z',
2472 tmp_admin_keyring
.name
: '/etc/ceph/ceph.client.admin.keyring:z',
2473 tmp_config
.name
: '/etc/ceph/ceph.conf:z',
2475 for k
, v
in extra_mounts
.items():
2477 timeout
= timeout
or args
.timeout
2478 return CephContainer(
2480 entrypoint
='/usr/bin/ceph',
2482 volume_mounts
=mounts
,
2483 ).run(timeout
=timeout
)
2485 logger
.info('Waiting for mon to start...')
2488 entrypoint
='/usr/bin/ceph',
2492 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id
),
2493 tmp_admin_keyring
.name
: '/etc/ceph/ceph.client.admin.keyring:z',
2494 tmp_config
.name
: '/etc/ceph/ceph.conf:z',
2498 # wait for the service to become available
2499 def is_mon_available():
2501 timeout
=args
.timeout
if args
.timeout
else 30 # seconds
2502 out
, err
, ret
= call(c
.run_cmd(),
2506 is_available('mon', is_mon_available
)
2508 # assimilate and minimize config
2509 if not args
.no_minimize_config
:
2510 logger
.info('Assimilating anything we can from ceph.conf...')
2512 'config', 'assimilate-conf',
2513 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
2515 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
2517 logger
.info('Generating new minimal ceph.conf...')
2519 'config', 'generate-minimal-conf',
2520 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
2522 mon_dir
: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
2524 # re-read our minimized config
2525 with
open(mon_dir
+ '/config', 'r') as f
:
2527 logger
.info('Restarting the monitor...')
2531 get_unit_name(fsid
, 'mon', mon_id
)
2535 logger
.info('Setting mon public_network...')
2536 cli(['config', 'set', 'mon', 'public_network', mon_network
])
2539 logger
.info('Creating mgr...')
2540 mgr_keyring
= '[mgr.%s]\n\tkey = %s\n' % (mgr_id
, mgr_key
)
2541 mgr_c
= get_container(fsid
, 'mgr', mgr_id
)
2542 deploy_daemon(fsid
, 'mgr', mgr_id
, mgr_c
, uid
, gid
,
2543 config
=config
, keyring
=mgr_keyring
)
2546 with
open(args
.output_keyring
, 'w') as f
:
2547 os
.fchmod(f
.fileno(), 0o600)
2548 f
.write('[client.admin]\n'
2549 '\tkey = ' + admin_key
+ '\n')
2550 logger
.info('Wrote keyring to %s' % args
.output_keyring
)
2552 with
open(args
.output_config
, 'w') as f
:
2554 logger
.info('Wrote config to %s' % args
.output_config
)
2556 # wait for the service to become available
2557 logger
.info('Waiting for mgr to start...')
2558 def is_mgr_available():
2560 timeout
=args
.timeout
if args
.timeout
else 30 # seconds
2562 out
= cli(['status', '-f', 'json-pretty'], timeout
=timeout
)
2564 return j
.get('mgrmap', {}).get('available', False)
2565 except Exception as e
:
2566 logger
.debug('status failed: %s' % e
)
2568 is_available('mgr', is_mgr_available
)
2570 # wait for mgr to restart (after enabling a module)
2571 def wait_for_mgr_restart():
2572 # first get latest mgrmap epoch from the mon
2573 out
= cli(['mgr', 'dump'])
2576 # wait for mgr to have it
2577 logger
.info('Waiting for the mgr to restart...')
2578 def mgr_has_latest_epoch():
2581 out
= cli(['tell', 'mgr', 'mgr_status'])
2583 return j
['mgrmap_epoch'] >= epoch
2584 except Exception as e
:
2585 logger
.debug('tell mgr mgr_status failed: %s' % e
)
2587 is_available('Mgr epoch %d' % epoch
, mgr_has_latest_epoch
)
2590 if not args
.skip_ssh
:
2591 logger
.info('Enabling cephadm module...')
2592 cli(['mgr', 'module', 'enable', 'cephadm'])
2593 wait_for_mgr_restart()
2595 logger
.info('Setting orchestrator backend to cephadm...')
2596 cli(['orch', 'set', 'backend', 'cephadm'])
2599 logger
.info('Using provided ssh config...')
2601 pathify(args
.ssh_config
.name
): '/tmp/cephadm-ssh-config:z',
2603 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts
=mounts
)
2605 if args
.ssh_private_key
and args
.ssh_public_key
:
2606 logger
.info('Using provided ssh keys...')
2608 pathify(args
.ssh_private_key
.name
): '/tmp/cephadm-ssh-key:z',
2609 pathify(args
.ssh_public_key
.name
): '/tmp/cephadm-ssh-key.pub:z'
2611 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts
=mounts
)
2612 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts
=mounts
)
2614 logger
.info('Generating ssh key...')
2615 cli(['cephadm', 'generate-key'])
2616 ssh_pub
= cli(['cephadm', 'get-pub-key'])
2618 with
open(args
.output_pub_ssh_key
, 'w') as f
:
2620 logger
.info('Wrote public SSH key to to %s' % args
.output_pub_ssh_key
)
2622 logger
.info('Adding key to root@localhost\'s authorized_keys...')
2623 if not os
.path
.exists('/root/.ssh'):
2624 os
.mkdir('/root/.ssh', 0o700)
2625 auth_keys_file
= '/root/.ssh/authorized_keys'
2627 if os
.path
.exists(auth_keys_file
):
2628 with
open(auth_keys_file
, 'r') as f
:
2629 f
.seek(0, os
.SEEK_END
)
2631 f
.seek(f
.tell()-1, os
.SEEK_SET
) # go to last char
2632 if f
.read() != '\n':
2634 with
open(auth_keys_file
, 'a') as f
:
2635 os
.fchmod(f
.fileno(), 0o600) # just in case we created it
2638 f
.write(ssh_pub
.strip() + '\n')
2640 host
= get_hostname()
2641 logger
.info('Adding host %s...' % host
)
2642 cli(['orch', 'host', 'add', host
])
2644 if not args
.orphan_initial_daemons
:
2645 for t
in ['mon', 'mgr', 'crash']:
2646 logger
.info('Deploying %s service with default placement...' % t
)
2647 cli(['orch', 'apply', t
])
2649 if not args
.skip_monitoring_stack
:
2650 logger
.info('Enabling mgr prometheus module...')
2651 cli(['mgr', 'module', 'enable', 'prometheus'])
2652 for t
in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
2653 logger
.info('Deploying %s service with default placement...' % t
)
2654 cli(['orch', 'apply', t
])
2656 if not args
.skip_dashboard
:
2657 logger
.info('Enabling the dashboard module...')
2658 cli(['mgr', 'module', 'enable', 'dashboard'])
2659 wait_for_mgr_restart()
2661 # dashboard crt and key
2662 if args
.dashboard_key
and args
.dashboard_crt
:
2663 logger
.info('Using provided dashboard certificate...')
2665 pathify(args
.dashboard_crt
.name
): '/tmp/dashboard.crt:z',
2666 pathify(args
.dashboard_key
.name
): '/tmp/dashboard.key:z'
2668 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts
=mounts
)
2669 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts
=mounts
)
2671 logger
.info('Generating a dashboard self-signed certificate...')
2672 cli(['dashboard', 'create-self-signed-cert'])
2674 logger
.info('Creating initial admin user...')
2675 password
= args
.initial_dashboard_password
or generate_password()
2676 cmd
= ['dashboard', 'ac-user-create', args
.initial_dashboard_user
, password
, 'administrator', '--force-password']
2677 if not args
.dashboard_password_noupdate
:
2678 cmd
.append('--pwd-update-required')
2680 logger
.info('Fetching dashboard port number...')
2681 out
= cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
2684 logger
.info('Ceph Dashboard is now available at:\n\n'
2685 '\t URL: https://%s:%s/\n'
2687 '\tPassword: %s\n' % (
2689 args
.initial_dashboard_user
,
2693 logger
.info('Applying %s to cluster' % args
.apply_spec
)
2695 with
open(args
.apply_spec
) as f
:
2697 if 'hostname:' in line
:
2698 line
= line
.replace('\n', '')
2699 split
= line
.split(': ')
2700 if split
[1] != host
:
2701 logger
.info('Adding ssh key to %s' % split
[1])
2703 ssh_key
= '/etc/ceph/ceph.pub'
2704 if args
.ssh_public_key
:
2705 ssh_key
= args
.ssh_public_key
.name
2706 out
, err
, code
= call_throws(['ssh-copy-id', '-f', '-i', ssh_key
, 'root@%s' % split
[1]])
2709 mounts
[pathify(args
.apply_spec
)] = '/tmp/spec.yml:z'
2711 out
= cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts
=mounts
)
2714 logger
.info('You can access the Ceph CLI with:\n\n'
2715 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
2719 args
.output_keyring
))
2720 logger
.info('Please consider enabling telemetry to help improve Ceph:\n\n'
2721 '\tceph telemetry on\n\n'
2722 'For more information see:\n\n'
2723 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
2724 logger
.info('Bootstrap complete.')
2727 ##################################
2729 def extract_uid_gid_monitoring(daemon_type
):
2730 # type: (str) -> Tuple[int, int]
2732 if daemon_type
== 'prometheus':
2733 uid
, gid
= extract_uid_gid(file_path
='/etc/prometheus')
2734 elif daemon_type
== 'node-exporter':
2735 uid
, gid
= 65534, 65534
2736 elif daemon_type
== 'grafana':
2737 uid
, gid
= extract_uid_gid(file_path
='/var/lib/grafana')
2738 elif daemon_type
== 'alertmanager':
2739 uid
, gid
= extract_uid_gid(file_path
='/etc/alertmanager')
2741 raise Error("{} not implemented yet".format(daemon_type
))
2746 def command_deploy():
2748 daemon_type
, daemon_id
= args
.name
.split('.', 1)
2750 l
= FileLock(args
.fsid
)
2753 if daemon_type
not in get_supported_daemons():
2754 raise Error('daemon type %s not recognized' % daemon_type
)
2757 unit_name
= get_unit_name(args
.fsid
, daemon_type
, daemon_id
)
2758 (_
, state
, _
) = check_unit(unit_name
)
2759 if state
== 'running':
2763 logger
.info('%s daemon %s ...' % ('Reconfig', args
.name
))
2765 logger
.info('%s daemon %s ...' % ('Redeploy', args
.name
))
2767 logger
.info('%s daemon %s ...' % ('Deploy', args
.name
))
2769 if daemon_type
in Ceph
.daemons
:
2770 config
, keyring
= get_config_and_keyring()
2771 uid
, gid
= extract_uid_gid()
2772 make_var_run(args
.fsid
, uid
, gid
)
2773 c
= get_container(args
.fsid
, daemon_type
, daemon_id
,
2774 ptrace
=args
.allow_ptrace
)
2775 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
2776 config
=config
, keyring
=keyring
,
2777 osd_fsid
=args
.osd_fsid
,
2778 reconfig
=args
.reconfig
)
2780 elif daemon_type
in Monitoring
.components
:
2781 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
2783 if not args
.reconfig
and not redeploy
:
2784 daemon_ports
= Monitoring
.port_map
[daemon_type
] # type: List[int]
2785 if any([port_in_use(port
) for port
in daemon_ports
]):
2786 raise Error("TCP Port(s) '{}' required for {} is already in use".format(",".join(map(str, daemon_ports
)), daemon_type
))
2788 # make sure provided config-json is sufficient
2789 config
= get_parm(args
.config_json
) # type: ignore
2790 required_files
= Monitoring
.components
[daemon_type
].get('config-json-files', list())
2791 required_args
= Monitoring
.components
[daemon_type
].get('config-json-args', list())
2793 if not config
or not all(c
in config
.get('files', {}).keys() for c
in required_files
): # type: ignore
2794 raise Error("{} deployment requires config-json which must "
2795 "contain file content for {}".format(daemon_type
.capitalize(), ', '.join(required_files
)))
2797 if not config
or not all(c
in config
.keys() for c
in required_args
): # type: ignore
2798 raise Error("{} deployment requires config-json which must "
2799 "contain arg for {}".format(daemon_type
.capitalize(), ', '.join(required_args
)))
2801 uid
, gid
= extract_uid_gid_monitoring(daemon_type
)
2802 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
2803 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
2804 reconfig
=args
.reconfig
)
2806 elif daemon_type
== NFSGanesha
.daemon_type
:
2807 if not args
.reconfig
and not redeploy
:
2808 NFSGanesha
.port_in_use()
2809 config
, keyring
= get_config_and_keyring()
2810 # TODO: extract ganesha uid/gid (997, 994) ?
2811 uid
, gid
= extract_uid_gid()
2812 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
2813 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
2814 config
=config
, keyring
=keyring
,
2815 reconfig
=args
.reconfig
)
2817 elif daemon_type
== CephIscsi
.daemon_type
:
2818 config
, keyring
= get_config_and_keyring()
2819 uid
, gid
= extract_uid_gid()
2820 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
2821 deploy_daemon(args
.fsid
, daemon_type
, daemon_id
, c
, uid
, gid
,
2822 config
=config
, keyring
=keyring
,
2823 reconfig
=args
.reconfig
)
2825 raise Error("{} not implemented in command_deploy function".format(daemon_type
))
2827 ##################################
2832 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
2833 c
= get_container(args
.fsid
, daemon_type
, daemon_id
)
2834 command
= c
.run_cmd()
2835 return call_timeout(command
, args
.timeout
)
2837 ##################################
2842 def command_shell():
2845 make_log_dir(args
.fsid
)
2847 if '.' in args
.name
:
2848 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
2850 daemon_type
= args
.name
2853 daemon_type
= 'osd' # get the most mounts
2856 if daemon_id
and not args
.fsid
:
2857 raise Error('must pass --fsid to specify cluster')
2859 # use /etc/ceph files by default, if present. we do this instead of
2860 # making these defaults in the arg parser because we don't want an error
2861 # if they don't exist.
2862 if not args
.keyring
and os
.path
.exists(SHELL_DEFAULT_KEYRING
):
2863 args
.keyring
= SHELL_DEFAULT_KEYRING
2865 container_args
= [] # type: List[str]
2866 mounts
= get_container_mounts(args
.fsid
, daemon_type
, daemon_id
,
2867 no_config
=True if args
.config
else False)
2869 mounts
[pathify(args
.config
)] = '/etc/ceph/ceph.conf:z'
2871 mounts
[pathify(args
.keyring
)] = '/etc/ceph/ceph.keyring:z'
2873 mount
= pathify(args
.mount
)
2874 filename
= os
.path
.basename(mount
)
2875 mounts
[mount
] = '/mnt/{}:z'.format(filename
)
2877 command
= args
.command
2883 '-e', "PS1=%s" % CUSTOM_PS1
,
2886 home
= os
.path
.join(args
.data_dir
, args
.fsid
, 'home')
2887 if not os
.path
.exists(home
):
2888 logger
.debug('Creating root home at %s' % home
)
2889 makedirs(home
, 0, 0, 0o660)
2890 if os
.path
.exists('/etc/skel'):
2891 for f
in os
.listdir('/etc/skel'):
2892 if f
.startswith('.bash'):
2893 shutil
.copyfile(os
.path
.join('/etc/skel', f
),
2894 os
.path
.join(home
, f
))
2895 mounts
[home
] = '/root'
2899 entrypoint
='doesnotmatter',
2901 container_args
=container_args
,
2902 volume_mounts
=mounts
,
2905 command
= c
.shell_cmd(command
)
2907 return call_timeout(command
, args
.timeout
)
2909 ##################################
2912 def command_enter():
2915 raise Error('must pass --fsid to specify cluster')
2916 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
2917 container_args
= [] # type: List[str]
2919 command
= args
.command
2925 '-e', "PS1=%s" % CUSTOM_PS1
,
2929 entrypoint
='doesnotmatter',
2930 container_args
=container_args
,
2931 cname
='ceph-%s-%s.%s' % (args
.fsid
, daemon_type
, daemon_id
),
2933 command
= c
.exec_cmd(command
)
2934 return call_timeout(command
, args
.timeout
)
2936 ##################################
2940 def command_ceph_volume():
2943 make_log_dir(args
.fsid
)
2945 l
= FileLock(args
.fsid
)
2948 (uid
, gid
) = (0, 0) # ceph-volume runs as root
2949 mounts
= get_container_mounts(args
.fsid
, 'osd', None)
2954 (config
, keyring
) = get_config_and_keyring()
2958 tmp_config
= write_tmp(config
, uid
, gid
)
2959 mounts
[tmp_config
.name
] = '/etc/ceph/ceph.conf:z'
2963 tmp_keyring
= write_tmp(keyring
, uid
, gid
)
2964 mounts
[tmp_keyring
.name
] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
2968 entrypoint
='/usr/sbin/ceph-volume',
2972 volume_mounts
=mounts
,
2974 out
, err
, code
= call_throws(c
.run_cmd(), verbose
=True)
2978 ##################################
2984 raise Error('must pass --fsid to specify cluster')
2986 unit_name
= get_unit_name_by_daemon_name(args
.fsid
, args
.name
)
2993 ##################################
2999 raise Error('must pass --fsid to specify cluster')
3001 unit_name
= get_unit_name_by_daemon_name(args
.fsid
, args
.name
)
3003 cmd
= [find_program('journalctl')]
3004 cmd
.extend(['-u', unit_name
])
3006 cmd
.extend(args
.command
)
3008 # call this directly, without our wrapper, so that we get an unmolested
3009 # stdout with logger prefixing.
3010 logger
.debug("Running command: %s" % ' '.join(cmd
))
3011 subprocess
.call(cmd
) # type: ignore
3013 ##################################
3015 def list_networks():
3016 # type: () -> Dict[str,List[str]]
3018 ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
3019 ## so we'll need to use a regex to parse 'ip' command output.
3020 #out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
3021 #j = json.loads(out)
3024 out
, _
, _
= call_throws([find_executable('ip'), 'route', 'ls'])
3025 return _parse_ip_route(out
)
3027 def _parse_ip_route(out
):
3028 r
= {} # type: Dict[str,List[str]]
3029 p
= re
.compile(r
'^(\S+) (.*)scope link (.*)src (\S+)')
3030 for line
in out
.splitlines():
3041 def command_list_networks():
3044 print(json
.dumps(r
, indent
=4))
3046 ##################################
3050 ls
= list_daemons(detail
=not args
.no_detail
,
3051 legacy_dir
=args
.legacy_dir
)
3052 print(json
.dumps(ls
, indent
=4))
3054 def list_daemons(detail
=True, legacy_dir
=None):
3055 # type: (bool, Optional[str]) -> List[Dict[str, str]]
3059 data_dir
= args
.data_dir
3060 if legacy_dir
is not None:
3061 data_dir
= os
.path
.abspath(legacy_dir
+ data_dir
)
3063 # keep track of ceph versions we see
3064 seen_versions
= {} # type: Dict[str, Optional[str]]
3067 if os
.path
.exists(data_dir
):
3068 for i
in os
.listdir(data_dir
):
3069 if i
in ['mon', 'osd', 'mds', 'mgr']:
3071 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
3074 (cluster
, daemon_id
) = j
.split('-', 1)
3075 fsid
= get_legacy_daemon_fsid(
3076 cluster
, daemon_type
, daemon_id
,
3077 legacy_dir
=legacy_dir
)
3078 legacy_unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
3081 'name': '%s.%s' % (daemon_type
, daemon_id
),
3082 'fsid': fsid
if fsid
is not None else 'unknown',
3083 'systemd_unit': legacy_unit_name
,
3086 (i
['enabled'], i
['state'], _
) = check_unit(legacy_unit_name
)
3087 if not host_version
:
3089 out
, err
, code
= call(['ceph', '-v'])
3090 if not code
and out
.startswith('ceph version '):
3091 host_version
= out
.split(' ')[2]
3094 i
['host_version'] = host_version
3097 fsid
= str(i
) # convince mypy that fsid is a str here
3098 for j
in os
.listdir(os
.path
.join(data_dir
, i
)):
3101 (daemon_type
, daemon_id
) = j
.split('.', 1)
3102 unit_name
= get_unit_name(fsid
,
3108 'style': 'cephadm:v1',
3111 'systemd_unit': unit_name
,
3115 (i
['enabled'], i
['state'], _
) = check_unit(unit_name
)
3122 if 'podman' in container_path
and get_podman_version() < (1, 6, 2):
3123 image_field
= '.ImageID'
3125 image_field
= '.Image'
3127 out
, err
, code
= call(
3129 container_path
, 'inspect',
3130 '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field
,
3131 'ceph-%s-%s' % (fsid
, j
)
3133 verbose_on_failure
=False)
3135 (container_id
, image_name
, image_id
, start
,
3136 version
) = out
.strip().split(',')
3137 image_id
= normalize_container_id(image_id
)
3138 daemon_type
= name
.split('.', 1)[0]
3139 start_stamp
= try_convert_datetime(start
)
3140 if not version
or '.' not in version
:
3141 version
= seen_versions
.get(image_id
, None)
3142 if daemon_type
== NFSGanesha
.daemon_type
:
3143 version
= NFSGanesha
.get_version(container_id
)
3144 if daemon_type
== CephIscsi
.daemon_type
:
3145 version
= CephIscsi
.get_version(container_id
)
3147 if daemon_type
in Ceph
.daemons
:
3148 out
, err
, code
= call(
3149 [container_path
, 'exec', container_id
,
3152 out
.startswith('ceph version '):
3153 version
= out
.split(' ')[2]
3154 seen_versions
[image_id
] = version
3155 elif daemon_type
== 'grafana':
3156 out
, err
, code
= call(
3157 [container_path
, 'exec', container_id
,
3158 'grafana-server', '-v'])
3160 out
.startswith('Version '):
3161 version
= out
.split(' ')[1]
3162 seen_versions
[image_id
] = version
3163 elif daemon_type
in ['prometheus',
3166 cmd
= daemon_type
.replace('-', '_')
3167 out
, err
, code
= call(
3168 [container_path
, 'exec', container_id
,
3171 err
.startswith('%s, version ' % cmd
):
3172 version
= err
.split(' ')[2]
3173 seen_versions
[image_id
] = version
3175 logging
.warning('version for unknown daemon type %s' % daemon_type
)
3177 vfile
= os
.path
.join(data_dir
, fsid
, j
, 'unit.image') # type: ignore
3179 with
open(vfile
, 'r') as f
:
3180 image_name
= f
.read().strip() or None
3183 i
['container_id'] = container_id
3184 i
['container_image_name'] = image_name
3185 i
['container_image_id'] = image_id
3186 i
['version'] = version
3187 i
['started'] = start_stamp
3188 i
['created'] = get_file_timestamp(
3189 os
.path
.join(data_dir
, fsid
, j
, 'unit.created')
3191 i
['deployed'] = get_file_timestamp(
3192 os
.path
.join(data_dir
, fsid
, j
, 'unit.image'))
3193 i
['configured'] = get_file_timestamp(
3194 os
.path
.join(data_dir
, fsid
, j
, 'unit.configured'))
3201 def get_daemon_description(fsid
, name
, detail
=False, legacy_dir
=None):
3202 # type: (str, str, bool, Optional[str]) -> Dict[str, str]
3204 for d
in list_daemons(detail
=detail
, legacy_dir
=legacy_dir
):
3205 if d
['fsid'] != fsid
:
3207 if d
['name'] != name
:
3210 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name
))
3213 ##################################
3216 def command_adopt():
3219 if not args
.skip_pull
:
3220 logger
.info('Pulling latest %s container...' % args
.image
)
3221 call_throws([container_path
, 'pull', args
.image
])
3223 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
3226 if args
.style
!= 'legacy':
3227 raise Error('adoption of style %s not implemented' % args
.style
)
3230 fsid
= get_legacy_daemon_fsid(args
.cluster
,
3233 legacy_dir
=args
.legacy_dir
)
3235 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
3239 # call correct adoption
3240 if daemon_type
in Ceph
.daemons
:
3241 command_adopt_ceph(daemon_type
, daemon_id
, fsid
);
3242 elif daemon_type
== 'prometheus':
3243 command_adopt_prometheus(daemon_id
, fsid
)
3244 elif daemon_type
== 'grafana':
3245 command_adopt_grafana(daemon_id
, fsid
)
3246 elif daemon_type
== 'node-exporter':
3247 raise Error('adoption of node-exporter not implemented')
3248 elif daemon_type
== 'alertmanager':
3249 command_adopt_alertmanager(daemon_id
, fsid
)
3251 raise Error('daemon type %s not recognized' % daemon_type
)
3254 class AdoptOsd(object):
3255 def __init__(self
, osd_data_dir
, osd_id
):
3256 # type: (str, str) -> None
3257 self
.osd_data_dir
= osd_data_dir
3258 self
.osd_id
= osd_id
3260 def check_online_osd(self
):
3261 # type: () -> Tuple[Optional[str], Optional[str]]
3263 osd_fsid
, osd_type
= None, None
3265 path
= os
.path
.join(self
.osd_data_dir
, 'fsid')
3267 with
open(path
, 'r') as f
:
3268 osd_fsid
= f
.read().strip()
3269 logger
.info("Found online OSD at %s" % path
)
3271 logger
.info('Unable to read OSD fsid from %s' % path
)
3272 if os
.path
.exists(os
.path
.join(self
.osd_data_dir
, 'type')):
3273 with
open(os
.path
.join(self
.osd_data_dir
, 'type')) as f
:
3274 osd_type
= f
.read().strip()
3276 logger
.info('"type" file missing for OSD data dir')
3278 return osd_fsid
, osd_type
3280 def check_offline_lvm_osd(self
):
3281 # type: () -> Tuple[Optional[str], Optional[str]]
3283 osd_fsid
, osd_type
= None, None
3287 entrypoint
='/usr/sbin/ceph-volume',
3288 args
=['lvm', 'list', '--format=json'],
3291 out
, err
, code
= call_throws(c
.run_cmd(), verbose
=False)
3294 js
= json
.loads(out
)
3295 if self
.osd_id
in js
:
3296 logger
.info("Found offline LVM OSD {}".format(self
.osd_id
))
3297 osd_fsid
= js
[self
.osd_id
][0]['tags']['ceph.osd_fsid']
3298 for device
in js
[self
.osd_id
]:
3299 if device
['tags']['ceph.type'] == 'block':
3300 osd_type
= 'bluestore'
3302 if device
['tags']['ceph.type'] == 'data':
3303 osd_type
= 'filestore'
3305 except ValueError as e
:
3306 logger
.info("Invalid JSON in ceph-volume lvm list: {}".format(e
))
3308 return osd_fsid
, osd_type
3310 def check_offline_simple_osd(self
):
3311 # type: () -> Tuple[Optional[str], Optional[str]]
3313 osd_fsid
, osd_type
= None, None
3315 osd_file
= glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self
.osd_id
))
3316 if len(osd_file
) == 1:
3317 with
open(osd_file
[0], 'r') as f
:
3319 js
= json
.loads(f
.read())
3320 logger
.info("Found offline simple OSD {}".format(self
.osd_id
))
3321 osd_fsid
= js
["fsid"]
3322 osd_type
= js
["type"]
3323 if osd_type
!= "filestore":
3324 # need this to be mounted for the adopt to work, as it
3325 # needs to move files from this directory
3326 call_throws(['mount', js
["data"]["path"], self
.osd_data_dir
])
3327 except ValueError as e
:
3328 logger
.info("Invalid JSON in {}: {}".format(osd_file
, e
))
3330 return osd_fsid
, osd_type
3333 def command_adopt_ceph(daemon_type
, daemon_id
, fsid
):
3334 # type: (str, str, str) -> None
3336 (uid
, gid
) = extract_uid_gid()
3338 data_dir_src
= ('/var/lib/ceph/%s/%s-%s' %
3339 (daemon_type
, args
.cluster
, daemon_id
))
3340 data_dir_src
= os
.path
.abspath(args
.legacy_dir
+ data_dir_src
)
3342 if not os
.path
.exists(data_dir_src
):
3343 raise Error("{}.{} data directory '{}' does not exist. "
3344 "Incorrect ID specified, or daemon alrady adopted?".format(
3345 daemon_type
, daemon_id
, data_dir_src
))
3348 if daemon_type
== 'osd':
3349 adopt_osd
= AdoptOsd(data_dir_src
, daemon_id
)
3350 osd_fsid
, osd_type
= adopt_osd
.check_online_osd()
3352 osd_fsid
, osd_type
= adopt_osd
.check_offline_lvm_osd()
3354 osd_fsid
, osd_type
= adopt_osd
.check_offline_simple_osd()
3356 raise Error('Unable to find OSD {}'.format(daemon_id
))
3357 logger
.info('objectstore_type is %s' % osd_type
)
3359 if osd_type
== 'filestore':
3360 raise Error('FileStore is not supported by cephadm')
3362 # NOTE: implicit assumption here that the units correspond to the
3363 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
3365 unit_name
= 'ceph-%s@%s' % (daemon_type
, daemon_id
)
3366 (enabled
, state
, _
) = check_unit(unit_name
)
3367 if state
== 'running':
3368 logger
.info('Stopping old systemd unit %s...' % unit_name
)
3369 call_throws(['systemctl', 'stop', unit_name
])
3371 logger
.info('Disabling old systemd unit %s...' % unit_name
)
3372 call_throws(['systemctl', 'disable', unit_name
])
3375 logger
.info('Moving data...')
3376 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
3378 move_files(glob(os
.path
.join(data_dir_src
, '*')),
3381 logger
.debug('Remove dir \'%s\'' % (data_dir_src
))
3382 if os
.path
.ismount(data_dir_src
):
3383 call_throws(['umount', data_dir_src
])
3384 os
.rmdir(data_dir_src
)
3386 logger
.info('Chowning content...')
3387 call_throws(['chown', '-c', '-R', '%d.%d' % (uid
, gid
), data_dir_dst
])
3389 if daemon_type
== 'mon':
3390 # rename *.ldb -> *.sst, in case they are coming from ubuntu
3391 store
= os
.path
.join(data_dir_dst
, 'store.db')
3393 if os
.path
.exists(store
):
3394 for oldf
in os
.listdir(store
):
3395 if oldf
.endswith('.ldb'):
3396 newf
= oldf
.replace('.ldb', '.sst')
3397 oldp
= os
.path
.join(store
, oldf
)
3398 newp
= os
.path
.join(store
, newf
)
3399 logger
.debug('Renaming %s -> %s' % (oldp
, newp
))
3400 os
.rename(oldp
, newp
)
3402 logger
.info('Renamed %d leveldb *.ldb files to *.sst',
3404 if daemon_type
== 'osd':
3405 for n
in ['block', 'block.db', 'block.wal']:
3406 p
= os
.path
.join(data_dir_dst
, n
)
3407 if os
.path
.exists(p
):
3408 logger
.info('Chowning %s...' % p
)
3409 os
.chown(p
, uid
, gid
)
3410 # disable the ceph-volume 'simple' mode files on the host
3411 simple_fn
= os
.path
.join('/etc/ceph/osd',
3412 '%s-%s.json' % (daemon_id
, osd_fsid
))
3413 if os
.path
.exists(simple_fn
):
3414 new_fn
= simple_fn
+ '.adopted-by-cephadm'
3415 logger
.info('Renaming %s -> %s', simple_fn
, new_fn
)
3416 os
.rename(simple_fn
, new_fn
)
3417 logger
.info('Disabling host unit ceph-volume@ simple unit...')
3418 call(['systemctl', 'disable',
3419 'ceph-volume@simple-%s-%s.service' % (daemon_id
, osd_fsid
)])
3421 # assume this is an 'lvm' c-v for now, but don't error
3423 logger
.info('Disabling host unit ceph-volume@ lvm unit...')
3424 call(['systemctl', 'disable',
3425 'ceph-volume@lvm-%s-%s.service' % (daemon_id
, osd_fsid
)])
3428 config_src
= '/etc/ceph/%s.conf' % (args
.cluster
)
3429 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
3430 config_dst
= os
.path
.join(data_dir_dst
, 'config')
3431 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
3434 logger
.info('Moving logs...')
3435 log_dir_src
= ('/var/log/ceph/%s-%s.%s.log*' %
3436 (args
.cluster
, daemon_type
, daemon_id
))
3437 log_dir_src
= os
.path
.abspath(args
.legacy_dir
+ log_dir_src
)
3438 log_dir_dst
= make_log_dir(fsid
, uid
=uid
, gid
=gid
)
3439 move_files(glob(log_dir_src
),
3443 logger
.info('Creating new units...')
3444 make_var_run(fsid
, uid
, gid
)
3445 c
= get_container(fsid
, daemon_type
, daemon_id
)
3446 deploy_daemon_units(fsid
, uid
, gid
, daemon_type
, daemon_id
, c
,
3447 enable
=True, # unconditionally enable the new unit
3448 start
=(state
== 'running' or args
.force_start
),
3450 update_firewalld(daemon_type
)
3453 def command_adopt_prometheus(daemon_id
, fsid
):
3454 # type: (str, str) -> None
3456 daemon_type
= 'prometheus'
3457 (uid
, gid
) = extract_uid_gid_monitoring(daemon_type
)
3459 _stop_and_disable('prometheus')
3461 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
3465 config_src
= '/etc/prometheus/prometheus.yml'
3466 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
3467 config_dst
= os
.path
.join(data_dir_dst
, 'etc/prometheus')
3468 makedirs(config_dst
, uid
, gid
, 0o755)
3469 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
3472 data_src
= '/var/lib/prometheus/metrics/'
3473 data_src
= os
.path
.abspath(args
.legacy_dir
+ data_src
)
3474 data_dst
= os
.path
.join(data_dir_dst
, 'data')
3475 copy_tree([data_src
], data_dst
, uid
=uid
, gid
=gid
)
3477 make_var_run(fsid
, uid
, gid
)
3478 c
= get_container(fsid
, daemon_type
, daemon_id
)
3479 deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
3480 update_firewalld(daemon_type
)
3482 def command_adopt_grafana(daemon_id
, fsid
):
3483 # type: (str, str) -> None
3485 daemon_type
= 'grafana'
3486 (uid
, gid
) = extract_uid_gid_monitoring(daemon_type
)
3488 _stop_and_disable('grafana-server')
3490 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
3494 config_src
= '/etc/grafana/grafana.ini'
3495 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
3496 config_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
3497 makedirs(config_dst
, uid
, gid
, 0o755)
3498 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
3500 prov_src
= '/etc/grafana/provisioning/'
3501 prov_src
= os
.path
.abspath(args
.legacy_dir
+ prov_src
)
3502 prov_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana')
3503 copy_tree([prov_src
], prov_dst
, uid
=uid
, gid
=gid
)
3506 cert
= '/etc/grafana/grafana.crt'
3507 key
= '/etc/grafana/grafana.key'
3508 if os
.path
.exists(cert
) and os
.path
.exists(key
):
3509 cert_src
= '/etc/grafana/grafana.crt'
3510 cert_src
= os
.path
.abspath(args
.legacy_dir
+ cert_src
)
3511 makedirs(os
.path
.join(data_dir_dst
, 'etc/grafana/certs'), uid
, gid
, 0o755)
3512 cert_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_file')
3513 copy_files([cert_src
], cert_dst
, uid
=uid
, gid
=gid
)
3515 key_src
= '/etc/grafana/grafana.key'
3516 key_src
= os
.path
.abspath(args
.legacy_dir
+ key_src
)
3517 key_dst
= os
.path
.join(data_dir_dst
, 'etc/grafana/certs/cert_key')
3518 copy_files([key_src
], key_dst
, uid
=uid
, gid
=gid
)
3520 _adjust_grafana_ini(os
.path
.join(config_dst
, 'grafana.ini'))
3522 logger
.debug("Skipping ssl, missing cert {} or key {}".format(cert
, key
))
3525 # data - possible custom dashboards/plugins
3526 data_src
= '/var/lib/grafana/'
3527 data_src
= os
.path
.abspath(args
.legacy_dir
+ data_src
)
3528 data_dst
= os
.path
.join(data_dir_dst
, 'data')
3529 copy_tree([data_src
], data_dst
, uid
=uid
, gid
=gid
)
3531 make_var_run(fsid
, uid
, gid
)
3532 c
= get_container(fsid
, daemon_type
, daemon_id
)
3533 deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
3534 update_firewalld(daemon_type
)
3536 def command_adopt_alertmanager(daemon_id
, fsid
):
3537 # type: (str, str) -> None
3539 daemon_type
= 'alertmanager'
3540 (uid
, gid
) = extract_uid_gid_monitoring(daemon_type
)
3542 _stop_and_disable('prometheus-alertmanager')
3544 data_dir_dst
= make_data_dir(fsid
, daemon_type
, daemon_id
,
3548 config_src
= '/etc/prometheus/alertmanager.yml'
3549 config_src
= os
.path
.abspath(args
.legacy_dir
+ config_src
)
3550 config_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager')
3551 makedirs(config_dst
, uid
, gid
, 0o755)
3552 copy_files([config_src
], config_dst
, uid
=uid
, gid
=gid
)
3555 data_src
= '/var/lib/prometheus/alertmanager/'
3556 data_src
= os
.path
.abspath(args
.legacy_dir
+ data_src
)
3557 data_dst
= os
.path
.join(data_dir_dst
, 'etc/alertmanager/data')
3558 copy_tree([data_src
], data_dst
, uid
=uid
, gid
=gid
)
3560 make_var_run(fsid
, uid
, gid
)
3561 c
= get_container(fsid
, daemon_type
, daemon_id
)
3562 deploy_daemon(fsid
, daemon_type
, daemon_id
, c
, uid
, gid
)
3563 update_firewalld(daemon_type
)
3565 def _adjust_grafana_ini(filename
):
3566 # type: (str) -> None
3568 # Update cert_file, cert_key pathnames in server section
3569 # ConfigParser does not preserve comments
3571 with
open(filename
, "r") as grafana_ini
:
3572 lines
= grafana_ini
.readlines()
3573 with
open("{}.new".format(filename
), "w") as grafana_ini
:
3574 server_section
=False
3576 if line
.startswith('['):
3577 server_section
=False
3578 if line
.startswith('[server]'):
3581 line
= re
.sub(r
'^cert_file.*',
3582 'cert_file = /etc/grafana/certs/cert_file', line
)
3583 line
= re
.sub(r
'^cert_key.*',
3584 'cert_key = /etc/grafana/certs/cert_key', line
)
3585 grafana_ini
.write(line
)
3586 os
.rename("{}.new".format(filename
), filename
)
3587 except OSError as err
:
3588 raise Error("Cannot update {}: {}".format(filename
, err
))
3591 def _stop_and_disable(unit_name
):
3592 # type: (str) -> None
3594 (enabled
, state
, _
) = check_unit(unit_name
)
3595 if state
== 'running':
3596 logger
.info('Stopping old systemd unit %s...' % unit_name
)
3597 call_throws(['systemctl', 'stop', unit_name
])
3599 logger
.info('Disabling old systemd unit %s...' % unit_name
)
3600 call_throws(['systemctl', 'disable', unit_name
])
3603 ##################################
3605 def command_rm_daemon():
3608 l
= FileLock(args
.fsid
)
3611 unit_name
= get_unit_name_by_daemon_name(args
.fsid
, args
.name
)
3613 (daemon_type
, daemon_id
) = args
.name
.split('.', 1)
3614 if daemon_type
in ['mon', 'osd'] and not args
.force
:
3615 raise Error('must pass --force to proceed: '
3616 'this command may destroy precious data!')
3618 call(['systemctl', 'stop', unit_name
],
3619 verbose_on_failure
=False)
3620 call(['systemctl', 'reset-failed', unit_name
],
3621 verbose_on_failure
=False)
3622 call(['systemctl', 'disable', unit_name
],
3623 verbose_on_failure
=False)
3624 data_dir
= get_data_dir(args
.fsid
, daemon_type
, daemon_id
)
3625 if daemon_type
in ['mon', 'osd', 'prometheus'] and \
3626 not args
.force_delete_data
:
3627 # rename it out of the way -- do not delete
3628 backup_dir
= os
.path
.join(args
.data_dir
, args
.fsid
, 'removed')
3629 if not os
.path
.exists(backup_dir
):
3630 makedirs(backup_dir
, 0, 0, DATA_DIR_MODE
)
3631 dirname
= '%s.%s_%s' % (daemon_type
, daemon_id
,
3632 datetime
.datetime
.utcnow().strftime(DATEFMT
))
3634 os
.path
.join(backup_dir
, dirname
))
3636 call_throws(['rm', '-rf', data_dir
])
3638 ##################################
3640 def command_rm_cluster():
3643 raise Error('must pass --force to proceed: '
3644 'this command may destroy precious data!')
3646 l
= FileLock(args
.fsid
)
3649 # stop + disable individual daemon units
3650 for d
in list_daemons(detail
=False):
3651 if d
['fsid'] != args
.fsid
:
3653 if d
['style'] != 'cephadm:v1':
3655 unit_name
= get_unit_name(args
.fsid
, d
['name'])
3656 call(['systemctl', 'stop', unit_name
],
3657 verbose_on_failure
=False)
3658 call(['systemctl', 'reset-failed', unit_name
],
3659 verbose_on_failure
=False)
3660 call(['systemctl', 'disable', unit_name
],
3661 verbose_on_failure
=False)
3664 for unit_name
in ['ceph-%s.target' % args
.fsid
]:
3665 call(['systemctl', 'stop', unit_name
],
3666 verbose_on_failure
=False)
3667 call(['systemctl', 'reset-failed', unit_name
],
3668 verbose_on_failure
=False)
3669 call(['systemctl', 'disable', unit_name
],
3670 verbose_on_failure
=False)
3672 slice_name
= 'system-%s.slice' % (('ceph-%s' % args
.fsid
).replace('-',
3674 call(['systemctl', 'stop', slice_name
],
3675 verbose_on_failure
=False)
3678 call_throws(['rm', '-f', args
.unit_dir
+
3679 '/ceph-%s@.service' % args
.fsid
])
3680 call_throws(['rm', '-f', args
.unit_dir
+
3681 '/ceph-%s.target' % args
.fsid
])
3682 call_throws(['rm', '-rf',
3683 args
.unit_dir
+ '/ceph-%s.target.wants' % args
.fsid
])
3685 call_throws(['rm', '-rf', args
.data_dir
+ '/' + args
.fsid
])
3687 call_throws(['rm', '-rf', args
.log_dir
+ '/' + args
.fsid
])
3688 call_throws(['rm', '-rf', args
.log_dir
+
3689 '/*.wants/ceph-%s@*' % args
.fsid
])
3690 # rm logrotate config
3691 call_throws(['rm', '-f', args
.logrotate_dir
+ '/ceph-%s' % args
.fsid
])
3693 # clean up config, keyring, and pub key files
3694 files
= ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
3696 if os
.path
.exists(files
[0]):
3698 with
open(files
[0]) as f
:
3699 if args
.fsid
in f
.read():
3702 for n
in range(0, len(files
)):
3703 if os
.path
.exists(files
[n
]):
3707 ##################################
3709 def check_time_sync(enabler
=None):
3710 # type: (Optional[Packager]) -> bool
3712 'chrony.service', # 18.04 (at least)
3713 'chronyd.service', # el / opensuse
3714 'systemd-timesyncd.service',
3715 'ntpd.service', # el7 (at least)
3716 'ntp.service', # 18.04 (at least)
3718 if not check_units(units
, enabler
):
3719 logger
.warning('No time sync service is running; checked for %s' % units
)
3723 def command_check_host():
3726 commands
= ['systemctl', 'lvcreate']
3729 container_path
= find_program('docker')
3731 for i
in CONTAINER_PREFERENCE
:
3733 container_path
= find_program(i
)
3735 except Exception as e
:
3736 logger
.debug('Could not locate %s: %s' % (i
, e
))
3737 if not container_path
:
3738 errors
.append('Unable to locate any of %s' % CONTAINER_PREFERENCE
)
3740 logger
.info('podman|docker (%s) is present' % container_path
)
3742 for command
in commands
:
3744 find_program(command
)
3745 logger
.info('%s is present' % command
)
3747 errors
.append('%s binary does not appear to be installed' % command
)
3749 # check for configured+running chronyd or ntp
3750 if not check_time_sync():
3751 errors
.append('No time synchronization is active')
3753 if 'expect_hostname' in args
and args
.expect_hostname
:
3754 if get_hostname().lower() != args
.expect_hostname
.lower():
3755 errors
.append('hostname "%s" does not match expected hostname "%s"' % (
3756 get_hostname(), args
.expect_hostname
))
3757 logger
.info('Hostname "%s" matches what is expected.',
3758 args
.expect_hostname
)
3761 raise Error('\n'.join(errors
))
3763 logger
.info('Host looks OK')
3765 ##################################
3767 def command_prepare_host():
3769 logger
.info('Verifying podman|docker is present...')
3771 if not container_path
:
3773 pkg
= create_packager()
3774 pkg
.install_podman()
3776 logger
.info('Verifying lvm2 is present...')
3777 if not find_executable('lvcreate'):
3779 pkg
= create_packager()
3780 pkg
.install(['lvm2'])
3782 logger
.info('Verifying time synchronization is in place...')
3783 if not check_time_sync():
3785 pkg
= create_packager()
3786 pkg
.install(['chrony'])
3787 # check again, and this time try to enable
3789 check_time_sync(enabler
=pkg
)
3791 if 'expect_hostname' in args
and args
.expect_hostname
and args
.expect_hostname
!= get_hostname():
3792 logger
.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), args
.expect_hostname
))
3793 call_throws(['hostname', args
.expect_hostname
])
3794 with
open('/etc/hostname', 'w') as f
:
3795 f
.write(args
.expect_hostname
+ '\n')
3797 logger
.info('Repeating the final host check...')
3798 command_check_host()
3800 ##################################
3802 class CustomValidation(argparse
.Action
):
3804 def _check_name(self
, values
):
3806 (daemon_type
, daemon_id
) = values
.split('.', 1)
3808 raise argparse
.ArgumentError(self
,
3809 "must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com")
3811 daemons
= get_supported_daemons()
3812 if daemon_type
not in daemons
:
3813 raise argparse
.ArgumentError(self
,
3814 "name must declare the type of daemon e.g. "
3815 "{}".format(', '.join(daemons
)))
3817 def __call__(self
, parser
, namespace
, values
, option_string
=None):
3818 if self
.dest
== "name":
3819 self
._check
_name
(values
)
3820 setattr(namespace
, self
.dest
, values
)
3822 ##################################
3825 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
3827 distro_version
= None
3828 distro_codename
= None
3829 with
open('/etc/os-release', 'r') as f
:
3830 for line
in f
.readlines():
3832 if '=' not in line
or line
.startswith('#'):
3834 (var
, val
) = line
.split('=', 1)
3835 if val
[0] == '"' and val
[-1] == '"':
3838 distro
= val
.lower()
3839 elif var
== 'VERSION_ID':
3840 distro_version
= val
.lower()
3841 elif var
== 'VERSION_CODENAME':
3842 distro_codename
= val
.lower()
3843 return distro
, distro_version
, distro_codename
3845 class Packager(object):
3846 def __init__(self
, stable
=None, version
=None, branch
=None, commit
=None):
3848 (stable
and not version
and not branch
and not commit
) or \
3849 (not stable
and version
and not branch
and not commit
) or \
3850 (not stable
and not version
and branch
) or \
3851 (not stable
and not version
and not branch
and not commit
)
3852 self
.stable
= stable
3853 self
.version
= version
3854 self
.branch
= branch
3855 self
.commit
= commit
3858 raise NotImplementedError
3861 raise NotImplementedError
3863 def query_shaman(self
, distro
, distro_version
, branch
, commit
):
3865 logging
.info('Fetching repo metadata from shaman and chacra...')
3866 shaman_url
= 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
3868 distro_version
=distro_version
,
3870 sha1
=commit
or 'latest',
3874 shaman_response
= urlopen(shaman_url
)
3875 except HTTPError
as err
:
3876 logging
.error('repository not found in shaman (might not be available yet)')
3877 raise Error('%s, failed to fetch %s' % (err
, shaman_url
))
3879 chacra_url
= shaman_response
.geturl()
3880 chacra_response
= urlopen(chacra_url
)
3881 except HTTPError
as err
:
3882 logging
.error('repository not found in chacra (might not be available yet)')
3883 raise Error('%s, failed to fetch %s' % (err
, chacra_url
))
3884 return chacra_response
.read().decode('utf-8')
3886 def repo_gpgkey(self
):
3889 if self
.stable
or self
.version
:
3890 return 'https://download.ceph.com/keys/release.asc', 'release'
3892 return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
3894 def enable_service(self
, service
):
3896 Start and enable the service (typically using systemd).
3898 call_throws(['systemctl', 'enable', '--now', service
])
3901 class Apt(Packager
):
3907 def __init__(self
, stable
, version
, branch
, commit
,
3908 distro
, distro_version
, distro_codename
):
3909 super(Apt
, self
).__init
__(stable
=stable
, version
=version
,
3910 branch
=branch
, commit
=commit
)
3911 self
.distro
= self
.DISTRO_NAMES
[distro
]
3912 self
.distro_codename
= distro_codename
3914 def repo_path(self
):
3915 return '/etc/apt/sources.list.d/ceph.list'
3918 url
, name
= self
.repo_gpgkey()
3919 logging
.info('Installing repo GPG key from %s...' % url
)
3921 response
= urlopen(url
)
3922 except HTTPError
as err
:
3923 logging
.error('failed to fetch GPG repo key from %s: %s' % (
3925 raise Error('failed to fetch GPG key')
3926 key
= response
.read().decode('utf-8')
3927 with
open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
, 'w') as f
:
3931 content
= 'deb %s/debian-%s/ %s main\n' % (
3932 args
.repo_url
, self
.version
, self
.distro_codename
)
3934 content
= 'deb %s/debian-%s/ %s main\n' % (
3935 args
.repo_url
, self
.stable
, self
.distro_codename
)
3937 content
= self
.query_shaman(self
.distro
, self
.distro_codename
, self
.branch
,
3940 logging
.info('Installing repo file at %s...' % self
.repo_path())
3941 with
open(self
.repo_path(), 'w') as f
:
3945 for name
in ['autobuild', 'release']:
3946 p
= '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
3947 if os
.path
.exists(p
):
3948 logging
.info('Removing repo GPG key %s...' % p
)
3950 if os
.path
.exists(self
.repo_path()):
3951 logging
.info('Removing repo at %s...' % self
.repo_path())
3952 os
.unlink(self
.repo_path())
3954 def install(self
, ls
):
3955 logging
.info('Installing packages %s...' % ls
)
3956 call_throws(['apt', 'install', '-y'] + ls
)
3958 def install_podman(self
):
3959 if self
.distro
== 'ubuntu':
3960 logging
.info('Setting up repo for pdoman...')
3961 self
.install(['software-properties-common'])
3962 call_throws(['add-apt-repository', '-y', 'ppa:projectatomic/ppa'])
3963 call_throws(['apt', 'update'])
3965 logging
.info('Attempting podman install...')
3967 self
.install(['podman'])
3969 logging
.info('Podman did not work. Falling back to docker...')
3970 self
.install(['docker.io'])
3972 class YumDnf(Packager
):
3974 'centos': ('centos', 'el'),
3975 'rhel': ('centos', 'el'),
3976 'scientific': ('centos', 'el'),
3977 'fedora': ('fedora', 'fc'),
3980 def __init__(self
, stable
, version
, branch
, commit
,
3981 distro
, distro_version
):
3982 super(YumDnf
, self
).__init
__(stable
=stable
, version
=version
,
3983 branch
=branch
, commit
=commit
)
3984 self
.major
= int(distro_version
.split('.')[0])
3985 self
.distro_normalized
= self
.DISTRO_NAMES
[distro
][0]
3986 self
.distro_code
= self
.DISTRO_NAMES
[distro
][1] + str(self
.major
)
3987 if (self
.distro_code
== 'fc' and self
.major
>= 30) or \
3988 (self
.distro_code
== 'el' and self
.major
>= 8):
3993 def custom_repo(self
, **kw
):
3995 Repo files need special care in that a whole line should not be present
3996 if there is no value for it. Because we were using `format()` we could
3997 not conditionally add a line for a repo file. So the end result would
3998 contain a key with a missing value (say if we were passing `None`).
4000 For example, it could look like::
4007 Which breaks. This function allows us to conditionally add lines,
4008 preserving an order and be more careful.
4010 Previously, and for historical purposes, this is how the template used
4026 # by using tuples (vs a dict) we preserve the order of what we want to
4027 # return, like starting with a [repo name]
4029 ('reponame', '[%s]'),
4030 ('name', 'name=%s'),
4031 ('baseurl', 'baseurl=%s'),
4032 ('enabled', 'enabled=%s'),
4033 ('gpgcheck', 'gpgcheck=%s'),
4034 ('_type', 'type=%s'),
4035 ('gpgkey', 'gpgkey=%s'),
4036 ('proxy', 'proxy=%s'),
4037 ('priority', 'priority=%s'),
4041 tmpl_key
, tmpl_value
= line
# key values from tmpl
4043 # ensure that there is an actual value (not None nor empty string)
4044 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
4045 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
4047 return '\n'.join(lines
)
4049 def repo_path(self
):
4050 return '/etc/yum.repos.d/ceph.repo'
4052 def repo_baseurl(self
):
4053 assert self
.stable
or self
.version
4055 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.version
,
4058 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.stable
,
4062 if self
.stable
or self
.version
:
4065 'Ceph': '$basearch',
4066 'Ceph-noarch': 'noarch',
4067 'Ceph-source': 'SRPMS'}.items():
4068 content
+= '[%s]\n' % (n
)
4069 content
+= self
.custom_repo(
4071 baseurl
=self
.repo_baseurl() + '/' + t
,
4074 gpgkey
=self
.repo_gpgkey()[0],
4078 content
= self
.query_shaman(self
.distro_normalized
, self
.major
,
4082 logging
.info('Writing repo to %s...' % self
.repo_path())
4083 with
open(self
.repo_path(), 'w') as f
:
4086 if self
.distro_code
.startswith('el'):
4087 logger
.info('Enabling EPEL...')
4088 call_throws([self
.tool
, 'install', '-y', 'epel-release'])
4089 if self
.distro_code
== 'el8':
4090 # we also need Ken's copr repo, at least for now
4091 logger
.info('Enabling supplementary copr repo ktdreyer/ceph-el8...')
4092 call_throws(['dnf', 'copr', 'enable', '-y', 'ktdreyer/ceph-el8'])
4095 if os
.path
.exists(self
.repo_path()):
4096 os
.unlink(self
.repo_path())
4097 if self
.distro_code
== 'el8':
4098 logger
.info('Disabling supplementary copr repo ktdreyer/ceph-el8...')
4099 call_throws(['dnf', 'copr', 'disable', '-y', 'ktdreyer/ceph-el8'])
4101 def install(self
, ls
):
4102 logger
.info('Installing packages %s...' % ls
)
4103 call_throws([self
.tool
, 'install', '-y'] + ls
)
4105 def install_podman(self
):
4106 self
.install(['podman'])
4109 class Zypper(Packager
):
4112 'opensuse-tumbleweed',
4116 def __init__(self
, stable
, version
, branch
, commit
,
4117 distro
, distro_version
):
4118 super(Zypper
, self
).__init
__(stable
=stable
, version
=version
,
4119 branch
=branch
, commit
=commit
)
4120 self
.tool
= 'zypper'
4121 self
.distro
= 'opensuse'
4122 self
.distro_version
= '15.1'
4123 if 'tumbleweed' not in distro
and distro_version
is not None:
4124 self
.distro_version
= distro_version
4126 def custom_repo(self
, **kw
):
4128 See YumDnf for format explanation.
4132 # by using tuples (vs a dict) we preserve the order of what we want to
4133 # return, like starting with a [repo name]
4135 ('reponame', '[%s]'),
4136 ('name', 'name=%s'),
4137 ('baseurl', 'baseurl=%s'),
4138 ('enabled', 'enabled=%s'),
4139 ('gpgcheck', 'gpgcheck=%s'),
4140 ('_type', 'type=%s'),
4141 ('gpgkey', 'gpgkey=%s'),
4142 ('proxy', 'proxy=%s'),
4143 ('priority', 'priority=%s'),
4147 tmpl_key
, tmpl_value
= line
# key values from tmpl
4149 # ensure that there is an actual value (not None nor empty string)
4150 if tmpl_key
in kw
and kw
.get(tmpl_key
) not in (None, ''):
4151 lines
.append(tmpl_value
% kw
.get(tmpl_key
))
4153 return '\n'.join(lines
)
4155 def repo_path(self
):
4156 return '/etc/zypp/repos.d/ceph.repo'
4158 def repo_baseurl(self
):
4159 assert self
.stable
or self
.version
4161 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.stable
, self
.distro
)
4163 return '%s/rpm-%s/%s' % (args
.repo_url
, self
.stable
, self
.distro
)
4166 if self
.stable
or self
.version
:
4169 'Ceph': '$basearch',
4170 'Ceph-noarch': 'noarch',
4171 'Ceph-source': 'SRPMS'}.items():
4172 content
+= '[%s]\n' % (n
)
4173 content
+= self
.custom_repo(
4175 baseurl
=self
.repo_baseurl() + '/' + t
,
4178 gpgkey
=self
.repo_gpgkey()[0],
4182 content
= self
.query_shaman(self
.distro
, self
.distro_version
,
4186 logging
.info('Writing repo to %s...' % self
.repo_path())
4187 with
open(self
.repo_path(), 'w') as f
:
4191 if os
.path
.exists(self
.repo_path()):
4192 os
.unlink(self
.repo_path())
4194 def install(self
, ls
):
4195 logger
.info('Installing packages %s...' % ls
)
4196 call_throws([self
.tool
, 'in', '-y'] + ls
)
4198 def install_podman(self
):
4199 self
.install(['podman'])
4202 def create_packager(stable
=None, version
=None, branch
=None, commit
=None):
4203 distro
, distro_version
, distro_codename
= get_distro()
4204 if distro
in YumDnf
.DISTRO_NAMES
:
4205 return YumDnf(stable
=stable
, version
=version
,
4206 branch
=branch
, commit
=commit
,
4207 distro
=distro
, distro_version
=distro_version
)
4208 elif distro
in Apt
.DISTRO_NAMES
:
4209 return Apt(stable
=stable
, version
=version
,
4210 branch
=branch
, commit
=commit
,
4211 distro
=distro
, distro_version
=distro_version
,
4212 distro_codename
=distro_codename
)
4213 elif distro
in Zypper
.DISTRO_NAMES
:
4214 return Zypper(stable
=stable
, version
=version
,
4215 branch
=branch
, commit
=commit
,
4216 distro
=distro
, distro_version
=distro_version
)
4217 raise Error('Distro %s version %s not supported' % (distro
, distro_version
))
4220 def command_add_repo():
4221 if args
.version
and args
.release
:
4222 raise Error('you can specify either --release or --version but not both')
4223 if not args
.version
and not args
.release
and not args
.dev
and not args
.dev_commit
:
4224 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
4227 (x
, y
, z
) = args
.version
.split('.')
4228 except Exception as e
:
4229 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
4231 pkg
= create_packager(stable
=args
.release
,
4232 version
=args
.version
,
4234 commit
=args
.dev_commit
)
4237 def command_rm_repo():
4238 pkg
= create_packager()
4241 def command_install():
4242 pkg
= create_packager()
4243 pkg
.install(args
.packages
)
4245 ##################################
4248 # type: () -> argparse.ArgumentParser
4249 parser
= argparse
.ArgumentParser(
4250 description
='Bootstrap Ceph daemons with systemd and containers.',
4251 formatter_class
=argparse
.ArgumentDefaultsHelpFormatter
)
4252 parser
.add_argument(
4254 help='container image. Can also be set via the "CEPHADM_IMAGE" '
4256 parser
.add_argument(
4258 action
='store_true',
4259 help='use docker instead of podman')
4260 parser
.add_argument(
4263 help='base directory for daemon data')
4264 parser
.add_argument(
4267 help='base directory for daemon logs')
4268 parser
.add_argument(
4270 default
=LOGROTATE_DIR
,
4271 help='location of logrotate configuration files')
4272 parser
.add_argument(
4275 help='base directory for systemd units')
4276 parser
.add_argument(
4278 action
='store_true',
4279 help='Show debug-level log messages')
4280 parser
.add_argument(
4283 default
=DEFAULT_TIMEOUT
,
4284 help='timeout in seconds')
4285 parser
.add_argument(
4288 default
=DEFAULT_RETRY
,
4289 help='max number of retries')
4290 parser
.add_argument(
4294 help='set environment variable')
4296 subparsers
= parser
.add_subparsers(help='sub-command')
4298 parser_version
= subparsers
.add_parser(
4299 'version', help='get ceph version from container')
4300 parser_version
.set_defaults(func
=command_version
)
4302 parser_pull
= subparsers
.add_parser(
4303 'pull', help='pull latest image version')
4304 parser_pull
.set_defaults(func
=command_pull
)
4306 parser_inspect_image
= subparsers
.add_parser(
4307 'inspect-image', help='inspect local container image')
4308 parser_inspect_image
.set_defaults(func
=command_inspect_image
)
4310 parser_ls
= subparsers
.add_parser(
4311 'ls', help='list daemon instances on this host')
4312 parser_ls
.set_defaults(func
=command_ls
)
4313 parser_ls
.add_argument(
4315 action
='store_true',
4316 help='Do not include daemon status')
4317 parser_ls
.add_argument(
4320 help='base directory for legacy daemon data')
4322 parser_list_networks
= subparsers
.add_parser(
4323 'list-networks', help='list IP networks')
4324 parser_list_networks
.set_defaults(func
=command_list_networks
)
4326 parser_adopt
= subparsers
.add_parser(
4327 'adopt', help='adopt daemon deployed with a different tool')
4328 parser_adopt
.set_defaults(func
=command_adopt
)
4329 parser_adopt
.add_argument(
4332 help='daemon name (type.id)')
4333 parser_adopt
.add_argument(
4336 help='deployment style (legacy, ...)')
4337 parser_adopt
.add_argument(
4340 help='cluster name')
4341 parser_adopt
.add_argument(
4344 help='base directory for legacy daemon data')
4345 parser_adopt
.add_argument(
4347 help='Additional configuration information in JSON format')
4348 parser_adopt
.add_argument(
4350 action
='store_true',
4351 help='Do not configure firewalld')
4352 parser_adopt
.add_argument(
4354 action
='store_true',
4355 help='do not pull the latest image before adopting')
4356 parser_adopt
.add_argument(
4358 action
='store_true',
4359 help="start newly adoped daemon, even if it wasn't running previously")
4361 parser_rm_daemon
= subparsers
.add_parser(
4362 'rm-daemon', help='remove daemon instance')
4363 parser_rm_daemon
.set_defaults(func
=command_rm_daemon
)
4364 parser_rm_daemon
.add_argument(
4367 action
=CustomValidation
,
4368 help='daemon name (type.id)')
4369 parser_rm_daemon
.add_argument(
4372 help='cluster FSID')
4373 parser_rm_daemon
.add_argument(
4375 action
='store_true',
4376 help='proceed, even though this may destroy valuable data')
4377 parser_rm_daemon
.add_argument(
4378 '--force-delete-data',
4379 action
='store_true',
4380 help='delete valuable daemon data instead of making a backup')
4382 parser_rm_cluster
= subparsers
.add_parser(
4383 'rm-cluster', help='remove all daemons for a cluster')
4384 parser_rm_cluster
.set_defaults(func
=command_rm_cluster
)
4385 parser_rm_cluster
.add_argument(
4388 help='cluster FSID')
4389 parser_rm_cluster
.add_argument(
4391 action
='store_true',
4392 help='proceed, even though this may destroy valuable data')
4394 parser_run
= subparsers
.add_parser(
4395 'run', help='run a ceph daemon, in a container, in the foreground')
4396 parser_run
.set_defaults(func
=command_run
)
4397 parser_run
.add_argument(
4400 help='daemon name (type.id)')
4401 parser_run
.add_argument(
4404 help='cluster FSID')
4406 parser_shell
= subparsers
.add_parser(
4407 'shell', help='run an interactive shell inside a daemon container')
4408 parser_shell
.set_defaults(func
=command_shell
)
4409 parser_shell
.add_argument(
4411 help='cluster FSID')
4412 parser_shell
.add_argument(
4414 help='daemon name (type.id)')
4415 parser_shell
.add_argument(
4417 help='ceph.conf to pass through to the container')
4418 parser_shell
.add_argument(
4420 help='ceph.keyring to pass through to the container')
4421 parser_shell
.add_argument(
4423 help='file or directory path that will be mounted in container /mnt')
4424 parser_shell
.add_argument(
4428 help='set environment variable')
4429 parser_shell
.add_argument(
4430 'command', nargs
=argparse
.REMAINDER
,
4431 help='command (optional)')
4433 parser_enter
= subparsers
.add_parser(
4434 'enter', help='run an interactive shell inside a running daemon container')
4435 parser_enter
.set_defaults(func
=command_enter
)
4436 parser_enter
.add_argument(
4438 help='cluster FSID')
4439 parser_enter
.add_argument(
4442 help='daemon name (type.id)')
4443 parser_enter
.add_argument(
4444 'command', nargs
=argparse
.REMAINDER
,
4447 parser_ceph_volume
= subparsers
.add_parser(
4448 'ceph-volume', help='run ceph-volume inside a container')
4449 parser_ceph_volume
.set_defaults(func
=command_ceph_volume
)
4450 parser_ceph_volume
.add_argument(
4452 help='cluster FSID')
4453 parser_ceph_volume
.add_argument(
4455 help='JSON file with config and (client.bootrap-osd) key')
4456 parser_ceph_volume
.add_argument(
4458 help='ceph conf file')
4459 parser_ceph_volume
.add_argument(
4461 help='ceph.keyring to pass through to the container')
4462 parser_ceph_volume
.add_argument(
4463 'command', nargs
=argparse
.REMAINDER
,
4466 parser_unit
= subparsers
.add_parser(
4467 'unit', help='operate on the daemon\'s systemd unit')
4468 parser_unit
.set_defaults(func
=command_unit
)
4469 parser_unit
.add_argument(
4471 help='systemd command (start, stop, restart, enable, disable, ...)')
4472 parser_unit
.add_argument(
4474 help='cluster FSID')
4475 parser_unit
.add_argument(
4478 help='daemon name (type.id)')
4480 parser_logs
= subparsers
.add_parser(
4481 'logs', help='print journald logs for a daemon container')
4482 parser_logs
.set_defaults(func
=command_logs
)
4483 parser_logs
.add_argument(
4485 help='cluster FSID')
4486 parser_logs
.add_argument(
4489 help='daemon name (type.id)')
4490 parser_logs
.add_argument(
4491 'command', nargs
='*',
4492 help='additional journalctl args')
4494 parser_bootstrap
= subparsers
.add_parser(
4495 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
4496 parser_bootstrap
.set_defaults(func
=command_bootstrap
)
4497 parser_bootstrap
.add_argument(
4499 help='ceph conf file to incorporate')
4500 parser_bootstrap
.add_argument(
4503 help='mon id (default: local hostname)')
4504 parser_bootstrap
.add_argument(
4506 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
4507 parser_bootstrap
.add_argument(
4510 parser_bootstrap
.add_argument(
4513 help='mgr id (default: randomly generated)')
4514 parser_bootstrap
.add_argument(
4516 help='cluster FSID')
4517 parser_bootstrap
.add_argument(
4519 default
='/etc/ceph',
4520 help='directory to write config, keyring, and pub key files')
4521 parser_bootstrap
.add_argument(
4523 help='location to write keyring file with new cluster admin and mon keys')
4524 parser_bootstrap
.add_argument(
4526 help='location to write conf file to connect to new cluster')
4527 parser_bootstrap
.add_argument(
4528 '--output-pub-ssh-key',
4529 help='location to write the cluster\'s public SSH key')
4530 parser_bootstrap
.add_argument(
4532 action
='store_true',
4533 help='skip setup of ssh key on local host')
4534 parser_bootstrap
.add_argument(
4535 '--initial-dashboard-user',
4537 help='Initial user for the dashboard')
4538 parser_bootstrap
.add_argument(
4539 '--initial-dashboard-password',
4540 help='Initial password for the initial dashboard user')
4542 parser_bootstrap
.add_argument(
4544 type=argparse
.FileType('r'),
4545 help='Dashboard key')
4546 parser_bootstrap
.add_argument(
4548 type=argparse
.FileType('r'),
4549 help='Dashboard certificate')
4551 parser_bootstrap
.add_argument(
4553 type=argparse
.FileType('r'),
4555 parser_bootstrap
.add_argument(
4556 '--ssh-private-key',
4557 type=argparse
.FileType('r'),
4558 help='SSH private key')
4559 parser_bootstrap
.add_argument(
4561 type=argparse
.FileType('r'),
4562 help='SSH public key')
4564 parser_bootstrap
.add_argument(
4565 '--skip-mon-network',
4566 action
='store_true',
4567 help='set mon public_network based on bootstrap mon ip')
4568 parser_bootstrap
.add_argument(
4570 action
='store_true',
4571 help='do not enable the Ceph Dashboard')
4572 parser_bootstrap
.add_argument(
4573 '--dashboard-password-noupdate',
4574 action
='store_true',
4575 help='stop forced dashboard password change')
4576 parser_bootstrap
.add_argument(
4577 '--no-minimize-config',
4578 action
='store_true',
4579 help='do not assimilate and minimize the config file')
4580 parser_bootstrap
.add_argument(
4581 '--skip-ping-check',
4582 action
='store_true',
4583 help='do not verify that mon IP is pingable')
4584 parser_bootstrap
.add_argument(
4586 action
='store_true',
4587 help='do not pull the latest image before bootstrapping')
4588 parser_bootstrap
.add_argument(
4590 action
='store_true',
4591 help='Do not configure firewalld')
4592 parser_bootstrap
.add_argument(
4593 '--allow-overwrite',
4594 action
='store_true',
4595 help='allow overwrite of existing --output-* config/keyring/ssh files')
4596 parser_bootstrap
.add_argument(
4597 '--allow-fqdn-hostname',
4598 action
='store_true',
4599 help='allow hostname that is fully-qualified (contains ".")')
4600 parser_bootstrap
.add_argument(
4601 '--skip-prepare-host',
4602 action
='store_true',
4603 help='Do not prepare host')
4604 parser_bootstrap
.add_argument(
4605 '--orphan-initial-daemons',
4606 action
='store_true',
4607 help='Do not create initial mon, mgr, and crash service specs')
4608 parser_bootstrap
.add_argument(
4609 '--skip-monitoring-stack',
4610 action
='store_true',
4611 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
4612 parser_bootstrap
.add_argument(
4614 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
4617 parser_bootstrap
.add_argument(
4618 '--shared_ceph_folder',
4619 metavar
='CEPH_SOURCE_FOLDER',
4620 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
4622 parser_deploy
= subparsers
.add_parser(
4623 'deploy', help='deploy a daemon')
4624 parser_deploy
.set_defaults(func
=command_deploy
)
4625 parser_deploy
.add_argument(
4628 action
=CustomValidation
,
4629 help='daemon name (type.id)')
4630 parser_deploy
.add_argument(
4633 help='cluster FSID')
4634 parser_deploy
.add_argument(
4636 help='config file for new daemon')
4637 parser_deploy
.add_argument(
4639 help='Additional configuration information in JSON format')
4640 parser_deploy
.add_argument(
4642 help='keyring for new daemon')
4643 parser_deploy
.add_argument(
4645 help='key for new daemon')
4646 parser_deploy
.add_argument(
4648 help='OSD uuid, if creating an OSD container')
4649 parser_deploy
.add_argument(
4651 action
='store_true',
4652 help='Do not configure firewalld')
4653 parser_deploy
.add_argument(
4655 action
='store_true',
4656 help='Reconfigure a previously deployed daemon')
4657 parser_deploy
.add_argument(
4659 action
='store_true',
4660 help='Allow SYS_PTRACE on daemon container')
4662 parser_check_host
= subparsers
.add_parser(
4663 'check-host', help='check host configuration')
4664 parser_check_host
.set_defaults(func
=command_check_host
)
4665 parser_check_host
.add_argument(
4666 '--expect-hostname',
4667 help='Check that hostname matches an expected value')
4669 parser_prepare_host
= subparsers
.add_parser(
4670 'prepare-host', help='prepare a host for cephadm use')
4671 parser_prepare_host
.set_defaults(func
=command_prepare_host
)
4672 parser_prepare_host
.add_argument(
4673 '--expect-hostname',
4674 help='Set hostname')
4676 parser_add_repo
= subparsers
.add_parser(
4677 'add-repo', help='configure package repository')
4678 parser_add_repo
.set_defaults(func
=command_add_repo
)
4679 parser_add_repo
.add_argument(
4681 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE
))
4682 parser_add_repo
.add_argument(
4684 help='use specific upstream version (x.y.z)')
4685 parser_add_repo
.add_argument(
4687 help='use specified bleeding edge build from git branch or tag')
4688 parser_add_repo
.add_argument(
4690 help='use specified bleeding edge build from git commit')
4691 parser_add_repo
.add_argument(
4693 help='specify alternative GPG key location')
4694 parser_add_repo
.add_argument(
4696 default
='https://download.ceph.com',
4697 help='specify alternative repo location')
4700 parser_rm_repo
= subparsers
.add_parser(
4701 'rm-repo', help='remove package repository configuration')
4702 parser_rm_repo
.set_defaults(func
=command_rm_repo
)
4704 parser_install
= subparsers
.add_parser(
4705 'install', help='install ceph package(s)')
4706 parser_install
.set_defaults(func
=command_install
)
4707 parser_install
.add_argument(
4708 'packages', nargs
='*',
4709 default
=['cephadm'],
4714 def _parse_args(av
):
4715 parser
= _get_parser()
4716 args
= parser
.parse_args(av
)
4717 if 'command' in args
and args
.command
and args
.command
[0] == "--":
4721 if __name__
== "__main__":
4722 # allow argv to be injected
4724 av
= injected_argv
# type: ignore
4727 args
= _parse_args(av
)
4730 logging
.basicConfig(level
=logging
.DEBUG
)
4732 logging
.basicConfig(level
=logging
.INFO
)
4733 logger
= logging
.getLogger('cephadm')
4736 if os
.geteuid() != 0:
4737 sys
.stderr
.write('ERROR: cephadm should be run as root\n')
4740 if 'func' not in args
:
4741 sys
.stderr
.write('No command specified; pass -h or --help for usage\n')
4745 if args
.func
!= command_check_host
:
4747 container_path
= find_program('docker')
4749 for i
in CONTAINER_PREFERENCE
:
4751 container_path
= find_program(i
)
4753 except Exception as e
:
4754 logger
.debug('Could not locate %s: %s' % (i
, e
))
4755 if not container_path
and args
.func
!= command_prepare_host\
4756 and args
.func
!= command_add_repo
:
4757 sys
.stderr
.write('Unable to locate any of %s\n' % CONTAINER_PREFERENCE
)
4765 sys
.stderr
.write('ERROR: %s\n' % e
)