]> git.proxmox.com Git - ceph.git/blame - ceph/src/cephadm/cephadm
import 15.2.4
[ceph.git] / ceph / src / cephadm / cephadm
CommitLineData
9f95a23c
TL
1#!/usr/bin/python3
2
3DEFAULT_IMAGE='docker.io/ceph/ceph:v15'
1911f103
TL
4DEFAULT_IMAGE_IS_MASTER=False
5LATEST_STABLE_RELEASE='octopus'
9f95a23c
TL
6DATA_DIR='/var/lib/ceph'
7LOG_DIR='/var/log/ceph'
8LOCK_DIR='/run/cephadm'
9LOGROTATE_DIR='/etc/logrotate.d'
10UNIT_DIR='/etc/systemd/system'
11LOG_DIR_MODE=0o770
12DATA_DIR_MODE=0o700
13CONTAINER_PREFERENCE = ['podman', 'docker'] # prefer podman to docker
14CUSTOM_PS1=r'[ceph: \u@\h \W]\$ '
15DEFAULT_TIMEOUT=None # in seconds
16DEFAULT_RETRY=10
17SHELL_DEFAULT_CONF='/etc/ceph/ceph.conf'
18SHELL_DEFAULT_KEYRING='/etc/ceph/ceph.client.admin.keyring'
19
20"""
21You can invoke cephadm in two ways:
22
231. The normal way, at the command line.
24
252. By piping the script to the python3 binary. In this latter case, you should
26 prepend one or more lines to the beginning of the script.
27
28 For arguments,
29
30 injected_argv = [...]
31
32 e.g.,
33
34 injected_argv = ['ls']
35
36 For reading stdin from the '--config-json -' argument,
37
38 injected_stdin = '...'
39"""
40
41import argparse
42import datetime
43import fcntl
44import json
45import logging
46import os
47import platform
48import random
49import re
50import select
51import shutil
52import socket
53import string
54import subprocess
55import sys
56import tempfile
57import time
58import errno
59try:
60 from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable
61except ImportError:
62 pass
63import uuid
64
65from functools import wraps
66from glob import glob
67from threading import Thread
68
69if sys.version_info >= (3, 0):
70 from io import StringIO
71else:
72 from StringIO import StringIO
73
74if sys.version_info >= (3, 2):
75 from configparser import ConfigParser
76else:
77 from ConfigParser import SafeConfigParser
78
79if sys.version_info >= (3, 0):
80 from urllib.request import urlopen
81 from urllib.error import HTTPError
82else:
83 from urllib2 import urlopen, HTTPError
84
85container_path = ''
86cached_stdin = None
87
88DATEFMT = '%Y-%m-%dT%H:%M:%S.%f'
89
e306af50
TL
90
91class termcolor:
92 yellow = '\033[93m'
93 red = '\033[31m'
94 end = '\033[0m'
95
9f95a23c
TL
96class Error(Exception):
97 pass
98
99class TimeoutExpired(Error):
100 pass
101
102##################################
103
104class Ceph(object):
105 daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
106 'crash')
107
108##################################
109
110class Monitoring(object):
111 """Define the configs for the monitoring containers"""
112
113 port_map = {
114 "prometheus": [9095], # Avoid default 9090, due to conflict with cockpit UI
115 "node-exporter": [9100],
116 "grafana": [3000],
117 "alertmanager": [9093, 9094],
118 }
119
120 components = {
121 "prometheus": {
122 "image": "prom/prometheus:latest",
123 "cpus": '2',
124 "memory": '4GB',
125 "args": [
126 "--config.file=/etc/prometheus/prometheus.yml",
127 "--storage.tsdb.path=/prometheus",
128 "--web.listen-address=:{}".format(port_map['prometheus'][0]),
129 ],
130 "config-json-files": [
131 "prometheus.yml",
132 ],
133 },
134 "node-exporter": {
135 "image": "prom/node-exporter",
136 "cpus": "1",
137 "memory": "1GB",
138 "args": [
139 "--no-collector.timex",
140 ],
141 },
142 "grafana": {
143 "image": "ceph/ceph-grafana:latest",
144 "cpus": "2",
145 "memory": "4GB",
146 "args": [],
147 "config-json-files": [
148 "grafana.ini",
149 "provisioning/datasources/ceph-dashboard.yml",
150 "certs/cert_file",
151 "certs/cert_key",
152 ],
153 },
154 "alertmanager": {
155 "image": "prom/alertmanager",
156 "cpus": "2",
157 "memory": "2GB",
158 "args": [],
159 "config-json-files": [
160 "alertmanager.yml",
161 ],
162 "config-json-args": [
163 "peers",
164 ],
165 },
166 } # type: ignore
167
168##################################
169
170class NFSGanesha(object):
171 """Defines a NFS-Ganesha container"""
172
173 daemon_type = 'nfs'
174 entrypoint = '/usr/bin/ganesha.nfsd'
175 daemon_args = ['-F', '-L', 'STDERR']
176
177 required_files = ['ganesha.conf']
178
179 port_map = {
180 "nfs" : 2049,
181 }
182
183 def __init__(self,
184 fsid,
185 daemon_id,
186 config_json,
187 image=DEFAULT_IMAGE):
188 # type: (str, Union[int, str], Dict, str) -> None
189 self.fsid = fsid
190 self.daemon_id = daemon_id
191 self.image = image
192
193 def json_get(key, default=None, require=False):
194 if require and not key in config_json.keys():
195 raise Error('{} missing from config-json'.format(key))
196 return config_json.get(key, default)
197
198 # config-json options
199 self.pool = json_get('pool', require=True)
200 self.namespace = json_get('namespace')
1911f103
TL
201 self.userid = json_get('userid')
202 self.extra_args = json_get('extra_args', [])
9f95a23c
TL
203 self.files = json_get('files', {})
204
205 # validate the supplied args
206 self.validate()
207
208 @classmethod
209 def init(cls, fsid, daemon_id):
210 # type: (str, Union[int, str]) -> NFSGanesha
211 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
212
213 @staticmethod
214 def port_in_use():
215 # type () -> None
216 for (srv, port) in NFSGanesha.port_map.items():
217 if port_in_use(port):
218 msg = 'TCP port {} required for {} is already in use'.format(port, srv)
219 raise Error(msg)
220
221 @staticmethod
222 def get_container_mounts(data_dir):
223 # type: (str) -> Dict[str, str]
224 mounts = dict()
225 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
226 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
227 mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
228 return mounts
229
230 @staticmethod
231 def get_container_envs():
232 # type: () -> List[str]
233 envs = [
234 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
235 ]
236 return envs
237
238 @staticmethod
239 def get_version(container_id):
e306af50 240 # type: (str) -> Optional[str]
9f95a23c
TL
241 version = None
242 out, err, code = call(
243 [container_path, 'exec', container_id,
244 NFSGanesha.entrypoint, '-v'])
245 if code == 0:
246 match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
247 if match:
248 version = match.group(1)
249 return version
250
251 def validate(self):
e306af50 252 # type: () -> None
9f95a23c
TL
253 if not is_fsid(self.fsid):
254 raise Error('not an fsid: %s' % self.fsid)
255 if not self.daemon_id:
256 raise Error('invalid daemon_id: %s' % self.daemon_id)
257 if not self.image:
258 raise Error('invalid image: %s' % self.image)
259
260 # check for the required files
261 if self.required_files:
262 for fname in self.required_files:
263 if fname not in self.files:
264 raise Error('required file missing from config-json: %s' % fname)
265
266 def get_daemon_name(self):
267 # type: () -> str
268 return '%s.%s' % (self.daemon_type, self.daemon_id)
269
270 def get_container_name(self, desc=None):
271 # type: (Optional[str]) -> str
272 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
273 if desc:
274 cname = '%s-%s' % (cname, desc)
275 return cname
276
1911f103
TL
277 def get_daemon_args(self):
278 # type: () -> List[str]
279 return self.daemon_args + self.extra_args
280
9f95a23c
TL
281 def get_file_content(self, fname):
282 # type: (str) -> str
283 """Normalize the json file content into a string"""
284 content = self.files.get(fname)
285 if isinstance(content, list):
286 content = '\n'.join(content)
287 return content
288
289 def create_daemon_dirs(self, data_dir, uid, gid):
290 # type: (str, int, int) -> None
291 """Create files under the container data dir"""
292 if not os.path.isdir(data_dir):
293 raise OSError('data_dir is not a directory: %s' % (data_dir))
294
295 logger.info('Creating ganesha config...')
296
297 # create the ganesha conf dir
298 config_dir = os.path.join(data_dir, 'etc/ganesha')
299 makedirs(config_dir, uid, gid, 0o755)
300
301 # populate files from the config-json
302 for fname in self.files:
303 config_file = os.path.join(config_dir, fname)
304 config_content = self.get_file_content(fname)
305 logger.info('Write file: %s' % (config_file))
306 with open(config_file, 'w') as f:
307 os.fchown(f.fileno(), uid, gid)
308 os.fchmod(f.fileno(), 0o600)
309 f.write(config_content)
310
311 def get_rados_grace_container(self, action):
312 # type: (str) -> CephContainer
313 """Container for a ganesha action on the grace db"""
314 entrypoint = '/usr/bin/ganesha-rados-grace'
315
316 assert self.pool
317 args=['--pool', self.pool]
318 if self.namespace:
319 args += ['--ns', self.namespace]
1911f103
TL
320 if self.userid:
321 args += ['--userid', self.userid]
9f95a23c
TL
322 args += [action, self.get_daemon_name()]
323
324 data_dir = get_data_dir(self.fsid, self.daemon_type, self.daemon_id)
325 volume_mounts = self.get_container_mounts(data_dir)
326 envs = self.get_container_envs()
327
328 logger.info('Creating RADOS grace for action: %s' % (action))
329 c = CephContainer(
330 image=self.image,
331 entrypoint=entrypoint,
332 args=args,
333 volume_mounts=volume_mounts,
334 cname=self.get_container_name(desc='grace-%s' % (action)),
335 envs=envs
336 )
337 return c
338
339##################################
340
1911f103
TL
341class CephIscsi(object):
342 """Defines a Ceph-Iscsi container"""
343
344 daemon_type = 'iscsi'
345 entrypoint = '/usr/bin/rbd-target-api'
346
347 required_files = ['iscsi-gateway.cfg']
348
349 def __init__(self,
350 fsid,
351 daemon_id,
352 config_json,
353 image=DEFAULT_IMAGE):
354 # type: (str, Union[int, str], Dict, str) -> None
355 self.fsid = fsid
356 self.daemon_id = daemon_id
357 self.image = image
358
359 def json_get(key, default=None, require=False):
360 if require and not key in config_json.keys():
361 raise Error('{} missing from config-json'.format(key))
362 return config_json.get(key, default)
363
364 # config-json options
365 self.files = json_get('files', {})
366
367 # validate the supplied args
368 self.validate()
369
370 @classmethod
371 def init(cls, fsid, daemon_id):
372 # type: (str, Union[int, str]) -> CephIscsi
373 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
374
375 @staticmethod
376 def get_container_mounts(data_dir, log_dir):
377 # type: (str, str) -> Dict[str, str]
378 mounts = dict()
379 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
380 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
381 mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
382 mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config:z'
383 mounts[log_dir] = '/var/log/rbd-target-api:z'
384 mounts['/dev/log'] = '/dev/log:z'
385 return mounts
386
387 @staticmethod
388 def get_version(container_id):
e306af50 389 # type: (str) -> Optional[str]
1911f103
TL
390 version = None
391 out, err, code = call(
392 [container_path, 'exec', container_id,
393 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"])
394 if code == 0:
395 version = out
396 return version
397
398 def validate(self):
e306af50 399 # type: () -> None
1911f103
TL
400 if not is_fsid(self.fsid):
401 raise Error('not an fsid: %s' % self.fsid)
402 if not self.daemon_id:
403 raise Error('invalid daemon_id: %s' % self.daemon_id)
404 if not self.image:
405 raise Error('invalid image: %s' % self.image)
406
407 # check for the required files
408 if self.required_files:
409 for fname in self.required_files:
410 if fname not in self.files:
411 raise Error('required file missing from config-json: %s' % fname)
412
413 def get_daemon_name(self):
414 # type: () -> str
415 return '%s.%s' % (self.daemon_type, self.daemon_id)
416
417 def get_container_name(self, desc=None):
418 # type: (Optional[str]) -> str
419 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
420 if desc:
421 cname = '%s-%s' % (cname, desc)
422 return cname
423
424 def get_file_content(self, fname):
425 # type: (str) -> str
426 """Normalize the json file content into a string"""
427 content = self.files.get(fname)
428 if isinstance(content, list):
429 content = '\n'.join(content)
430 return content
431
432 def create_daemon_dirs(self, data_dir, uid, gid):
433 # type: (str, int, int) -> None
434 """Create files under the container data dir"""
435 if not os.path.isdir(data_dir):
436 raise OSError('data_dir is not a directory: %s' % (data_dir))
437
438 logger.info('Creating ceph-iscsi config...')
439 configfs_dir = os.path.join(data_dir, 'configfs')
440 makedirs(configfs_dir, uid, gid, 0o755)
441
442 # populate files from the config-json
443 for fname in self.files:
444 config_file = os.path.join(data_dir, fname)
445 config_content = self.get_file_content(fname)
446 logger.info('Write file: %s' % (config_file))
447 with open(config_file, 'w') as f:
448 os.fchown(f.fileno(), uid, gid)
449 os.fchmod(f.fileno(), 0o600)
450 f.write(config_content)
451
452 @staticmethod
453 def configfs_mount_umount(data_dir, mount=True):
e306af50 454 # type: (str, bool) -> List[str]
1911f103
TL
455 mount_path = os.path.join(data_dir, 'configfs')
456 if mount:
457 cmd = "if ! grep -qs {0} /proc/mounts; then " \
458 "mount -t configfs none {0}; fi".format(mount_path)
459 else:
460 cmd = "if grep -qs {0} /proc/mounts; then " \
461 "umount {0}; fi".format(mount_path)
462 return cmd.split()
463
464##################################
465
9f95a23c 466def get_supported_daemons():
e306af50 467 # type: () -> List[str]
9f95a23c
TL
468 supported_daemons = list(Ceph.daemons)
469 supported_daemons.extend(Monitoring.components)
470 supported_daemons.append(NFSGanesha.daemon_type)
1911f103 471 supported_daemons.append(CephIscsi.daemon_type)
9f95a23c
TL
472 assert len(supported_daemons) == len(set(supported_daemons))
473 return supported_daemons
474
475##################################
476
477def attempt_bind(s, address, port):
e306af50 478 # type: (socket.socket, str, int) -> None
9f95a23c
TL
479 try:
480 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
481 s.bind((address, port))
482 except (socket.error, OSError) as e: # py2 and py3
483 msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
484 logger.warning(msg)
485 if e.errno == errno.EADDRINUSE:
486 raise OSError(msg)
487 elif e.errno == errno.EADDRNOTAVAIL:
488 pass
489 finally:
490 s.close()
491
492def port_in_use(port_num):
e306af50 493 # type: (int) -> bool
9f95a23c 494 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
e306af50 495 logger.info('Verifying port %d ...' % port_num)
9f95a23c
TL
496 try:
497 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
498 attempt_bind(s, '0.0.0.0', port_num)
499
500 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
501 attempt_bind(s, '::', port_num)
502 except OSError:
503 return True
504 else:
505 return False
506
507def check_ip_port(ip, port):
e306af50 508 # type: (str, int) -> None
9f95a23c
TL
509 if not args.skip_ping_check:
510 logger.info('Verifying IP %s port %d ...' % (ip, port))
511 if ip.startswith('[') or '::' in ip:
512 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
1911f103
TL
513 if ip.startswith('[') and ip.endswith(']'):
514 ip = ip[1:-1]
9f95a23c
TL
515 else:
516 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
517 try:
518 attempt_bind(s, ip, port)
519 except OSError as e:
520 raise Error(e)
521
522##################################
523
524# this is an abbreviated version of
525# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
526# that drops all of the compatibility (this is Unix/Linux only).
527
528try:
529 TimeoutError
530except NameError:
531 TimeoutError = OSError
532
533class Timeout(TimeoutError):
534 """
535 Raised when the lock could not be acquired in *timeout*
536 seconds.
537 """
538
539 def __init__(self, lock_file):
540 """
541 """
542 #: The path of the file lock.
543 self.lock_file = lock_file
544 return None
545
546 def __str__(self):
547 temp = "The file lock '{}' could not be acquired."\
548 .format(self.lock_file)
549 return temp
550
551
552class _Acquire_ReturnProxy(object):
553 def __init__(self, lock):
554 self.lock = lock
555 return None
556
557 def __enter__(self):
558 return self.lock
559
560 def __exit__(self, exc_type, exc_value, traceback):
561 self.lock.release()
562 return None
563
564
565class FileLock(object):
566 def __init__(self, name, timeout = -1):
567 if not os.path.exists(LOCK_DIR):
568 os.mkdir(LOCK_DIR, 0o700)
569 self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
570
571 # The file descriptor for the *_lock_file* as it is returned by the
572 # os.open() function.
573 # This file lock is only NOT None, if the object currently holds the
574 # lock.
575 self._lock_file_fd = None
576 self.timeout = timeout
577 # The lock counter is used for implementing the nested locking
578 # mechanism. Whenever the lock is acquired, the counter is increased and
579 # the lock is only released, when this value is 0 again.
580 self._lock_counter = 0
581 return None
582
583 @property
584 def is_locked(self):
585 return self._lock_file_fd is not None
586
587 def acquire(self, timeout=None, poll_intervall=0.05):
588 """
589 Acquires the file lock or fails with a :exc:`Timeout` error.
590 .. code-block:: python
591 # You can use this method in the context manager (recommended)
592 with lock.acquire():
593 pass
594 # Or use an equivalent try-finally construct:
595 lock.acquire()
596 try:
597 pass
598 finally:
599 lock.release()
600 :arg float timeout:
601 The maximum time waited for the file lock.
602 If ``timeout < 0``, there is no timeout and this method will
603 block until the lock could be acquired.
604 If ``timeout`` is None, the default :attr:`~timeout` is used.
605 :arg float poll_intervall:
606 We check once in *poll_intervall* seconds if we can acquire the
607 file lock.
608 :raises Timeout:
609 if the lock could not be acquired in *timeout* seconds.
610 .. versionchanged:: 2.0.0
611 This method returns now a *proxy* object instead of *self*,
612 so that it can be used in a with statement without side effects.
613 """
614 # Use the default timeout, if no timeout is provided.
615 if timeout is None:
616 timeout = self.timeout
617
618 # Increment the number right at the beginning.
619 # We can still undo it, if something fails.
620 self._lock_counter += 1
621
622 lock_id = id(self)
623 lock_filename = self._lock_file
624 start_time = time.time()
625 try:
626 while True:
627 if not self.is_locked:
628 logger.debug('Acquiring lock %s on %s', lock_id,
629 lock_filename)
630 self._acquire()
631
632 if self.is_locked:
633 logger.debug('Lock %s acquired on %s', lock_id,
634 lock_filename)
635 break
636 elif timeout >= 0 and time.time() - start_time > timeout:
637 logger.warning('Timeout acquiring lock %s on %s', lock_id,
638 lock_filename)
639 raise Timeout(self._lock_file)
640 else:
641 logger.debug(
642 'Lock %s not acquired on %s, waiting %s seconds ...',
643 lock_id, lock_filename, poll_intervall
644 )
645 time.sleep(poll_intervall)
646 except:
647 # Something did go wrong, so decrement the counter.
648 self._lock_counter = max(0, self._lock_counter - 1)
649
650 raise
651 return _Acquire_ReturnProxy(lock = self)
652
653 def release(self, force = False):
654 """
655 Releases the file lock.
656 Please note, that the lock is only completly released, if the lock
657 counter is 0.
658 Also note, that the lock file itself is not automatically deleted.
659 :arg bool force:
660 If true, the lock counter is ignored and the lock is released in
661 every case.
662 """
663 if self.is_locked:
664 self._lock_counter -= 1
665
666 if self._lock_counter == 0 or force:
667 lock_id = id(self)
668 lock_filename = self._lock_file
669
670 logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
671 self._release()
672 self._lock_counter = 0
673 logger.debug('Lock %s released on %s', lock_id, lock_filename)
674
675 return None
676
677 def __enter__(self):
678 self.acquire()
679 return self
680
681 def __exit__(self, exc_type, exc_value, traceback):
682 self.release()
683 return None
684
685 def __del__(self):
686 self.release(force = True)
687 return None
688
689
690 def _acquire(self):
691 open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
692 fd = os.open(self._lock_file, open_mode)
693
694 try:
695 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
696 except (IOError, OSError):
697 os.close(fd)
698 else:
699 self._lock_file_fd = fd
700 return None
701
702 def _release(self):
703 # Do not remove the lockfile:
704 #
705 # https://github.com/benediktschmitt/py-filelock/issues/31
706 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
707 fd = self._lock_file_fd
708 self._lock_file_fd = None
709 fcntl.flock(fd, fcntl.LOCK_UN)
710 os.close(fd)
711 return None
712
713
714##################################
715# Popen wrappers, lifted from ceph-volume
716
717def call(command, # type: List[str]
718 desc=None, # type: Optional[str]
719 verbose=False, # type: bool
720 verbose_on_failure=True, # type: bool
721 timeout=DEFAULT_TIMEOUT, # type: Optional[int]
722 **kwargs):
723 """
724 Wrap subprocess.Popen to
725
726 - log stdout/stderr to a logger,
727 - decode utf-8
728 - cleanly return out, err, returncode
729
730 If verbose=True, log at info (instead of debug) level.
731
732 :param verbose_on_failure: On a non-zero exit status, it will forcefully set
733 logging ON for the terminal
734 :param timeout: timeout in seconds
735 """
736 if not desc:
737 desc = command[0]
738 timeout = timeout or args.timeout
739
740 logger.debug("Running command: %s" % ' '.join(command))
741 process = subprocess.Popen(
742 command,
743 stdout=subprocess.PIPE,
744 stderr=subprocess.PIPE,
745 close_fds=True,
746 **kwargs
747 )
748 # get current p.stdout flags, add O_NONBLOCK
749 assert process.stdout is not None
750 assert process.stderr is not None
751 stdout_flags = fcntl.fcntl(process.stdout, fcntl.F_GETFL)
752 stderr_flags = fcntl.fcntl(process.stderr, fcntl.F_GETFL)
753 fcntl.fcntl(process.stdout, fcntl.F_SETFL, stdout_flags | os.O_NONBLOCK)
754 fcntl.fcntl(process.stderr, fcntl.F_SETFL, stderr_flags | os.O_NONBLOCK)
755
756 out = ''
757 err = ''
758 reads = None
759 stop = False
760 out_buffer = '' # partial line (no newline yet)
761 err_buffer = '' # partial line (no newline yet)
762 start_time = time.time()
763 end_time = None
764 if timeout:
765 end_time = start_time + timeout
766 while not stop:
767 if end_time and (time.time() >= end_time):
768 logger.info(desc + ':timeout after %s seconds' % timeout)
769 stop = True
770 process.kill()
771 if reads and process.poll() is not None:
772 # we want to stop, but first read off anything remaining
773 # on stdout/stderr
774 stop = True
775 else:
776 reads, _, _ = select.select(
777 [process.stdout.fileno(), process.stderr.fileno()],
778 [], [], timeout
779 )
780 for fd in reads:
781 try:
782 message_b = os.read(fd, 1024)
783 if isinstance(message_b, bytes):
784 message = message_b.decode('utf-8')
785 if isinstance(message_b, str):
786 message = message_b
801d1391
TL
787 if stop and message:
788 # process has terminated, but have more to read still, so not stopping yet
789 # (os.read returns '' when it encounters EOF)
790 stop = False
791 if not message:
792 continue
9f95a23c
TL
793 if fd == process.stdout.fileno():
794 out += message
795 message = out_buffer + message
796 lines = message.split('\n')
797 out_buffer = lines.pop()
798 for line in lines:
799 if verbose:
800 logger.info(desc + ':stdout ' + line)
801 else:
802 logger.debug(desc + ':stdout ' + line)
803 elif fd == process.stderr.fileno():
804 err += message
805 message = err_buffer + message
806 lines = message.split('\n')
807 err_buffer = lines.pop()
808 for line in lines:
809 if verbose:
810 logger.info(desc + ':stderr ' + line)
811 else:
812 logger.debug(desc + ':stderr ' + line)
813 else:
814 assert False
815 except (IOError, OSError):
816 pass
817
818 returncode = process.wait()
819
820 if out_buffer != '':
821 if verbose:
822 logger.info(desc + ':stdout ' + out_buffer)
823 else:
824 logger.debug(desc + ':stdout ' + out_buffer)
825 if err_buffer != '':
826 if verbose:
827 logger.info(desc + ':stderr ' + err_buffer)
828 else:
829 logger.debug(desc + ':stderr ' + err_buffer)
830
831 if returncode != 0 and verbose_on_failure and not verbose:
832 # dump stdout + stderr
833 logger.info('Non-zero exit code %d from %s' % (returncode, ' '.join(command)))
834 for line in out.splitlines():
835 logger.info(desc + ':stdout ' + line)
836 for line in err.splitlines():
837 logger.info(desc + ':stderr ' + line)
838
839 return out, err, returncode
840
841
842def call_throws(command, **kwargs):
843 # type: (List[str], Any) -> Tuple[str, str, int]
844 out, err, ret = call(command, **kwargs)
845 if ret:
846 raise RuntimeError('Failed command: %s' % ' '.join(command))
847 return out, err, ret
848
849
850def call_timeout(command, timeout):
851 # type: (List[str], int) -> int
852
853 logger.debug('Running command (timeout=%s): %s'
854 % (timeout, ' '.join(command)))
855
856 def raise_timeout(command, timeout):
857 # type: (List[str], int) -> NoReturn
858 msg = 'Command \'%s\' timed out after %s seconds' % (command, timeout)
859 logger.debug(msg)
860 raise TimeoutExpired(msg)
861
862 def call_timeout_py2(command, timeout):
863 # type: (List[str], int) -> int
864 proc = subprocess.Popen(command)
865 thread = Thread(target=proc.wait)
866 thread.start()
867 thread.join(timeout)
868 if thread.is_alive():
869 proc.kill()
870 thread.join()
871 raise_timeout(command, timeout)
872 return proc.returncode
873
874 def call_timeout_py3(command, timeout):
875 # type: (List[str], int) -> int
876 try:
877 return subprocess.call(command, timeout=timeout)
878 except subprocess.TimeoutExpired as e:
879 raise_timeout(command, timeout)
880
881 ret = 1
882 if sys.version_info >= (3, 3):
883 ret = call_timeout_py3(command, timeout)
884 else:
885 # py2 subprocess has no timeout arg
886 ret = call_timeout_py2(command, timeout)
887 return ret
888
889##################################
890
891def is_available(what, func):
892 # type: (str, Callable[[], bool]) -> None
893 """
894 Wait for a service to become available
895
896 :param what: the name of the service
897 :param func: the callable object that determines availability
898 """
899 retry = args.retry
900 logger.info('Waiting for %s...' % (what))
901 num = 1
902 while True:
903 if func():
e306af50
TL
904 logger.info('%s is available'
905 % (what))
9f95a23c
TL
906 break
907 elif num > retry:
908 raise Error('%s not available after %s tries'
909 % (what, retry))
910
911 logger.info('%s not available, waiting (%s/%s)...'
912 % (what, num, retry))
913
914 num += 1
915 time.sleep(1)
916
917
918def read_config(fn):
919 # type: (Optional[str]) -> ConfigParser
920 # bend over backwards here because py2's ConfigParser doesn't like
921 # whitespace before config option names (e.g., '\n foo = bar\n').
922 # Yeesh!
923 if sys.version_info >= (3, 2):
924 cp = ConfigParser()
925 else:
926 cp = SafeConfigParser()
927
928 if fn:
929 with open(fn, 'r') as f:
930 raw_conf = f.read()
931 nice_conf = re.sub(r'\n(\s)+', r'\n', raw_conf)
932 s_io = StringIO(nice_conf)
933 if sys.version_info >= (3, 2):
934 cp.read_file(s_io)
935 else:
936 cp.readfp(s_io)
937
938 return cp
939
940def pathify(p):
941 # type: (str) -> str
e306af50
TL
942 p = os.path.expanduser(p)
943 return os.path.abspath(p)
9f95a23c
TL
944
945def get_file_timestamp(fn):
e306af50 946 # type: (str) -> Optional[str]
9f95a23c
TL
947 try:
948 mt = os.path.getmtime(fn)
949 return datetime.datetime.fromtimestamp(
950 mt, tz=datetime.timezone.utc
951 ).strftime(DATEFMT)
952 except Exception as e:
953 return None
954
955def try_convert_datetime(s):
e306af50 956 # type: (str) -> Optional[str]
9f95a23c
TL
957 # This is super irritating because
958 # 1) podman and docker use different formats
959 # 2) python's strptime can't parse either one
960 #
961 # I've seen:
962 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
963 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
964 # 2020-03-03 15:52:30.136257504 -0600 CST
965 # (In the podman case, there is a different string format for
966 # 'inspect' and 'inspect --format {{.Created}}'!!)
967
968 # In *all* cases, the 9 digit second precision is too much for
969 # python's strptime. Shorten it to 6 digits.
970 p = re.compile(r'(\.[\d]{6})[\d]*')
971 s = p.sub(r'\1', s)
972
973 # replace trailling Z with -0000, since (on python 3.6.8) it won't parse
974 if s and s[-1] == 'Z':
975 s = s[:-1] + '-0000'
976
977 # cut off the redundnat 'CST' part that strptime can't parse, if
978 # present.
979 v = s.split(' ')
980 s = ' '.join(v[0:3])
981
982 # try parsing with several format strings
983 fmts = [
984 '%Y-%m-%dT%H:%M:%S.%f%z',
985 '%Y-%m-%d %H:%M:%S.%f %z',
986 ]
987 for f in fmts:
988 try:
989 # return timestamp normalized to UTC, rendered as DATEFMT.
990 return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
991 except ValueError:
992 pass
993 return None
994
995def get_podman_version():
996 # type: () -> Tuple[int, ...]
997 if 'podman' not in container_path:
998 raise ValueError('not using podman')
999 out, _, _ = call_throws([container_path, '--version'])
1000 return _parse_podman_version(out)
1001
1002def _parse_podman_version(out):
1003 # type: (str) -> Tuple[int, ...]
1004 _, _, version_str = out.strip().split()
1005
1006 def to_int(val, org_e=None):
1007 if not val and org_e:
1008 raise org_e
1009 try:
1010 return int(val)
1011 except ValueError as e:
1012 return to_int(val[0:-1], org_e or e)
1013
1014 return tuple(map(to_int, version_str.split('.')))
1015
1016
1017def get_hostname():
1018 # type: () -> str
1019 return socket.gethostname()
1020
1021def get_fqdn():
1022 # type: () -> str
1023 return socket.getfqdn() or socket.gethostname()
1024
1025def get_arch():
1026 # type: () -> str
1027 return platform.uname().machine
1028
1029def generate_service_id():
1030 # type: () -> str
1031 return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
1032 for _ in range(6))
1033
1034def generate_password():
1035 # type: () -> str
1036 return ''.join(random.choice(string.ascii_lowercase + string.digits)
1037 for i in range(10))
1038
1039def normalize_container_id(i):
1040 # type: (str) -> str
1041 # docker adds the sha256: prefix, but AFAICS both
1042 # docker (18.09.7 in bionic at least) and podman
1043 # both always use sha256, so leave off the prefix
1044 # for consistency.
1045 prefix = 'sha256:'
1046 if i.startswith(prefix):
1047 i = i[len(prefix):]
1048 return i
1049
1050def make_fsid():
1051 # type: () -> str
1052 return str(uuid.uuid1())
1053
1054def is_fsid(s):
1055 # type: (str) -> bool
1056 try:
1057 uuid.UUID(s)
1058 except ValueError:
1059 return False
1060 return True
1061
1062def infer_fsid(func):
1063 """
1064 If we only find a single fsid in /var/lib/ceph/*, use that
1065 """
1066 @wraps(func)
1067 def _infer_fsid():
1068 if args.fsid:
1069 logger.debug('Using specified fsid: %s' % args.fsid)
1070 return func()
1071
1072 fsids = set()
1073 daemon_list = list_daemons(detail=False)
1074 for daemon in daemon_list:
1075 if 'name' not in args or not args.name:
1076 fsids.add(daemon['fsid'])
1077 elif daemon['name'] == args.name:
1078 fsids.add(daemon['fsid'])
1079 fsids = list(fsids)
1080
1081 if not fsids:
1082 # some commands do not always require an fsid
1083 pass
1084 elif len(fsids) == 1:
1085 logger.info('Inferring fsid %s' % fsids[0])
1086 args.fsid = fsids[0]
1087 else:
1088 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids)
1089 return func()
1090
1091 return _infer_fsid
1092
e306af50
TL
1093def infer_config(func):
1094 """
1095 If we find a MON daemon, use the config from that container
1096 """
1097 @wraps(func)
1098 def _infer_config():
1099 if args.config:
1100 logger.debug('Using specified config: %s' % args.config)
1101 return func()
1102 config = None
1103 if args.fsid:
1104 name = args.name
1105 if not name:
1106 daemon_list = list_daemons(detail=False)
1107 for daemon in daemon_list:
1108 if daemon['name'].startswith('mon.'):
1109 name = daemon['name']
1110 break
1111 if name:
1112 config = '/var/lib/ceph/{}/{}/config'.format(args.fsid, name)
1113 if config:
1114 logger.info('Inferring config %s' % config)
1115 args.config = config
1116 elif os.path.exists(SHELL_DEFAULT_CONF):
1117 logger.debug('Using default config: %s' % SHELL_DEFAULT_CONF)
1118 args.config = SHELL_DEFAULT_CONF
1119 return func()
1120
1121 return _infer_config
1122
1911f103
TL
1123def _get_default_image():
1124 if DEFAULT_IMAGE_IS_MASTER:
1911f103
TL
1125 warn = '''This is a development version of cephadm.
1126For information regarding the latest stable release:
1127 https://docs.ceph.com/docs/{}/cephadm/install
1128'''.format(LATEST_STABLE_RELEASE)
1129 for line in warn.splitlines():
e306af50 1130 logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
1911f103
TL
1131 return DEFAULT_IMAGE
1132
9f95a23c
TL
1133def infer_image(func):
1134 """
1135 Use the most recent ceph image
1136 """
1137 @wraps(func)
1138 def _infer_image():
1139 if not args.image:
1140 args.image = os.environ.get('CEPHADM_IMAGE')
1141 if not args.image:
1142 args.image = get_last_local_ceph_image()
1143 if not args.image:
1911f103 1144 args.image = _get_default_image()
9f95a23c
TL
1145 return func()
1146
1147 return _infer_image
1148
1149def default_image(func):
1150 @wraps(func)
1151 def _default_image():
1152 if not args.image:
1153 if 'name' in args and args.name:
1154 type_ = args.name.split('.', 1)[0]
1155 if type_ in Monitoring.components:
1156 args.image = Monitoring.components[type_]['image']
1157 if not args.image:
1158 args.image = os.environ.get('CEPHADM_IMAGE')
1159 if not args.image:
1911f103
TL
1160 args.image = _get_default_image()
1161
9f95a23c
TL
1162 return func()
1163
1164 return _default_image
1165
1166def get_last_local_ceph_image():
1167 """
1168 :return: The most recent local ceph image (already pulled)
1169 """
1170 out, _, _ = call_throws(
1171 [container_path, 'images',
1172 '--filter', 'label=ceph=True',
1173 '--format', '{{.Repository}} {{.Tag}}'])
1174 out_lines = out.splitlines()
1175 if len(out_lines) > 0:
1176 repository, tag = out_lines[0].split()
1177 r = '{}:{}'.format(repository, tag)
1178 logger.info('Using recent ceph image %s' % r)
1179 return r
1180 return None
1181
1182def write_tmp(s, uid, gid):
e306af50 1183 # type: (str, int, int) -> Any
9f95a23c
TL
1184 tmp_f = tempfile.NamedTemporaryFile(mode='w',
1185 prefix='ceph-tmp')
1186 os.fchown(tmp_f.fileno(), uid, gid)
1187 tmp_f.write(s)
1188 tmp_f.flush()
1189
1190 return tmp_f
1191
1192def makedirs(dir, uid, gid, mode):
1193 # type: (str, int, int, int) -> None
1194 if not os.path.exists(dir):
1195 os.makedirs(dir, mode=mode)
1196 else:
1197 os.chmod(dir, mode)
1198 os.chown(dir, uid, gid)
1199 os.chmod(dir, mode) # the above is masked by umask...
1200
1201def get_data_dir(fsid, t, n):
1202 # type: (str, str, Union[int, str]) -> str
1203 return os.path.join(args.data_dir, fsid, '%s.%s' % (t, n))
1204
1205def get_log_dir(fsid):
1206 # type: (str) -> str
1207 return os.path.join(args.log_dir, fsid)
1208
1209def make_data_dir_base(fsid, uid, gid):
1210 # type: (str, int, int) -> str
1211 data_dir_base = os.path.join(args.data_dir, fsid)
1212 makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
1213 makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
1214 makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
1215 DATA_DIR_MODE)
1216 return data_dir_base
1217
1218def make_data_dir(fsid, daemon_type, daemon_id, uid=None, gid=None):
1219 # type: (str, str, Union[int, str], int, int) -> str
1220 if not uid or not gid:
1221 (uid, gid) = extract_uid_gid()
1222 make_data_dir_base(fsid, uid, gid)
1223 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1224 makedirs(data_dir, uid, gid, DATA_DIR_MODE)
1225 return data_dir
1226
1227def make_log_dir(fsid, uid=None, gid=None):
1228 # type: (str, int, int) -> str
1229 if not uid or not gid:
1230 (uid, gid) = extract_uid_gid()
1231 log_dir = get_log_dir(fsid)
1232 makedirs(log_dir, uid, gid, LOG_DIR_MODE)
1233 return log_dir
1234
1235def make_var_run(fsid, uid, gid):
1236 # type: (str, int, int) -> None
1237 call_throws(['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
1238 '/var/run/ceph/%s' % fsid])
1239
1240def copy_tree(src, dst, uid=None, gid=None):
1241 # type: (List[str], str, int, int) -> None
1242 """
1243 Copy a directory tree from src to dst
1244 """
1245 if not uid or not gid:
1246 (uid, gid) = extract_uid_gid()
1247
1248 for src_dir in src:
1249 dst_dir = dst
1250 if os.path.isdir(dst):
1251 dst_dir = os.path.join(dst, os.path.basename(src_dir))
1252
1253 logger.debug('copy directory \'%s\' -> \'%s\'' % (src_dir, dst_dir))
1254 shutil.rmtree(dst_dir, ignore_errors=True)
1255 shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8
1256
1257 for dirpath, dirnames, filenames in os.walk(dst_dir):
1258 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dirpath))
1259 os.chown(dirpath, uid, gid)
1260 for filename in filenames:
1261 logger.debug('chown %s:%s \'%s\'' % (uid, gid, filename))
1262 os.chown(os.path.join(dirpath, filename), uid, gid)
1263
1264
1265def copy_files(src, dst, uid=None, gid=None):
1266 # type: (List[str], str, int, int) -> None
1267 """
1268 Copy a files from src to dst
1269 """
1270 if not uid or not gid:
1271 (uid, gid) = extract_uid_gid()
1272
1273 for src_file in src:
1274 dst_file = dst
1275 if os.path.isdir(dst):
1276 dst_file = os.path.join(dst, os.path.basename(src_file))
1277
1278 logger.debug('copy file \'%s\' -> \'%s\'' % (src_file, dst_file))
1279 shutil.copyfile(src_file, dst_file)
1280
1281 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1282 os.chown(dst_file, uid, gid)
1283
1284def move_files(src, dst, uid=None, gid=None):
1285 # type: (List[str], str, int, int) -> None
1286 """
1287 Move files from src to dst
1288 """
1289 if not uid or not gid:
1290 (uid, gid) = extract_uid_gid()
1291
1292 for src_file in src:
1293 dst_file = dst
1294 if os.path.isdir(dst):
1295 dst_file = os.path.join(dst, os.path.basename(src_file))
1296
1297 if os.path.islink(src_file):
1298 # shutil.move() in py2 does not handle symlinks correctly
1299 src_rl = os.readlink(src_file)
1300 logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
1301 os.symlink(src_rl, dst_file)
1302 os.unlink(src_file)
1303 else:
1304 logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
1305 shutil.move(src_file, dst_file)
1306 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1307 os.chown(dst_file, uid, gid)
1308
1309## copied from distutils ##
1310def find_executable(executable, path=None):
1311 """Tries to find 'executable' in the directories listed in 'path'.
1312 A string listing directories separated by 'os.pathsep'; defaults to
1313 os.environ['PATH']. Returns the complete filename or None if not found.
1314 """
1315 _, ext = os.path.splitext(executable)
1316 if (sys.platform == 'win32') and (ext != '.exe'):
1317 executable = executable + '.exe'
1318
1319 if os.path.isfile(executable):
1320 return executable
1321
1322 if path is None:
1323 path = os.environ.get('PATH', None)
1324 if path is None:
1325 try:
1326 path = os.confstr("CS_PATH")
1327 except (AttributeError, ValueError):
1328 # os.confstr() or CS_PATH is not available
1329 path = os.defpath
1330 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1331 # set to an empty string
1332
1333 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1334 if not path:
1335 return None
1336
1337 paths = path.split(os.pathsep)
1338 for p in paths:
1339 f = os.path.join(p, executable)
1340 if os.path.isfile(f):
1341 # the file exists, we have a shot at spawn working
1342 return f
1343 return None
1344
1345def find_program(filename):
1346 # type: (str) -> str
1347 name = find_executable(filename)
1348 if name is None:
1349 raise ValueError('%s not found' % filename)
1350 return name
1351
1352def get_unit_name(fsid, daemon_type, daemon_id=None):
1353 # type: (str, str, Optional[Union[int, str]]) -> str
1354 # accept either name or type + id
1355 if daemon_id is not None:
1356 return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
1357 else:
1358 return 'ceph-%s@%s' % (fsid, daemon_type)
1359
e306af50
TL
1360def get_unit_name_by_daemon_name(fsid, name):
1361 daemon = get_daemon_description(fsid, name)
1362 try:
1363 return daemon['systemd_unit']
1364 except KeyError:
1365 raise Error('Failed to get unit name for {}'.format(daemon))
1366
9f95a23c
TL
1367def check_unit(unit_name):
1368 # type: (str) -> Tuple[bool, str, bool]
1369 # NOTE: we ignore the exit code here because systemctl outputs
1370 # various exit codes based on the state of the service, but the
1371 # string result is more explicit (and sufficient).
1372 enabled = False
1373 installed = False
1374 try:
1375 out, err, code = call(['systemctl', 'is-enabled', unit_name],
1376 verbose_on_failure=False)
1377 if code == 0:
1378 enabled = True
1379 installed = True
1380 elif "disabled" in out:
1381 installed = True
1382 except Exception as e:
1383 logger.warning('unable to run systemctl: %s' % e)
1384 enabled = False
1385 installed = False
1386
1387 state = 'unknown'
1388 try:
1389 out, err, code = call(['systemctl', 'is-active', unit_name],
1390 verbose_on_failure=False)
1391 out = out.strip()
1392 if out in ['active']:
1393 state = 'running'
1394 elif out in ['inactive']:
1395 state = 'stopped'
1396 elif out in ['failed', 'auto-restart']:
1397 state = 'error'
1398 else:
1399 state = 'unknown'
1400 except Exception as e:
1401 logger.warning('unable to run systemctl: %s' % e)
1402 state = 'unknown'
1403 return (enabled, state, installed)
1404
1405def check_units(units, enabler=None):
1406 # type: (List[str], Optional[Packager]) -> bool
1407 for u in units:
1408 (enabled, state, installed) = check_unit(u)
1409 if enabled and state == 'running':
1410 logger.info('Unit %s is enabled and running' % u)
1411 return True
1412 if enabler is not None:
1413 if installed:
1414 logger.info('Enabling unit %s' % u)
1415 enabler.enable_service(u)
1416 return False
1417
1418def get_legacy_config_fsid(cluster, legacy_dir=None):
1419 # type: (str, str) -> Optional[str]
1420 config_file = '/etc/ceph/%s.conf' % cluster
1421 if legacy_dir is not None:
1422 config_file = os.path.abspath(legacy_dir + config_file)
1423
1424 if os.path.exists(config_file):
1425 config = read_config(config_file)
1426 if config.has_section('global') and config.has_option('global', 'fsid'):
1427 return config.get('global', 'fsid')
1428 return None
1429
1430def get_legacy_daemon_fsid(cluster, daemon_type, daemon_id, legacy_dir=None):
1431 # type: (str, str, Union[int, str], str) -> Optional[str]
1432 fsid = None
1433 if daemon_type == 'osd':
1434 try:
1435 fsid_file = os.path.join(args.data_dir,
1436 daemon_type,
1437 'ceph-%s' % daemon_id,
1438 'ceph_fsid')
1439 if legacy_dir is not None:
1440 fsid_file = os.path.abspath(legacy_dir + fsid_file)
1441 with open(fsid_file, 'r') as f:
1442 fsid = f.read().strip()
1443 except IOError:
1444 pass
1445 if not fsid:
1446 fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
1447 return fsid
1448
1449def get_daemon_args(fsid, daemon_type, daemon_id):
1450 # type: (str, str, Union[int, str]) -> List[str]
1451 r = list() # type: List[str]
1452
1453 if daemon_type in Ceph.daemons and daemon_type != 'crash':
1454 r += [
1455 '--setuser', 'ceph',
1456 '--setgroup', 'ceph',
1457 '--default-log-to-file=false',
1458 '--default-log-to-stderr=true',
1459 '--default-log-stderr-prefix="debug "',
1460 ]
1461 if daemon_type == 'mon':
1462 r += [
1463 '--default-mon-cluster-log-to-file=false',
1464 '--default-mon-cluster-log-to-stderr=true',
1465 ]
1466 elif daemon_type in Monitoring.components:
1467 metadata = Monitoring.components[daemon_type]
1468 r += metadata.get('args', list())
1469 if daemon_type == 'alertmanager':
1470 config = get_parm(args.config_json)
1471 peers = config.get('peers', list()) # type: ignore
1472 for peer in peers:
1473 r += ["--cluster.peer={}".format(peer)]
1474 elif daemon_type == NFSGanesha.daemon_type:
1911f103
TL
1475 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1476 r += nfs_ganesha.get_daemon_args()
9f95a23c
TL
1477
1478 return r
1479
1480def create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid,
e306af50
TL
1481 config=None, keyring=None):
1482 # type: (str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
9f95a23c
TL
1483 data_dir = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid)
1484 make_log_dir(fsid, uid=uid, gid=gid)
1485
1486 if config:
1487 config_path = os.path.join(data_dir, 'config')
1488 with open(config_path, 'w') as f:
1489 os.fchown(f.fileno(), uid, gid)
1490 os.fchmod(f.fileno(), 0o600)
1491 f.write(config)
1492 if keyring:
1493 keyring_path = os.path.join(data_dir, 'keyring')
1494 with open(keyring_path, 'w') as f:
1495 os.fchmod(f.fileno(), 0o600)
1496 os.fchown(f.fileno(), uid, gid)
1497 f.write(keyring)
1498
1499 if daemon_type in Monitoring.components.keys():
1500 config = get_parm(args.config_json) # type: ignore
1501 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
1502
1503 # Set up directories specific to the monitoring component
1504 config_dir = ''
1505 if daemon_type == 'prometheus':
1506 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1507 config_dir = 'etc/prometheus'
1508 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1509 makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
1510 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1511 elif daemon_type == 'grafana':
1512 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1513 config_dir = 'etc/grafana'
1514 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1515 makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
1516 makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
1517 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1518 elif daemon_type == 'alertmanager':
1519 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1520 config_dir = 'etc/alertmanager'
1521 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1522 makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
1523
1524
1525 # populate the config directory for the component from the config-json
1526 for fname in required_files:
1527 if 'files' in config: # type: ignore
1528 if isinstance(config['files'][fname], list): # type: ignore
1529 content = '\n'.join(config['files'][fname]) # type: ignore
1530 else:
1531 content = config['files'][fname] # type: ignore
1532
1533 with open(os.path.join(data_dir_root, config_dir, fname), 'w') as f:
1534 os.fchown(f.fileno(), uid, gid)
1535 os.fchmod(f.fileno(), 0o600)
1536 f.write(content)
1537
1538 if daemon_type == NFSGanesha.daemon_type:
1539 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1540 nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
1541
1911f103
TL
1542 if daemon_type == CephIscsi.daemon_type:
1543 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
1544 ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
1545
9f95a23c
TL
1546def get_parm(option):
1547 # type: (str) -> Dict[str, str]
1548
1549 if not option:
1550 return dict()
1551
1552 global cached_stdin
1553 if option == '-':
1554 if cached_stdin is not None:
1555 j = cached_stdin
1556 else:
1557 try:
1558 j = injected_stdin # type: ignore
1559 except NameError:
1560 j = sys.stdin.read()
1561 cached_stdin = j
1562 else:
1563 # inline json string
1564 if option[0] == '{' and option[-1] == '}':
1565 j = option
1566 # json file
1567 elif os.path.exists(option):
1568 with open(option, 'r') as f:
1569 j = f.read()
1570 else:
1571 raise Error("Config file {} not found".format(option))
1572
1573 try:
1574 js = json.loads(j)
1575 except ValueError as e:
1576 raise Error("Invalid JSON in {}: {}".format(option, e))
1577 else:
1578 return js
1579
1580def get_config_and_keyring():
801d1391
TL
1581 # type: () -> Tuple[Optional[str], Optional[str]]
1582 config = None
1583 keyring = None
1584
9f95a23c
TL
1585 if 'config_json' in args and args.config_json:
1586 d = get_parm(args.config_json)
1587 config = d.get('config')
1588 keyring = d.get('keyring')
1589
1590 if 'config' in args and args.config:
1591 with open(args.config, 'r') as f:
1592 config = f.read()
1593
1594 if 'key' in args and args.key:
1595 keyring = '[%s]\n\tkey = %s\n' % (args.name, args.key)
1596 elif 'keyring' in args and args.keyring:
1597 with open(args.keyring, 'r') as f:
1598 keyring = f.read()
1599
9f95a23c
TL
1600 return (config, keyring)
1601
1602def get_container_mounts(fsid, daemon_type, daemon_id,
1603 no_config=False):
1604 # type: (str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
1605 mounts = dict()
1606
1607 if daemon_type in Ceph.daemons:
1608 if fsid:
1609 run_path = os.path.join('/var/run/ceph', fsid);
1610 if os.path.exists(run_path):
1611 mounts[run_path] = '/var/run/ceph:z'
1612 log_dir = get_log_dir(fsid)
1613 mounts[log_dir] = '/var/log/ceph:z'
1614 crash_dir = '/var/lib/ceph/%s/crash' % fsid
1615 if os.path.exists(crash_dir):
1616 mounts[crash_dir] = '/var/lib/ceph/crash:z'
1617
1618 if daemon_type in Ceph.daemons and daemon_id:
1619 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1620 if daemon_type == 'rgw':
1621 cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
1622 else:
1623 cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
1624 if daemon_type != 'crash':
1625 mounts[data_dir] = cdata_dir + ':z'
1626 if not no_config:
1627 mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
1628 if daemon_type == 'rbd-mirror' or daemon_type == 'crash':
1629 # these do not search for their keyrings in a data directory
1630 mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
1631
1632 if daemon_type in ['mon', 'osd']:
1633 mounts['/dev'] = '/dev' # FIXME: narrow this down?
1634 mounts['/run/udev'] = '/run/udev'
1635 if daemon_type == 'osd':
1636 mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
1637 mounts['/run/lvm'] = '/run/lvm'
1638 mounts['/run/lock/lvm'] = '/run/lock/lvm'
1639
e306af50
TL
1640 try:
1641 if args.shared_ceph_folder: # make easy manager modules/ceph-volume development
1642 ceph_folder = pathify(args.shared_ceph_folder)
1643 if os.path.exists(ceph_folder):
1644 mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
1645 mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
1646 mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
1647 mounts[ceph_folder + '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
1648 mounts[ceph_folder + '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
1649 else:
1650 logger.error('{}{}{}'.format(termcolor.red,
1651 'Ceph shared source folder does not exist.',
1652 termcolor.end))
1653 except AttributeError:
1654 pass
1655
9f95a23c
TL
1656 if daemon_type in Monitoring.components and daemon_id:
1657 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1658 if daemon_type == 'prometheus':
1659 mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
1660 mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
1661 elif daemon_type == 'node-exporter':
1662 mounts['/proc'] = '/host/proc:ro'
1663 mounts['/sys'] = '/host/sys:ro'
1664 mounts['/'] = '/rootfs:ro'
1665 elif daemon_type == "grafana":
1666 mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
1667 mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
1668 mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
1669 elif daemon_type == 'alertmanager':
1670 mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/alertmanager:Z'
1671
1672 if daemon_type == NFSGanesha.daemon_type:
1673 assert daemon_id
1674 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1675 mounts.update(NFSGanesha.get_container_mounts(data_dir))
1676
1911f103
TL
1677 if daemon_type == CephIscsi.daemon_type:
1678 assert daemon_id
1679 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1680 log_dir = get_log_dir(fsid)
1681 mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
1682
9f95a23c
TL
1683 return mounts
1684
1685def get_container(fsid, daemon_type, daemon_id,
1686 privileged=False,
1687 ptrace=False,
1688 container_args=[]):
1689 # type: (str, str, Union[int, str], bool, bool, List[str]) -> CephContainer
1690 if daemon_type in ['mon', 'osd']:
1691 # mon and osd need privileged in order for libudev to query devices
1692 privileged = True
1693 if daemon_type == 'rgw':
1694 entrypoint = '/usr/bin/radosgw'
1695 name = 'client.rgw.%s' % daemon_id
1696 elif daemon_type == 'rbd-mirror':
1697 entrypoint = '/usr/bin/rbd-mirror'
1698 name = 'client.rbd-mirror.%s' % daemon_id
1699 elif daemon_type == 'crash':
1700 entrypoint = '/usr/bin/ceph-crash'
1701 name = 'client.crash.%s' % daemon_id
1702 elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
1703 entrypoint = '/usr/bin/ceph-' + daemon_type
1704 name = '%s.%s' % (daemon_type, daemon_id)
1705 elif daemon_type in Monitoring.components:
1706 entrypoint = ''
1707 name = ''
1708 elif daemon_type == NFSGanesha.daemon_type:
1709 entrypoint = NFSGanesha.entrypoint
1710 name = '%s.%s' % (daemon_type, daemon_id)
1911f103
TL
1711 elif daemon_type == CephIscsi.daemon_type:
1712 entrypoint = CephIscsi.entrypoint
1713 name = '%s.%s' % (daemon_type, daemon_id)
e306af50
TL
1714 # So the container can modprobe iscsi_target_mod and have write perms
1715 # to configfs we need to make this a privileged container.
1716 privileged = True
9f95a23c
TL
1717 else:
1718 entrypoint = ''
1719 name = ''
1720
1721 ceph_args = [] # type: List[str]
1722 if daemon_type in Monitoring.components:
1723 uid, gid = extract_uid_gid_monitoring(daemon_type)
1724 m = Monitoring.components[daemon_type] # type: ignore
1725 metadata = m.get('image', dict()) # type: ignore
1726 monitoring_args = [
1727 '--user',
1728 str(uid),
1729 # FIXME: disable cpu/memory limits for the time being (not supported
1730 # by ubuntu 18.04 kernel!)
1731 #'--cpus',
1732 #metadata.get('cpus', '2'),
1733 #'--memory',
1734 #metadata.get('memory', '4GB')
1735 ]
1736 container_args.extend(monitoring_args)
1737 elif daemon_type == 'crash':
1738 ceph_args = ['-n', name]
1739 elif daemon_type in Ceph.daemons:
1740 ceph_args = ['-n', name, '-f']
1741
1742 envs=[] # type: List[str]
1743 if daemon_type == NFSGanesha.daemon_type:
1744 envs.extend(NFSGanesha.get_container_envs())
1745
1746 return CephContainer(
1747 image=args.image,
1748 entrypoint=entrypoint,
1749 args=ceph_args + get_daemon_args(fsid, daemon_type, daemon_id),
1750 container_args=container_args,
1751 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
1752 cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
1753 envs=envs,
1754 privileged=privileged,
1755 ptrace=ptrace,
1756 )
1757
1758def extract_uid_gid(img='', file_path='/var/lib/ceph'):
1759 # type: (str, str) -> Tuple[int, int]
1760
1761 if not img:
1762 img = args.image
1763
1764 out = CephContainer(
1765 image=img,
1766 entrypoint='stat',
1767 args=['-c', '%u %g', file_path]
1768 ).run()
1769 (uid, gid) = out.split(' ')
1770 return (int(uid), int(gid))
1771
1772def deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid,
1773 config=None, keyring=None,
1774 osd_fsid=None,
1775 reconfig=False):
1776 # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool]) -> None
1777 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1778 if reconfig and not os.path.exists(data_dir):
1779 raise Error('cannot reconfig, data path %s does not exist' % data_dir)
1780 if daemon_type == 'mon' and not os.path.exists(data_dir):
1781 assert config
1782 assert keyring
1783 # tmp keyring file
1784 tmp_keyring = write_tmp(keyring, uid, gid)
1785
1786 # tmp config file
1787 tmp_config = write_tmp(config, uid, gid)
1788
1789 # --mkfs
1790 create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid)
1791 mon_dir = get_data_dir(fsid, 'mon', daemon_id)
1792 log_dir = get_log_dir(fsid)
1793 out = CephContainer(
1794 image=args.image,
1795 entrypoint='/usr/bin/ceph-mon',
1796 args=['--mkfs',
1797 '-i', str(daemon_id),
1798 '--fsid', fsid,
1799 '-c', '/tmp/config',
1800 '--keyring', '/tmp/keyring',
1801 ] + get_daemon_args(fsid, 'mon', daemon_id),
1802 volume_mounts={
1803 log_dir: '/var/log/ceph:z',
1804 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
1805 tmp_keyring.name: '/tmp/keyring:z',
1806 tmp_config.name: '/tmp/config:z',
1807 },
1808 ).run()
1809
1810 # write conf
1811 with open(mon_dir + '/config', 'w') as f:
1812 os.fchown(f.fileno(), uid, gid)
1813 os.fchmod(f.fileno(), 0o600)
1814 f.write(config)
1815 else:
1816 # dirs, conf, keyring
1817 create_daemon_dirs(
1818 fsid, daemon_type, daemon_id,
1819 uid, gid,
1820 config, keyring)
1821
1822 if not reconfig:
1823 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
1824 osd_fsid=osd_fsid)
1825
1826 if not os.path.exists(data_dir + '/unit.created'):
1827 with open(data_dir + '/unit.created', 'w') as f:
1828 os.fchmod(f.fileno(), 0o600)
1829 os.fchown(f.fileno(), uid, gid)
1830 f.write('mtime is time the daemon deployment was created\n')
1831
1832 with open(data_dir + '/unit.configured', 'w') as f:
1833 f.write('mtime is time we were last configured\n')
1834 os.fchmod(f.fileno(), 0o600)
1835 os.fchown(f.fileno(), uid, gid)
1836
1837 update_firewalld(daemon_type)
1838
1839 if reconfig and daemon_type not in Ceph.daemons:
1840 # ceph daemons do not need a restart; others (presumably) do to pick
1841 # up the new config
1842 call_throws(['systemctl', 'reset-failed',
1843 get_unit_name(fsid, daemon_type, daemon_id)])
1844 call_throws(['systemctl', 'restart',
1845 get_unit_name(fsid, daemon_type, daemon_id)])
1846
1847def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
1848 enable=True, start=True,
1849 osd_fsid=None):
1850 # type: (str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None
1851 # cmd
1852 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1853 with open(data_dir + '/unit.run.new', 'w') as f:
1854 # pre-start cmd(s)
1855 if daemon_type == 'osd':
1856 # osds have a pre-start step
1857 assert osd_fsid
e306af50
TL
1858 f.write('# Simple OSDs need chown on startup:\n')
1859 for n in ['block', 'block.db', 'block.wal']:
1860 p = os.path.join(data_dir, n)
1861 f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
1862 f.write('# LVM OSDs use ceph-volume lvm activate:\n')
9f95a23c
TL
1863 prestart = CephContainer(
1864 image=args.image,
1865 entrypoint='/usr/sbin/ceph-volume',
1866 args=[
1867 'lvm', 'activate',
1868 str(daemon_id), osd_fsid,
1869 '--no-systemd'
1870 ],
1871 privileged=True,
1872 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
1873 cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
1874 )
1875 f.write(' '.join(prestart.run_cmd()) + '\n')
1876 elif daemon_type == NFSGanesha.daemon_type:
1877 # add nfs to the rados grace db
1878 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1879 prestart = nfs_ganesha.get_rados_grace_container('add')
1880 f.write(' '.join(prestart.run_cmd()) + '\n')
1911f103
TL
1881 elif daemon_type == CephIscsi.daemon_type:
1882 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
1883
1884 if daemon_type in Ceph.daemons:
1885 install_path = find_program('install')
1886 f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
9f95a23c
TL
1887
1888 # container run command
1889 f.write(' '.join(c.run_cmd()) + '\n')
1890 os.fchmod(f.fileno(), 0o600)
1891 os.rename(data_dir + '/unit.run.new',
1892 data_dir + '/unit.run')
1893
1894 # post-stop command(s)
1895 with open(data_dir + '/unit.poststop.new', 'w') as f:
1896 if daemon_type == 'osd':
1897 assert osd_fsid
1898 poststop = CephContainer(
1899 image=args.image,
1900 entrypoint='/usr/sbin/ceph-volume',
1901 args=[
1902 'lvm', 'deactivate',
1903 str(daemon_id), osd_fsid,
1904 ],
1905 privileged=True,
1906 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
1907 cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
1908 daemon_id),
1909 )
1910 f.write(' '.join(poststop.run_cmd()) + '\n')
1911 elif daemon_type == NFSGanesha.daemon_type:
1912 # remove nfs from the rados grace db
1913 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1914 poststop = nfs_ganesha.get_rados_grace_container('remove')
1915 f.write(' '.join(poststop.run_cmd()) + '\n')
1911f103
TL
1916 elif daemon_type == CephIscsi.daemon_type:
1917 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
9f95a23c
TL
1918 os.fchmod(f.fileno(), 0o600)
1919 os.rename(data_dir + '/unit.poststop.new',
1920 data_dir + '/unit.poststop')
1921
1922 with open(data_dir + '/unit.image.new', 'w') as f:
1923 f.write(c.image + '\n')
1924 os.fchmod(f.fileno(), 0o600)
1925 os.rename(data_dir + '/unit.image.new',
1926 data_dir + '/unit.image')
1927
1928 # systemd
1929 install_base_units(fsid)
1911f103 1930 unit = get_unit_file(fsid)
9f95a23c
TL
1931 unit_file = 'ceph-%s@.service' % (fsid)
1932 with open(args.unit_dir + '/' + unit_file + '.new', 'w') as f:
1933 f.write(unit)
1934 os.rename(args.unit_dir + '/' + unit_file + '.new',
1935 args.unit_dir + '/' + unit_file)
1936 call_throws(['systemctl', 'daemon-reload'])
1937
1938 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
1939 call(['systemctl', 'stop', unit_name],
1940 verbose_on_failure=False)
1941 call(['systemctl', 'reset-failed', unit_name],
1942 verbose_on_failure=False)
1943 if enable:
1944 call_throws(['systemctl', 'enable', unit_name])
1945 if start:
1946 call_throws(['systemctl', 'start', unit_name])
1947
1948def update_firewalld(daemon_type):
1949 # type: (str) -> None
1950 if args.skip_firewalld:
1951 return
1952 cmd = find_executable('firewall-cmd')
1953 if not cmd:
1954 logger.debug('firewalld does not appear to be present')
1955 return
1956 (enabled, state, _) = check_unit('firewalld.service')
1957 if not enabled:
1958 logger.debug('firewalld.service is not enabled')
1959 return
1960
1961 fw_services = []
1962 fw_ports = []
1963 if daemon_type == 'mon':
1964 fw_services.append('ceph-mon')
1965 elif daemon_type in ['mgr', 'mds', 'osd']:
1966 fw_services.append('ceph')
1967 if daemon_type == 'mgr':
1968 fw_ports.append(8080) # dashboard
1969 fw_ports.append(8443) # dashboard
1970 fw_ports.append(9283) # mgr/prometheus exporter
1971 elif daemon_type in Monitoring.port_map.keys():
1972 fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc
1973 elif daemon_type == NFSGanesha.daemon_type:
1974 fw_services.append('nfs')
1975
1976 for svc in fw_services:
1977 out, err, ret = call([cmd, '--permanent', '--query-service', svc])
1978 if ret:
1979 logger.info('Enabling firewalld service %s in current zone...' % svc)
1980 out, err, ret = call([cmd, '--permanent', '--add-service', svc])
1981 if ret:
1982 raise RuntimeError(
1983 'unable to add service %s to current zone: %s' % (svc, err))
1984 else:
1985 logger.debug('firewalld service %s is enabled in current zone' % svc)
1986 for port in fw_ports:
1987 tcp_port = str(port) + '/tcp'
1988 out, err, ret = call([cmd, '--permanent', '--query-port', tcp_port])
1989 if ret:
1990 logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
1991 out, err, ret = call([cmd, '--permanent', '--add-port', tcp_port])
1992 if ret:
1993 raise RuntimeError('unable to add port %s to current zone: %s' %
1994 (tcp_port, err))
1995 else:
1996 logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
1997 call_throws([cmd, '--reload'])
1998
1999def install_base_units(fsid):
2000 # type: (str) -> None
2001 """
2002 Set up ceph.target and ceph-$fsid.target units.
2003 """
2004 # global unit
2005 existed = os.path.exists(args.unit_dir + '/ceph.target')
2006 with open(args.unit_dir + '/ceph.target.new', 'w') as f:
2007 f.write('[Unit]\n'
2008 'Description=All Ceph clusters and services\n'
2009 '\n'
2010 '[Install]\n'
2011 'WantedBy=multi-user.target\n')
2012 os.rename(args.unit_dir + '/ceph.target.new',
2013 args.unit_dir + '/ceph.target')
2014 if not existed:
2015 # we disable before enable in case a different ceph.target
2016 # (from the traditional package) is present; while newer
2017 # systemd is smart enough to disable the old
2018 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2019 # some older versions of systemd error out with EEXIST.
2020 call_throws(['systemctl', 'disable', 'ceph.target'])
2021 call_throws(['systemctl', 'enable', 'ceph.target'])
2022 call_throws(['systemctl', 'start', 'ceph.target'])
2023
2024 # cluster unit
2025 existed = os.path.exists(args.unit_dir + '/ceph-%s.target' % fsid)
2026 with open(args.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f:
2027 f.write('[Unit]\n'
2028 'Description=Ceph cluster {fsid}\n'
2029 'PartOf=ceph.target\n'
2030 'Before=ceph.target\n'
2031 '\n'
2032 '[Install]\n'
2033 'WantedBy=multi-user.target ceph.target\n'.format(
2034 fsid=fsid)
2035 )
2036 os.rename(args.unit_dir + '/ceph-%s.target.new' % fsid,
2037 args.unit_dir + '/ceph-%s.target' % fsid)
2038 if not existed:
2039 call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid])
2040 call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid])
2041
2042 # logrotate for the cluster
2043 with open(args.logrotate_dir + '/ceph-%s' % fsid, 'w') as f:
2044 """
2045 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
2046 in all containers, but I don't see an elegant way to send SIGHUP *just* to
2047 the daemons for this cluster. (1) systemd kill -s will get the signal to
2048 podman, but podman will exit. (2) podman kill will get the signal to the
2049 first child (bash), but that isn't the ceph daemon. This is simpler and
2050 should be harmless.
2051 """
2052 f.write("""# created by cephadm
2053/var/log/ceph/%s/*.log {
2054 rotate 7
2055 daily
2056 compress
2057 sharedscripts
2058 postrotate
2059 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror || pkill -1 -x "ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror" || true
2060 endscript
2061 missingok
2062 notifempty
2063 su root root
2064}
2065""" % fsid)
2066
1911f103
TL
2067def get_unit_file(fsid):
2068 # type: (str) -> str
9f95a23c
TL
2069 u = """# generated by cephadm
2070[Unit]
2071Description=Ceph %i for {fsid}
2072
2073# According to:
2074# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
2075# these can be removed once ceph-mon will dynamically change network
2076# configuration.
2077After=network-online.target local-fs.target time-sync.target
2078Wants=network-online.target local-fs.target time-sync.target
2079
2080PartOf=ceph-{fsid}.target
2081Before=ceph-{fsid}.target
2082
2083[Service]
2084LimitNOFILE=1048576
2085LimitNPROC=1048576
2086EnvironmentFile=-/etc/environment
2087ExecStartPre=-{container_path} rm ceph-{fsid}-%i
9f95a23c
TL
2088ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
2089ExecStop=-{container_path} stop ceph-{fsid}-%i
2090ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
2091KillMode=none
2092Restart=on-failure
2093RestartSec=10s
2094TimeoutStartSec=120
e306af50 2095TimeoutStopSec=120
9f95a23c
TL
2096StartLimitInterval=30min
2097StartLimitBurst=5
2098
2099[Install]
2100WantedBy=ceph-{fsid}.target
2101""".format(
2102 container_path=container_path,
9f95a23c 2103 fsid=fsid,
9f95a23c
TL
2104 data_dir=args.data_dir)
2105 return u
2106
2107##################################
2108
2109class CephContainer:
2110 def __init__(self,
2111 image,
2112 entrypoint,
2113 args=[],
2114 volume_mounts={},
2115 cname='',
2116 container_args=[],
2117 envs=None,
2118 privileged=False,
2119 ptrace=False):
2120 # type: (str, str, List[str], Dict[str, str], str, List[str], Optional[List[str]], bool, bool) -> None
2121 self.image = image
2122 self.entrypoint = entrypoint
2123 self.args = args
2124 self.volume_mounts = volume_mounts
2125 self.cname = cname
2126 self.container_args = container_args
2127 self.envs = envs
2128 self.privileged = privileged
2129 self.ptrace = ptrace
2130
2131 def run_cmd(self):
2132 # type: () -> List[str]
2133 vols = [] # type: List[str]
2134 envs = [] # type: List[str]
2135 cname = [] # type: List[str]
2136 entrypoint = [] # type: List[str]
2137 if self.entrypoint:
2138 entrypoint = ['--entrypoint', self.entrypoint]
2139
2140 priv = [] # type: List[str]
2141 if self.privileged:
2142 priv = ['--privileged',
2143 # let OSD etc read block devs that haven't been chowned
2144 '--group-add=disk']
2145 if self.ptrace:
2146 priv.append('--cap-add=SYS_PTRACE')
2147 vols = sum(
2148 [['-v', '%s:%s' % (host_dir, container_dir)]
2149 for host_dir, container_dir in self.volume_mounts.items()], [])
2150 envs = [
2151 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2152 '-e', 'NODE_NAME=%s' % get_hostname(),
2153 ]
2154 if self.envs:
2155 for e in self.envs:
2156 envs.extend(['-e', e])
2157 cname = ['--name', self.cname] if self.cname else []
2158 return [
2159 str(container_path),
2160 'run',
2161 '--rm',
2162 '--net=host',
e306af50 2163 '--ipc=host',
9f95a23c
TL
2164 ] + self.container_args + priv + \
2165 cname + envs + \
2166 vols + entrypoint + \
2167 [
2168 self.image
2169 ] + self.args # type: ignore
2170
2171 def shell_cmd(self, cmd):
2172 # type: (List[str]) -> List[str]
2173 priv = [] # type: List[str]
2174 if self.privileged:
2175 priv = ['--privileged',
2176 # let OSD etc read block devs that haven't been chowned
2177 '--group-add=disk']
2178 vols = [] # type: List[str]
2179 vols = sum(
2180 [['-v', '%s:%s' % (host_dir, container_dir)]
2181 for host_dir, container_dir in self.volume_mounts.items()], [])
2182 envs = [
2183 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2184 '-e', 'NODE_NAME=%s' % get_hostname(),
2185 ]
2186 if self.envs:
2187 for e in self.envs:
2188 envs.extend(['-e', e])
2189 cmd_args = [] # type: List[str]
2190 if cmd:
2191 cmd_args = ['-c'] + cmd
2192 return [
2193 str(container_path),
2194 'run',
2195 '--rm',
2196 '--net=host',
e306af50 2197 '--ipc=host',
9f95a23c
TL
2198 ] + self.container_args + priv + envs + vols + [
2199 '--entrypoint', cmd[0],
2200 self.image
2201 ] + cmd[1:]
2202
2203 def exec_cmd(self, cmd):
2204 # type: (List[str]) -> List[str]
2205 return [
2206 str(container_path),
2207 'exec',
2208 ] + self.container_args + [
2209 self.cname,
2210 ] + cmd
2211
2212 def run(self, timeout=DEFAULT_TIMEOUT):
2213 # type: (Optional[int]) -> str
2214 logger.debug(self.run_cmd())
2215 out, _, _ = call_throws(
2216 self.run_cmd(), desc=self.entrypoint, timeout=timeout)
2217 return out
2218
2219##################################
2220
2221@infer_image
2222def command_version():
2223 # type: () -> int
2224 out = CephContainer(args.image, 'ceph', ['--version']).run()
2225 print(out.strip())
2226 return 0
2227
2228##################################
2229
2230@infer_image
2231def command_pull():
2232 # type: () -> int
2233 logger.info('Pulling latest %s...' % args.image)
2234 call_throws([container_path, 'pull', args.image])
2235 return command_inspect_image()
2236
2237##################################
2238
2239@infer_image
2240def command_inspect_image():
2241 # type: () -> int
2242 out, err, ret = call_throws([
2243 container_path, 'inspect',
2244 '--format', '{{.Id}}',
2245 args.image])
2246 if ret:
2247 return errno.ENOENT
2248 image_id = normalize_container_id(out.strip())
2249 ver = CephContainer(args.image, 'ceph', ['--version']).run().strip()
2250 r = {
2251 'image_id': image_id,
2252 'ceph_version': ver,
2253 }
2254 print(json.dumps(r, indent=4, sort_keys=True))
2255 return 0
2256
2257##################################
2258
2259@default_image
2260def command_bootstrap():
2261 # type: () -> int
2262
2263 if not args.output_config:
2264 args.output_config = os.path.join(args.output_dir, 'ceph.conf')
2265 if not args.output_keyring:
2266 args.output_keyring = os.path.join(args.output_dir,
2267 'ceph.client.admin.keyring')
2268 if not args.output_pub_ssh_key:
2269 args.output_pub_ssh_key = os.path.join(args.output_dir, 'ceph.pub')
2270
2271 # verify output files
2272 for f in [args.output_config, args.output_keyring, args.output_pub_ssh_key]:
2273 if not args.allow_overwrite:
2274 if os.path.exists(f):
2275 raise Error('%s already exists; delete or pass '
2276 '--allow-overwrite to overwrite' % f)
2277 dirname = os.path.dirname(f)
2278 if dirname and not os.path.exists(dirname):
2279 raise Error('%s directory %s does not exist' % (f, dirname))
2280
2281 if not args.skip_prepare_host:
2282 command_prepare_host()
2283 else:
2284 logger.info('Skip prepare_host')
2285
2286 # initial vars
2287 fsid = args.fsid or make_fsid()
2288 hostname = get_hostname()
2289 if '.' in hostname and not args.allow_fqdn_hostname:
2290 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
2291 mon_id = args.mon_id or hostname
2292 mgr_id = args.mgr_id or generate_service_id()
2293 logging.info('Cluster fsid: %s' % fsid)
2294
2295 l = FileLock(fsid)
2296 l.acquire()
2297
2298 # ip
2299 r = re.compile(r':(\d+)$')
2300 base_ip = None
2301 if args.mon_ip:
2302 hasport = r.findall(args.mon_ip)
2303 if hasport:
2304 port = int(hasport[0])
2305 if port == 6789:
2306 addr_arg = '[v1:%s]' % args.mon_ip
2307 elif port == 3300:
2308 addr_arg = '[v2:%s]' % args.mon_ip
2309 else:
2310 logger.warning('Using msgr2 protocol for unrecognized port %d' %
2311 port)
2312 addr_arg = '[v2:%s]' % args.mon_ip
2313 base_ip = args.mon_ip[0:-(len(str(port)))-1]
2314 check_ip_port(base_ip, port)
2315 else:
2316 base_ip = args.mon_ip
2317 addr_arg = '[v2:%s:3300,v1:%s:6789]' % (args.mon_ip, args.mon_ip)
2318 check_ip_port(args.mon_ip, 3300)
2319 check_ip_port(args.mon_ip, 6789)
2320 elif args.mon_addrv:
2321 addr_arg = args.mon_addrv
2322 if addr_arg[0] != '[' or addr_arg[-1] != ']':
2323 raise Error('--mon-addrv value %s must use square backets' %
2324 addr_arg)
2325 for addr in addr_arg[1:-1].split(','):
2326 hasport = r.findall(addr)
2327 if not hasport:
2328 raise Error('--mon-addrv value %s must include port number' %
2329 addr_arg)
2330 port = int(hasport[0])
2331 # strip off v1: or v2: prefix
2332 addr = re.sub(r'^\w+:', '', addr)
2333 base_ip = addr[0:-(len(str(port)))-1]
2334 check_ip_port(base_ip, port)
2335 else:
2336 raise Error('must specify --mon-ip or --mon-addrv')
2337 logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg))
2338
2339 mon_network = None
2340 if not args.skip_mon_network:
2341 # make sure IP is configured locally, and then figure out the
2342 # CIDR network
2343 for net, ips in list_networks().items():
2344 if base_ip in ips:
2345 mon_network = net
2346 logger.info('Mon IP %s is in CIDR network %s' % (base_ip,
2347 mon_network))
2348 break
2349 if not mon_network:
2350 raise Error('Failed to infer CIDR network for mon ip %s; pass '
2351 '--skip-mon-network to configure it later' % base_ip)
2352
2353 # config
2354 cp = read_config(args.config)
2355 if not cp.has_section('global'):
2356 cp.add_section('global')
2357 cp.set('global', 'fsid', fsid);
2358 cp.set('global', 'mon host', addr_arg)
2359 cp.set('global', 'container_image', args.image)
2360 cpf = StringIO()
2361 cp.write(cpf)
2362 config = cpf.getvalue()
2363
2364 if not args.skip_pull:
2365 logger.info('Pulling latest %s container...' % args.image)
2366 call_throws([container_path, 'pull', args.image])
2367
2368 logger.info('Extracting ceph user uid/gid from container image...')
2369 (uid, gid) = extract_uid_gid()
2370
2371 # create some initial keys
2372 logger.info('Creating initial keys...')
2373 mon_key = CephContainer(
2374 image=args.image,
2375 entrypoint='/usr/bin/ceph-authtool',
2376 args=['--gen-print-key'],
2377 ).run().strip()
2378 admin_key = CephContainer(
2379 image=args.image,
2380 entrypoint='/usr/bin/ceph-authtool',
2381 args=['--gen-print-key'],
2382 ).run().strip()
2383 mgr_key = CephContainer(
2384 image=args.image,
2385 entrypoint='/usr/bin/ceph-authtool',
2386 args=['--gen-print-key'],
2387 ).run().strip()
2388
2389 keyring = ('[mon.]\n'
2390 '\tkey = %s\n'
2391 '\tcaps mon = allow *\n'
2392 '[client.admin]\n'
2393 '\tkey = %s\n'
2394 '\tcaps mon = allow *\n'
2395 '\tcaps mds = allow *\n'
2396 '\tcaps mgr = allow *\n'
2397 '\tcaps osd = allow *\n'
2398 '[mgr.%s]\n'
2399 '\tkey = %s\n'
2400 '\tcaps mon = profile mgr\n'
2401 '\tcaps mds = allow *\n'
2402 '\tcaps osd = allow *\n'
2403 % (mon_key, admin_key, mgr_id, mgr_key))
2404
2405 # tmp keyring file
2406 tmp_bootstrap_keyring = write_tmp(keyring, uid, gid)
2407
2408 # create initial monmap, tmp monmap file
2409 logger.info('Creating initial monmap...')
2410 tmp_monmap = write_tmp('', 0, 0)
2411 out = CephContainer(
2412 image=args.image,
2413 entrypoint='/usr/bin/monmaptool',
2414 args=['--create',
2415 '--clobber',
2416 '--fsid', fsid,
2417 '--addv', mon_id, addr_arg,
2418 '/tmp/monmap'
2419 ],
2420 volume_mounts={
2421 tmp_monmap.name: '/tmp/monmap:z',
2422 },
2423 ).run()
2424
2425 # pass monmap file to ceph user for use by ceph-mon --mkfs below
2426 os.fchown(tmp_monmap.fileno(), uid, gid)
2427
2428 # create mon
2429 logger.info('Creating mon...')
2430 create_daemon_dirs(fsid, 'mon', mon_id, uid, gid)
2431 mon_dir = get_data_dir(fsid, 'mon', mon_id)
2432 log_dir = get_log_dir(fsid)
2433 out = CephContainer(
2434 image=args.image,
2435 entrypoint='/usr/bin/ceph-mon',
2436 args=['--mkfs',
2437 '-i', mon_id,
2438 '--fsid', fsid,
2439 '-c', '/dev/null',
2440 '--monmap', '/tmp/monmap',
2441 '--keyring', '/tmp/keyring',
2442 ] + get_daemon_args(fsid, 'mon', mon_id),
2443 volume_mounts={
2444 log_dir: '/var/log/ceph:z',
2445 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
2446 tmp_bootstrap_keyring.name: '/tmp/keyring:z',
2447 tmp_monmap.name: '/tmp/monmap:z',
2448 },
2449 ).run()
2450
2451 with open(mon_dir + '/config', 'w') as f:
2452 os.fchown(f.fileno(), uid, gid)
2453 os.fchmod(f.fileno(), 0o600)
2454 f.write(config)
2455
2456 make_var_run(fsid, uid, gid)
2457 mon_c = get_container(fsid, 'mon', mon_id)
2458 deploy_daemon(fsid, 'mon', mon_id, mon_c, uid, gid,
2459 config=None, keyring=None)
2460
2461 # client.admin key + config to issue various CLI commands
2462 tmp_admin_keyring = write_tmp('[client.admin]\n'
2463 '\tkey = ' + admin_key + '\n',
2464 uid, gid)
2465 tmp_config = write_tmp(config, uid, gid)
2466
2467 # a CLI helper to reduce our typing
2468 def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT):
2469 # type: (List[str], Dict[str, str], Optional[int]) -> str
2470 mounts = {
2471 log_dir: '/var/log/ceph:z',
2472 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
2473 tmp_config.name: '/etc/ceph/ceph.conf:z',
2474 }
2475 for k, v in extra_mounts.items():
2476 mounts[k] = v
2477 timeout = timeout or args.timeout
2478 return CephContainer(
2479 image=args.image,
2480 entrypoint='/usr/bin/ceph',
2481 args=cmd,
2482 volume_mounts=mounts,
2483 ).run(timeout=timeout)
2484
2485 logger.info('Waiting for mon to start...')
2486 c = CephContainer(
2487 image=args.image,
2488 entrypoint='/usr/bin/ceph',
2489 args=[
2490 'status'],
2491 volume_mounts={
2492 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
2493 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
2494 tmp_config.name: '/etc/ceph/ceph.conf:z',
2495 },
2496 )
2497
2498 # wait for the service to become available
2499 def is_mon_available():
2500 # type: () -> bool
2501 timeout=args.timeout if args.timeout else 30 # seconds
2502 out, err, ret = call(c.run_cmd(),
2503 desc=c.entrypoint,
2504 timeout=timeout)
2505 return ret == 0
2506 is_available('mon', is_mon_available)
2507
2508 # assimilate and minimize config
2509 if not args.no_minimize_config:
2510 logger.info('Assimilating anything we can from ceph.conf...')
2511 cli([
2512 'config', 'assimilate-conf',
2513 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
2514 ], {
2515 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
2516 })
2517 logger.info('Generating new minimal ceph.conf...')
2518 cli([
2519 'config', 'generate-minimal-conf',
2520 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
2521 ], {
2522 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
2523 })
2524 # re-read our minimized config
2525 with open(mon_dir + '/config', 'r') as f:
2526 config = f.read()
2527 logger.info('Restarting the monitor...')
2528 call_throws([
2529 'systemctl',
2530 'restart',
2531 get_unit_name(fsid, 'mon', mon_id)
2532 ])
2533
2534 if mon_network:
2535 logger.info('Setting mon public_network...')
2536 cli(['config', 'set', 'mon', 'public_network', mon_network])
2537
2538 # create mgr
2539 logger.info('Creating mgr...')
2540 mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
2541 mgr_c = get_container(fsid, 'mgr', mgr_id)
2542 deploy_daemon(fsid, 'mgr', mgr_id, mgr_c, uid, gid,
2543 config=config, keyring=mgr_keyring)
2544
2545 # output files
2546 with open(args.output_keyring, 'w') as f:
2547 os.fchmod(f.fileno(), 0o600)
2548 f.write('[client.admin]\n'
2549 '\tkey = ' + admin_key + '\n')
2550 logger.info('Wrote keyring to %s' % args.output_keyring)
2551
2552 with open(args.output_config, 'w') as f:
2553 f.write(config)
2554 logger.info('Wrote config to %s' % args.output_config)
2555
2556 # wait for the service to become available
2557 logger.info('Waiting for mgr to start...')
2558 def is_mgr_available():
2559 # type: () -> bool
2560 timeout=args.timeout if args.timeout else 30 # seconds
e306af50
TL
2561 try:
2562 out = cli(['status', '-f', 'json-pretty'], timeout=timeout)
2563 j = json.loads(out)
2564 return j.get('mgrmap', {}).get('available', False)
2565 except Exception as e:
2566 logger.debug('status failed: %s' % e)
2567 return False
9f95a23c
TL
2568 is_available('mgr', is_mgr_available)
2569
2570 # wait for mgr to restart (after enabling a module)
2571 def wait_for_mgr_restart():
2572 # first get latest mgrmap epoch from the mon
2573 out = cli(['mgr', 'dump'])
2574 j = json.loads(out)
2575 epoch = j['epoch']
2576 # wait for mgr to have it
2577 logger.info('Waiting for the mgr to restart...')
2578 def mgr_has_latest_epoch():
2579 # type: () -> bool
2580 try:
2581 out = cli(['tell', 'mgr', 'mgr_status'])
2582 j = json.loads(out)
2583 return j['mgrmap_epoch'] >= epoch
2584 except Exception as e:
2585 logger.debug('tell mgr mgr_status failed: %s' % e)
2586 return False
2587 is_available('Mgr epoch %d' % epoch, mgr_has_latest_epoch)
2588
2589 # ssh
2590 if not args.skip_ssh:
2591 logger.info('Enabling cephadm module...')
2592 cli(['mgr', 'module', 'enable', 'cephadm'])
2593 wait_for_mgr_restart()
2594
2595 logger.info('Setting orchestrator backend to cephadm...')
2596 cli(['orch', 'set', 'backend', 'cephadm'])
2597
e306af50
TL
2598 if args.ssh_config:
2599 logger.info('Using provided ssh config...')
2600 mounts = {
2601 pathify(args.ssh_config.name): '/tmp/cephadm-ssh-config:z',
2602 }
2603 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
2604
2605 if args.ssh_private_key and args.ssh_public_key:
2606 logger.info('Using provided ssh keys...')
2607 mounts = {
2608 pathify(args.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
2609 pathify(args.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
2610 }
2611 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
2612 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
2613 else:
2614 logger.info('Generating ssh key...')
2615 cli(['cephadm', 'generate-key'])
2616 ssh_pub = cli(['cephadm', 'get-pub-key'])
2617
2618 with open(args.output_pub_ssh_key, 'w') as f:
2619 f.write(ssh_pub)
2620 logger.info('Wrote public SSH key to to %s' % args.output_pub_ssh_key)
2621
2622 logger.info('Adding key to root@localhost\'s authorized_keys...')
2623 if not os.path.exists('/root/.ssh'):
2624 os.mkdir('/root/.ssh', 0o700)
2625 auth_keys_file = '/root/.ssh/authorized_keys'
2626 add_newline = False
2627 if os.path.exists(auth_keys_file):
2628 with open(auth_keys_file, 'r') as f:
2629 f.seek(0, os.SEEK_END)
2630 if f.tell() > 0:
2631 f.seek(f.tell()-1, os.SEEK_SET) # go to last char
2632 if f.read() != '\n':
2633 add_newline = True
2634 with open(auth_keys_file, 'a') as f:
2635 os.fchmod(f.fileno(), 0o600) # just in case we created it
2636 if add_newline:
2637 f.write('\n')
2638 f.write(ssh_pub.strip() + '\n')
9f95a23c
TL
2639
2640 host = get_hostname()
2641 logger.info('Adding host %s...' % host)
2642 cli(['orch', 'host', 'add', host])
2643
2644 if not args.orphan_initial_daemons:
2645 for t in ['mon', 'mgr', 'crash']:
2646 logger.info('Deploying %s service with default placement...' % t)
2647 cli(['orch', 'apply', t])
2648
2649 if not args.skip_monitoring_stack:
2650 logger.info('Enabling mgr prometheus module...')
2651 cli(['mgr', 'module', 'enable', 'prometheus'])
2652 for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
2653 logger.info('Deploying %s service with default placement...' % t)
2654 cli(['orch', 'apply', t])
2655
2656 if not args.skip_dashboard:
2657 logger.info('Enabling the dashboard module...')
2658 cli(['mgr', 'module', 'enable', 'dashboard'])
2659 wait_for_mgr_restart()
2660
2661 # dashboard crt and key
2662 if args.dashboard_key and args.dashboard_crt:
2663 logger.info('Using provided dashboard certificate...')
e306af50
TL
2664 mounts = {
2665 pathify(args.dashboard_crt.name): '/tmp/dashboard.crt:z',
2666 pathify(args.dashboard_key.name): '/tmp/dashboard.key:z'
2667 }
9f95a23c
TL
2668 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
2669 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
2670 else:
2671 logger.info('Generating a dashboard self-signed certificate...')
2672 cli(['dashboard', 'create-self-signed-cert'])
2673
2674 logger.info('Creating initial admin user...')
2675 password = args.initial_dashboard_password or generate_password()
2676 cmd = ['dashboard', 'ac-user-create', args.initial_dashboard_user, password, 'administrator', '--force-password']
2677 if not args.dashboard_password_noupdate:
2678 cmd.append('--pwd-update-required')
1911f103 2679 cli(cmd)
9f95a23c
TL
2680 logger.info('Fetching dashboard port number...')
2681 out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
2682 port = int(out)
2683
2684 logger.info('Ceph Dashboard is now available at:\n\n'
2685 '\t URL: https://%s:%s/\n'
2686 '\t User: %s\n'
2687 '\tPassword: %s\n' % (
2688 get_fqdn(), port,
2689 args.initial_dashboard_user,
2690 password))
e306af50
TL
2691
2692 if args.apply_spec:
2693 logger.info('Applying %s to cluster' % args.apply_spec)
2694
2695 with open(args.apply_spec) as f:
2696 for line in f:
2697 if 'hostname:' in line:
2698 line = line.replace('\n', '')
2699 split = line.split(': ')
2700 if split[1] != host:
2701 logger.info('Adding ssh key to %s' % split[1])
2702
2703 ssh_key = '/etc/ceph/ceph.pub'
2704 if args.ssh_public_key:
2705 ssh_key = args.ssh_public_key.name
2706 out, err, code = call_throws(['ssh-copy-id', '-f', '-i', ssh_key, 'root@%s' % split[1]])
2707
2708 mounts = {}
2709 mounts[pathify(args.apply_spec)] = '/tmp/spec.yml:z'
2710
2711 out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
2712 logger.info(out)
9f95a23c
TL
2713
2714 logger.info('You can access the Ceph CLI with:\n\n'
2715 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
2716 sys.argv[0],
2717 fsid,
2718 args.output_config,
2719 args.output_keyring))
2720 logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
2721 '\tceph telemetry on\n\n'
2722 'For more information see:\n\n'
2723 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
2724 logger.info('Bootstrap complete.')
2725 return 0
2726
2727##################################
2728
2729def extract_uid_gid_monitoring(daemon_type):
2730 # type: (str) -> Tuple[int, int]
2731
2732 if daemon_type == 'prometheus':
2733 uid, gid = extract_uid_gid(file_path='/etc/prometheus')
2734 elif daemon_type == 'node-exporter':
2735 uid, gid = 65534, 65534
2736 elif daemon_type == 'grafana':
2737 uid, gid = extract_uid_gid(file_path='/var/lib/grafana')
2738 elif daemon_type == 'alertmanager':
2739 uid, gid = extract_uid_gid(file_path='/etc/alertmanager')
2740 else:
2741 raise Error("{} not implemented yet".format(daemon_type))
2742 return uid, gid
2743
2744
2745@default_image
2746def command_deploy():
2747 # type: () -> None
e306af50 2748 daemon_type, daemon_id = args.name.split('.', 1)
9f95a23c
TL
2749
2750 l = FileLock(args.fsid)
2751 l.acquire()
2752
2753 if daemon_type not in get_supported_daemons():
2754 raise Error('daemon type %s not recognized' % daemon_type)
2755
e306af50
TL
2756 redeploy = False
2757 unit_name = get_unit_name(args.fsid, daemon_type, daemon_id)
2758 (_, state, _) = check_unit(unit_name)
2759 if state == 'running':
2760 redeploy = True
2761
2762 if args.reconfig:
2763 logger.info('%s daemon %s ...' % ('Reconfig', args.name))
2764 elif redeploy:
2765 logger.info('%s daemon %s ...' % ('Redeploy', args.name))
2766 else:
2767 logger.info('%s daemon %s ...' % ('Deploy', args.name))
9f95a23c
TL
2768
2769 if daemon_type in Ceph.daemons:
e306af50
TL
2770 config, keyring = get_config_and_keyring()
2771 uid, gid = extract_uid_gid()
9f95a23c
TL
2772 make_var_run(args.fsid, uid, gid)
2773 c = get_container(args.fsid, daemon_type, daemon_id,
2774 ptrace=args.allow_ptrace)
2775 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
2776 config=config, keyring=keyring,
2777 osd_fsid=args.osd_fsid,
2778 reconfig=args.reconfig)
2779
2780 elif daemon_type in Monitoring.components:
2781 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
9f95a23c 2782 # Default Checks
e306af50 2783 if not args.reconfig and not redeploy:
9f95a23c
TL
2784 daemon_ports = Monitoring.port_map[daemon_type] # type: List[int]
2785 if any([port_in_use(port) for port in daemon_ports]):
2786 raise Error("TCP Port(s) '{}' required for {} is already in use".format(",".join(map(str, daemon_ports)), daemon_type))
2787
2788 # make sure provided config-json is sufficient
2789 config = get_parm(args.config_json) # type: ignore
2790 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
2791 required_args = Monitoring.components[daemon_type].get('config-json-args', list())
2792 if required_files:
2793 if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
2794 raise Error("{} deployment requires config-json which must "
2795 "contain file content for {}".format(daemon_type.capitalize(), ', '.join(required_files)))
2796 if required_args:
2797 if not config or not all(c in config.keys() for c in required_args): # type: ignore
2798 raise Error("{} deployment requires config-json which must "
2799 "contain arg for {}".format(daemon_type.capitalize(), ', '.join(required_args)))
2800
9f95a23c
TL
2801 uid, gid = extract_uid_gid_monitoring(daemon_type)
2802 c = get_container(args.fsid, daemon_type, daemon_id)
2803 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
2804 reconfig=args.reconfig)
2805
2806 elif daemon_type == NFSGanesha.daemon_type:
e306af50
TL
2807 if not args.reconfig and not redeploy:
2808 NFSGanesha.port_in_use()
2809 config, keyring = get_config_and_keyring()
9f95a23c 2810 # TODO: extract ganesha uid/gid (997, 994) ?
e306af50 2811 uid, gid = extract_uid_gid()
9f95a23c
TL
2812 c = get_container(args.fsid, daemon_type, daemon_id)
2813 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
2814 config=config, keyring=keyring,
2815 reconfig=args.reconfig)
e306af50 2816
1911f103 2817 elif daemon_type == CephIscsi.daemon_type:
e306af50
TL
2818 config, keyring = get_config_and_keyring()
2819 uid, gid = extract_uid_gid()
1911f103
TL
2820 c = get_container(args.fsid, daemon_type, daemon_id)
2821 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
2822 config=config, keyring=keyring,
2823 reconfig=args.reconfig)
9f95a23c
TL
2824 else:
2825 raise Error("{} not implemented in command_deploy function".format(daemon_type))
2826
2827##################################
2828
2829@infer_image
2830def command_run():
2831 # type: () -> int
2832 (daemon_type, daemon_id) = args.name.split('.', 1)
2833 c = get_container(args.fsid, daemon_type, daemon_id)
2834 command = c.run_cmd()
2835 return call_timeout(command, args.timeout)
2836
2837##################################
2838
2839@infer_fsid
e306af50 2840@infer_config
9f95a23c
TL
2841@infer_image
2842def command_shell():
2843 # type: () -> int
2844 if args.fsid:
2845 make_log_dir(args.fsid)
2846 if args.name:
2847 if '.' in args.name:
2848 (daemon_type, daemon_id) = args.name.split('.', 1)
2849 else:
2850 daemon_type = args.name
2851 daemon_id = None
2852 else:
2853 daemon_type = 'osd' # get the most mounts
2854 daemon_id = None
2855
2856 if daemon_id and not args.fsid:
2857 raise Error('must pass --fsid to specify cluster')
2858
2859 # use /etc/ceph files by default, if present. we do this instead of
2860 # making these defaults in the arg parser because we don't want an error
2861 # if they don't exist.
9f95a23c
TL
2862 if not args.keyring and os.path.exists(SHELL_DEFAULT_KEYRING):
2863 args.keyring = SHELL_DEFAULT_KEYRING
2864
2865 container_args = [] # type: List[str]
2866 mounts = get_container_mounts(args.fsid, daemon_type, daemon_id,
2867 no_config=True if args.config else False)
2868 if args.config:
2869 mounts[pathify(args.config)] = '/etc/ceph/ceph.conf:z'
2870 if args.keyring:
2871 mounts[pathify(args.keyring)] = '/etc/ceph/ceph.keyring:z'
e306af50
TL
2872 if args.mount:
2873 mount = pathify(args.mount)
2874 filename = os.path.basename(mount)
2875 mounts[mount] = '/mnt/{}:z'.format(filename)
9f95a23c
TL
2876 if args.command:
2877 command = args.command
2878 else:
2879 command = ['bash']
2880 container_args += [
2881 '-it',
2882 '-e', 'LANG=C',
2883 '-e', "PS1=%s" % CUSTOM_PS1,
2884 ]
2885 if args.fsid:
2886 home = os.path.join(args.data_dir, args.fsid, 'home')
2887 if not os.path.exists(home):
2888 logger.debug('Creating root home at %s' % home)
2889 makedirs(home, 0, 0, 0o660)
2890 if os.path.exists('/etc/skel'):
2891 for f in os.listdir('/etc/skel'):
2892 if f.startswith('.bash'):
2893 shutil.copyfile(os.path.join('/etc/skel', f),
2894 os.path.join(home, f))
2895 mounts[home] = '/root'
2896
2897 c = CephContainer(
2898 image=args.image,
2899 entrypoint='doesnotmatter',
2900 args=[],
2901 container_args=container_args,
2902 volume_mounts=mounts,
2903 envs=args.env,
2904 privileged=True)
2905 command = c.shell_cmd(command)
2906
2907 return call_timeout(command, args.timeout)
2908
2909##################################
2910
2911@infer_fsid
2912def command_enter():
2913 # type: () -> int
2914 if not args.fsid:
2915 raise Error('must pass --fsid to specify cluster')
2916 (daemon_type, daemon_id) = args.name.split('.', 1)
2917 container_args = [] # type: List[str]
2918 if args.command:
2919 command = args.command
2920 else:
2921 command = ['sh']
2922 container_args += [
2923 '-it',
2924 '-e', 'LANG=C',
2925 '-e', "PS1=%s" % CUSTOM_PS1,
2926 ]
1911f103
TL
2927 c = CephContainer(
2928 image=args.image,
2929 entrypoint='doesnotmatter',
2930 container_args=container_args,
2931 cname='ceph-%s-%s.%s' % (args.fsid, daemon_type, daemon_id),
2932 )
9f95a23c
TL
2933 command = c.exec_cmd(command)
2934 return call_timeout(command, args.timeout)
2935
2936##################################
2937
2938@infer_fsid
2939@infer_image
2940def command_ceph_volume():
2941 # type: () -> None
2942 if args.fsid:
2943 make_log_dir(args.fsid)
2944
1911f103
TL
2945 l = FileLock(args.fsid)
2946 l.acquire()
2947
9f95a23c
TL
2948 (uid, gid) = (0, 0) # ceph-volume runs as root
2949 mounts = get_container_mounts(args.fsid, 'osd', None)
2950
2951 tmp_config = None
2952 tmp_keyring = None
2953
801d1391 2954 (config, keyring) = get_config_and_keyring()
9f95a23c 2955
801d1391 2956 if config:
9f95a23c
TL
2957 # tmp config file
2958 tmp_config = write_tmp(config, uid, gid)
9f95a23c 2959 mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
801d1391
TL
2960
2961 if keyring:
2962 # tmp keyring file
2963 tmp_keyring = write_tmp(keyring, uid, gid)
9f95a23c
TL
2964 mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
2965
2966 c = CephContainer(
2967 image=args.image,
2968 entrypoint='/usr/sbin/ceph-volume',
e306af50 2969 envs=args.env,
9f95a23c
TL
2970 args=args.command,
2971 privileged=True,
2972 volume_mounts=mounts,
2973 )
2974 out, err, code = call_throws(c.run_cmd(), verbose=True)
2975 if not code:
2976 print(out)
2977
2978##################################
2979
2980@infer_fsid
2981def command_unit():
2982 # type: () -> None
2983 if not args.fsid:
2984 raise Error('must pass --fsid to specify cluster')
e306af50
TL
2985
2986 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
2987
9f95a23c
TL
2988 call_throws([
2989 'systemctl',
2990 args.command,
2991 unit_name])
2992
2993##################################
2994
2995@infer_fsid
2996def command_logs():
2997 # type: () -> None
2998 if not args.fsid:
2999 raise Error('must pass --fsid to specify cluster')
3000
e306af50 3001 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
9f95a23c
TL
3002
3003 cmd = [find_program('journalctl')]
3004 cmd.extend(['-u', unit_name])
3005 if args.command:
3006 cmd.extend(args.command)
3007
3008 # call this directly, without our wrapper, so that we get an unmolested
3009 # stdout with logger prefixing.
3010 logger.debug("Running command: %s" % ' '.join(cmd))
3011 subprocess.call(cmd) # type: ignore
3012
3013##################################
3014
3015def list_networks():
3016 # type: () -> Dict[str,List[str]]
3017
3018 ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
3019 ## so we'll need to use a regex to parse 'ip' command output.
3020 #out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
3021 #j = json.loads(out)
3022 #for x in j:
3023
3024 out, _, _ = call_throws([find_executable('ip'), 'route', 'ls'])
3025 return _parse_ip_route(out)
3026
3027def _parse_ip_route(out):
3028 r = {} # type: Dict[str,List[str]]
3029 p = re.compile(r'^(\S+) (.*)scope link (.*)src (\S+)')
3030 for line in out.splitlines():
3031 m = p.findall(line)
3032 if not m:
3033 continue
3034 net = m[0][0]
3035 ip = m[0][3]
3036 if net not in r:
3037 r[net] = []
3038 r[net].append(ip)
3039 return r
3040
3041def command_list_networks():
3042 # type: () -> None
3043 r = list_networks()
3044 print(json.dumps(r, indent=4))
3045
3046##################################
3047
3048def command_ls():
3049 # type: () -> None
3050 ls = list_daemons(detail=not args.no_detail,
3051 legacy_dir=args.legacy_dir)
3052 print(json.dumps(ls, indent=4))
3053
3054def list_daemons(detail=True, legacy_dir=None):
3055 # type: (bool, Optional[str]) -> List[Dict[str, str]]
3056 host_version = None
3057 ls = []
3058
3059 data_dir = args.data_dir
3060 if legacy_dir is not None:
3061 data_dir = os.path.abspath(legacy_dir + data_dir)
3062
3063 # keep track of ceph versions we see
3064 seen_versions = {} # type: Dict[str, Optional[str]]
3065
3066 # /var/lib/ceph
3067 if os.path.exists(data_dir):
3068 for i in os.listdir(data_dir):
3069 if i in ['mon', 'osd', 'mds', 'mgr']:
3070 daemon_type = i
3071 for j in os.listdir(os.path.join(data_dir, i)):
3072 if '-' not in j:
3073 continue
3074 (cluster, daemon_id) = j.split('-', 1)
3075 fsid = get_legacy_daemon_fsid(
3076 cluster, daemon_type, daemon_id,
3077 legacy_dir=legacy_dir)
e306af50 3078 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
9f95a23c
TL
3079 i = {
3080 'style': 'legacy',
3081 'name': '%s.%s' % (daemon_type, daemon_id),
3082 'fsid': fsid if fsid is not None else 'unknown',
e306af50 3083 'systemd_unit': legacy_unit_name,
9f95a23c
TL
3084 }
3085 if detail:
e306af50 3086 (i['enabled'], i['state'], _) = check_unit(legacy_unit_name)
9f95a23c
TL
3087 if not host_version:
3088 try:
3089 out, err, code = call(['ceph', '-v'])
3090 if not code and out.startswith('ceph version '):
3091 host_version = out.split(' ')[2]
3092 except Exception:
3093 pass
3094 i['host_version'] = host_version
3095 ls.append(i)
3096 elif is_fsid(i):
3097 fsid = str(i) # convince mypy that fsid is a str here
3098 for j in os.listdir(os.path.join(data_dir, i)):
3099 if '.' in j:
3100 name = j
3101 (daemon_type, daemon_id) = j.split('.', 1)
3102 unit_name = get_unit_name(fsid,
3103 daemon_type,
3104 daemon_id)
3105 else:
3106 continue
3107 i = {
3108 'style': 'cephadm:v1',
3109 'name': name,
3110 'fsid': fsid,
e306af50 3111 'systemd_unit': unit_name,
9f95a23c
TL
3112 }
3113 if detail:
3114 # get container id
3115 (i['enabled'], i['state'], _) = check_unit(unit_name)
3116 container_id = None
3117 image_name = None
3118 image_id = None
3119 version = None
3120 start_stamp = None
3121
3122 if 'podman' in container_path and get_podman_version() < (1, 6, 2):
3123 image_field = '.ImageID'
3124 else:
3125 image_field = '.Image'
3126
3127 out, err, code = call(
3128 [
3129 container_path, 'inspect',
3130 '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field,
3131 'ceph-%s-%s' % (fsid, j)
3132 ],
3133 verbose_on_failure=False)
3134 if not code:
3135 (container_id, image_name, image_id, start,
3136 version) = out.strip().split(',')
3137 image_id = normalize_container_id(image_id)
3138 daemon_type = name.split('.', 1)[0]
3139 start_stamp = try_convert_datetime(start)
3140 if not version or '.' not in version:
3141 version = seen_versions.get(image_id, None)
3142 if daemon_type == NFSGanesha.daemon_type:
3143 version = NFSGanesha.get_version(container_id)
1911f103
TL
3144 if daemon_type == CephIscsi.daemon_type:
3145 version = CephIscsi.get_version(container_id)
9f95a23c
TL
3146 elif not version:
3147 if daemon_type in Ceph.daemons:
3148 out, err, code = call(
3149 [container_path, 'exec', container_id,
3150 'ceph', '-v'])
3151 if not code and \
3152 out.startswith('ceph version '):
3153 version = out.split(' ')[2]
3154 seen_versions[image_id] = version
3155 elif daemon_type == 'grafana':
3156 out, err, code = call(
3157 [container_path, 'exec', container_id,
3158 'grafana-server', '-v'])
3159 if not code and \
3160 out.startswith('Version '):
3161 version = out.split(' ')[1]
3162 seen_versions[image_id] = version
3163 elif daemon_type in ['prometheus',
3164 'alertmanager',
3165 'node-exporter']:
3166 cmd = daemon_type.replace('-', '_')
3167 out, err, code = call(
3168 [container_path, 'exec', container_id,
3169 cmd, '--version'])
3170 if not code and \
3171 err.startswith('%s, version ' % cmd):
3172 version = err.split(' ')[2]
3173 seen_versions[image_id] = version
3174 else:
3175 logging.warning('version for unknown daemon type %s' % daemon_type)
3176 else:
3177 vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore
3178 try:
3179 with open(vfile, 'r') as f:
3180 image_name = f.read().strip() or None
3181 except IOError:
3182 pass
3183 i['container_id'] = container_id
3184 i['container_image_name'] = image_name
3185 i['container_image_id'] = image_id
3186 i['version'] = version
3187 i['started'] = start_stamp
3188 i['created'] = get_file_timestamp(
3189 os.path.join(data_dir, fsid, j, 'unit.created')
3190 )
3191 i['deployed'] = get_file_timestamp(
3192 os.path.join(data_dir, fsid, j, 'unit.image'))
3193 i['configured'] = get_file_timestamp(
3194 os.path.join(data_dir, fsid, j, 'unit.configured'))
3195
3196 ls.append(i)
3197
9f95a23c
TL
3198 return ls
3199
3200
e306af50
TL
3201def get_daemon_description(fsid, name, detail=False, legacy_dir=None):
3202 # type: (str, str, bool, Optional[str]) -> Dict[str, str]
3203
3204 for d in list_daemons(detail=detail, legacy_dir=legacy_dir):
3205 if d['fsid'] != fsid:
3206 continue
3207 if d['name'] != name:
3208 continue
3209 return d
3210 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
3211
3212
9f95a23c
TL
3213##################################
3214
3215@default_image
3216def command_adopt():
3217 # type: () -> None
3218
3219 if not args.skip_pull:
3220 logger.info('Pulling latest %s container...' % args.image)
3221 call_throws([container_path, 'pull', args.image])
3222
3223 (daemon_type, daemon_id) = args.name.split('.', 1)
3224
3225 # legacy check
3226 if args.style != 'legacy':
3227 raise Error('adoption of style %s not implemented' % args.style)
3228
3229 # lock
3230 fsid = get_legacy_daemon_fsid(args.cluster,
3231 daemon_type,
3232 daemon_id,
3233 legacy_dir=args.legacy_dir)
3234 if not fsid:
3235 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
3236 l = FileLock(fsid)
3237 l.acquire()
3238
3239 # call correct adoption
3240 if daemon_type in Ceph.daemons:
3241 command_adopt_ceph(daemon_type, daemon_id, fsid);
3242 elif daemon_type == 'prometheus':
3243 command_adopt_prometheus(daemon_id, fsid)
3244 elif daemon_type == 'grafana':
3245 command_adopt_grafana(daemon_id, fsid)
3246 elif daemon_type == 'node-exporter':
3247 raise Error('adoption of node-exporter not implemented')
3248 elif daemon_type == 'alertmanager':
801d1391 3249 command_adopt_alertmanager(daemon_id, fsid)
9f95a23c
TL
3250 else:
3251 raise Error('daemon type %s not recognized' % daemon_type)
3252
3253
1911f103
TL
3254class AdoptOsd(object):
3255 def __init__(self, osd_data_dir, osd_id):
3256 # type: (str, str) -> None
3257 self.osd_data_dir = osd_data_dir
3258 self.osd_id = osd_id
3259
3260 def check_online_osd(self):
3261 # type: () -> Tuple[Optional[str], Optional[str]]
3262
3263 osd_fsid, osd_type = None, None
3264
3265 path = os.path.join(self.osd_data_dir, 'fsid')
3266 try:
3267 with open(path, 'r') as f:
3268 osd_fsid = f.read().strip()
3269 logger.info("Found online OSD at %s" % path)
1911f103
TL
3270 except IOError:
3271 logger.info('Unable to read OSD fsid from %s' % path)
e306af50
TL
3272 if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
3273 with open(os.path.join(self.osd_data_dir, 'type')) as f:
3274 osd_type = f.read().strip()
3275 else:
3276 logger.info('"type" file missing for OSD data dir')
1911f103
TL
3277
3278 return osd_fsid, osd_type
3279
3280 def check_offline_lvm_osd(self):
3281 # type: () -> Tuple[Optional[str], Optional[str]]
3282
3283 osd_fsid, osd_type = None, None
3284
3285 c = CephContainer(
3286 image=args.image,
3287 entrypoint='/usr/sbin/ceph-volume',
3288 args=['lvm', 'list', '--format=json'],
3289 privileged=True
3290 )
3291 out, err, code = call_throws(c.run_cmd(), verbose=False)
3292 if not code:
3293 try:
3294 js = json.loads(out)
3295 if self.osd_id in js:
3296 logger.info("Found offline LVM OSD {}".format(self.osd_id))
3297 osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
3298 for device in js[self.osd_id]:
3299 if device['tags']['ceph.type'] == 'block':
3300 osd_type = 'bluestore'
3301 break
3302 if device['tags']['ceph.type'] == 'data':
3303 osd_type = 'filestore'
3304 break
3305 except ValueError as e:
3306 logger.info("Invalid JSON in ceph-volume lvm list: {}".format(e))
3307
3308 return osd_fsid, osd_type
3309
3310 def check_offline_simple_osd(self):
3311 # type: () -> Tuple[Optional[str], Optional[str]]
3312
3313 osd_fsid, osd_type = None, None
3314
3315 osd_file = glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self.osd_id))
3316 if len(osd_file) == 1:
3317 with open(osd_file[0], 'r') as f:
3318 try:
3319 js = json.loads(f.read())
3320 logger.info("Found offline simple OSD {}".format(self.osd_id))
3321 osd_fsid = js["fsid"]
3322 osd_type = js["type"]
3323 if osd_type != "filestore":
3324 # need this to be mounted for the adopt to work, as it
3325 # needs to move files from this directory
3326 call_throws(['mount', js["data"]["path"], self.osd_data_dir])
3327 except ValueError as e:
3328 logger.info("Invalid JSON in {}: {}".format(osd_file, e))
3329
3330 return osd_fsid, osd_type
3331
9f95a23c
TL
3332
3333def command_adopt_ceph(daemon_type, daemon_id, fsid):
3334 # type: (str, str, str) -> None
3335
3336 (uid, gid) = extract_uid_gid()
3337
3338 data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
3339 (daemon_type, args.cluster, daemon_id))
3340 data_dir_src = os.path.abspath(args.legacy_dir + data_dir_src)
3341
1911f103
TL
3342 if not os.path.exists(data_dir_src):
3343 raise Error("{}.{} data directory '{}' does not exist. "
3344 "Incorrect ID specified, or daemon alrady adopted?".format(
3345 daemon_type, daemon_id, data_dir_src))
3346
9f95a23c
TL
3347 osd_fsid = None
3348 if daemon_type == 'osd':
1911f103
TL
3349 adopt_osd = AdoptOsd(data_dir_src, daemon_id)
3350 osd_fsid, osd_type = adopt_osd.check_online_osd()
3351 if not osd_fsid:
3352 osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
3353 if not osd_fsid:
3354 osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
3355 if not osd_fsid:
3356 raise Error('Unable to find OSD {}'.format(daemon_id))
3357 logger.info('objectstore_type is %s' % osd_type)
e306af50 3358 assert osd_type
1911f103 3359 if osd_type == 'filestore':
9f95a23c
TL
3360 raise Error('FileStore is not supported by cephadm')
3361
3362 # NOTE: implicit assumption here that the units correspond to the
3363 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
3364 # CLUSTER field.
3365 unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
3366 (enabled, state, _) = check_unit(unit_name)
3367 if state == 'running':
3368 logger.info('Stopping old systemd unit %s...' % unit_name)
3369 call_throws(['systemctl', 'stop', unit_name])
3370 if enabled:
3371 logger.info('Disabling old systemd unit %s...' % unit_name)
3372 call_throws(['systemctl', 'disable', unit_name])
3373
3374 # data
3375 logger.info('Moving data...')
3376 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
3377 uid=uid, gid=gid)
3378 move_files(glob(os.path.join(data_dir_src, '*')),
3379 data_dir_dst,
3380 uid=uid, gid=gid)
3381 logger.debug('Remove dir \'%s\'' % (data_dir_src))
3382 if os.path.ismount(data_dir_src):
3383 call_throws(['umount', data_dir_src])
3384 os.rmdir(data_dir_src)
3385
3386 logger.info('Chowning content...')
3387 call_throws(['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
3388
3389 if daemon_type == 'mon':
3390 # rename *.ldb -> *.sst, in case they are coming from ubuntu
3391 store = os.path.join(data_dir_dst, 'store.db')
3392 num_renamed = 0
3393 if os.path.exists(store):
3394 for oldf in os.listdir(store):
3395 if oldf.endswith('.ldb'):
3396 newf = oldf.replace('.ldb', '.sst')
3397 oldp = os.path.join(store, oldf)
3398 newp = os.path.join(store, newf)
3399 logger.debug('Renaming %s -> %s' % (oldp, newp))
3400 os.rename(oldp, newp)
3401 if num_renamed:
3402 logger.info('Renamed %d leveldb *.ldb files to *.sst',
3403 num_renamed)
3404 if daemon_type == 'osd':
3405 for n in ['block', 'block.db', 'block.wal']:
3406 p = os.path.join(data_dir_dst, n)
3407 if os.path.exists(p):
3408 logger.info('Chowning %s...' % p)
3409 os.chown(p, uid, gid)
3410 # disable the ceph-volume 'simple' mode files on the host
3411 simple_fn = os.path.join('/etc/ceph/osd',
3412 '%s-%s.json' % (daemon_id, osd_fsid))
3413 if os.path.exists(simple_fn):
3414 new_fn = simple_fn + '.adopted-by-cephadm'
3415 logger.info('Renaming %s -> %s', simple_fn, new_fn)
3416 os.rename(simple_fn, new_fn)
3417 logger.info('Disabling host unit ceph-volume@ simple unit...')
1911f103
TL
3418 call(['systemctl', 'disable',
3419 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
9f95a23c
TL
3420 else:
3421 # assume this is an 'lvm' c-v for now, but don't error
3422 # out if it's not.
3423 logger.info('Disabling host unit ceph-volume@ lvm unit...')
3424 call(['systemctl', 'disable',
3425 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
3426
3427 # config
3428 config_src = '/etc/ceph/%s.conf' % (args.cluster)
3429 config_src = os.path.abspath(args.legacy_dir + config_src)
3430 config_dst = os.path.join(data_dir_dst, 'config')
3431 copy_files([config_src], config_dst, uid=uid, gid=gid)
3432
3433 # logs
3434 logger.info('Moving logs...')
3435 log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
3436 (args.cluster, daemon_type, daemon_id))
3437 log_dir_src = os.path.abspath(args.legacy_dir + log_dir_src)
3438 log_dir_dst = make_log_dir(fsid, uid=uid, gid=gid)
3439 move_files(glob(log_dir_src),
3440 log_dir_dst,
3441 uid=uid, gid=gid)
3442
3443 logger.info('Creating new units...')
3444 make_var_run(fsid, uid, gid)
3445 c = get_container(fsid, daemon_type, daemon_id)
3446 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
3447 enable=True, # unconditionally enable the new unit
1911f103 3448 start=(state == 'running' or args.force_start),
9f95a23c
TL
3449 osd_fsid=osd_fsid)
3450 update_firewalld(daemon_type)
3451
3452
3453def command_adopt_prometheus(daemon_id, fsid):
3454 # type: (str, str) -> None
3455
3456 daemon_type = 'prometheus'
3457 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
3458
3459 _stop_and_disable('prometheus')
3460
3461 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
3462 uid=uid, gid=gid)
3463
3464 # config
3465 config_src = '/etc/prometheus/prometheus.yml'
3466 config_src = os.path.abspath(args.legacy_dir + config_src)
3467 config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
1911f103 3468 makedirs(config_dst, uid, gid, 0o755)
9f95a23c
TL
3469 copy_files([config_src], config_dst, uid=uid, gid=gid)
3470
3471 # data
3472 data_src = '/var/lib/prometheus/metrics/'
3473 data_src = os.path.abspath(args.legacy_dir + data_src)
3474 data_dst = os.path.join(data_dir_dst, 'data')
3475 copy_tree([data_src], data_dst, uid=uid, gid=gid)
3476
3477 make_var_run(fsid, uid, gid)
3478 c = get_container(fsid, daemon_type, daemon_id)
3479 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
3480 update_firewalld(daemon_type)
3481
3482def command_adopt_grafana(daemon_id, fsid):
3483 # type: (str, str) -> None
3484
3485 daemon_type = 'grafana'
3486 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
3487
3488 _stop_and_disable('grafana-server')
3489
3490 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
3491 uid=uid, gid=gid)
3492
3493 # config
3494 config_src = '/etc/grafana/grafana.ini'
3495 config_src = os.path.abspath(args.legacy_dir + config_src)
3496 config_dst = os.path.join(data_dir_dst, 'etc/grafana')
3497 makedirs(config_dst, uid, gid, 0o755)
3498 copy_files([config_src], config_dst, uid=uid, gid=gid)
3499
3500 prov_src = '/etc/grafana/provisioning/'
3501 prov_src = os.path.abspath(args.legacy_dir + prov_src)
3502 prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
3503 copy_tree([prov_src], prov_dst, uid=uid, gid=gid)
3504
3505 # cert
3506 cert = '/etc/grafana/grafana.crt'
3507 key = '/etc/grafana/grafana.key'
3508 if os.path.exists(cert) and os.path.exists(key):
3509 cert_src = '/etc/grafana/grafana.crt'
3510 cert_src = os.path.abspath(args.legacy_dir + cert_src)
3511 makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
3512 cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
3513 copy_files([cert_src], cert_dst, uid=uid, gid=gid)
3514
3515 key_src = '/etc/grafana/grafana.key'
3516 key_src = os.path.abspath(args.legacy_dir + key_src)
3517 key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
3518 copy_files([key_src], key_dst, uid=uid, gid=gid)
3519
3520 _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
3521 else:
3522 logger.debug("Skipping ssl, missing cert {} or key {}".format(cert, key))
3523
3524
3525 # data - possible custom dashboards/plugins
3526 data_src = '/var/lib/grafana/'
3527 data_src = os.path.abspath(args.legacy_dir + data_src)
3528 data_dst = os.path.join(data_dir_dst, 'data')
3529 copy_tree([data_src], data_dst, uid=uid, gid=gid)
3530
3531 make_var_run(fsid, uid, gid)
3532 c = get_container(fsid, daemon_type, daemon_id)
3533 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
3534 update_firewalld(daemon_type)
3535
801d1391
TL
3536def command_adopt_alertmanager(daemon_id, fsid):
3537 # type: (str, str) -> None
3538
3539 daemon_type = 'alertmanager'
3540 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
3541
3542 _stop_and_disable('prometheus-alertmanager')
3543
3544 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
3545 uid=uid, gid=gid)
3546
3547 # config
3548 config_src = '/etc/prometheus/alertmanager.yml'
3549 config_src = os.path.abspath(args.legacy_dir + config_src)
3550 config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
3551 makedirs(config_dst, uid, gid, 0o755)
3552 copy_files([config_src], config_dst, uid=uid, gid=gid)
3553
3554 # data
3555 data_src = '/var/lib/prometheus/alertmanager/'
3556 data_src = os.path.abspath(args.legacy_dir + data_src)
3557 data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
3558 copy_tree([data_src], data_dst, uid=uid, gid=gid)
3559
3560 make_var_run(fsid, uid, gid)
3561 c = get_container(fsid, daemon_type, daemon_id)
3562 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
3563 update_firewalld(daemon_type)
3564
9f95a23c
TL
3565def _adjust_grafana_ini(filename):
3566 # type: (str) -> None
3567
3568 # Update cert_file, cert_key pathnames in server section
3569 # ConfigParser does not preserve comments
3570 try:
3571 with open(filename, "r") as grafana_ini:
3572 lines = grafana_ini.readlines()
3573 with open("{}.new".format(filename), "w") as grafana_ini:
3574 server_section=False
3575 for line in lines:
3576 if line.startswith('['):
3577 server_section=False
3578 if line.startswith('[server]'):
3579 server_section=True
3580 if server_section:
3581 line = re.sub(r'^cert_file.*',
3582 'cert_file = /etc/grafana/certs/cert_file', line)
3583 line = re.sub(r'^cert_key.*',
3584 'cert_key = /etc/grafana/certs/cert_key', line)
3585 grafana_ini.write(line)
3586 os.rename("{}.new".format(filename), filename)
3587 except OSError as err:
3588 raise Error("Cannot update {}: {}".format(filename, err))
3589
3590
3591def _stop_and_disable(unit_name):
3592 # type: (str) -> None
3593
3594 (enabled, state, _) = check_unit(unit_name)
3595 if state == 'running':
3596 logger.info('Stopping old systemd unit %s...' % unit_name)
3597 call_throws(['systemctl', 'stop', unit_name])
3598 if enabled:
3599 logger.info('Disabling old systemd unit %s...' % unit_name)
3600 call_throws(['systemctl', 'disable', unit_name])
3601
3602
3603##################################
3604
3605def command_rm_daemon():
3606 # type: () -> None
3607
3608 l = FileLock(args.fsid)
3609 l.acquire()
3610
e306af50
TL
3611 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
3612
9f95a23c
TL
3613 (daemon_type, daemon_id) = args.name.split('.', 1)
3614 if daemon_type in ['mon', 'osd'] and not args.force:
3615 raise Error('must pass --force to proceed: '
3616 'this command may destroy precious data!')
e306af50 3617
9f95a23c
TL
3618 call(['systemctl', 'stop', unit_name],
3619 verbose_on_failure=False)
3620 call(['systemctl', 'reset-failed', unit_name],
3621 verbose_on_failure=False)
3622 call(['systemctl', 'disable', unit_name],
3623 verbose_on_failure=False)
3624 data_dir = get_data_dir(args.fsid, daemon_type, daemon_id)
3625 if daemon_type in ['mon', 'osd', 'prometheus'] and \
3626 not args.force_delete_data:
3627 # rename it out of the way -- do not delete
3628 backup_dir = os.path.join(args.data_dir, args.fsid, 'removed')
3629 if not os.path.exists(backup_dir):
3630 makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
3631 dirname = '%s.%s_%s' % (daemon_type, daemon_id,
3632 datetime.datetime.utcnow().strftime(DATEFMT))
3633 os.rename(data_dir,
3634 os.path.join(backup_dir, dirname))
3635 else:
3636 call_throws(['rm', '-rf', data_dir])
3637
3638##################################
3639
3640def command_rm_cluster():
3641 # type: () -> None
3642 if not args.force:
3643 raise Error('must pass --force to proceed: '
3644 'this command may destroy precious data!')
3645
3646 l = FileLock(args.fsid)
3647 l.acquire()
3648
3649 # stop + disable individual daemon units
3650 for d in list_daemons(detail=False):
3651 if d['fsid'] != args.fsid:
3652 continue
3653 if d['style'] != 'cephadm:v1':
3654 continue
3655 unit_name = get_unit_name(args.fsid, d['name'])
3656 call(['systemctl', 'stop', unit_name],
3657 verbose_on_failure=False)
3658 call(['systemctl', 'reset-failed', unit_name],
3659 verbose_on_failure=False)
3660 call(['systemctl', 'disable', unit_name],
3661 verbose_on_failure=False)
3662
3663 # cluster units
3664 for unit_name in ['ceph-%s.target' % args.fsid]:
3665 call(['systemctl', 'stop', unit_name],
3666 verbose_on_failure=False)
3667 call(['systemctl', 'reset-failed', unit_name],
3668 verbose_on_failure=False)
3669 call(['systemctl', 'disable', unit_name],
3670 verbose_on_failure=False)
3671
3672 slice_name = 'system-%s.slice' % (('ceph-%s' % args.fsid).replace('-',
3673 '\\x2d'))
3674 call(['systemctl', 'stop', slice_name],
3675 verbose_on_failure=False)
3676
3677 # rm units
3678 call_throws(['rm', '-f', args.unit_dir +
3679 '/ceph-%s@.service' % args.fsid])
3680 call_throws(['rm', '-f', args.unit_dir +
3681 '/ceph-%s.target' % args.fsid])
3682 call_throws(['rm', '-rf',
3683 args.unit_dir + '/ceph-%s.target.wants' % args.fsid])
3684 # rm data
3685 call_throws(['rm', '-rf', args.data_dir + '/' + args.fsid])
3686 # rm logs
3687 call_throws(['rm', '-rf', args.log_dir + '/' + args.fsid])
3688 call_throws(['rm', '-rf', args.log_dir +
3689 '/*.wants/ceph-%s@*' % args.fsid])
3690 # rm logrotate config
3691 call_throws(['rm', '-f', args.logrotate_dir + '/ceph-%s' % args.fsid])
3692
1911f103
TL
3693 # clean up config, keyring, and pub key files
3694 files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
3695
3696 if os.path.exists(files[0]):
3697 valid_fsid = False
3698 with open(files[0]) as f:
3699 if args.fsid in f.read():
3700 valid_fsid = True
3701 if valid_fsid:
3702 for n in range(0, len(files)):
3703 if os.path.exists(files[n]):
3704 os.remove(files[n])
3705
9f95a23c
TL
3706
3707##################################
3708
3709def check_time_sync(enabler=None):
3710 # type: (Optional[Packager]) -> bool
3711 units = [
3712 'chrony.service', # 18.04 (at least)
3713 'chronyd.service', # el / opensuse
3714 'systemd-timesyncd.service',
3715 'ntpd.service', # el7 (at least)
3716 'ntp.service', # 18.04 (at least)
3717 ]
e306af50 3718 if not check_units(units, enabler):
9f95a23c
TL
3719 logger.warning('No time sync service is running; checked for %s' % units)
3720 return False
3721 return True
3722
3723def command_check_host():
3724 # type: () -> None
1911f103 3725 errors = []
9f95a23c
TL
3726 commands = ['systemctl', 'lvcreate']
3727
1911f103
TL
3728 if args.docker:
3729 container_path = find_program('docker')
3730 else:
3731 for i in CONTAINER_PREFERENCE:
3732 try:
3733 container_path = find_program(i)
3734 break
3735 except Exception as e:
3736 logger.debug('Could not locate %s: %s' % (i, e))
3737 if not container_path:
3738 errors.append('Unable to locate any of %s' % CONTAINER_PREFERENCE)
3739 else:
3740 logger.info('podman|docker (%s) is present' % container_path)
3741
9f95a23c
TL
3742 for command in commands:
3743 try:
3744 find_program(command)
3745 logger.info('%s is present' % command)
3746 except ValueError:
1911f103 3747 errors.append('%s binary does not appear to be installed' % command)
9f95a23c
TL
3748
3749 # check for configured+running chronyd or ntp
3750 if not check_time_sync():
1911f103 3751 errors.append('No time synchronization is active')
9f95a23c
TL
3752
3753 if 'expect_hostname' in args and args.expect_hostname:
1911f103
TL
3754 if get_hostname().lower() != args.expect_hostname.lower():
3755 errors.append('hostname "%s" does not match expected hostname "%s"' % (
9f95a23c
TL
3756 get_hostname(), args.expect_hostname))
3757 logger.info('Hostname "%s" matches what is expected.',
3758 args.expect_hostname)
3759
1911f103
TL
3760 if errors:
3761 raise Error('\n'.join(errors))
3762
9f95a23c
TL
3763 logger.info('Host looks OK')
3764
3765##################################
3766
3767def command_prepare_host():
3768 # type: () -> None
3769 logger.info('Verifying podman|docker is present...')
3770 pkg = None
3771 if not container_path:
3772 if not pkg:
3773 pkg = create_packager()
3774 pkg.install_podman()
3775
3776 logger.info('Verifying lvm2 is present...')
3777 if not find_executable('lvcreate'):
3778 if not pkg:
3779 pkg = create_packager()
3780 pkg.install(['lvm2'])
3781
3782 logger.info('Verifying time synchronization is in place...')
3783 if not check_time_sync():
3784 if not pkg:
3785 pkg = create_packager()
3786 pkg.install(['chrony'])
3787 # check again, and this time try to enable
3788 # the service
3789 check_time_sync(enabler=pkg)
3790
3791 if 'expect_hostname' in args and args.expect_hostname and args.expect_hostname != get_hostname():
3792 logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), args.expect_hostname))
3793 call_throws(['hostname', args.expect_hostname])
3794 with open('/etc/hostname', 'w') as f:
3795 f.write(args.expect_hostname + '\n')
3796
3797 logger.info('Repeating the final host check...')
3798 command_check_host()
3799
3800##################################
3801
3802class CustomValidation(argparse.Action):
3803
3804 def _check_name(self, values):
3805 try:
3806 (daemon_type, daemon_id) = values.split('.', 1)
3807 except ValueError:
3808 raise argparse.ArgumentError(self,
3809 "must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com")
3810
3811 daemons = get_supported_daemons()
3812 if daemon_type not in daemons:
3813 raise argparse.ArgumentError(self,
3814 "name must declare the type of daemon e.g. "
3815 "{}".format(', '.join(daemons)))
3816
3817 def __call__(self, parser, namespace, values, option_string=None):
3818 if self.dest == "name":
3819 self._check_name(values)
3820 setattr(namespace, self.dest, values)
3821
3822##################################
3823
3824def get_distro():
e306af50 3825 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
9f95a23c
TL
3826 distro = None
3827 distro_version = None
3828 distro_codename = None
3829 with open('/etc/os-release', 'r') as f:
3830 for line in f.readlines():
3831 line = line.strip()
3832 if '=' not in line or line.startswith('#'):
3833 continue
3834 (var, val) = line.split('=', 1)
3835 if val[0] == '"' and val[-1] == '"':
3836 val = val[1:-1]
3837 if var == 'ID':
3838 distro = val.lower()
3839 elif var == 'VERSION_ID':
3840 distro_version = val.lower()
3841 elif var == 'VERSION_CODENAME':
3842 distro_codename = val.lower()
3843 return distro, distro_version, distro_codename
3844
3845class Packager(object):
3846 def __init__(self, stable=None, version=None, branch=None, commit=None):
3847 assert \
3848 (stable and not version and not branch and not commit) or \
3849 (not stable and version and not branch and not commit) or \
3850 (not stable and not version and branch) or \
3851 (not stable and not version and not branch and not commit)
3852 self.stable = stable
3853 self.version = version
3854 self.branch = branch
3855 self.commit = commit
3856
3857 def add_repo(self):
3858 raise NotImplementedError
3859
3860 def rm_repo(self):
3861 raise NotImplementedError
3862
3863 def query_shaman(self, distro, distro_version, branch, commit):
3864 # query shaman
3865 logging.info('Fetching repo metadata from shaman and chacra...')
3866 shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
3867 distro=distro,
3868 distro_version=distro_version,
3869 branch=branch,
3870 sha1=commit or 'latest',
3871 arch=get_arch()
3872 )
3873 try:
3874 shaman_response = urlopen(shaman_url)
3875 except HTTPError as err:
3876 logging.error('repository not found in shaman (might not be available yet)')
3877 raise Error('%s, failed to fetch %s' % (err, shaman_url))
3878 try:
3879 chacra_url = shaman_response.geturl()
3880 chacra_response = urlopen(chacra_url)
3881 except HTTPError as err:
3882 logging.error('repository not found in chacra (might not be available yet)')
3883 raise Error('%s, failed to fetch %s' % (err, chacra_url))
3884 return chacra_response.read().decode('utf-8')
3885
3886 def repo_gpgkey(self):
3887 if args.gpg_url:
3888 return args.gpg_url
3889 if self.stable or self.version:
3890 return 'https://download.ceph.com/keys/release.asc', 'release'
3891 else:
3892 return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
3893
3894 def enable_service(self, service):
3895 """
3896 Start and enable the service (typically using systemd).
3897 """
3898 call_throws(['systemctl', 'enable', '--now', service])
3899
3900
3901class Apt(Packager):
3902 DISTRO_NAMES = {
3903 'ubuntu': 'ubuntu',
3904 'debian': 'debian',
3905 }
3906
3907 def __init__(self, stable, version, branch, commit,
3908 distro, distro_version, distro_codename):
3909 super(Apt, self).__init__(stable=stable, version=version,
3910 branch=branch, commit=commit)
3911 self.distro = self.DISTRO_NAMES[distro]
3912 self.distro_codename = distro_codename
3913
3914 def repo_path(self):
3915 return '/etc/apt/sources.list.d/ceph.list'
3916
3917 def add_repo(self):
3918 url, name = self.repo_gpgkey()
3919 logging.info('Installing repo GPG key from %s...' % url)
3920 try:
3921 response = urlopen(url)
3922 except HTTPError as err:
3923 logging.error('failed to fetch GPG repo key from %s: %s' % (
3924 url, err))
3925 raise Error('failed to fetch GPG key')
3926 key = response.read().decode('utf-8')
3927 with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'w') as f:
3928 f.write(key)
3929
3930 if self.version:
3931 content = 'deb %s/debian-%s/ %s main\n' % (
3932 args.repo_url, self.version, self.distro_codename)
3933 elif self.stable:
3934 content = 'deb %s/debian-%s/ %s main\n' % (
3935 args.repo_url, self.stable, self.distro_codename)
3936 else:
3937 content = self.query_shaman(self.distro, self.distro_codename, self.branch,
3938 self.commit)
3939
3940 logging.info('Installing repo file at %s...' % self.repo_path())
3941 with open(self.repo_path(), 'w') as f:
3942 f.write(content)
3943
3944 def rm_repo(self):
3945 for name in ['autobuild', 'release']:
3946 p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
3947 if os.path.exists(p):
3948 logging.info('Removing repo GPG key %s...' % p)
3949 os.unlink(p)
3950 if os.path.exists(self.repo_path()):
3951 logging.info('Removing repo at %s...' % self.repo_path())
3952 os.unlink(self.repo_path())
3953
3954 def install(self, ls):
3955 logging.info('Installing packages %s...' % ls)
3956 call_throws(['apt', 'install', '-y'] + ls)
3957
3958 def install_podman(self):
3959 if self.distro == 'ubuntu':
3960 logging.info('Setting up repo for pdoman...')
3961 self.install(['software-properties-common'])
3962 call_throws(['add-apt-repository', '-y', 'ppa:projectatomic/ppa'])
3963 call_throws(['apt', 'update'])
3964
3965 logging.info('Attempting podman install...')
3966 try:
3967 self.install(['podman'])
3968 except Error as e:
3969 logging.info('Podman did not work. Falling back to docker...')
3970 self.install(['docker.io'])
3971
3972class YumDnf(Packager):
3973 DISTRO_NAMES = {
3974 'centos': ('centos', 'el'),
3975 'rhel': ('centos', 'el'),
3976 'scientific': ('centos', 'el'),
3977 'fedora': ('fedora', 'fc'),
3978 }
3979
3980 def __init__(self, stable, version, branch, commit,
3981 distro, distro_version):
3982 super(YumDnf, self).__init__(stable=stable, version=version,
3983 branch=branch, commit=commit)
3984 self.major = int(distro_version.split('.')[0])
3985 self.distro_normalized = self.DISTRO_NAMES[distro][0]
3986 self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
3987 if (self.distro_code == 'fc' and self.major >= 30) or \
3988 (self.distro_code == 'el' and self.major >= 8):
3989 self.tool = 'dnf'
3990 else:
3991 self.tool = 'yum'
3992
3993 def custom_repo(self, **kw):
3994 """
3995 Repo files need special care in that a whole line should not be present
3996 if there is no value for it. Because we were using `format()` we could
3997 not conditionally add a line for a repo file. So the end result would
3998 contain a key with a missing value (say if we were passing `None`).
3999
4000 For example, it could look like::
4001
4002 [ceph repo]
4003 name= ceph repo
4004 proxy=
4005 gpgcheck=
4006
4007 Which breaks. This function allows us to conditionally add lines,
4008 preserving an order and be more careful.
4009
4010 Previously, and for historical purposes, this is how the template used
4011 to look::
4012
4013 custom_repo =
4014 [{repo_name}]
4015 name={name}
4016 baseurl={baseurl}
4017 enabled={enabled}
4018 gpgcheck={gpgcheck}
4019 type={_type}
4020 gpgkey={gpgkey}
4021 proxy={proxy}
4022
4023 """
4024 lines = []
4025
4026 # by using tuples (vs a dict) we preserve the order of what we want to
4027 # return, like starting with a [repo name]
4028 tmpl = (
4029 ('reponame', '[%s]'),
4030 ('name', 'name=%s'),
4031 ('baseurl', 'baseurl=%s'),
4032 ('enabled', 'enabled=%s'),
4033 ('gpgcheck', 'gpgcheck=%s'),
4034 ('_type', 'type=%s'),
4035 ('gpgkey', 'gpgkey=%s'),
4036 ('proxy', 'proxy=%s'),
4037 ('priority', 'priority=%s'),
4038 )
4039
4040 for line in tmpl:
4041 tmpl_key, tmpl_value = line # key values from tmpl
4042
4043 # ensure that there is an actual value (not None nor empty string)
4044 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4045 lines.append(tmpl_value % kw.get(tmpl_key))
4046
4047 return '\n'.join(lines)
4048
4049 def repo_path(self):
4050 return '/etc/yum.repos.d/ceph.repo'
4051
4052 def repo_baseurl(self):
4053 assert self.stable or self.version
4054 if self.version:
4055 return '%s/rpm-%s/%s' % (args.repo_url, self.version,
4056 self.distro_code)
4057 else:
4058 return '%s/rpm-%s/%s' % (args.repo_url, self.stable,
4059 self.distro_code)
4060
4061 def add_repo(self):
4062 if self.stable or self.version:
4063 content = ''
4064 for n, t in {
4065 'Ceph': '$basearch',
4066 'Ceph-noarch': 'noarch',
4067 'Ceph-source': 'SRPMS'}.items():
4068 content += '[%s]\n' % (n)
4069 content += self.custom_repo(
4070 name='Ceph %s' % t,
4071 baseurl=self.repo_baseurl() + '/' + t,
4072 enabled=1,
4073 gpgcheck=1,
4074 gpgkey=self.repo_gpgkey()[0],
4075 )
4076 content += '\n\n'
4077 else:
4078 content = self.query_shaman(self.distro_normalized, self.major,
4079 self.branch,
4080 self.commit)
4081
4082 logging.info('Writing repo to %s...' % self.repo_path())
4083 with open(self.repo_path(), 'w') as f:
4084 f.write(content)
4085
4086 if self.distro_code.startswith('el'):
4087 logger.info('Enabling EPEL...')
4088 call_throws([self.tool, 'install', '-y', 'epel-release'])
4089 if self.distro_code == 'el8':
4090 # we also need Ken's copr repo, at least for now
4091 logger.info('Enabling supplementary copr repo ktdreyer/ceph-el8...')
4092 call_throws(['dnf', 'copr', 'enable', '-y', 'ktdreyer/ceph-el8'])
4093
4094 def rm_repo(self):
4095 if os.path.exists(self.repo_path()):
4096 os.unlink(self.repo_path())
4097 if self.distro_code == 'el8':
4098 logger.info('Disabling supplementary copr repo ktdreyer/ceph-el8...')
4099 call_throws(['dnf', 'copr', 'disable', '-y', 'ktdreyer/ceph-el8'])
4100
4101 def install(self, ls):
4102 logger.info('Installing packages %s...' % ls)
4103 call_throws([self.tool, 'install', '-y'] + ls)
4104
4105 def install_podman(self):
4106 self.install(['podman'])
4107
4108
4109class Zypper(Packager):
4110 DISTRO_NAMES = [
4111 'sles',
4112 'opensuse-tumbleweed',
4113 'opensuse-leap'
4114 ]
4115
4116 def __init__(self, stable, version, branch, commit,
4117 distro, distro_version):
4118 super(Zypper, self).__init__(stable=stable, version=version,
4119 branch=branch, commit=commit)
4120 self.tool = 'zypper'
4121 self.distro = 'opensuse'
4122 self.distro_version = '15.1'
4123 if 'tumbleweed' not in distro and distro_version is not None:
4124 self.distro_version = distro_version
4125
4126 def custom_repo(self, **kw):
4127 """
4128 See YumDnf for format explanation.
4129 """
4130 lines = []
4131
4132 # by using tuples (vs a dict) we preserve the order of what we want to
4133 # return, like starting with a [repo name]
4134 tmpl = (
4135 ('reponame', '[%s]'),
4136 ('name', 'name=%s'),
4137 ('baseurl', 'baseurl=%s'),
4138 ('enabled', 'enabled=%s'),
4139 ('gpgcheck', 'gpgcheck=%s'),
4140 ('_type', 'type=%s'),
4141 ('gpgkey', 'gpgkey=%s'),
4142 ('proxy', 'proxy=%s'),
4143 ('priority', 'priority=%s'),
4144 )
4145
4146 for line in tmpl:
4147 tmpl_key, tmpl_value = line # key values from tmpl
4148
4149 # ensure that there is an actual value (not None nor empty string)
4150 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4151 lines.append(tmpl_value % kw.get(tmpl_key))
4152
4153 return '\n'.join(lines)
4154
4155 def repo_path(self):
4156 return '/etc/zypp/repos.d/ceph.repo'
4157
4158 def repo_baseurl(self):
4159 assert self.stable or self.version
4160 if self.version:
4161 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4162 else:
4163 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4164
4165 def add_repo(self):
4166 if self.stable or self.version:
4167 content = ''
4168 for n, t in {
4169 'Ceph': '$basearch',
4170 'Ceph-noarch': 'noarch',
4171 'Ceph-source': 'SRPMS'}.items():
4172 content += '[%s]\n' % (n)
4173 content += self.custom_repo(
4174 name='Ceph %s' % t,
4175 baseurl=self.repo_baseurl() + '/' + t,
4176 enabled=1,
4177 gpgcheck=1,
4178 gpgkey=self.repo_gpgkey()[0],
4179 )
4180 content += '\n\n'
4181 else:
4182 content = self.query_shaman(self.distro, self.distro_version,
4183 self.branch,
4184 self.commit)
4185
4186 logging.info('Writing repo to %s...' % self.repo_path())
4187 with open(self.repo_path(), 'w') as f:
4188 f.write(content)
4189
4190 def rm_repo(self):
4191 if os.path.exists(self.repo_path()):
4192 os.unlink(self.repo_path())
4193
4194 def install(self, ls):
4195 logger.info('Installing packages %s...' % ls)
4196 call_throws([self.tool, 'in', '-y'] + ls)
4197
4198 def install_podman(self):
4199 self.install(['podman'])
4200
4201
4202def create_packager(stable=None, version=None, branch=None, commit=None):
4203 distro, distro_version, distro_codename = get_distro()
4204 if distro in YumDnf.DISTRO_NAMES:
4205 return YumDnf(stable=stable, version=version,
4206 branch=branch, commit=commit,
4207 distro=distro, distro_version=distro_version)
4208 elif distro in Apt.DISTRO_NAMES:
4209 return Apt(stable=stable, version=version,
4210 branch=branch, commit=commit,
4211 distro=distro, distro_version=distro_version,
4212 distro_codename=distro_codename)
4213 elif distro in Zypper.DISTRO_NAMES:
4214 return Zypper(stable=stable, version=version,
4215 branch=branch, commit=commit,
4216 distro=distro, distro_version=distro_version)
4217 raise Error('Distro %s version %s not supported' % (distro, distro_version))
4218
4219
4220def command_add_repo():
4221 if args.version and args.release:
4222 raise Error('you can specify either --release or --version but not both')
1911f103
TL
4223 if not args.version and not args.release and not args.dev and not args.dev_commit:
4224 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
9f95a23c
TL
4225 if args.version:
4226 try:
4227 (x, y, z) = args.version.split('.')
4228 except Exception as e:
4229 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
4230
4231 pkg = create_packager(stable=args.release,
4232 version=args.version,
4233 branch=args.dev,
4234 commit=args.dev_commit)
4235 pkg.add_repo()
4236
4237def command_rm_repo():
4238 pkg = create_packager()
4239 pkg.rm_repo()
4240
4241def command_install():
4242 pkg = create_packager()
4243 pkg.install(args.packages)
4244
4245##################################
4246
4247def _get_parser():
4248 # type: () -> argparse.ArgumentParser
4249 parser = argparse.ArgumentParser(
4250 description='Bootstrap Ceph daemons with systemd and containers.',
4251 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
4252 parser.add_argument(
4253 '--image',
4254 help='container image. Can also be set via the "CEPHADM_IMAGE" '
4255 'env var')
4256 parser.add_argument(
4257 '--docker',
4258 action='store_true',
4259 help='use docker instead of podman')
4260 parser.add_argument(
4261 '--data-dir',
4262 default=DATA_DIR,
4263 help='base directory for daemon data')
4264 parser.add_argument(
4265 '--log-dir',
4266 default=LOG_DIR,
4267 help='base directory for daemon logs')
4268 parser.add_argument(
4269 '--logrotate-dir',
4270 default=LOGROTATE_DIR,
4271 help='location of logrotate configuration files')
4272 parser.add_argument(
4273 '--unit-dir',
4274 default=UNIT_DIR,
4275 help='base directory for systemd units')
4276 parser.add_argument(
4277 '--verbose', '-v',
4278 action='store_true',
4279 help='Show debug-level log messages')
4280 parser.add_argument(
4281 '--timeout',
4282 type=int,
4283 default=DEFAULT_TIMEOUT,
4284 help='timeout in seconds')
4285 parser.add_argument(
4286 '--retry',
4287 type=int,
4288 default=DEFAULT_RETRY,
4289 help='max number of retries')
e306af50
TL
4290 parser.add_argument(
4291 '--env', '-e',
4292 action='append',
4293 default=[],
4294 help='set environment variable')
9f95a23c
TL
4295
4296 subparsers = parser.add_subparsers(help='sub-command')
4297
4298 parser_version = subparsers.add_parser(
4299 'version', help='get ceph version from container')
4300 parser_version.set_defaults(func=command_version)
4301
4302 parser_pull = subparsers.add_parser(
4303 'pull', help='pull latest image version')
4304 parser_pull.set_defaults(func=command_pull)
4305
4306 parser_inspect_image = subparsers.add_parser(
4307 'inspect-image', help='inspect local container image')
4308 parser_inspect_image.set_defaults(func=command_inspect_image)
4309
4310 parser_ls = subparsers.add_parser(
4311 'ls', help='list daemon instances on this host')
4312 parser_ls.set_defaults(func=command_ls)
4313 parser_ls.add_argument(
4314 '--no-detail',
4315 action='store_true',
4316 help='Do not include daemon status')
4317 parser_ls.add_argument(
4318 '--legacy-dir',
4319 default='/',
4320 help='base directory for legacy daemon data')
4321
4322 parser_list_networks = subparsers.add_parser(
4323 'list-networks', help='list IP networks')
4324 parser_list_networks.set_defaults(func=command_list_networks)
4325
4326 parser_adopt = subparsers.add_parser(
4327 'adopt', help='adopt daemon deployed with a different tool')
4328 parser_adopt.set_defaults(func=command_adopt)
4329 parser_adopt.add_argument(
4330 '--name', '-n',
4331 required=True,
4332 help='daemon name (type.id)')
4333 parser_adopt.add_argument(
4334 '--style',
4335 required=True,
4336 help='deployment style (legacy, ...)')
4337 parser_adopt.add_argument(
4338 '--cluster',
4339 default='ceph',
4340 help='cluster name')
4341 parser_adopt.add_argument(
4342 '--legacy-dir',
4343 default='/',
4344 help='base directory for legacy daemon data')
4345 parser_adopt.add_argument(
4346 '--config-json',
4347 help='Additional configuration information in JSON format')
4348 parser_adopt.add_argument(
4349 '--skip-firewalld',
4350 action='store_true',
4351 help='Do not configure firewalld')
4352 parser_adopt.add_argument(
4353 '--skip-pull',
4354 action='store_true',
4355 help='do not pull the latest image before adopting')
1911f103
TL
4356 parser_adopt.add_argument(
4357 '--force-start',
4358 action='store_true',
4359 help="start newly adoped daemon, even if it wasn't running previously")
9f95a23c
TL
4360
4361 parser_rm_daemon = subparsers.add_parser(
4362 'rm-daemon', help='remove daemon instance')
4363 parser_rm_daemon.set_defaults(func=command_rm_daemon)
4364 parser_rm_daemon.add_argument(
4365 '--name', '-n',
4366 required=True,
4367 action=CustomValidation,
4368 help='daemon name (type.id)')
4369 parser_rm_daemon.add_argument(
4370 '--fsid',
4371 required=True,
4372 help='cluster FSID')
4373 parser_rm_daemon.add_argument(
4374 '--force',
4375 action='store_true',
4376 help='proceed, even though this may destroy valuable data')
4377 parser_rm_daemon.add_argument(
4378 '--force-delete-data',
4379 action='store_true',
4380 help='delete valuable daemon data instead of making a backup')
4381
4382 parser_rm_cluster = subparsers.add_parser(
4383 'rm-cluster', help='remove all daemons for a cluster')
4384 parser_rm_cluster.set_defaults(func=command_rm_cluster)
4385 parser_rm_cluster.add_argument(
4386 '--fsid',
4387 required=True,
4388 help='cluster FSID')
4389 parser_rm_cluster.add_argument(
4390 '--force',
4391 action='store_true',
4392 help='proceed, even though this may destroy valuable data')
4393
4394 parser_run = subparsers.add_parser(
4395 'run', help='run a ceph daemon, in a container, in the foreground')
4396 parser_run.set_defaults(func=command_run)
4397 parser_run.add_argument(
4398 '--name', '-n',
4399 required=True,
4400 help='daemon name (type.id)')
4401 parser_run.add_argument(
4402 '--fsid',
4403 required=True,
4404 help='cluster FSID')
4405
4406 parser_shell = subparsers.add_parser(
4407 'shell', help='run an interactive shell inside a daemon container')
4408 parser_shell.set_defaults(func=command_shell)
4409 parser_shell.add_argument(
4410 '--fsid',
4411 help='cluster FSID')
4412 parser_shell.add_argument(
4413 '--name', '-n',
4414 help='daemon name (type.id)')
4415 parser_shell.add_argument(
4416 '--config', '-c',
4417 help='ceph.conf to pass through to the container')
4418 parser_shell.add_argument(
4419 '--keyring', '-k',
4420 help='ceph.keyring to pass through to the container')
e306af50
TL
4421 parser_shell.add_argument(
4422 '--mount', '-m',
4423 help='file or directory path that will be mounted in container /mnt')
9f95a23c
TL
4424 parser_shell.add_argument(
4425 '--env', '-e',
4426 action='append',
4427 default=[],
4428 help='set environment variable')
4429 parser_shell.add_argument(
e306af50 4430 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
4431 help='command (optional)')
4432
4433 parser_enter = subparsers.add_parser(
4434 'enter', help='run an interactive shell inside a running daemon container')
4435 parser_enter.set_defaults(func=command_enter)
4436 parser_enter.add_argument(
4437 '--fsid',
4438 help='cluster FSID')
4439 parser_enter.add_argument(
4440 '--name', '-n',
4441 required=True,
4442 help='daemon name (type.id)')
4443 parser_enter.add_argument(
e306af50 4444 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
4445 help='command')
4446
4447 parser_ceph_volume = subparsers.add_parser(
4448 'ceph-volume', help='run ceph-volume inside a container')
4449 parser_ceph_volume.set_defaults(func=command_ceph_volume)
4450 parser_ceph_volume.add_argument(
4451 '--fsid',
4452 help='cluster FSID')
4453 parser_ceph_volume.add_argument(
4454 '--config-json',
4455 help='JSON file with config and (client.bootrap-osd) key')
801d1391
TL
4456 parser_ceph_volume.add_argument(
4457 '--config', '-c',
4458 help='ceph conf file')
4459 parser_ceph_volume.add_argument(
4460 '--keyring', '-k',
4461 help='ceph.keyring to pass through to the container')
9f95a23c 4462 parser_ceph_volume.add_argument(
e306af50 4463 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
4464 help='command')
4465
4466 parser_unit = subparsers.add_parser(
4467 'unit', help='operate on the daemon\'s systemd unit')
4468 parser_unit.set_defaults(func=command_unit)
4469 parser_unit.add_argument(
4470 'command',
4471 help='systemd command (start, stop, restart, enable, disable, ...)')
4472 parser_unit.add_argument(
4473 '--fsid',
4474 help='cluster FSID')
4475 parser_unit.add_argument(
4476 '--name', '-n',
4477 required=True,
4478 help='daemon name (type.id)')
4479
4480 parser_logs = subparsers.add_parser(
4481 'logs', help='print journald logs for a daemon container')
4482 parser_logs.set_defaults(func=command_logs)
4483 parser_logs.add_argument(
4484 '--fsid',
4485 help='cluster FSID')
4486 parser_logs.add_argument(
4487 '--name', '-n',
4488 required=True,
4489 help='daemon name (type.id)')
4490 parser_logs.add_argument(
4491 'command', nargs='*',
4492 help='additional journalctl args')
4493
4494 parser_bootstrap = subparsers.add_parser(
4495 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
4496 parser_bootstrap.set_defaults(func=command_bootstrap)
4497 parser_bootstrap.add_argument(
4498 '--config', '-c',
4499 help='ceph conf file to incorporate')
4500 parser_bootstrap.add_argument(
4501 '--mon-id',
4502 required=False,
4503 help='mon id (default: local hostname)')
4504 parser_bootstrap.add_argument(
4505 '--mon-addrv',
4506 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
4507 parser_bootstrap.add_argument(
4508 '--mon-ip',
4509 help='mon IP')
4510 parser_bootstrap.add_argument(
4511 '--mgr-id',
4512 required=False,
4513 help='mgr id (default: randomly generated)')
4514 parser_bootstrap.add_argument(
4515 '--fsid',
4516 help='cluster FSID')
4517 parser_bootstrap.add_argument(
4518 '--output-dir',
4519 default='/etc/ceph',
4520 help='directory to write config, keyring, and pub key files')
4521 parser_bootstrap.add_argument(
4522 '--output-keyring',
4523 help='location to write keyring file with new cluster admin and mon keys')
4524 parser_bootstrap.add_argument(
4525 '--output-config',
4526 help='location to write conf file to connect to new cluster')
4527 parser_bootstrap.add_argument(
4528 '--output-pub-ssh-key',
4529 help='location to write the cluster\'s public SSH key')
4530 parser_bootstrap.add_argument(
4531 '--skip-ssh',
4532 action='store_true',
4533 help='skip setup of ssh key on local host')
4534 parser_bootstrap.add_argument(
4535 '--initial-dashboard-user',
4536 default='admin',
4537 help='Initial user for the dashboard')
4538 parser_bootstrap.add_argument(
4539 '--initial-dashboard-password',
4540 help='Initial password for the initial dashboard user')
4541
4542 parser_bootstrap.add_argument(
4543 '--dashboard-key',
e306af50 4544 type=argparse.FileType('r'),
9f95a23c
TL
4545 help='Dashboard key')
4546 parser_bootstrap.add_argument(
4547 '--dashboard-crt',
e306af50 4548 type=argparse.FileType('r'),
9f95a23c
TL
4549 help='Dashboard certificate')
4550
e306af50
TL
4551 parser_bootstrap.add_argument(
4552 '--ssh-config',
4553 type=argparse.FileType('r'),
4554 help='SSH config')
4555 parser_bootstrap.add_argument(
4556 '--ssh-private-key',
4557 type=argparse.FileType('r'),
4558 help='SSH private key')
4559 parser_bootstrap.add_argument(
4560 '--ssh-public-key',
4561 type=argparse.FileType('r'),
4562 help='SSH public key')
4563
9f95a23c
TL
4564 parser_bootstrap.add_argument(
4565 '--skip-mon-network',
4566 action='store_true',
4567 help='set mon public_network based on bootstrap mon ip')
4568 parser_bootstrap.add_argument(
4569 '--skip-dashboard',
4570 action='store_true',
4571 help='do not enable the Ceph Dashboard')
4572 parser_bootstrap.add_argument(
4573 '--dashboard-password-noupdate',
4574 action='store_true',
4575 help='stop forced dashboard password change')
4576 parser_bootstrap.add_argument(
4577 '--no-minimize-config',
4578 action='store_true',
4579 help='do not assimilate and minimize the config file')
4580 parser_bootstrap.add_argument(
4581 '--skip-ping-check',
4582 action='store_true',
4583 help='do not verify that mon IP is pingable')
4584 parser_bootstrap.add_argument(
4585 '--skip-pull',
4586 action='store_true',
4587 help='do not pull the latest image before bootstrapping')
4588 parser_bootstrap.add_argument(
4589 '--skip-firewalld',
4590 action='store_true',
4591 help='Do not configure firewalld')
4592 parser_bootstrap.add_argument(
4593 '--allow-overwrite',
4594 action='store_true',
4595 help='allow overwrite of existing --output-* config/keyring/ssh files')
4596 parser_bootstrap.add_argument(
4597 '--allow-fqdn-hostname',
4598 action='store_true',
4599 help='allow hostname that is fully-qualified (contains ".")')
4600 parser_bootstrap.add_argument(
4601 '--skip-prepare-host',
4602 action='store_true',
4603 help='Do not prepare host')
4604 parser_bootstrap.add_argument(
4605 '--orphan-initial-daemons',
4606 action='store_true',
4607 help='Do not create initial mon, mgr, and crash service specs')
4608 parser_bootstrap.add_argument(
4609 '--skip-monitoring-stack',
4610 action='store_true',
4611 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
e306af50
TL
4612 parser_bootstrap.add_argument(
4613 '--apply-spec',
4614 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
4615
4616
4617 parser_bootstrap.add_argument(
4618 '--shared_ceph_folder',
4619 metavar='CEPH_SOURCE_FOLDER',
4620 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c
TL
4621
4622 parser_deploy = subparsers.add_parser(
4623 'deploy', help='deploy a daemon')
4624 parser_deploy.set_defaults(func=command_deploy)
4625 parser_deploy.add_argument(
4626 '--name',
4627 required=True,
4628 action=CustomValidation,
4629 help='daemon name (type.id)')
4630 parser_deploy.add_argument(
4631 '--fsid',
4632 required=True,
4633 help='cluster FSID')
4634 parser_deploy.add_argument(
4635 '--config', '-c',
4636 help='config file for new daemon')
4637 parser_deploy.add_argument(
4638 '--config-json',
4639 help='Additional configuration information in JSON format')
4640 parser_deploy.add_argument(
4641 '--keyring',
4642 help='keyring for new daemon')
4643 parser_deploy.add_argument(
4644 '--key',
4645 help='key for new daemon')
4646 parser_deploy.add_argument(
4647 '--osd-fsid',
4648 help='OSD uuid, if creating an OSD container')
4649 parser_deploy.add_argument(
4650 '--skip-firewalld',
4651 action='store_true',
4652 help='Do not configure firewalld')
4653 parser_deploy.add_argument(
4654 '--reconfig',
4655 action='store_true',
4656 help='Reconfigure a previously deployed daemon')
4657 parser_deploy.add_argument(
4658 '--allow-ptrace',
4659 action='store_true',
4660 help='Allow SYS_PTRACE on daemon container')
4661
4662 parser_check_host = subparsers.add_parser(
4663 'check-host', help='check host configuration')
4664 parser_check_host.set_defaults(func=command_check_host)
4665 parser_check_host.add_argument(
4666 '--expect-hostname',
4667 help='Check that hostname matches an expected value')
4668
4669 parser_prepare_host = subparsers.add_parser(
4670 'prepare-host', help='prepare a host for cephadm use')
4671 parser_prepare_host.set_defaults(func=command_prepare_host)
4672 parser_prepare_host.add_argument(
4673 '--expect-hostname',
4674 help='Set hostname')
4675
4676 parser_add_repo = subparsers.add_parser(
4677 'add-repo', help='configure package repository')
4678 parser_add_repo.set_defaults(func=command_add_repo)
4679 parser_add_repo.add_argument(
4680 '--release',
1911f103 4681 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
9f95a23c
TL
4682 parser_add_repo.add_argument(
4683 '--version',
4684 help='use specific upstream version (x.y.z)')
4685 parser_add_repo.add_argument(
4686 '--dev',
4687 help='use specified bleeding edge build from git branch or tag')
4688 parser_add_repo.add_argument(
4689 '--dev-commit',
4690 help='use specified bleeding edge build from git commit')
4691 parser_add_repo.add_argument(
4692 '--gpg-url',
4693 help='specify alternative GPG key location')
4694 parser_add_repo.add_argument(
4695 '--repo-url',
4696 default='https://download.ceph.com',
4697 help='specify alternative repo location')
4698 # TODO: proxy?
4699
4700 parser_rm_repo = subparsers.add_parser(
4701 'rm-repo', help='remove package repository configuration')
4702 parser_rm_repo.set_defaults(func=command_rm_repo)
4703
4704 parser_install = subparsers.add_parser(
4705 'install', help='install ceph package(s)')
4706 parser_install.set_defaults(func=command_install)
4707 parser_install.add_argument(
4708 'packages', nargs='*',
4709 default=['cephadm'],
4710 help='packages')
4711
4712 return parser
4713
4714def _parse_args(av):
4715 parser = _get_parser()
e306af50
TL
4716 args = parser.parse_args(av)
4717 if 'command' in args and args.command and args.command[0] == "--":
4718 args.command.pop(0)
4719 return args
9f95a23c
TL
4720
4721if __name__ == "__main__":
4722 # allow argv to be injected
4723 try:
4724 av = injected_argv # type: ignore
4725 except NameError:
4726 av = sys.argv[1:]
4727 args = _parse_args(av)
4728
4729 if args.verbose:
4730 logging.basicConfig(level=logging.DEBUG)
4731 else:
4732 logging.basicConfig(level=logging.INFO)
4733 logger = logging.getLogger('cephadm')
4734
4735 # root?
4736 if os.geteuid() != 0:
4737 sys.stderr.write('ERROR: cephadm should be run as root\n')
4738 sys.exit(1)
4739
9f95a23c
TL
4740 if 'func' not in args:
4741 sys.stderr.write('No command specified; pass -h or --help for usage\n')
4742 sys.exit(1)
4743
1911f103
TL
4744 # podman or docker?
4745 if args.func != command_check_host:
4746 if args.docker:
4747 container_path = find_program('docker')
4748 else:
4749 for i in CONTAINER_PREFERENCE:
4750 try:
4751 container_path = find_program(i)
4752 break
4753 except Exception as e:
4754 logger.debug('Could not locate %s: %s' % (i, e))
4755 if not container_path and args.func != command_prepare_host\
4756 and args.func != command_add_repo:
4757 sys.stderr.write('Unable to locate any of %s\n' % CONTAINER_PREFERENCE)
4758 sys.exit(1)
4759
9f95a23c
TL
4760 try:
4761 r = args.func()
4762 except Error as e:
4763 if args.verbose:
4764 raise
4765 sys.stderr.write('ERROR: %s\n' % e)
4766 sys.exit(1)
4767 if not r:
4768 r = 0
4769 sys.exit(r)