]> git.proxmox.com Git - ceph.git/blob - ceph/src/cephadm/cephadm
8de809d75bf126dd0aef5a65040906b088a0fd6a
[ceph.git] / ceph / src / cephadm / cephadm
1 #!/usr/bin/python3
2
3 DEFAULT_IMAGE='docker.io/ceph/ceph:v15'
4 DEFAULT_IMAGE_IS_MASTER=False
5 LATEST_STABLE_RELEASE = 'octopus'
6 DATA_DIR = '/var/lib/ceph'
7 LOG_DIR = '/var/log/ceph'
8 LOCK_DIR = '/run/cephadm'
9 LOGROTATE_DIR = '/etc/logrotate.d'
10 UNIT_DIR = '/etc/systemd/system'
11 LOG_DIR_MODE = 0o770
12 DATA_DIR_MODE = 0o700
13 CONTAINER_PREFERENCE = ['podman', 'docker'] # prefer podman to docker
14 CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
15 DEFAULT_TIMEOUT = None # in seconds
16 DEFAULT_RETRY = 10
17 SHELL_DEFAULT_CONF = '/etc/ceph/ceph.conf'
18 SHELL_DEFAULT_KEYRING = '/etc/ceph/ceph.client.admin.keyring'
19
20 """
21 You can invoke cephadm in two ways:
22
23 1. The normal way, at the command line.
24
25 2. By piping the script to the python3 binary. In this latter case, you should
26 prepend one or more lines to the beginning of the script.
27
28 For arguments,
29
30 injected_argv = [...]
31
32 e.g.,
33
34 injected_argv = ['ls']
35
36 For reading stdin from the '--config-json -' argument,
37
38 injected_stdin = '...'
39 """
40 import argparse
41 import datetime
42 import fcntl
43 import ipaddress
44 import json
45 import logging
46 from logging.config import dictConfig
47 import os
48 import platform
49 import pwd
50 import random
51 import select
52 import shutil
53 import socket
54 import string
55 import subprocess
56 import sys
57 import tempfile
58 import time
59 import errno
60 import struct
61 from enum import Enum
62 try:
63 from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO
64 except ImportError:
65 pass
66
67 import re
68 import uuid
69
70 from functools import wraps
71 from glob import glob
72 from threading import Thread
73
74 if sys.version_info >= (3, 0):
75 from io import StringIO
76 else:
77 from StringIO import StringIO
78
79 if sys.version_info >= (3, 2):
80 from configparser import ConfigParser
81 else:
82 from ConfigParser import SafeConfigParser
83
84 if sys.version_info >= (3, 0):
85 from urllib.request import urlopen
86 from urllib.error import HTTPError
87 else:
88 from urllib2 import urlopen, HTTPError
89
90 if sys.version_info > (3, 0):
91 unicode = str
92
93 container_path = ''
94 cached_stdin = None
95
96 DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
97
98 # Log and console output config
99 logging_config = {
100 'version': 1,
101 'disable_existing_loggers': True,
102 'formatters': {
103 'cephadm': {
104 'format': '%(asctime)s %(levelname)s %(message)s'
105 },
106 },
107 'handlers': {
108 'console':{
109 'level':'INFO',
110 'class':'logging.StreamHandler',
111 },
112 'log_file': {
113 'level': 'DEBUG',
114 'class': 'logging.handlers.RotatingFileHandler',
115 'formatter': 'cephadm',
116 'filename': '%s/cephadm.log' % LOG_DIR,
117 'maxBytes': 1024000,
118 'backupCount': 1,
119 }
120 },
121 'loggers': {
122 '': {
123 'level': 'DEBUG',
124 'handlers': ['console', 'log_file'],
125 }
126 }
127 }
128
129 class termcolor:
130 yellow = '\033[93m'
131 red = '\033[31m'
132 end = '\033[0m'
133
134
135 class Error(Exception):
136 pass
137
138
139 class TimeoutExpired(Error):
140 pass
141
142 ##################################
143
144
145 class Ceph(object):
146 daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
147 'crash')
148
149 ##################################
150
151
152 class Monitoring(object):
153 """Define the configs for the monitoring containers"""
154
155 port_map = {
156 "prometheus": [9095], # Avoid default 9090, due to conflict with cockpit UI
157 "node-exporter": [9100],
158 "grafana": [3000],
159 "alertmanager": [9093, 9094],
160 }
161
162 components = {
163 "prometheus": {
164 "image": "docker.io/prom/prometheus:v2.18.1",
165 "cpus": '2',
166 "memory": '4GB',
167 "args": [
168 "--config.file=/etc/prometheus/prometheus.yml",
169 "--storage.tsdb.path=/prometheus",
170 "--web.listen-address=:{}".format(port_map['prometheus'][0]),
171 ],
172 "config-json-files": [
173 "prometheus.yml",
174 ],
175 },
176 "node-exporter": {
177 "image": "docker.io/prom/node-exporter:v0.18.1",
178 "cpus": "1",
179 "memory": "1GB",
180 "args": [
181 "--no-collector.timex",
182 ],
183 },
184 "grafana": {
185 "image": "docker.io/ceph/ceph-grafana:6.6.2",
186 "cpus": "2",
187 "memory": "4GB",
188 "args": [],
189 "config-json-files": [
190 "grafana.ini",
191 "provisioning/datasources/ceph-dashboard.yml",
192 "certs/cert_file",
193 "certs/cert_key",
194 ],
195 },
196 "alertmanager": {
197 "image": "docker.io/prom/alertmanager:v0.20.0",
198 "cpus": "2",
199 "memory": "2GB",
200 "args": [
201 "--web.listen-address=:{}".format(port_map['alertmanager'][0]),
202 "--cluster.listen-address=:{}".format(port_map['alertmanager'][1]),
203 ],
204 "config-json-files": [
205 "alertmanager.yml",
206 ],
207 "config-json-args": [
208 "peers",
209 ],
210 },
211 } # type: ignore
212
213 ##################################
214
215
216 class NFSGanesha(object):
217 """Defines a NFS-Ganesha container"""
218
219 daemon_type = 'nfs'
220 entrypoint = '/usr/bin/ganesha.nfsd'
221 daemon_args = ['-F', '-L', 'STDERR']
222
223 required_files = ['ganesha.conf']
224
225 port_map = {
226 "nfs" : 2049,
227 }
228
229 def __init__(self,
230 fsid,
231 daemon_id,
232 config_json,
233 image=DEFAULT_IMAGE):
234 # type: (str, Union[int, str], Dict, str) -> None
235 self.fsid = fsid
236 self.daemon_id = daemon_id
237 self.image = image
238
239 # config-json options
240 self.pool = dict_get(config_json, 'pool', require=True)
241 self.namespace = dict_get(config_json, 'namespace')
242 self.userid = dict_get(config_json, 'userid')
243 self.extra_args = dict_get(config_json, 'extra_args', [])
244 self.files = dict_get(config_json, 'files', {})
245 self.rgw = dict_get(config_json, 'rgw', {})
246
247 # validate the supplied args
248 self.validate()
249
250 @classmethod
251 def init(cls, fsid, daemon_id):
252 # type: (str, Union[int, str]) -> NFSGanesha
253 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
254
255 def get_container_mounts(self, data_dir):
256 # type: (str) -> Dict[str, str]
257 mounts = dict()
258 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
259 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
260 mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
261 if self.rgw:
262 cluster = self.rgw.get('cluster', 'ceph')
263 rgw_user = self.rgw.get('user', 'admin')
264 mounts[os.path.join(data_dir, 'keyring.rgw')] = \
265 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
266 return mounts
267
268 @staticmethod
269 def get_container_envs():
270 # type: () -> List[str]
271 envs = [
272 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
273 ]
274 return envs
275
276 @staticmethod
277 def get_version(container_id):
278 # type: (str) -> Optional[str]
279 version = None
280 out, err, code = call(
281 [container_path, 'exec', container_id,
282 NFSGanesha.entrypoint, '-v'])
283 if code == 0:
284 match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
285 if match:
286 version = match.group(1)
287 return version
288
289 def validate(self):
290 # type: () -> None
291 if not is_fsid(self.fsid):
292 raise Error('not an fsid: %s' % self.fsid)
293 if not self.daemon_id:
294 raise Error('invalid daemon_id: %s' % self.daemon_id)
295 if not self.image:
296 raise Error('invalid image: %s' % self.image)
297
298 # check for the required files
299 if self.required_files:
300 for fname in self.required_files:
301 if fname not in self.files:
302 raise Error('required file missing from config-json: %s' % fname)
303
304 # check for an RGW config
305 if self.rgw:
306 if not self.rgw.get('keyring'):
307 raise Error('RGW keyring is missing')
308 if not self.rgw.get('user'):
309 raise Error('RGW user is missing')
310
311 def get_daemon_name(self):
312 # type: () -> str
313 return '%s.%s' % (self.daemon_type, self.daemon_id)
314
315 def get_container_name(self, desc=None):
316 # type: (Optional[str]) -> str
317 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
318 if desc:
319 cname = '%s-%s' % (cname, desc)
320 return cname
321
322 def get_daemon_args(self):
323 # type: () -> List[str]
324 return self.daemon_args + self.extra_args
325
326 def create_daemon_dirs(self, data_dir, uid, gid):
327 # type: (str, int, int) -> None
328 """Create files under the container data dir"""
329 if not os.path.isdir(data_dir):
330 raise OSError('data_dir is not a directory: %s' % (data_dir))
331
332 logger.info('Creating ganesha config...')
333
334 # create the ganesha conf dir
335 config_dir = os.path.join(data_dir, 'etc/ganesha')
336 makedirs(config_dir, uid, gid, 0o755)
337
338 # populate files from the config-json
339 for fname in self.files:
340 config_file = os.path.join(config_dir, fname)
341 config_content = dict_get_join(self.files, fname)
342 logger.info('Write file: %s' % (config_file))
343 with open(config_file, 'w') as f:
344 os.fchown(f.fileno(), uid, gid)
345 os.fchmod(f.fileno(), 0o600)
346 f.write(config_content)
347
348 # write the RGW keyring
349 if self.rgw:
350 keyring_path = os.path.join(data_dir, 'keyring.rgw')
351 with open(keyring_path, 'w') as f:
352 os.fchmod(f.fileno(), 0o600)
353 os.fchown(f.fileno(), uid, gid)
354 f.write(self.rgw.get('keyring', ''))
355
356 def get_rados_grace_container(self, action):
357 # type: (str) -> CephContainer
358 """Container for a ganesha action on the grace db"""
359 entrypoint = '/usr/bin/ganesha-rados-grace'
360
361 assert self.pool
362 args=['--pool', self.pool]
363 if self.namespace:
364 args += ['--ns', self.namespace]
365 if self.userid:
366 args += ['--userid', self.userid]
367 args += [action, self.get_daemon_name()]
368
369 data_dir = get_data_dir(self.fsid, self.daemon_type, self.daemon_id)
370 volume_mounts = self.get_container_mounts(data_dir)
371 envs = self.get_container_envs()
372
373 logger.info('Creating RADOS grace for action: %s' % action)
374 c = CephContainer(
375 image=self.image,
376 entrypoint=entrypoint,
377 args=args,
378 volume_mounts=volume_mounts,
379 cname=self.get_container_name(desc='grace-%s' % action),
380 envs=envs
381 )
382 return c
383
384 ##################################
385
386
387 class CephIscsi(object):
388 """Defines a Ceph-Iscsi container"""
389
390 daemon_type = 'iscsi'
391 entrypoint = '/usr/bin/rbd-target-api'
392
393 required_files = ['iscsi-gateway.cfg']
394
395 def __init__(self,
396 fsid,
397 daemon_id,
398 config_json,
399 image=DEFAULT_IMAGE):
400 # type: (str, Union[int, str], Dict, str) -> None
401 self.fsid = fsid
402 self.daemon_id = daemon_id
403 self.image = image
404
405 # config-json options
406 self.files = dict_get(config_json, 'files', {})
407
408 # validate the supplied args
409 self.validate()
410
411 @classmethod
412 def init(cls, fsid, daemon_id):
413 # type: (str, Union[int, str]) -> CephIscsi
414 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
415
416 @staticmethod
417 def get_container_mounts(data_dir, log_dir):
418 # type: (str, str) -> Dict[str, str]
419 mounts = dict()
420 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
421 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
422 mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
423 mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
424 mounts[log_dir] = '/var/log/rbd-target-api:z'
425 mounts['/dev'] = '/dev'
426 return mounts
427
428 @staticmethod
429 def get_container_binds():
430 # type: () -> List[List[str]]
431 binds = []
432 lib_modules = ['type=bind',
433 'source=/lib/modules',
434 'destination=/lib/modules',
435 'ro=true']
436 binds.append(lib_modules)
437 return binds
438
439 @staticmethod
440 def get_version(container_id):
441 # type: (str) -> Optional[str]
442 version = None
443 out, err, code = call(
444 [container_path, 'exec', container_id,
445 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"])
446 if code == 0:
447 version = out.strip()
448 return version
449
450 def validate(self):
451 # type: () -> None
452 if not is_fsid(self.fsid):
453 raise Error('not an fsid: %s' % self.fsid)
454 if not self.daemon_id:
455 raise Error('invalid daemon_id: %s' % self.daemon_id)
456 if not self.image:
457 raise Error('invalid image: %s' % self.image)
458
459 # check for the required files
460 if self.required_files:
461 for fname in self.required_files:
462 if fname not in self.files:
463 raise Error('required file missing from config-json: %s' % fname)
464
465 def get_daemon_name(self):
466 # type: () -> str
467 return '%s.%s' % (self.daemon_type, self.daemon_id)
468
469 def get_container_name(self, desc=None):
470 # type: (Optional[str]) -> str
471 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
472 if desc:
473 cname = '%s-%s' % (cname, desc)
474 return cname
475
476 def create_daemon_dirs(self, data_dir, uid, gid):
477 # type: (str, int, int) -> None
478 """Create files under the container data dir"""
479 if not os.path.isdir(data_dir):
480 raise OSError('data_dir is not a directory: %s' % (data_dir))
481
482 logger.info('Creating ceph-iscsi config...')
483 configfs_dir = os.path.join(data_dir, 'configfs')
484 makedirs(configfs_dir, uid, gid, 0o755)
485
486 # populate files from the config-json
487 for fname in self.files:
488 config_file = os.path.join(data_dir, fname)
489 config_content = dict_get_join(self.files, fname)
490 logger.info('Write file: %s' % (config_file))
491 with open(config_file, 'w') as f:
492 os.fchown(f.fileno(), uid, gid)
493 os.fchmod(f.fileno(), 0o600)
494 f.write(config_content)
495
496 @staticmethod
497 def configfs_mount_umount(data_dir, mount=True):
498 # type: (str, bool) -> List[str]
499 mount_path = os.path.join(data_dir, 'configfs')
500 if mount:
501 cmd = "if ! grep -qs {0} /proc/mounts; then " \
502 "mount -t configfs none {0}; fi".format(mount_path)
503 else:
504 cmd = "if grep -qs {0} /proc/mounts; then " \
505 "umount {0}; fi".format(mount_path)
506 return cmd.split()
507
508 def get_tcmu_runner_container(self):
509 # type: () -> CephContainer
510 tcmu_container = get_container(self.fsid, self.daemon_type, self.daemon_id)
511 tcmu_container.entrypoint = "/usr/bin/tcmu-runner"
512 tcmu_container.cname = self.get_container_name(desc='tcmu')
513 # remove extra container args for tcmu container.
514 # extra args could cause issue with forking service type
515 tcmu_container.container_args = []
516 return tcmu_container
517
518 ##################################
519
520
521 class CustomContainer(object):
522 """Defines a custom container"""
523 daemon_type = 'container'
524
525 def __init__(self, fsid: str, daemon_id: Union[int, str],
526 config_json: Dict, image: str) -> None:
527 self.fsid = fsid
528 self.daemon_id = daemon_id
529 self.image = image
530
531 # config-json options
532 self.entrypoint = dict_get(config_json, 'entrypoint')
533 self.uid = dict_get(config_json, 'uid', 65534) # nobody
534 self.gid = dict_get(config_json, 'gid', 65534) # nobody
535 self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
536 self.args = dict_get(config_json, 'args', [])
537 self.envs = dict_get(config_json, 'envs', [])
538 self.privileged = dict_get(config_json, 'privileged', False)
539 self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
540 self.ports = dict_get(config_json, 'ports', [])
541 self.dirs = dict_get(config_json, 'dirs', [])
542 self.files = dict_get(config_json, 'files', {})
543
544 @classmethod
545 def init(cls, fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
546 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
547
548 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
549 """
550 Create dirs/files below the container data directory.
551 """
552 logger.info('Creating custom container configuration '
553 'dirs/files in {} ...'.format(data_dir))
554
555 if not os.path.isdir(data_dir):
556 raise OSError('data_dir is not a directory: %s' % data_dir)
557
558 for dir_path in self.dirs:
559 logger.info('Creating directory: {}'.format(dir_path))
560 dir_path = os.path.join(data_dir, dir_path.strip('/'))
561 makedirs(dir_path, uid, gid, 0o755)
562
563 for file_path in self.files:
564 logger.info('Creating file: {}'.format(file_path))
565 content = dict_get_join(self.files, file_path)
566 file_path = os.path.join(data_dir, file_path.strip('/'))
567 with open(file_path, 'w', encoding='utf-8') as f:
568 os.fchown(f.fileno(), uid, gid)
569 os.fchmod(f.fileno(), 0o600)
570 f.write(content)
571
572 def get_daemon_args(self) -> List[str]:
573 return []
574
575 def get_container_args(self) -> List[str]:
576 return self.args
577
578 def get_container_envs(self) -> List[str]:
579 return self.envs
580
581 def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
582 """
583 Get the volume mounts. Relative source paths will be located below
584 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
585
586 Example:
587 {
588 /foo/conf: /conf
589 foo/conf: /conf
590 }
591 becomes
592 {
593 /foo/conf: /conf
594 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
595 }
596 """
597 mounts = {}
598 for source, destination in self.volume_mounts.items():
599 source = os.path.join(data_dir, source)
600 mounts[source] = destination
601 return mounts
602
603 def get_container_binds(self, data_dir: str) -> List[List[str]]:
604 """
605 Get the bind mounts. Relative `source=...` paths will be located below
606 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
607
608 Example:
609 [
610 'type=bind',
611 'source=lib/modules',
612 'destination=/lib/modules',
613 'ro=true'
614 ]
615 becomes
616 [
617 ...
618 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
619 ...
620 ]
621 """
622 binds = self.bind_mounts.copy()
623 for bind in binds:
624 for index, value in enumerate(bind):
625 match = re.match(r'^source=(.+)$', value)
626 if match:
627 bind[index] = 'source={}'.format(os.path.join(
628 data_dir, match.group(1)))
629 return binds
630
631 ##################################
632
633
634 def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
635 """
636 Helper function to get a key from a dictionary.
637 :param d: The dictionary to process.
638 :param key: The name of the key to get.
639 :param default: The default value in case the key does not
640 exist. Default is `None`.
641 :param require: Set to `True` if the key is required. An
642 exception will be raised if the key does not exist in
643 the given dictionary.
644 :return: Returns the value of the given key.
645 :raises: :exc:`self.Error` if the given key does not exist
646 and `require` is set to `True`.
647 """
648 if require and key not in d.keys():
649 raise Error('{} missing from dict'.format(key))
650 return d.get(key, default)
651
652 ##################################
653
654
655 def dict_get_join(d: Dict, key: str) -> Any:
656 """
657 Helper function to get the value of a given key from a dictionary.
658 `List` values will be converted to a string by joining them with a
659 line break.
660 :param d: The dictionary to process.
661 :param key: The name of the key to get.
662 :return: Returns the value of the given key. If it was a `list`, it
663 will be joining with a line break.
664 """
665 value = d.get(key)
666 if isinstance(value, list):
667 value = '\n'.join(map(str, value))
668 return value
669
670 ##################################
671
672
673 def get_supported_daemons():
674 # type: () -> List[str]
675 supported_daemons = list(Ceph.daemons)
676 supported_daemons.extend(Monitoring.components)
677 supported_daemons.append(NFSGanesha.daemon_type)
678 supported_daemons.append(CephIscsi.daemon_type)
679 supported_daemons.append(CustomContainer.daemon_type)
680 assert len(supported_daemons) == len(set(supported_daemons))
681 return supported_daemons
682
683 ##################################
684
685
686 def attempt_bind(s, address, port):
687 # type: (socket.socket, str, int) -> None
688 try:
689 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
690 s.bind((address, port))
691 except (socket.error, OSError) as e: # py2 and py3
692 msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
693 logger.warning(msg)
694 if e.errno == errno.EADDRINUSE:
695 raise OSError(msg)
696 elif e.errno == errno.EADDRNOTAVAIL:
697 pass
698 finally:
699 s.close()
700
701
702 def port_in_use(port_num):
703 # type: (int) -> bool
704 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
705 logger.info('Verifying port %d ...' % port_num)
706 try:
707 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
708 attempt_bind(s, '0.0.0.0', port_num)
709
710 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
711 attempt_bind(s, '::', port_num)
712 except OSError:
713 return True
714 else:
715 return False
716
717
718 def check_ip_port(ip, port):
719 # type: (str, int) -> None
720 if not args.skip_ping_check:
721 logger.info('Verifying IP %s port %d ...' % (ip, port))
722 if is_ipv6(ip):
723 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
724 ip = unwrap_ipv6(ip)
725 else:
726 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
727 try:
728 attempt_bind(s, ip, port)
729 except OSError as e:
730 raise Error(e)
731
732 ##################################
733
734 # this is an abbreviated version of
735 # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
736 # that drops all of the compatibility (this is Unix/Linux only).
737
738 try:
739 TimeoutError
740 except NameError:
741 TimeoutError = OSError
742
743
744 class Timeout(TimeoutError):
745 """
746 Raised when the lock could not be acquired in *timeout*
747 seconds.
748 """
749
750 def __init__(self, lock_file):
751 """
752 """
753 #: The path of the file lock.
754 self.lock_file = lock_file
755 return None
756
757 def __str__(self):
758 temp = "The file lock '{}' could not be acquired."\
759 .format(self.lock_file)
760 return temp
761
762
763 class _Acquire_ReturnProxy(object):
764 def __init__(self, lock):
765 self.lock = lock
766 return None
767
768 def __enter__(self):
769 return self.lock
770
771 def __exit__(self, exc_type, exc_value, traceback):
772 self.lock.release()
773 return None
774
775
776 class FileLock(object):
777 def __init__(self, name, timeout=-1):
778 if not os.path.exists(LOCK_DIR):
779 os.mkdir(LOCK_DIR, 0o700)
780 self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
781
782 # The file descriptor for the *_lock_file* as it is returned by the
783 # os.open() function.
784 # This file lock is only NOT None, if the object currently holds the
785 # lock.
786 self._lock_file_fd = None
787 self.timeout = timeout
788 # The lock counter is used for implementing the nested locking
789 # mechanism. Whenever the lock is acquired, the counter is increased and
790 # the lock is only released, when this value is 0 again.
791 self._lock_counter = 0
792 return None
793
794 @property
795 def is_locked(self):
796 return self._lock_file_fd is not None
797
798 def acquire(self, timeout=None, poll_intervall=0.05):
799 """
800 Acquires the file lock or fails with a :exc:`Timeout` error.
801 .. code-block:: python
802 # You can use this method in the context manager (recommended)
803 with lock.acquire():
804 pass
805 # Or use an equivalent try-finally construct:
806 lock.acquire()
807 try:
808 pass
809 finally:
810 lock.release()
811 :arg float timeout:
812 The maximum time waited for the file lock.
813 If ``timeout < 0``, there is no timeout and this method will
814 block until the lock could be acquired.
815 If ``timeout`` is None, the default :attr:`~timeout` is used.
816 :arg float poll_intervall:
817 We check once in *poll_intervall* seconds if we can acquire the
818 file lock.
819 :raises Timeout:
820 if the lock could not be acquired in *timeout* seconds.
821 .. versionchanged:: 2.0.0
822 This method returns now a *proxy* object instead of *self*,
823 so that it can be used in a with statement without side effects.
824 """
825 # Use the default timeout, if no timeout is provided.
826 if timeout is None:
827 timeout = self.timeout
828
829 # Increment the number right at the beginning.
830 # We can still undo it, if something fails.
831 self._lock_counter += 1
832
833 lock_id = id(self)
834 lock_filename = self._lock_file
835 start_time = time.time()
836 try:
837 while True:
838 if not self.is_locked:
839 logger.debug('Acquiring lock %s on %s', lock_id,
840 lock_filename)
841 self._acquire()
842
843 if self.is_locked:
844 logger.debug('Lock %s acquired on %s', lock_id,
845 lock_filename)
846 break
847 elif timeout >= 0 and time.time() - start_time > timeout:
848 logger.warning('Timeout acquiring lock %s on %s', lock_id,
849 lock_filename)
850 raise Timeout(self._lock_file)
851 else:
852 logger.debug(
853 'Lock %s not acquired on %s, waiting %s seconds ...',
854 lock_id, lock_filename, poll_intervall
855 )
856 time.sleep(poll_intervall)
857 except: # noqa
858 # Something did go wrong, so decrement the counter.
859 self._lock_counter = max(0, self._lock_counter - 1)
860
861 raise
862 return _Acquire_ReturnProxy(lock = self)
863
864 def release(self, force=False):
865 """
866 Releases the file lock.
867 Please note, that the lock is only completly released, if the lock
868 counter is 0.
869 Also note, that the lock file itself is not automatically deleted.
870 :arg bool force:
871 If true, the lock counter is ignored and the lock is released in
872 every case.
873 """
874 if self.is_locked:
875 self._lock_counter -= 1
876
877 if self._lock_counter == 0 or force:
878 lock_id = id(self)
879 lock_filename = self._lock_file
880
881 logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
882 self._release()
883 self._lock_counter = 0
884 logger.debug('Lock %s released on %s', lock_id, lock_filename)
885
886 return None
887
888 def __enter__(self):
889 self.acquire()
890 return self
891
892 def __exit__(self, exc_type, exc_value, traceback):
893 self.release()
894 return None
895
896 def __del__(self):
897 self.release(force=True)
898 return None
899
900 def _acquire(self):
901 open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
902 fd = os.open(self._lock_file, open_mode)
903
904 try:
905 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
906 except (IOError, OSError):
907 os.close(fd)
908 else:
909 self._lock_file_fd = fd
910 return None
911
912 def _release(self):
913 # Do not remove the lockfile:
914 #
915 # https://github.com/benediktschmitt/py-filelock/issues/31
916 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
917 fd = self._lock_file_fd
918 self._lock_file_fd = None
919 fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
920 os.close(fd) # type: ignore
921 return None
922
923
924 ##################################
925 # Popen wrappers, lifted from ceph-volume
926
927 class CallVerbosity(Enum):
928 SILENT = 0
929 # log stdout/stderr to logger.debug
930 DEBUG = 1
931 # On a non-zero exit status, it will forcefully set
932 # logging ON for the terminal
933 VERBOSE_ON_FAILURE = 2
934 # log at info (instead of debug) level.
935 VERBOSE = 3
936
937
938 def call(command: List[str],
939 desc: Optional[str] = None,
940 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
941 timeout: Optional[int] = DEFAULT_TIMEOUT,
942 **kwargs) -> Tuple[str, str, int]:
943 """
944 Wrap subprocess.Popen to
945
946 - log stdout/stderr to a logger,
947 - decode utf-8
948 - cleanly return out, err, returncode
949
950 :param timeout: timeout in seconds
951 """
952 if desc is None:
953 desc = command[0]
954 if desc:
955 desc += ': '
956 timeout = timeout or args.timeout
957
958 logger.debug("Running command: %s" % ' '.join(command))
959 process = subprocess.Popen(
960 command,
961 stdout=subprocess.PIPE,
962 stderr=subprocess.PIPE,
963 close_fds=True,
964 **kwargs
965 )
966 # get current p.stdout flags, add O_NONBLOCK
967 assert process.stdout is not None
968 assert process.stderr is not None
969 stdout_flags = fcntl.fcntl(process.stdout, fcntl.F_GETFL)
970 stderr_flags = fcntl.fcntl(process.stderr, fcntl.F_GETFL)
971 fcntl.fcntl(process.stdout, fcntl.F_SETFL, stdout_flags | os.O_NONBLOCK)
972 fcntl.fcntl(process.stderr, fcntl.F_SETFL, stderr_flags | os.O_NONBLOCK)
973
974 out = ''
975 err = ''
976 reads = None
977 stop = False
978 out_buffer = '' # partial line (no newline yet)
979 err_buffer = '' # partial line (no newline yet)
980 start_time = time.time()
981 end_time = None
982 if timeout:
983 end_time = start_time + timeout
984 while not stop:
985 if end_time and (time.time() >= end_time):
986 stop = True
987 if process.poll() is None:
988 logger.info(desc + 'timeout after %s seconds' % timeout)
989 process.kill()
990 if reads and process.poll() is not None:
991 # we want to stop, but first read off anything remaining
992 # on stdout/stderr
993 stop = True
994 else:
995 reads, _, _ = select.select(
996 [process.stdout.fileno(), process.stderr.fileno()],
997 [], [], timeout
998 )
999 for fd in reads:
1000 try:
1001 message_b = os.read(fd, 1024)
1002 if isinstance(message_b, bytes):
1003 message = message_b.decode('utf-8')
1004 if isinstance(message_b, str):
1005 message = message_b
1006 if stop and message:
1007 # process has terminated, but have more to read still, so not stopping yet
1008 # (os.read returns '' when it encounters EOF)
1009 stop = False
1010 if not message:
1011 continue
1012 if fd == process.stdout.fileno():
1013 out += message
1014 message = out_buffer + message
1015 lines = message.split('\n')
1016 out_buffer = lines.pop()
1017 for line in lines:
1018 if verbosity == CallVerbosity.VERBOSE:
1019 logger.info(desc + 'stdout ' + line)
1020 elif verbosity != CallVerbosity.SILENT:
1021 logger.debug(desc + 'stdout ' + line)
1022 elif fd == process.stderr.fileno():
1023 err += message
1024 message = err_buffer + message
1025 lines = message.split('\n')
1026 err_buffer = lines.pop()
1027 for line in lines:
1028 if verbosity == CallVerbosity.VERBOSE:
1029 logger.info(desc + 'stderr ' + line)
1030 elif verbosity != CallVerbosity.SILENT:
1031 logger.debug(desc + 'stderr ' + line)
1032 else:
1033 assert False
1034 except (IOError, OSError):
1035 pass
1036 if verbosity == CallVerbosity.VERBOSE:
1037 logger.debug(desc + 'profile rt=%s, stop=%s, exit=%s, reads=%s'
1038 % (time.time()-start_time, stop, process.poll(), reads))
1039
1040 returncode = process.wait()
1041
1042 if out_buffer != '':
1043 if verbosity == CallVerbosity.VERBOSE:
1044 logger.info(desc + 'stdout ' + out_buffer)
1045 elif verbosity != CallVerbosity.SILENT:
1046 logger.debug(desc + 'stdout ' + out_buffer)
1047 if err_buffer != '':
1048 if verbosity == CallVerbosity.VERBOSE:
1049 logger.info(desc + 'stderr ' + err_buffer)
1050 elif verbosity != CallVerbosity.SILENT:
1051 logger.debug(desc + 'stderr ' + err_buffer)
1052
1053 if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE:
1054 # dump stdout + stderr
1055 logger.info('Non-zero exit code %d from %s' % (returncode, ' '.join(command)))
1056 for line in out.splitlines():
1057 logger.info(desc + 'stdout ' + line)
1058 for line in err.splitlines():
1059 logger.info(desc + 'stderr ' + line)
1060
1061 return out, err, returncode
1062
1063
1064 def call_throws(command: List[str],
1065 desc: Optional[str] = None,
1066 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
1067 timeout: Optional[int] = DEFAULT_TIMEOUT,
1068 **kwargs) -> Tuple[str, str, int]:
1069 out, err, ret = call(command, desc, verbosity, timeout, **kwargs)
1070 if ret:
1071 raise RuntimeError('Failed command: %s' % ' '.join(command))
1072 return out, err, ret
1073
1074
1075 def call_timeout(command, timeout):
1076 # type: (List[str], int) -> int
1077
1078 logger.debug('Running command (timeout=%s): %s'
1079 % (timeout, ' '.join(command)))
1080
1081 def raise_timeout(command, timeout):
1082 # type: (List[str], int) -> NoReturn
1083 msg = 'Command \'%s\' timed out after %s seconds' % (command, timeout)
1084 logger.debug(msg)
1085 raise TimeoutExpired(msg)
1086
1087 def call_timeout_py2(command, timeout):
1088 # type: (List[str], int) -> int
1089 proc = subprocess.Popen(command)
1090 thread = Thread(target=proc.wait)
1091 thread.start()
1092 thread.join(timeout)
1093 if thread.is_alive():
1094 proc.kill()
1095 thread.join()
1096 raise_timeout(command, timeout)
1097 return proc.returncode
1098
1099 def call_timeout_py3(command, timeout):
1100 # type: (List[str], int) -> int
1101 try:
1102 return subprocess.call(command, timeout=timeout)
1103 except subprocess.TimeoutExpired as e:
1104 raise_timeout(command, timeout)
1105
1106 ret = 1
1107 if sys.version_info >= (3, 3):
1108 ret = call_timeout_py3(command, timeout)
1109 else:
1110 # py2 subprocess has no timeout arg
1111 ret = call_timeout_py2(command, timeout)
1112 return ret
1113
1114 ##################################
1115
1116
1117 def is_available(what, func):
1118 # type: (str, Callable[[], bool]) -> None
1119 """
1120 Wait for a service to become available
1121
1122 :param what: the name of the service
1123 :param func: the callable object that determines availability
1124 """
1125 retry = args.retry
1126 logger.info('Waiting for %s...' % what)
1127 num = 1
1128 while True:
1129 if func():
1130 logger.info('%s is available'
1131 % what)
1132 break
1133 elif num > retry:
1134 raise Error('%s not available after %s tries'
1135 % (what, retry))
1136
1137 logger.info('%s not available, waiting (%s/%s)...'
1138 % (what, num, retry))
1139
1140 num += 1
1141 time.sleep(1)
1142
1143
1144 def read_config(fn):
1145 # type: (Optional[str]) -> ConfigParser
1146 # bend over backwards here because py2's ConfigParser doesn't like
1147 # whitespace before config option names (e.g., '\n foo = bar\n').
1148 # Yeesh!
1149 if sys.version_info >= (3, 2):
1150 cp = ConfigParser()
1151 else:
1152 cp = SafeConfigParser()
1153
1154 if fn:
1155 with open(fn, 'r') as f:
1156 raw_conf = f.read()
1157 nice_conf = re.sub(r'\n(\s)+', r'\n', raw_conf)
1158 s_io = StringIO(nice_conf)
1159 if sys.version_info >= (3, 2):
1160 cp.read_file(s_io)
1161 else:
1162 cp.readfp(s_io)
1163
1164 return cp
1165
1166
1167 def pathify(p):
1168 # type: (str) -> str
1169 p = os.path.expanduser(p)
1170 return os.path.abspath(p)
1171
1172
1173 def get_file_timestamp(fn):
1174 # type: (str) -> Optional[str]
1175 try:
1176 mt = os.path.getmtime(fn)
1177 return datetime.datetime.fromtimestamp(
1178 mt, tz=datetime.timezone.utc
1179 ).strftime(DATEFMT)
1180 except Exception:
1181 return None
1182
1183
1184 def try_convert_datetime(s):
1185 # type: (str) -> Optional[str]
1186 # This is super irritating because
1187 # 1) podman and docker use different formats
1188 # 2) python's strptime can't parse either one
1189 #
1190 # I've seen:
1191 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1192 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1193 # 2020-03-03 15:52:30.136257504 -0600 CST
1194 # (In the podman case, there is a different string format for
1195 # 'inspect' and 'inspect --format {{.Created}}'!!)
1196
1197 # In *all* cases, the 9 digit second precision is too much for
1198 # python's strptime. Shorten it to 6 digits.
1199 p = re.compile(r'(\.[\d]{6})[\d]*')
1200 s = p.sub(r'\1', s)
1201
1202 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
1203 if s and s[-1] == 'Z':
1204 s = s[:-1] + '-0000'
1205
1206 # cut off the redundant 'CST' part that strptime can't parse, if
1207 # present.
1208 v = s.split(' ')
1209 s = ' '.join(v[0:3])
1210
1211 # try parsing with several format strings
1212 fmts = [
1213 '%Y-%m-%dT%H:%M:%S.%f%z',
1214 '%Y-%m-%d %H:%M:%S.%f %z',
1215 ]
1216 for f in fmts:
1217 try:
1218 # return timestamp normalized to UTC, rendered as DATEFMT.
1219 return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
1220 except ValueError:
1221 pass
1222 return None
1223
1224
1225 def get_podman_version():
1226 # type: () -> Tuple[int, ...]
1227 if 'podman' not in container_path:
1228 raise ValueError('not using podman')
1229 out, _, _ = call_throws([container_path, '--version'])
1230 return _parse_podman_version(out)
1231
1232
1233 def _parse_podman_version(out):
1234 # type: (str) -> Tuple[int, ...]
1235 _, _, version_str = out.strip().split()
1236
1237 def to_int(val, org_e=None):
1238 if not val and org_e:
1239 raise org_e
1240 try:
1241 return int(val)
1242 except ValueError as e:
1243 return to_int(val[0:-1], org_e or e)
1244
1245 return tuple(map(to_int, version_str.split('.')))
1246
1247
1248 def get_hostname():
1249 # type: () -> str
1250 return socket.gethostname()
1251
1252
1253 def get_fqdn():
1254 # type: () -> str
1255 return socket.getfqdn() or socket.gethostname()
1256
1257
1258 def get_arch():
1259 # type: () -> str
1260 return platform.uname().machine
1261
1262
1263 def generate_service_id():
1264 # type: () -> str
1265 return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
1266 for _ in range(6))
1267
1268
1269 def generate_password():
1270 # type: () -> str
1271 return ''.join(random.choice(string.ascii_lowercase + string.digits)
1272 for i in range(10))
1273
1274
1275 def normalize_container_id(i):
1276 # type: (str) -> str
1277 # docker adds the sha256: prefix, but AFAICS both
1278 # docker (18.09.7 in bionic at least) and podman
1279 # both always use sha256, so leave off the prefix
1280 # for consistency.
1281 prefix = 'sha256:'
1282 if i.startswith(prefix):
1283 i = i[len(prefix):]
1284 return i
1285
1286
1287 def make_fsid():
1288 # type: () -> str
1289 return str(uuid.uuid1())
1290
1291
1292 def is_fsid(s):
1293 # type: (str) -> bool
1294 try:
1295 uuid.UUID(s)
1296 except ValueError:
1297 return False
1298 return True
1299
1300
1301 def infer_fsid(func):
1302 """
1303 If we only find a single fsid in /var/lib/ceph/*, use that
1304 """
1305 @wraps(func)
1306 def _infer_fsid():
1307 if args.fsid:
1308 logger.debug('Using specified fsid: %s' % args.fsid)
1309 return func()
1310
1311 fsids_set = set()
1312 daemon_list = list_daemons(detail=False)
1313 for daemon in daemon_list:
1314 if not is_fsid(daemon['fsid']):
1315 # 'unknown' fsid
1316 continue
1317 elif 'name' not in args or not args.name:
1318 # args.name not specified
1319 fsids_set.add(daemon['fsid'])
1320 elif daemon['name'] == args.name:
1321 # args.name is a match
1322 fsids_set.add(daemon['fsid'])
1323 fsids = sorted(fsids_set)
1324
1325 if not fsids:
1326 # some commands do not always require an fsid
1327 pass
1328 elif len(fsids) == 1:
1329 logger.info('Inferring fsid %s' % fsids[0])
1330 args.fsid = fsids[0]
1331 else:
1332 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids)
1333 return func()
1334
1335 return _infer_fsid
1336
1337
1338 def infer_config(func):
1339 """
1340 If we find a MON daemon, use the config from that container
1341 """
1342 @wraps(func)
1343 def _infer_config():
1344 if args.config:
1345 logger.debug('Using specified config: %s' % args.config)
1346 return func()
1347 config = None
1348 if args.fsid:
1349 name = args.name
1350 if not name:
1351 daemon_list = list_daemons(detail=False)
1352 for daemon in daemon_list:
1353 if daemon['name'].startswith('mon.'):
1354 name = daemon['name']
1355 break
1356 if name:
1357 config = '/var/lib/ceph/{}/{}/config'.format(args.fsid, name)
1358 if config:
1359 logger.info('Inferring config %s' % config)
1360 args.config = config
1361 elif os.path.exists(SHELL_DEFAULT_CONF):
1362 logger.debug('Using default config: %s' % SHELL_DEFAULT_CONF)
1363 args.config = SHELL_DEFAULT_CONF
1364 return func()
1365
1366 return _infer_config
1367
1368
1369 def _get_default_image():
1370 if DEFAULT_IMAGE_IS_MASTER:
1371 warn = '''This is a development version of cephadm.
1372 For information regarding the latest stable release:
1373 https://docs.ceph.com/docs/{}/cephadm/install
1374 '''.format(LATEST_STABLE_RELEASE)
1375 for line in warn.splitlines():
1376 logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
1377 return DEFAULT_IMAGE
1378
1379
1380 def infer_image(func):
1381 """
1382 Use the most recent ceph image
1383 """
1384 @wraps(func)
1385 def _infer_image():
1386 if not args.image:
1387 args.image = os.environ.get('CEPHADM_IMAGE')
1388 if not args.image:
1389 args.image = get_last_local_ceph_image()
1390 if not args.image:
1391 args.image = _get_default_image()
1392 return func()
1393
1394 return _infer_image
1395
1396
1397 def default_image(func):
1398 @wraps(func)
1399 def _default_image():
1400 if not args.image:
1401 if 'name' in args and args.name:
1402 type_ = args.name.split('.', 1)[0]
1403 if type_ in Monitoring.components:
1404 args.image = Monitoring.components[type_]['image']
1405 if not args.image:
1406 args.image = os.environ.get('CEPHADM_IMAGE')
1407 if not args.image:
1408 args.image = _get_default_image()
1409
1410 return func()
1411
1412 return _default_image
1413
1414
1415 def get_last_local_ceph_image():
1416 """
1417 :return: The most recent local ceph image (already pulled)
1418 """
1419 out, _, _ = call_throws(
1420 [container_path, 'images',
1421 '--filter', 'label=ceph=True',
1422 '--filter', 'dangling=false',
1423 '--format', '{{.Repository}}@{{.Digest}}'])
1424 return _filter_last_local_ceph_image(out)
1425
1426
1427 def _filter_last_local_ceph_image(out):
1428 # str -> Optional[str]
1429 for image in out.splitlines():
1430 if image and not image.endswith('@'):
1431 logger.info('Using recent ceph image %s' % image)
1432 return image
1433 return None
1434
1435
1436 def write_tmp(s, uid, gid):
1437 # type: (str, int, int) -> Any
1438 tmp_f = tempfile.NamedTemporaryFile(mode='w',
1439 prefix='ceph-tmp')
1440 os.fchown(tmp_f.fileno(), uid, gid)
1441 tmp_f.write(s)
1442 tmp_f.flush()
1443
1444 return tmp_f
1445
1446
1447 def makedirs(dir, uid, gid, mode):
1448 # type: (str, int, int, int) -> None
1449 if not os.path.exists(dir):
1450 os.makedirs(dir, mode=mode)
1451 else:
1452 os.chmod(dir, mode)
1453 os.chown(dir, uid, gid)
1454 os.chmod(dir, mode) # the above is masked by umask...
1455
1456
1457 def get_data_dir(fsid, t, n):
1458 # type: (str, str, Union[int, str]) -> str
1459 return os.path.join(args.data_dir, fsid, '%s.%s' % (t, n))
1460
1461
1462 def get_log_dir(fsid):
1463 # type: (str) -> str
1464 return os.path.join(args.log_dir, fsid)
1465
1466
1467 def make_data_dir_base(fsid, uid, gid):
1468 # type: (str, int, int) -> str
1469 data_dir_base = os.path.join(args.data_dir, fsid)
1470 makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
1471 makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
1472 makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
1473 DATA_DIR_MODE)
1474 return data_dir_base
1475
1476
1477 def make_data_dir(fsid, daemon_type, daemon_id, uid=None, gid=None):
1478 # type: (str, str, Union[int, str], Optional[int], Optional[int]) -> str
1479 if uid is None or gid is None:
1480 uid, gid = extract_uid_gid()
1481 make_data_dir_base(fsid, uid, gid)
1482 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1483 makedirs(data_dir, uid, gid, DATA_DIR_MODE)
1484 return data_dir
1485
1486
1487 def make_log_dir(fsid, uid=None, gid=None):
1488 # type: (str, Optional[int], Optional[int]) -> str
1489 if uid is None or gid is None:
1490 uid, gid = extract_uid_gid()
1491 log_dir = get_log_dir(fsid)
1492 makedirs(log_dir, uid, gid, LOG_DIR_MODE)
1493 return log_dir
1494
1495
1496 def make_var_run(fsid, uid, gid):
1497 # type: (str, int, int) -> None
1498 call_throws(['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
1499 '/var/run/ceph/%s' % fsid])
1500
1501
1502 def copy_tree(src, dst, uid=None, gid=None):
1503 # type: (List[str], str, Optional[int], Optional[int]) -> None
1504 """
1505 Copy a directory tree from src to dst
1506 """
1507 if uid is None or gid is None:
1508 (uid, gid) = extract_uid_gid()
1509
1510 for src_dir in src:
1511 dst_dir = dst
1512 if os.path.isdir(dst):
1513 dst_dir = os.path.join(dst, os.path.basename(src_dir))
1514
1515 logger.debug('copy directory \'%s\' -> \'%s\'' % (src_dir, dst_dir))
1516 shutil.rmtree(dst_dir, ignore_errors=True)
1517 shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8
1518
1519 for dirpath, dirnames, filenames in os.walk(dst_dir):
1520 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dirpath))
1521 os.chown(dirpath, uid, gid)
1522 for filename in filenames:
1523 logger.debug('chown %s:%s \'%s\'' % (uid, gid, filename))
1524 os.chown(os.path.join(dirpath, filename), uid, gid)
1525
1526
1527 def copy_files(src, dst, uid=None, gid=None):
1528 # type: (List[str], str, Optional[int], Optional[int]) -> None
1529 """
1530 Copy a files from src to dst
1531 """
1532 if uid is None or gid is None:
1533 (uid, gid) = extract_uid_gid()
1534
1535 for src_file in src:
1536 dst_file = dst
1537 if os.path.isdir(dst):
1538 dst_file = os.path.join(dst, os.path.basename(src_file))
1539
1540 logger.debug('copy file \'%s\' -> \'%s\'' % (src_file, dst_file))
1541 shutil.copyfile(src_file, dst_file)
1542
1543 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1544 os.chown(dst_file, uid, gid)
1545
1546
1547 def move_files(src, dst, uid=None, gid=None):
1548 # type: (List[str], str, Optional[int], Optional[int]) -> None
1549 """
1550 Move files from src to dst
1551 """
1552 if uid is None or gid is None:
1553 (uid, gid) = extract_uid_gid()
1554
1555 for src_file in src:
1556 dst_file = dst
1557 if os.path.isdir(dst):
1558 dst_file = os.path.join(dst, os.path.basename(src_file))
1559
1560 if os.path.islink(src_file):
1561 # shutil.move() in py2 does not handle symlinks correctly
1562 src_rl = os.readlink(src_file)
1563 logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
1564 os.symlink(src_rl, dst_file)
1565 os.unlink(src_file)
1566 else:
1567 logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
1568 shutil.move(src_file, dst_file)
1569 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1570 os.chown(dst_file, uid, gid)
1571
1572
1573 ## copied from distutils ##
1574 def find_executable(executable, path=None):
1575 """Tries to find 'executable' in the directories listed in 'path'.
1576 A string listing directories separated by 'os.pathsep'; defaults to
1577 os.environ['PATH']. Returns the complete filename or None if not found.
1578 """
1579 _, ext = os.path.splitext(executable)
1580 if (sys.platform == 'win32') and (ext != '.exe'):
1581 executable = executable + '.exe'
1582
1583 if os.path.isfile(executable):
1584 return executable
1585
1586 if path is None:
1587 path = os.environ.get('PATH', None)
1588 if path is None:
1589 try:
1590 path = os.confstr("CS_PATH")
1591 except (AttributeError, ValueError):
1592 # os.confstr() or CS_PATH is not available
1593 path = os.defpath
1594 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1595 # set to an empty string
1596
1597 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1598 if not path:
1599 return None
1600
1601 paths = path.split(os.pathsep)
1602 for p in paths:
1603 f = os.path.join(p, executable)
1604 if os.path.isfile(f):
1605 # the file exists, we have a shot at spawn working
1606 return f
1607 return None
1608
1609
1610 def find_program(filename):
1611 # type: (str) -> str
1612 name = find_executable(filename)
1613 if name is None:
1614 raise ValueError('%s not found' % filename)
1615 return name
1616
1617
1618 def get_unit_name(fsid, daemon_type, daemon_id=None):
1619 # type: (str, str, Optional[Union[int, str]]) -> str
1620 # accept either name or type + id
1621 if daemon_id is not None:
1622 return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
1623 else:
1624 return 'ceph-%s@%s' % (fsid, daemon_type)
1625
1626
1627 def get_unit_name_by_daemon_name(fsid, name):
1628 daemon = get_daemon_description(fsid, name)
1629 try:
1630 return daemon['systemd_unit']
1631 except KeyError:
1632 raise Error('Failed to get unit name for {}'.format(daemon))
1633
1634
1635 def check_unit(unit_name):
1636 # type: (str) -> Tuple[bool, str, bool]
1637 # NOTE: we ignore the exit code here because systemctl outputs
1638 # various exit codes based on the state of the service, but the
1639 # string result is more explicit (and sufficient).
1640 enabled = False
1641 installed = False
1642 try:
1643 out, err, code = call(['systemctl', 'is-enabled', unit_name],
1644 verbosity=CallVerbosity.DEBUG)
1645 if code == 0:
1646 enabled = True
1647 installed = True
1648 elif "disabled" in out:
1649 installed = True
1650 except Exception as e:
1651 logger.warning('unable to run systemctl: %s' % e)
1652 enabled = False
1653 installed = False
1654
1655 state = 'unknown'
1656 try:
1657 out, err, code = call(['systemctl', 'is-active', unit_name],
1658 verbosity=CallVerbosity.DEBUG)
1659 out = out.strip()
1660 if out in ['active']:
1661 state = 'running'
1662 elif out in ['inactive']:
1663 state = 'stopped'
1664 elif out in ['failed', 'auto-restart']:
1665 state = 'error'
1666 else:
1667 state = 'unknown'
1668 except Exception as e:
1669 logger.warning('unable to run systemctl: %s' % e)
1670 state = 'unknown'
1671 return (enabled, state, installed)
1672
1673
1674 def check_units(units, enabler=None):
1675 # type: (List[str], Optional[Packager]) -> bool
1676 for u in units:
1677 (enabled, state, installed) = check_unit(u)
1678 if enabled and state == 'running':
1679 logger.info('Unit %s is enabled and running' % u)
1680 return True
1681 if enabler is not None:
1682 if installed:
1683 logger.info('Enabling unit %s' % u)
1684 enabler.enable_service(u)
1685 return False
1686
1687
1688 def get_legacy_config_fsid(cluster, legacy_dir=None):
1689 # type: (str, Optional[str]) -> Optional[str]
1690 config_file = '/etc/ceph/%s.conf' % cluster
1691 if legacy_dir is not None:
1692 config_file = os.path.abspath(legacy_dir + config_file)
1693
1694 if os.path.exists(config_file):
1695 config = read_config(config_file)
1696 if config.has_section('global') and config.has_option('global', 'fsid'):
1697 return config.get('global', 'fsid')
1698 return None
1699
1700
1701 def get_legacy_daemon_fsid(cluster, daemon_type, daemon_id, legacy_dir=None):
1702 # type: (str, str, Union[int, str], Optional[str]) -> Optional[str]
1703 fsid = None
1704 if daemon_type == 'osd':
1705 try:
1706 fsid_file = os.path.join(args.data_dir,
1707 daemon_type,
1708 'ceph-%s' % daemon_id,
1709 'ceph_fsid')
1710 if legacy_dir is not None:
1711 fsid_file = os.path.abspath(legacy_dir + fsid_file)
1712 with open(fsid_file, 'r') as f:
1713 fsid = f.read().strip()
1714 except IOError:
1715 pass
1716 if not fsid:
1717 fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
1718 return fsid
1719
1720
1721 def get_daemon_args(fsid, daemon_type, daemon_id):
1722 # type: (str, str, Union[int, str]) -> List[str]
1723 r = list() # type: List[str]
1724
1725 if daemon_type in Ceph.daemons and daemon_type != 'crash':
1726 r += [
1727 '--setuser', 'ceph',
1728 '--setgroup', 'ceph',
1729 '--default-log-to-file=false',
1730 '--default-log-to-stderr=true',
1731 '--default-log-stderr-prefix="debug "',
1732 ]
1733 if daemon_type == 'mon':
1734 r += [
1735 '--default-mon-cluster-log-to-file=false',
1736 '--default-mon-cluster-log-to-stderr=true',
1737 ]
1738 elif daemon_type in Monitoring.components:
1739 metadata = Monitoring.components[daemon_type]
1740 r += metadata.get('args', list())
1741 if daemon_type == 'alertmanager':
1742 config = get_parm(args.config_json)
1743 peers = config.get('peers', list()) # type: ignore
1744 for peer in peers:
1745 r += ["--cluster.peer={}".format(peer)]
1746 # some alertmanager, by default, look elsewhere for a config
1747 r += ["--config.file=/etc/alertmanager/alertmanager.yml"]
1748 elif daemon_type == NFSGanesha.daemon_type:
1749 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1750 r += nfs_ganesha.get_daemon_args()
1751 elif daemon_type == CustomContainer.daemon_type:
1752 cc = CustomContainer.init(fsid, daemon_id)
1753 r.extend(cc.get_daemon_args())
1754
1755 return r
1756
1757
1758 def create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid,
1759 config=None, keyring=None):
1760 # type: (str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
1761 data_dir = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid)
1762 make_log_dir(fsid, uid=uid, gid=gid)
1763
1764 if config:
1765 config_path = os.path.join(data_dir, 'config')
1766 with open(config_path, 'w') as f:
1767 os.fchown(f.fileno(), uid, gid)
1768 os.fchmod(f.fileno(), 0o600)
1769 f.write(config)
1770
1771 if keyring:
1772 keyring_path = os.path.join(data_dir, 'keyring')
1773 with open(keyring_path, 'w') as f:
1774 os.fchmod(f.fileno(), 0o600)
1775 os.fchown(f.fileno(), uid, gid)
1776 f.write(keyring)
1777
1778 if daemon_type in Monitoring.components.keys():
1779 config_json: Dict[str, Any] = get_parm(args.config_json)
1780 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
1781
1782 # Set up directories specific to the monitoring component
1783 config_dir = ''
1784 if daemon_type == 'prometheus':
1785 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1786 config_dir = 'etc/prometheus'
1787 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1788 makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
1789 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1790 elif daemon_type == 'grafana':
1791 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1792 config_dir = 'etc/grafana'
1793 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1794 makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
1795 makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
1796 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1797 elif daemon_type == 'alertmanager':
1798 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1799 config_dir = 'etc/alertmanager'
1800 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1801 makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
1802
1803 # populate the config directory for the component from the config-json
1804 for fname in required_files:
1805 if 'files' in config_json: # type: ignore
1806 content = dict_get_join(config_json['files'], fname)
1807 with open(os.path.join(data_dir_root, config_dir, fname), 'w') as f:
1808 os.fchown(f.fileno(), uid, gid)
1809 os.fchmod(f.fileno(), 0o600)
1810 f.write(content)
1811
1812 elif daemon_type == NFSGanesha.daemon_type:
1813 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1814 nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
1815
1816 elif daemon_type == CephIscsi.daemon_type:
1817 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
1818 ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
1819
1820 elif daemon_type == CustomContainer.daemon_type:
1821 cc = CustomContainer.init(fsid, daemon_id)
1822 cc.create_daemon_dirs(data_dir, uid, gid)
1823
1824
1825 def get_parm(option):
1826 # type: (str) -> Dict[str, str]
1827
1828 if not option:
1829 return dict()
1830
1831 global cached_stdin
1832 if option == '-':
1833 if cached_stdin is not None:
1834 j = cached_stdin
1835 else:
1836 try:
1837 j = injected_stdin # type: ignore
1838 except NameError:
1839 j = sys.stdin.read()
1840 cached_stdin = j
1841 else:
1842 # inline json string
1843 if option[0] == '{' and option[-1] == '}':
1844 j = option
1845 # json file
1846 elif os.path.exists(option):
1847 with open(option, 'r') as f:
1848 j = f.read()
1849 else:
1850 raise Error("Config file {} not found".format(option))
1851
1852 try:
1853 js = json.loads(j)
1854 except ValueError as e:
1855 raise Error("Invalid JSON in {}: {}".format(option, e))
1856 else:
1857 return js
1858
1859
1860 def get_config_and_keyring():
1861 # type: () -> Tuple[Optional[str], Optional[str]]
1862 config = None
1863 keyring = None
1864
1865 if 'config_json' in args and args.config_json:
1866 d = get_parm(args.config_json)
1867 config = d.get('config')
1868 keyring = d.get('keyring')
1869
1870 if 'config' in args and args.config:
1871 with open(args.config, 'r') as f:
1872 config = f.read()
1873
1874 if 'key' in args and args.key:
1875 keyring = '[%s]\n\tkey = %s\n' % (args.name, args.key)
1876 elif 'keyring' in args and args.keyring:
1877 with open(args.keyring, 'r') as f:
1878 keyring = f.read()
1879
1880 return config, keyring
1881
1882
1883 def get_container_binds(fsid, daemon_type, daemon_id):
1884 # type: (str, str, Union[int, str, None]) -> List[List[str]]
1885 binds = list()
1886
1887 if daemon_type == CephIscsi.daemon_type:
1888 binds.extend(CephIscsi.get_container_binds())
1889 elif daemon_type == CustomContainer.daemon_type:
1890 assert daemon_id
1891 cc = CustomContainer.init(fsid, daemon_id)
1892 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1893 binds.extend(cc.get_container_binds(data_dir))
1894
1895 return binds
1896
1897
1898 def get_container_mounts(fsid, daemon_type, daemon_id,
1899 no_config=False):
1900 # type: (str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
1901 mounts = dict()
1902
1903 if daemon_type in Ceph.daemons:
1904 if fsid:
1905 run_path = os.path.join('/var/run/ceph', fsid);
1906 if os.path.exists(run_path):
1907 mounts[run_path] = '/var/run/ceph:z'
1908 log_dir = get_log_dir(fsid)
1909 mounts[log_dir] = '/var/log/ceph:z'
1910 crash_dir = '/var/lib/ceph/%s/crash' % fsid
1911 if os.path.exists(crash_dir):
1912 mounts[crash_dir] = '/var/lib/ceph/crash:z'
1913
1914 if daemon_type in Ceph.daemons and daemon_id:
1915 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1916 if daemon_type == 'rgw':
1917 cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
1918 else:
1919 cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
1920 if daemon_type != 'crash':
1921 mounts[data_dir] = cdata_dir + ':z'
1922 if not no_config:
1923 mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
1924 if daemon_type == 'rbd-mirror' or daemon_type == 'crash':
1925 # these do not search for their keyrings in a data directory
1926 mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
1927
1928 if daemon_type in ['mon', 'osd']:
1929 mounts['/dev'] = '/dev' # FIXME: narrow this down?
1930 mounts['/run/udev'] = '/run/udev'
1931 if daemon_type == 'osd':
1932 mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
1933 mounts['/run/lvm'] = '/run/lvm'
1934 mounts['/run/lock/lvm'] = '/run/lock/lvm'
1935
1936 try:
1937 if args.shared_ceph_folder: # make easy manager modules/ceph-volume development
1938 ceph_folder = pathify(args.shared_ceph_folder)
1939 if os.path.exists(ceph_folder):
1940 mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
1941 mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
1942 mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
1943 mounts[ceph_folder + '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
1944 mounts[ceph_folder + '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
1945 else:
1946 logger.error('{}{}{}'.format(termcolor.red,
1947 'Ceph shared source folder does not exist.',
1948 termcolor.end))
1949 except AttributeError:
1950 pass
1951
1952 if daemon_type in Monitoring.components and daemon_id:
1953 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1954 if daemon_type == 'prometheus':
1955 mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
1956 mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
1957 elif daemon_type == 'node-exporter':
1958 mounts['/proc'] = '/host/proc:ro'
1959 mounts['/sys'] = '/host/sys:ro'
1960 mounts['/'] = '/rootfs:ro'
1961 elif daemon_type == "grafana":
1962 mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
1963 mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
1964 mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
1965 elif daemon_type == 'alertmanager':
1966 mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
1967
1968 if daemon_type == NFSGanesha.daemon_type:
1969 assert daemon_id
1970 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1971 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1972 mounts.update(nfs_ganesha.get_container_mounts(data_dir))
1973
1974 if daemon_type == CephIscsi.daemon_type:
1975 assert daemon_id
1976 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1977 log_dir = get_log_dir(fsid)
1978 mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
1979
1980 if daemon_type == CustomContainer.daemon_type:
1981 assert daemon_id
1982 cc = CustomContainer.init(fsid, daemon_id)
1983 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1984 mounts.update(cc.get_container_mounts(data_dir))
1985
1986 return mounts
1987
1988
1989 def get_container(fsid: str, daemon_type: str, daemon_id: Union[int, str],
1990 privileged: bool = False,
1991 ptrace: bool = False,
1992 container_args: Optional[List[str]] = None) -> 'CephContainer':
1993 entrypoint: str = ''
1994 name: str = ''
1995 ceph_args: List[str] = []
1996 envs: List[str] = []
1997 host_network: bool = True
1998
1999 if container_args is None:
2000 container_args = []
2001 if daemon_type in ['mon', 'osd']:
2002 # mon and osd need privileged in order for libudev to query devices
2003 privileged = True
2004 if daemon_type == 'rgw':
2005 entrypoint = '/usr/bin/radosgw'
2006 name = 'client.rgw.%s' % daemon_id
2007 elif daemon_type == 'rbd-mirror':
2008 entrypoint = '/usr/bin/rbd-mirror'
2009 name = 'client.rbd-mirror.%s' % daemon_id
2010 elif daemon_type == 'crash':
2011 entrypoint = '/usr/bin/ceph-crash'
2012 name = 'client.crash.%s' % daemon_id
2013 elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
2014 entrypoint = '/usr/bin/ceph-' + daemon_type
2015 name = '%s.%s' % (daemon_type, daemon_id)
2016 elif daemon_type in Monitoring.components:
2017 entrypoint = ''
2018 elif daemon_type == NFSGanesha.daemon_type:
2019 entrypoint = NFSGanesha.entrypoint
2020 name = '%s.%s' % (daemon_type, daemon_id)
2021 envs.extend(NFSGanesha.get_container_envs())
2022 elif daemon_type == CephIscsi.daemon_type:
2023 entrypoint = CephIscsi.entrypoint
2024 name = '%s.%s' % (daemon_type, daemon_id)
2025 # So the container can modprobe iscsi_target_mod and have write perms
2026 # to configfs we need to make this a privileged container.
2027 privileged = True
2028 elif daemon_type == CustomContainer.daemon_type:
2029 cc = CustomContainer.init(fsid, daemon_id)
2030 entrypoint = cc.entrypoint
2031 host_network = False
2032 envs.extend(cc.get_container_envs())
2033 container_args.extend(cc.get_container_args())
2034
2035 if daemon_type in Monitoring.components:
2036 uid, gid = extract_uid_gid_monitoring(daemon_type)
2037 monitoring_args = [
2038 '--user',
2039 str(uid),
2040 # FIXME: disable cpu/memory limits for the time being (not supported
2041 # by ubuntu 18.04 kernel!)
2042 ]
2043 container_args.extend(monitoring_args)
2044 elif daemon_type == 'crash':
2045 ceph_args = ['-n', name]
2046 elif daemon_type in Ceph.daemons:
2047 ceph_args = ['-n', name, '-f']
2048
2049 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2050 # so service can have Type=Forking
2051 if 'podman' in container_path:
2052 runtime_dir = '/run'
2053 container_args.extend(['-d',
2054 '--conmon-pidfile',
2055 runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
2056 '--cidfile',
2057 runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id)])
2058
2059 return CephContainer(
2060 image=args.image,
2061 entrypoint=entrypoint,
2062 args=ceph_args + get_daemon_args(fsid, daemon_type, daemon_id),
2063 container_args=container_args,
2064 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
2065 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
2066 cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
2067 envs=envs,
2068 privileged=privileged,
2069 ptrace=ptrace,
2070 init=args.container_init,
2071 host_network=host_network,
2072 )
2073
2074
2075 def extract_uid_gid(img='', file_path='/var/lib/ceph'):
2076 # type: (str, Union[str, List[str]]) -> Tuple[int, int]
2077
2078 if not img:
2079 img = args.image
2080
2081 if isinstance(file_path, str):
2082 paths = [file_path]
2083 else:
2084 paths = file_path
2085
2086 for fp in paths:
2087 try:
2088 out = CephContainer(
2089 image=img,
2090 entrypoint='stat',
2091 args=['-c', '%u %g', fp]
2092 ).run()
2093 uid, gid = out.split(' ')
2094 return int(uid), int(gid)
2095 except RuntimeError:
2096 pass
2097 raise RuntimeError('uid/gid not found')
2098
2099
2100 def deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid,
2101 config=None, keyring=None,
2102 osd_fsid=None,
2103 reconfig=False,
2104 ports=None):
2105 # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
2106
2107 ports = ports or []
2108 if any([port_in_use(port) for port in ports]):
2109 raise Error("TCP Port(s) '{}' required for {} already in use".format(",".join(map(str, ports)), daemon_type))
2110
2111 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2112 if reconfig and not os.path.exists(data_dir):
2113 raise Error('cannot reconfig, data path %s does not exist' % data_dir)
2114 if daemon_type == 'mon' and not os.path.exists(data_dir):
2115 assert config
2116 assert keyring
2117 # tmp keyring file
2118 tmp_keyring = write_tmp(keyring, uid, gid)
2119
2120 # tmp config file
2121 tmp_config = write_tmp(config, uid, gid)
2122
2123 # --mkfs
2124 create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid)
2125 mon_dir = get_data_dir(fsid, 'mon', daemon_id)
2126 log_dir = get_log_dir(fsid)
2127 out = CephContainer(
2128 image=args.image,
2129 entrypoint='/usr/bin/ceph-mon',
2130 args=['--mkfs',
2131 '-i', str(daemon_id),
2132 '--fsid', fsid,
2133 '-c', '/tmp/config',
2134 '--keyring', '/tmp/keyring',
2135 ] + get_daemon_args(fsid, 'mon', daemon_id),
2136 volume_mounts={
2137 log_dir: '/var/log/ceph:z',
2138 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
2139 tmp_keyring.name: '/tmp/keyring:z',
2140 tmp_config.name: '/tmp/config:z',
2141 },
2142 ).run()
2143
2144 # write conf
2145 with open(mon_dir + '/config', 'w') as f:
2146 os.fchown(f.fileno(), uid, gid)
2147 os.fchmod(f.fileno(), 0o600)
2148 f.write(config)
2149 else:
2150 # dirs, conf, keyring
2151 create_daemon_dirs(
2152 fsid, daemon_type, daemon_id,
2153 uid, gid,
2154 config, keyring)
2155
2156 if not reconfig:
2157 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
2158 osd_fsid=osd_fsid)
2159
2160 if not os.path.exists(data_dir + '/unit.created'):
2161 with open(data_dir + '/unit.created', 'w') as f:
2162 os.fchmod(f.fileno(), 0o600)
2163 os.fchown(f.fileno(), uid, gid)
2164 f.write('mtime is time the daemon deployment was created\n')
2165
2166 with open(data_dir + '/unit.configured', 'w') as f:
2167 f.write('mtime is time we were last configured\n')
2168 os.fchmod(f.fileno(), 0o600)
2169 os.fchown(f.fileno(), uid, gid)
2170
2171 update_firewalld(daemon_type)
2172
2173 # Open ports explicitly required for the daemon
2174 if ports:
2175 fw = Firewalld()
2176 fw.open_ports(ports)
2177 fw.apply_rules()
2178
2179 if reconfig and daemon_type not in Ceph.daemons:
2180 # ceph daemons do not need a restart; others (presumably) do to pick
2181 # up the new config
2182 call_throws(['systemctl', 'reset-failed',
2183 get_unit_name(fsid, daemon_type, daemon_id)])
2184 call_throws(['systemctl', 'restart',
2185 get_unit_name(fsid, daemon_type, daemon_id)])
2186
2187 def _write_container_cmd_to_bash(file_obj, container, comment=None, background=False):
2188 # type: (IO[str], CephContainer, Optional[str], Optional[bool]) -> None
2189 if comment:
2190 # Sometimes adding a comment, especially if there are multiple containers in one
2191 # unit file, makes it easier to read and grok.
2192 file_obj.write('# ' + comment + '\n')
2193 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
2194 file_obj.write('! '+ ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
2195 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
2196 if 'podman' in container_path:
2197 file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + ' 2> /dev/null\n')
2198
2199 # container run command
2200 file_obj.write(' '.join(container.run_cmd()) + (' &' if background else '') + '\n')
2201
2202
2203 def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
2204 enable=True, start=True,
2205 osd_fsid=None):
2206 # type: (str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None
2207 # cmd
2208 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2209 with open(data_dir + '/unit.run.new', 'w') as f:
2210 f.write('set -e\n')
2211
2212 if daemon_type in Ceph.daemons:
2213 install_path = find_program('install')
2214 f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
2215
2216 # pre-start cmd(s)
2217 if daemon_type == 'osd':
2218 # osds have a pre-start step
2219 assert osd_fsid
2220 simple_fn = os.path.join('/etc/ceph/osd',
2221 '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
2222 if os.path.exists(simple_fn):
2223 f.write('# Simple OSDs need chown on startup:\n')
2224 for n in ['block', 'block.db', 'block.wal']:
2225 p = os.path.join(data_dir, n)
2226 f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
2227 else:
2228 prestart = CephContainer(
2229 image=args.image,
2230 entrypoint='/usr/sbin/ceph-volume',
2231 args=[
2232 'lvm', 'activate',
2233 str(daemon_id), osd_fsid,
2234 '--no-systemd'
2235 ],
2236 privileged=True,
2237 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
2238 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
2239 cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
2240 )
2241 _write_container_cmd_to_bash(f, prestart, 'LVM OSDs use ceph-volume lvm activate')
2242 elif daemon_type == NFSGanesha.daemon_type:
2243 # add nfs to the rados grace db
2244 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2245 prestart = nfs_ganesha.get_rados_grace_container('add')
2246 _write_container_cmd_to_bash(f, prestart, 'add daemon to rados grace')
2247 elif daemon_type == CephIscsi.daemon_type:
2248 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
2249 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
2250 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
2251 _write_container_cmd_to_bash(f, tcmu_container, 'iscsi tcmu-runnter container', background=True)
2252
2253 _write_container_cmd_to_bash(f, c, '%s.%s' % (daemon_type, str(daemon_id)))
2254 os.fchmod(f.fileno(), 0o600)
2255 os.rename(data_dir + '/unit.run.new',
2256 data_dir + '/unit.run')
2257
2258 # post-stop command(s)
2259 with open(data_dir + '/unit.poststop.new', 'w') as f:
2260 if daemon_type == 'osd':
2261 assert osd_fsid
2262 poststop = CephContainer(
2263 image=args.image,
2264 entrypoint='/usr/sbin/ceph-volume',
2265 args=[
2266 'lvm', 'deactivate',
2267 str(daemon_id), osd_fsid,
2268 ],
2269 privileged=True,
2270 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
2271 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
2272 cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
2273 daemon_id),
2274 )
2275 _write_container_cmd_to_bash(f, poststop, 'deactivate osd')
2276 elif daemon_type == NFSGanesha.daemon_type:
2277 # remove nfs from the rados grace db
2278 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2279 poststop = nfs_ganesha.get_rados_grace_container('remove')
2280 _write_container_cmd_to_bash(f, poststop, 'remove daemon from rados grace')
2281 elif daemon_type == CephIscsi.daemon_type:
2282 # make sure we also stop the tcmu container
2283 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
2284 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
2285 f.write('! '+ ' '.join(tcmu_container.stop_cmd()) + '\n')
2286 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
2287 os.fchmod(f.fileno(), 0o600)
2288 os.rename(data_dir + '/unit.poststop.new',
2289 data_dir + '/unit.poststop')
2290
2291 with open(data_dir + '/unit.image.new', 'w') as f:
2292 f.write(c.image + '\n')
2293 os.fchmod(f.fileno(), 0o600)
2294 os.rename(data_dir + '/unit.image.new',
2295 data_dir + '/unit.image')
2296
2297 # systemd
2298 install_base_units(fsid)
2299 unit = get_unit_file(fsid)
2300 unit_file = 'ceph-%s@.service' % (fsid)
2301 with open(args.unit_dir + '/' + unit_file + '.new', 'w') as f:
2302 f.write(unit)
2303 os.rename(args.unit_dir + '/' + unit_file + '.new',
2304 args.unit_dir + '/' + unit_file)
2305 call_throws(['systemctl', 'daemon-reload'])
2306
2307 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
2308 call(['systemctl', 'stop', unit_name],
2309 verbosity=CallVerbosity.DEBUG)
2310 call(['systemctl', 'reset-failed', unit_name],
2311 verbosity=CallVerbosity.DEBUG)
2312 if enable:
2313 call_throws(['systemctl', 'enable', unit_name])
2314 if start:
2315 call_throws(['systemctl', 'start', unit_name])
2316
2317
2318
2319 class Firewalld(object):
2320 def __init__(self):
2321 # type: () -> None
2322 self.available = self.check()
2323
2324 def check(self):
2325 # type: () -> bool
2326 self.cmd = find_executable('firewall-cmd')
2327 if not self.cmd:
2328 logger.debug('firewalld does not appear to be present')
2329 return False
2330 (enabled, state, _) = check_unit('firewalld.service')
2331 if not enabled:
2332 logger.debug('firewalld.service is not enabled')
2333 return False
2334 if state != "running":
2335 logger.debug('firewalld.service is not running')
2336 return False
2337
2338 logger.info("firewalld ready")
2339 return True
2340
2341 def enable_service_for(self, daemon_type):
2342 # type: (str) -> None
2343 if not self.available:
2344 logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)
2345 return
2346
2347 if daemon_type == 'mon':
2348 svc = 'ceph-mon'
2349 elif daemon_type in ['mgr', 'mds', 'osd']:
2350 svc = 'ceph'
2351 elif daemon_type == NFSGanesha.daemon_type:
2352 svc = 'nfs'
2353 else:
2354 return
2355
2356 out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
2357 if ret:
2358 logger.info('Enabling firewalld service %s in current zone...' % svc)
2359 out, err, ret = call([self.cmd, '--permanent', '--add-service', svc])
2360 if ret:
2361 raise RuntimeError(
2362 'unable to add service %s to current zone: %s' % (svc, err))
2363 else:
2364 logger.debug('firewalld service %s is enabled in current zone' % svc)
2365
2366 def open_ports(self, fw_ports):
2367 # type: (List[int]) -> None
2368 if not self.available:
2369 logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
2370 return
2371
2372 for port in fw_ports:
2373 tcp_port = str(port) + '/tcp'
2374 out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
2375 if ret:
2376 logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
2377 out, err, ret = call([self.cmd, '--permanent', '--add-port', tcp_port])
2378 if ret:
2379 raise RuntimeError('unable to add port %s to current zone: %s' %
2380 (tcp_port, err))
2381 else:
2382 logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
2383
2384 out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbose_on_failure=False)
2385 def apply_rules(self):
2386 # type: () -> None
2387 if not self.available:
2388 return
2389
2390 call_throws([self.cmd, '--reload'])
2391
2392
2393 def update_firewalld(daemon_type):
2394 # type: (str) -> None
2395 firewall = Firewalld()
2396
2397 firewall.enable_service_for(daemon_type)
2398
2399 fw_ports = []
2400
2401 if daemon_type in Monitoring.port_map.keys():
2402 fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc
2403
2404 firewall.open_ports(fw_ports)
2405 firewall.apply_rules()
2406
2407 def install_base_units(fsid):
2408 # type: (str) -> None
2409 """
2410 Set up ceph.target and ceph-$fsid.target units.
2411 """
2412 # global unit
2413 existed = os.path.exists(args.unit_dir + '/ceph.target')
2414 with open(args.unit_dir + '/ceph.target.new', 'w') as f:
2415 f.write('[Unit]\n'
2416 'Description=All Ceph clusters and services\n'
2417 '\n'
2418 '[Install]\n'
2419 'WantedBy=multi-user.target\n')
2420 os.rename(args.unit_dir + '/ceph.target.new',
2421 args.unit_dir + '/ceph.target')
2422 if not existed:
2423 # we disable before enable in case a different ceph.target
2424 # (from the traditional package) is present; while newer
2425 # systemd is smart enough to disable the old
2426 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2427 # some older versions of systemd error out with EEXIST.
2428 call_throws(['systemctl', 'disable', 'ceph.target'])
2429 call_throws(['systemctl', 'enable', 'ceph.target'])
2430 call_throws(['systemctl', 'start', 'ceph.target'])
2431
2432 # cluster unit
2433 existed = os.path.exists(args.unit_dir + '/ceph-%s.target' % fsid)
2434 with open(args.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f:
2435 f.write('[Unit]\n'
2436 'Description=Ceph cluster {fsid}\n'
2437 'PartOf=ceph.target\n'
2438 'Before=ceph.target\n'
2439 '\n'
2440 '[Install]\n'
2441 'WantedBy=multi-user.target ceph.target\n'.format(
2442 fsid=fsid)
2443 )
2444 os.rename(args.unit_dir + '/ceph-%s.target.new' % fsid,
2445 args.unit_dir + '/ceph-%s.target' % fsid)
2446 if not existed:
2447 call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid])
2448 call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid])
2449
2450 # logrotate for the cluster
2451 with open(args.logrotate_dir + '/ceph-%s' % fsid, 'w') as f:
2452 """
2453 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
2454 in all containers, but I don't see an elegant way to send SIGHUP *just* to
2455 the daemons for this cluster. (1) systemd kill -s will get the signal to
2456 podman, but podman will exit. (2) podman kill will get the signal to the
2457 first child (bash), but that isn't the ceph daemon. This is simpler and
2458 should be harmless.
2459 """
2460 f.write("""# created by cephadm
2461 /var/log/ceph/%s/*.log {
2462 rotate 7
2463 daily
2464 compress
2465 sharedscripts
2466 postrotate
2467 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror || pkill -1 -x "ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror" || true
2468 endscript
2469 missingok
2470 notifempty
2471 su root root
2472 }
2473 """ % fsid)
2474
2475
2476 def get_unit_file(fsid):
2477 # type: (str) -> str
2478 extra_args = ''
2479 if 'podman' in container_path:
2480 extra_args = ('ExecStartPre=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2481 'ExecStopPost=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2482 'Type=forking\n'
2483 'PIDFile=/%t/%n-pid\n')
2484
2485 u = """# generated by cephadm
2486 [Unit]
2487 Description=Ceph %i for {fsid}
2488
2489 # According to:
2490 # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
2491 # these can be removed once ceph-mon will dynamically change network
2492 # configuration.
2493 After=network-online.target local-fs.target time-sync.target
2494 Wants=network-online.target local-fs.target time-sync.target
2495
2496 PartOf=ceph-{fsid}.target
2497 Before=ceph-{fsid}.target
2498
2499 [Service]
2500 LimitNOFILE=1048576
2501 LimitNPROC=1048576
2502 EnvironmentFile=-/etc/environment
2503 ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
2504 ExecStop=-{container_path} stop ceph-{fsid}-%i
2505 ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
2506 KillMode=none
2507 Restart=on-failure
2508 RestartSec=10s
2509 TimeoutStartSec=120
2510 TimeoutStopSec=120
2511 StartLimitInterval=30min
2512 StartLimitBurst=5
2513 {extra_args}
2514 [Install]
2515 WantedBy=ceph-{fsid}.target
2516 """.format(
2517 container_path=container_path,
2518 fsid=fsid,
2519 data_dir=args.data_dir,
2520 extra_args=extra_args)
2521
2522 return u
2523
2524 ##################################
2525
2526
2527 class CephContainer:
2528 def __init__(self,
2529 image: str,
2530 entrypoint: str,
2531 args: List[str] = [],
2532 volume_mounts: Dict[str, str] = {},
2533 cname: str = '',
2534 container_args: List[str] = [],
2535 envs: Optional[List[str]] = None,
2536 privileged: bool = False,
2537 ptrace: bool = False,
2538 bind_mounts: Optional[List[List[str]]] = None,
2539 init: bool = False,
2540 host_network: bool = True,
2541 ) -> None:
2542 self.image = image
2543 self.entrypoint = entrypoint
2544 self.args = args
2545 self.volume_mounts = volume_mounts
2546 self.cname = cname
2547 self.container_args = container_args
2548 self.envs = envs
2549 self.privileged = privileged
2550 self.ptrace = ptrace
2551 self.bind_mounts = bind_mounts if bind_mounts else []
2552 self.init = init
2553 self.host_network = host_network
2554
2555 def run_cmd(self) -> List[str]:
2556 cmd_args: List[str] = [
2557 str(container_path),
2558 'run',
2559 '--rm',
2560 '--ipc=host',
2561 ]
2562 envs: List[str] = [
2563 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2564 '-e', 'NODE_NAME=%s' % get_hostname(),
2565 ]
2566 vols: List[str] = []
2567 binds: List[str] = []
2568
2569 if self.host_network:
2570 cmd_args.append('--net=host')
2571 if self.entrypoint:
2572 cmd_args.extend(['--entrypoint', self.entrypoint])
2573 if self.privileged:
2574 cmd_args.extend([
2575 '--privileged',
2576 # let OSD etc read block devs that haven't been chowned
2577 '--group-add=disk'])
2578 if self.ptrace and not self.privileged:
2579 # if privileged, the SYS_PTRACE cap is already added
2580 # in addition, --cap-add and --privileged are mutually
2581 # exclusive since podman >= 2.0
2582 cmd_args.append('--cap-add=SYS_PTRACE')
2583 if self.init:
2584 cmd_args.append('--init')
2585 if self.cname:
2586 cmd_args.extend(['--name', self.cname])
2587 if self.envs:
2588 for env in self.envs:
2589 envs.extend(['-e', env])
2590
2591 vols = sum(
2592 [['-v', '%s:%s' % (host_dir, container_dir)]
2593 for host_dir, container_dir in self.volume_mounts.items()], [])
2594 binds = sum([['--mount', '{}'.format(','.join(bind))]
2595 for bind in self.bind_mounts], [])
2596
2597 return cmd_args + self.container_args + envs + vols + binds + [
2598 self.image,
2599 ] + self.args # type: ignore
2600
2601 def shell_cmd(self, cmd: List[str]) -> List[str]:
2602 cmd_args: List[str] = [
2603 str(container_path),
2604 'run',
2605 '--rm',
2606 '--ipc=host',
2607 ]
2608 envs: List[str] = [
2609 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2610 '-e', 'NODE_NAME=%s' % get_hostname(),
2611 ]
2612 vols: List[str] = []
2613 binds: List[str] = []
2614
2615 if self.host_network:
2616 cmd_args.append('--net=host')
2617 if self.privileged:
2618 cmd_args.extend([
2619 '--privileged',
2620 # let OSD etc read block devs that haven't been chowned
2621 '--group-add=disk',
2622 ])
2623 if self.envs:
2624 for env in self.envs:
2625 envs.extend(['-e', env])
2626
2627 vols = sum(
2628 [['-v', '%s:%s' % (host_dir, container_dir)]
2629 for host_dir, container_dir in self.volume_mounts.items()], [])
2630 binds = sum([['--mount', '{}'.format(','.join(bind))]
2631 for bind in self.bind_mounts], [])
2632
2633 return cmd_args + self.container_args + envs + vols + binds + [
2634 '--entrypoint', cmd[0],
2635 self.image,
2636 ] + cmd[1:]
2637
2638 def exec_cmd(self, cmd):
2639 # type: (List[str]) -> List[str]
2640 return [
2641 str(container_path),
2642 'exec',
2643 ] + self.container_args + [
2644 self.cname,
2645 ] + cmd
2646
2647 def rm_cmd(self, storage=False):
2648 # type: (bool) -> List[str]
2649 ret = [
2650 str(container_path),
2651 'rm', '-f',
2652 ]
2653 if storage:
2654 ret.append('--storage')
2655 ret.append(self.cname)
2656 return ret
2657
2658 def stop_cmd(self):
2659 # type () -> List[str]
2660 ret = [
2661 str(container_path),
2662 'stop', self.cname,
2663 ]
2664 return ret
2665
2666 def run(self, timeout=DEFAULT_TIMEOUT):
2667 # type: (Optional[int]) -> str
2668 out, _, _ = call_throws(
2669 self.run_cmd(), desc=self.entrypoint, timeout=timeout)
2670 return out
2671
2672 ##################################
2673
2674
2675 @infer_image
2676 def command_version():
2677 # type: () -> int
2678 out = CephContainer(args.image, 'ceph', ['--version']).run()
2679 print(out.strip())
2680 return 0
2681
2682 ##################################
2683
2684
2685 @infer_image
2686 def command_pull():
2687 # type: () -> int
2688
2689 _pull_image(args.image)
2690 return command_inspect_image()
2691
2692
2693 def _pull_image(image):
2694 # type: (str) -> None
2695 logger.info('Pulling container image %s...' % image)
2696
2697 ignorelist = [
2698 "error creating read-write layer with ID",
2699 "net/http: TLS handshake timeout",
2700 "Digest did not match, expected",
2701 ]
2702
2703 cmd = [container_path, 'pull', image]
2704 cmd_str = ' '.join(cmd)
2705
2706 for sleep_secs in [1, 4, 25]:
2707 out, err, ret = call(cmd)
2708 if not ret:
2709 return
2710
2711 if not any(pattern in err for pattern in ignorelist):
2712 raise RuntimeError('Failed command: %s' % cmd_str)
2713
2714 logger.info('"%s failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))
2715 time.sleep(sleep_secs)
2716
2717 raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str)
2718 ##################################
2719
2720
2721 @infer_image
2722 def command_inspect_image():
2723 # type: () -> int
2724 out, err, ret = call_throws([
2725 container_path, 'inspect',
2726 '--format', '{{.ID}},{{json .RepoDigests}}',
2727 args.image])
2728 if ret:
2729 return errno.ENOENT
2730 info_from = get_image_info_from_inspect(out.strip(), args.image)
2731
2732 ver = CephContainer(args.image, 'ceph', ['--version']).run().strip()
2733 info_from['ceph_version'] = ver
2734
2735 print(json.dumps(info_from, indent=4, sort_keys=True))
2736 return 0
2737
2738
2739 def get_image_info_from_inspect(out, image):
2740 # type: (str, str) -> Dict[str, str]
2741 image_id, digests = out.split(',', 1)
2742 if not out:
2743 raise Error('inspect {}: empty result'.format(image))
2744 r = {
2745 'image_id': normalize_container_id(image_id)
2746 }
2747 if digests:
2748 json_digests = json.loads(digests)
2749 if json_digests:
2750 r['repo_digest'] = json_digests[0]
2751 return r
2752
2753
2754 ##################################
2755
2756
2757 def unwrap_ipv6(address):
2758 # type: (str) -> str
2759 if address.startswith('[') and address.endswith(']'):
2760 return address[1:-1]
2761 return address
2762
2763
2764 def wrap_ipv6(address):
2765 # type: (str) -> str
2766
2767 # We cannot assume it's already wrapped or even an IPv6 address if
2768 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
2769 # the ValueError
2770 try:
2771 if ipaddress.ip_address(unicode(address)).version == 6:
2772 return f"[{address}]"
2773 except ValueError:
2774 pass
2775
2776 return address
2777
2778
2779 def is_ipv6(address):
2780 # type: (str) -> bool
2781 address = unwrap_ipv6(address)
2782 try:
2783 return ipaddress.ip_address(unicode(address)).version == 6
2784 except ValueError:
2785 logger.warning("Address: {} isn't a valid IP address".format(address))
2786 return False
2787
2788
2789 @default_image
2790 def command_bootstrap():
2791 # type: () -> int
2792
2793 if not args.output_config:
2794 args.output_config = os.path.join(args.output_dir, 'ceph.conf')
2795 if not args.output_keyring:
2796 args.output_keyring = os.path.join(args.output_dir,
2797 'ceph.client.admin.keyring')
2798 if not args.output_pub_ssh_key:
2799 args.output_pub_ssh_key = os.path.join(args.output_dir, 'ceph.pub')
2800
2801 # verify output files
2802 for f in [args.output_config, args.output_keyring, args.output_pub_ssh_key]:
2803 if not args.allow_overwrite:
2804 if os.path.exists(f):
2805 raise Error('%s already exists; delete or pass '
2806 '--allow-overwrite to overwrite' % f)
2807 dirname = os.path.dirname(f)
2808 if dirname and not os.path.exists(dirname):
2809 fname = os.path.basename(f)
2810 logger.info(f"Creating directory {dirname} for {fname}")
2811 try:
2812 # use makedirs to create intermediate missing dirs
2813 os.makedirs(dirname, 0o755)
2814 except PermissionError:
2815 raise Error(f"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.")
2816
2817
2818 if not args.skip_prepare_host:
2819 command_prepare_host()
2820 else:
2821 logger.info('Skip prepare_host')
2822
2823 # initial vars
2824 fsid = args.fsid or make_fsid()
2825 hostname = get_hostname()
2826 if '.' in hostname and not args.allow_fqdn_hostname:
2827 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
2828 mon_id = args.mon_id or hostname
2829 mgr_id = args.mgr_id or generate_service_id()
2830 logger.info('Cluster fsid: %s' % fsid)
2831 ipv6 = False
2832
2833 l = FileLock(fsid)
2834 l.acquire()
2835
2836 # ip
2837 r = re.compile(r':(\d+)$')
2838 base_ip = ''
2839 if args.mon_ip:
2840 ipv6 = is_ipv6(args.mon_ip)
2841 if ipv6:
2842 args.mon_ip = wrap_ipv6(args.mon_ip)
2843 hasport = r.findall(args.mon_ip)
2844 if hasport:
2845 port = int(hasport[0])
2846 if port == 6789:
2847 addr_arg = '[v1:%s]' % args.mon_ip
2848 elif port == 3300:
2849 addr_arg = '[v2:%s]' % args.mon_ip
2850 else:
2851 logger.warning('Using msgr2 protocol for unrecognized port %d' %
2852 port)
2853 addr_arg = '[v2:%s]' % args.mon_ip
2854 base_ip = args.mon_ip[0:-(len(str(port)))-1]
2855 check_ip_port(base_ip, port)
2856 else:
2857 base_ip = args.mon_ip
2858 addr_arg = '[v2:%s:3300,v1:%s:6789]' % (args.mon_ip, args.mon_ip)
2859 check_ip_port(args.mon_ip, 3300)
2860 check_ip_port(args.mon_ip, 6789)
2861 elif args.mon_addrv:
2862 addr_arg = args.mon_addrv
2863 if addr_arg[0] != '[' or addr_arg[-1] != ']':
2864 raise Error('--mon-addrv value %s must use square backets' %
2865 addr_arg)
2866 ipv6 = addr_arg.count('[') > 1
2867 for addr in addr_arg[1:-1].split(','):
2868 hasport = r.findall(addr)
2869 if not hasport:
2870 raise Error('--mon-addrv value %s must include port number' %
2871 addr_arg)
2872 port = int(hasport[0])
2873 # strip off v1: or v2: prefix
2874 addr = re.sub(r'^\w+:', '', addr)
2875 base_ip = addr[0:-(len(str(port)))-1]
2876 check_ip_port(base_ip, port)
2877 else:
2878 raise Error('must specify --mon-ip or --mon-addrv')
2879 logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg))
2880
2881 mon_network = None
2882 if not args.skip_mon_network:
2883 # make sure IP is configured locally, and then figure out the
2884 # CIDR network
2885 for net, ips in list_networks().items():
2886 if ipaddress.ip_address(unicode(unwrap_ipv6(base_ip))) in \
2887 [ipaddress.ip_address(unicode(ip)) for ip in ips]:
2888 mon_network = net
2889 logger.info('Mon IP %s is in CIDR network %s' % (base_ip,
2890 mon_network))
2891 break
2892 if not mon_network:
2893 raise Error('Failed to infer CIDR network for mon ip %s; pass '
2894 '--skip-mon-network to configure it later' % base_ip)
2895
2896 # config
2897 cp = read_config(args.config)
2898 if not cp.has_section('global'):
2899 cp.add_section('global')
2900 cp.set('global', 'fsid', fsid);
2901 cp.set('global', 'mon host', addr_arg)
2902 cp.set('global', 'container_image', args.image)
2903 cpf = StringIO()
2904 cp.write(cpf)
2905 config = cpf.getvalue()
2906
2907 if args.registry_json or args.registry_url:
2908 command_registry_login()
2909
2910 if not args.skip_pull:
2911 _pull_image(args.image)
2912
2913 logger.info('Extracting ceph user uid/gid from container image...')
2914 (uid, gid) = extract_uid_gid()
2915
2916 # create some initial keys
2917 logger.info('Creating initial keys...')
2918 mon_key = CephContainer(
2919 image=args.image,
2920 entrypoint='/usr/bin/ceph-authtool',
2921 args=['--gen-print-key'],
2922 ).run().strip()
2923 admin_key = CephContainer(
2924 image=args.image,
2925 entrypoint='/usr/bin/ceph-authtool',
2926 args=['--gen-print-key'],
2927 ).run().strip()
2928 mgr_key = CephContainer(
2929 image=args.image,
2930 entrypoint='/usr/bin/ceph-authtool',
2931 args=['--gen-print-key'],
2932 ).run().strip()
2933
2934 keyring = ('[mon.]\n'
2935 '\tkey = %s\n'
2936 '\tcaps mon = allow *\n'
2937 '[client.admin]\n'
2938 '\tkey = %s\n'
2939 '\tcaps mon = allow *\n'
2940 '\tcaps mds = allow *\n'
2941 '\tcaps mgr = allow *\n'
2942 '\tcaps osd = allow *\n'
2943 '[mgr.%s]\n'
2944 '\tkey = %s\n'
2945 '\tcaps mon = profile mgr\n'
2946 '\tcaps mds = allow *\n'
2947 '\tcaps osd = allow *\n'
2948 % (mon_key, admin_key, mgr_id, mgr_key))
2949
2950 # tmp keyring file
2951 tmp_bootstrap_keyring = write_tmp(keyring, uid, gid)
2952
2953 # create initial monmap, tmp monmap file
2954 logger.info('Creating initial monmap...')
2955 tmp_monmap = write_tmp('', 0, 0)
2956 out = CephContainer(
2957 image=args.image,
2958 entrypoint='/usr/bin/monmaptool',
2959 args=['--create',
2960 '--clobber',
2961 '--fsid', fsid,
2962 '--addv', mon_id, addr_arg,
2963 '/tmp/monmap'
2964 ],
2965 volume_mounts={
2966 tmp_monmap.name: '/tmp/monmap:z',
2967 },
2968 ).run()
2969
2970 # pass monmap file to ceph user for use by ceph-mon --mkfs below
2971 os.fchown(tmp_monmap.fileno(), uid, gid)
2972
2973 # create mon
2974 logger.info('Creating mon...')
2975 create_daemon_dirs(fsid, 'mon', mon_id, uid, gid)
2976 mon_dir = get_data_dir(fsid, 'mon', mon_id)
2977 log_dir = get_log_dir(fsid)
2978 out = CephContainer(
2979 image=args.image,
2980 entrypoint='/usr/bin/ceph-mon',
2981 args=['--mkfs',
2982 '-i', mon_id,
2983 '--fsid', fsid,
2984 '-c', '/dev/null',
2985 '--monmap', '/tmp/monmap',
2986 '--keyring', '/tmp/keyring',
2987 ] + get_daemon_args(fsid, 'mon', mon_id),
2988 volume_mounts={
2989 log_dir: '/var/log/ceph:z',
2990 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
2991 tmp_bootstrap_keyring.name: '/tmp/keyring:z',
2992 tmp_monmap.name: '/tmp/monmap:z',
2993 },
2994 ).run()
2995
2996 with open(mon_dir + '/config', 'w') as f:
2997 os.fchown(f.fileno(), uid, gid)
2998 os.fchmod(f.fileno(), 0o600)
2999 f.write(config)
3000
3001 make_var_run(fsid, uid, gid)
3002 mon_c = get_container(fsid, 'mon', mon_id)
3003 deploy_daemon(fsid, 'mon', mon_id, mon_c, uid, gid,
3004 config=None, keyring=None)
3005
3006 # client.admin key + config to issue various CLI commands
3007 tmp_admin_keyring = write_tmp('[client.admin]\n'
3008 '\tkey = ' + admin_key + '\n',
3009 uid, gid)
3010 tmp_config = write_tmp(config, uid, gid)
3011
3012 # a CLI helper to reduce our typing
3013 def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT):
3014 # type: (List[str], Dict[str, str], Optional[int]) -> str
3015 mounts = {
3016 log_dir: '/var/log/ceph:z',
3017 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
3018 tmp_config.name: '/etc/ceph/ceph.conf:z',
3019 }
3020 for k, v in extra_mounts.items():
3021 mounts[k] = v
3022 timeout = timeout or args.timeout
3023 return CephContainer(
3024 image=args.image,
3025 entrypoint='/usr/bin/ceph',
3026 args=cmd,
3027 volume_mounts=mounts,
3028 ).run(timeout=timeout)
3029
3030 logger.info('Waiting for mon to start...')
3031 c = CephContainer(
3032 image=args.image,
3033 entrypoint='/usr/bin/ceph',
3034 args=[
3035 'status'],
3036 volume_mounts={
3037 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
3038 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
3039 tmp_config.name: '/etc/ceph/ceph.conf:z',
3040 },
3041 )
3042
3043 # wait for the service to become available
3044 def is_mon_available():
3045 # type: () -> bool
3046 timeout=args.timeout if args.timeout else 60 # seconds
3047 out, err, ret = call(c.run_cmd(),
3048 desc=c.entrypoint,
3049 timeout=timeout)
3050 return ret == 0
3051 is_available('mon', is_mon_available)
3052
3053 # assimilate and minimize config
3054 if not args.no_minimize_config:
3055 logger.info('Assimilating anything we can from ceph.conf...')
3056 cli([
3057 'config', 'assimilate-conf',
3058 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3059 ], {
3060 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3061 })
3062 logger.info('Generating new minimal ceph.conf...')
3063 cli([
3064 'config', 'generate-minimal-conf',
3065 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3066 ], {
3067 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3068 })
3069 # re-read our minimized config
3070 with open(mon_dir + '/config', 'r') as f:
3071 config = f.read()
3072 logger.info('Restarting the monitor...')
3073 call_throws([
3074 'systemctl',
3075 'restart',
3076 get_unit_name(fsid, 'mon', mon_id)
3077 ])
3078
3079 if mon_network:
3080 logger.info('Setting mon public_network...')
3081 cli(['config', 'set', 'mon', 'public_network', mon_network])
3082
3083 if ipv6:
3084 logger.info('Enabling IPv6 (ms_bind_ipv6)')
3085 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
3086
3087 # create mgr
3088 logger.info('Creating mgr...')
3089 mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
3090 mgr_c = get_container(fsid, 'mgr', mgr_id)
3091 # Note:the default port used by the Prometheus node exporter is opened in fw
3092 deploy_daemon(fsid, 'mgr', mgr_id, mgr_c, uid, gid,
3093 config=config, keyring=mgr_keyring, ports=[9283])
3094
3095 # output files
3096 with open(args.output_keyring, 'w') as f:
3097 os.fchmod(f.fileno(), 0o600)
3098 f.write('[client.admin]\n'
3099 '\tkey = ' + admin_key + '\n')
3100 logger.info('Wrote keyring to %s' % args.output_keyring)
3101
3102 with open(args.output_config, 'w') as f:
3103 f.write(config)
3104 logger.info('Wrote config to %s' % args.output_config)
3105
3106 # wait for the service to become available
3107 logger.info('Waiting for mgr to start...')
3108 def is_mgr_available():
3109 # type: () -> bool
3110 timeout=args.timeout if args.timeout else 60 # seconds
3111 try:
3112 out = cli(['status', '-f', 'json-pretty'], timeout=timeout)
3113 j = json.loads(out)
3114 return j.get('mgrmap', {}).get('available', False)
3115 except Exception as e:
3116 logger.debug('status failed: %s' % e)
3117 return False
3118 is_available('mgr', is_mgr_available)
3119
3120 # wait for mgr to restart (after enabling a module)
3121 def wait_for_mgr_restart():
3122 # first get latest mgrmap epoch from the mon
3123 out = cli(['mgr', 'dump'])
3124 j = json.loads(out)
3125 epoch = j['epoch']
3126 # wait for mgr to have it
3127 logger.info('Waiting for the mgr to restart...')
3128 def mgr_has_latest_epoch():
3129 # type: () -> bool
3130 try:
3131 out = cli(['tell', 'mgr', 'mgr_status'])
3132 j = json.loads(out)
3133 return j['mgrmap_epoch'] >= epoch
3134 except Exception as e:
3135 logger.debug('tell mgr mgr_status failed: %s' % e)
3136 return False
3137 is_available('Mgr epoch %d' % epoch, mgr_has_latest_epoch)
3138
3139 # ssh
3140 if not args.skip_ssh:
3141 cli(['config-key', 'set', 'mgr/cephadm/ssh_user', args.ssh_user])
3142
3143 logger.info('Enabling cephadm module...')
3144 cli(['mgr', 'module', 'enable', 'cephadm'])
3145 wait_for_mgr_restart()
3146
3147 logger.info('Setting orchestrator backend to cephadm...')
3148 cli(['orch', 'set', 'backend', 'cephadm'])
3149
3150 if args.ssh_config:
3151 logger.info('Using provided ssh config...')
3152 mounts = {
3153 pathify(args.ssh_config.name): '/tmp/cephadm-ssh-config:z',
3154 }
3155 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
3156
3157 if args.ssh_private_key and args.ssh_public_key:
3158 logger.info('Using provided ssh keys...')
3159 mounts = {
3160 pathify(args.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
3161 pathify(args.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
3162 }
3163 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
3164 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
3165 else:
3166 logger.info('Generating ssh key...')
3167 cli(['cephadm', 'generate-key'])
3168 ssh_pub = cli(['cephadm', 'get-pub-key'])
3169
3170 with open(args.output_pub_ssh_key, 'w') as f:
3171 f.write(ssh_pub)
3172 logger.info('Wrote public SSH key to to %s' % args.output_pub_ssh_key)
3173
3174 logger.info('Adding key to %s@localhost\'s authorized_keys...' % args.ssh_user)
3175 try:
3176 s_pwd = pwd.getpwnam(args.ssh_user)
3177 except KeyError as e:
3178 raise Error('Cannot find uid/gid for ssh-user: %s' % (args.ssh_user))
3179 ssh_uid = s_pwd.pw_uid
3180 ssh_gid = s_pwd.pw_gid
3181 ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
3182
3183 if not os.path.exists(ssh_dir):
3184 makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
3185
3186 auth_keys_file = '%s/authorized_keys' % ssh_dir
3187 add_newline = False
3188
3189 if os.path.exists(auth_keys_file):
3190 with open(auth_keys_file, 'r') as f:
3191 f.seek(0, os.SEEK_END)
3192 if f.tell() > 0:
3193 f.seek(f.tell()-1, os.SEEK_SET) # go to last char
3194 if f.read() != '\n':
3195 add_newline = True
3196
3197 with open(auth_keys_file, 'a') as f:
3198 os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it
3199 os.fchmod(f.fileno(), 0o600) # just in case we created it
3200 if add_newline:
3201 f.write('\n')
3202 f.write(ssh_pub.strip() + '\n')
3203
3204 host = get_hostname()
3205 logger.info('Adding host %s...' % host)
3206 try:
3207 cli(['orch', 'host', 'add', host])
3208 except RuntimeError as e:
3209 raise Error('Failed to add host <%s>: %s' % (host, e))
3210
3211 if not args.orphan_initial_daemons:
3212 for t in ['mon', 'mgr', 'crash']:
3213 logger.info('Deploying %s service with default placement...' % t)
3214 cli(['orch', 'apply', t])
3215
3216 if not args.skip_monitoring_stack:
3217 logger.info('Enabling mgr prometheus module...')
3218 cli(['mgr', 'module', 'enable', 'prometheus'])
3219 for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
3220 logger.info('Deploying %s service with default placement...' % t)
3221 cli(['orch', 'apply', t])
3222
3223 if args.registry_url and args.registry_username and args.registry_password:
3224 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', args.registry_url, '--force'])
3225 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', args.registry_username, '--force'])
3226 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', args.registry_password, '--force'])
3227
3228 if args.container_init:
3229 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(args.container_init), '--force'])
3230
3231 if not args.skip_dashboard:
3232 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
3233 # if the user does not want to use SSL he can change this setting once the cluster is up
3234 cli(["config", "set", "mgr", "mgr/dashboard/ssl_server_port" , str(args.ssl_dashboard_port)])
3235
3236 # configuring dashboard parameters
3237 logger.info('Enabling the dashboard module...')
3238 cli(['mgr', 'module', 'enable', 'dashboard'])
3239 wait_for_mgr_restart()
3240
3241 # dashboard crt and key
3242 if args.dashboard_key and args.dashboard_crt:
3243 logger.info('Using provided dashboard certificate...')
3244 mounts = {
3245 pathify(args.dashboard_crt.name): '/tmp/dashboard.crt:z',
3246 pathify(args.dashboard_key.name): '/tmp/dashboard.key:z'
3247 }
3248 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
3249 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
3250 else:
3251 logger.info('Generating a dashboard self-signed certificate...')
3252 cli(['dashboard', 'create-self-signed-cert'])
3253
3254 logger.info('Creating initial admin user...')
3255 password = args.initial_dashboard_password or generate_password()
3256 cmd = ['dashboard', 'ac-user-create', args.initial_dashboard_user, password, 'administrator', '--force-password']
3257 if not args.dashboard_password_noupdate:
3258 cmd.append('--pwd-update-required')
3259 cli(cmd)
3260 logger.info('Fetching dashboard port number...')
3261 out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
3262 port = int(out)
3263
3264 # Open dashboard port
3265 fw = Firewalld()
3266 fw.open_ports([port])
3267 fw.apply_rules()
3268
3269 logger.info('Ceph Dashboard is now available at:\n\n'
3270 '\t URL: https://%s:%s/\n'
3271 '\t User: %s\n'
3272 '\tPassword: %s\n' % (
3273 get_fqdn(), port,
3274 args.initial_dashboard_user,
3275 password))
3276
3277 if args.apply_spec:
3278 logger.info('Applying %s to cluster' % args.apply_spec)
3279
3280 with open(args.apply_spec) as f:
3281 for line in f:
3282 if 'hostname:' in line:
3283 line = line.replace('\n', '')
3284 split = line.split(': ')
3285 if split[1] != host:
3286 logger.info('Adding ssh key to %s' % split[1])
3287
3288 ssh_key = '/etc/ceph/ceph.pub'
3289 if args.ssh_public_key:
3290 ssh_key = args.ssh_public_key.name
3291 out, err, code = call_throws(['ssh-copy-id', '-f', '-i', ssh_key, '%s@%s' % (args.ssh_user, split[1])])
3292
3293 mounts = {}
3294 mounts[pathify(args.apply_spec)] = '/tmp/spec.yml:z'
3295
3296 out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
3297 logger.info(out)
3298
3299 logger.info('You can access the Ceph CLI with:\n\n'
3300 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
3301 sys.argv[0],
3302 fsid,
3303 args.output_config,
3304 args.output_keyring))
3305 logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
3306 '\tceph telemetry on\n\n'
3307 'For more information see:\n\n'
3308 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
3309 logger.info('Bootstrap complete.')
3310 return 0
3311
3312 ##################################
3313
3314 def command_registry_login():
3315 if args.registry_json:
3316 logger.info("Pulling custom registry login info from %s." % args.registry_json)
3317 d = get_parm(args.registry_json)
3318 if d.get('url') and d.get('username') and d.get('password'):
3319 args.registry_url = d.get('url')
3320 args.registry_username = d.get('username')
3321 args.registry_password = d.get('password')
3322 registry_login(args.registry_url, args.registry_username, args.registry_password)
3323 else:
3324 raise Error("json provided for custom registry login did not include all necessary fields. "
3325 "Please setup json file as\n"
3326 "{\n"
3327 " \"url\": \"REGISTRY_URL\",\n"
3328 " \"username\": \"REGISTRY_USERNAME\",\n"
3329 " \"password\": \"REGISTRY_PASSWORD\"\n"
3330 "}\n")
3331 elif args.registry_url and args.registry_username and args.registry_password:
3332 registry_login(args.registry_url, args.registry_username, args.registry_password)
3333 else:
3334 raise Error("Invalid custom registry arguments received. To login to a custom registry include "
3335 "--registry-url, --registry-username and --registry-password "
3336 "options or --registry-json option")
3337 return 0
3338
3339 def registry_login(url, username, password):
3340 logger.info("Logging into custom registry.")
3341 try:
3342 out, _, _ = call_throws([container_path, 'login',
3343 '-u', username,
3344 '-p', password,
3345 url])
3346 except:
3347 raise Error("Failed to login to custom registry @ %s as %s with given password" % (args.registry_url, args.registry_username))
3348
3349 ##################################
3350
3351
3352 def extract_uid_gid_monitoring(daemon_type):
3353 # type: (str) -> Tuple[int, int]
3354
3355 if daemon_type == 'prometheus':
3356 uid, gid = extract_uid_gid(file_path='/etc/prometheus')
3357 elif daemon_type == 'node-exporter':
3358 uid, gid = 65534, 65534
3359 elif daemon_type == 'grafana':
3360 uid, gid = extract_uid_gid(file_path='/var/lib/grafana')
3361 elif daemon_type == 'alertmanager':
3362 uid, gid = extract_uid_gid(file_path=['/etc/alertmanager', '/etc/prometheus'])
3363 else:
3364 raise Error("{} not implemented yet".format(daemon_type))
3365 return uid, gid
3366
3367
3368 @default_image
3369 def command_deploy():
3370 # type: () -> None
3371 daemon_type, daemon_id = args.name.split('.', 1)
3372
3373 l = FileLock(args.fsid)
3374 l.acquire()
3375
3376 if daemon_type not in get_supported_daemons():
3377 raise Error('daemon type %s not recognized' % daemon_type)
3378
3379 redeploy = False
3380 unit_name = get_unit_name(args.fsid, daemon_type, daemon_id)
3381 (_, state, _) = check_unit(unit_name)
3382 if state == 'running':
3383 redeploy = True
3384
3385 if args.reconfig:
3386 logger.info('%s daemon %s ...' % ('Reconfig', args.name))
3387 elif redeploy:
3388 logger.info('%s daemon %s ...' % ('Redeploy', args.name))
3389 else:
3390 logger.info('%s daemon %s ...' % ('Deploy', args.name))
3391
3392 # Get and check ports explicitly required to be opened
3393 daemon_ports = [] # type: List[int]
3394 if args.tcp_ports:
3395 daemon_ports = list(map(int, args.tcp_ports.split()))
3396
3397 if daemon_type in Ceph.daemons:
3398 config, keyring = get_config_and_keyring()
3399 uid, gid = extract_uid_gid()
3400 make_var_run(args.fsid, uid, gid)
3401
3402 c = get_container(args.fsid, daemon_type, daemon_id,
3403 ptrace=args.allow_ptrace)
3404 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3405 config=config, keyring=keyring,
3406 osd_fsid=args.osd_fsid,
3407 reconfig=args.reconfig,
3408 ports=daemon_ports)
3409
3410 elif daemon_type in Monitoring.components:
3411 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
3412 # Default Checks
3413 if not args.reconfig and not redeploy:
3414 daemon_ports.extend(Monitoring.port_map[daemon_type])
3415
3416 # make sure provided config-json is sufficient
3417 config = get_parm(args.config_json) # type: ignore
3418 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
3419 required_args = Monitoring.components[daemon_type].get('config-json-args', list())
3420 if required_files:
3421 if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
3422 raise Error("{} deployment requires config-json which must "
3423 "contain file content for {}".format(daemon_type.capitalize(), ', '.join(required_files)))
3424 if required_args:
3425 if not config or not all(c in config.keys() for c in required_args): # type: ignore
3426 raise Error("{} deployment requires config-json which must "
3427 "contain arg for {}".format(daemon_type.capitalize(), ', '.join(required_args)))
3428
3429 uid, gid = extract_uid_gid_monitoring(daemon_type)
3430 c = get_container(args.fsid, daemon_type, daemon_id)
3431 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3432 reconfig=args.reconfig,
3433 ports=daemon_ports)
3434
3435 elif daemon_type == NFSGanesha.daemon_type:
3436 if not args.reconfig and not redeploy:
3437 daemon_ports.extend(NFSGanesha.port_map.values())
3438
3439 config, keyring = get_config_and_keyring()
3440 # TODO: extract ganesha uid/gid (997, 994) ?
3441 uid, gid = extract_uid_gid()
3442 c = get_container(args.fsid, daemon_type, daemon_id)
3443 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3444 config=config, keyring=keyring,
3445 reconfig=args.reconfig,
3446 ports=daemon_ports)
3447
3448 elif daemon_type == CephIscsi.daemon_type:
3449 config, keyring = get_config_and_keyring()
3450 uid, gid = extract_uid_gid()
3451 c = get_container(args.fsid, daemon_type, daemon_id)
3452 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3453 config=config, keyring=keyring,
3454 reconfig=args.reconfig,
3455 ports=daemon_ports)
3456
3457 elif daemon_type == CustomContainer.daemon_type:
3458 cc = CustomContainer.init(args.fsid, daemon_id)
3459 if not args.reconfig and not redeploy:
3460 daemon_ports.extend(cc.ports)
3461 c = get_container(args.fsid, daemon_type, daemon_id,
3462 privileged=cc.privileged,
3463 ptrace=args.allow_ptrace)
3464 deploy_daemon(args.fsid, daemon_type, daemon_id, c,
3465 uid=cc.uid, gid=cc.gid, config=None,
3466 keyring=None, reconfig=args.reconfig,
3467 ports=daemon_ports)
3468
3469 else:
3470 raise Error('daemon type {} not implemented in command_deploy function'
3471 .format(daemon_type))
3472
3473 ##################################
3474
3475
3476 @infer_image
3477 def command_run():
3478 # type: () -> int
3479 (daemon_type, daemon_id) = args.name.split('.', 1)
3480 c = get_container(args.fsid, daemon_type, daemon_id)
3481 command = c.run_cmd()
3482 return call_timeout(command, args.timeout)
3483
3484 ##################################
3485
3486
3487 @infer_fsid
3488 @infer_config
3489 @infer_image
3490 def command_shell():
3491 # type: () -> int
3492 if args.fsid:
3493 make_log_dir(args.fsid)
3494 if args.name:
3495 if '.' in args.name:
3496 (daemon_type, daemon_id) = args.name.split('.', 1)
3497 else:
3498 daemon_type = args.name
3499 daemon_id = None
3500 else:
3501 daemon_type = 'osd' # get the most mounts
3502 daemon_id = None
3503
3504 if daemon_id and not args.fsid:
3505 raise Error('must pass --fsid to specify cluster')
3506
3507 # use /etc/ceph files by default, if present. we do this instead of
3508 # making these defaults in the arg parser because we don't want an error
3509 # if they don't exist.
3510 if not args.keyring and os.path.exists(SHELL_DEFAULT_KEYRING):
3511 args.keyring = SHELL_DEFAULT_KEYRING
3512
3513 container_args = [] # type: List[str]
3514 mounts = get_container_mounts(args.fsid, daemon_type, daemon_id,
3515 no_config=True if args.config else False)
3516 binds = get_container_binds(args.fsid, daemon_type, daemon_id)
3517 if args.config:
3518 mounts[pathify(args.config)] = '/etc/ceph/ceph.conf:z'
3519 if args.keyring:
3520 mounts[pathify(args.keyring)] = '/etc/ceph/ceph.keyring:z'
3521 if args.mount:
3522 for _mount in args.mount:
3523 split_src_dst = _mount.split(':')
3524 mount = pathify(split_src_dst[0])
3525 filename = os.path.basename(split_src_dst[0])
3526 if len(split_src_dst) > 1:
3527 dst = split_src_dst[1] + ':z' if len(split_src_dst) == 3 else split_src_dst[1]
3528 mounts[mount] = dst
3529 else:
3530 mounts[mount] = '/mnt/{}:z'.format(filename)
3531 if args.command:
3532 command = args.command
3533 else:
3534 command = ['bash']
3535 container_args += [
3536 '-it',
3537 '-e', 'LANG=C',
3538 '-e', "PS1=%s" % CUSTOM_PS1,
3539 ]
3540 if args.fsid:
3541 home = os.path.join(args.data_dir, args.fsid, 'home')
3542 if not os.path.exists(home):
3543 logger.debug('Creating root home at %s' % home)
3544 makedirs(home, 0, 0, 0o660)
3545 if os.path.exists('/etc/skel'):
3546 for f in os.listdir('/etc/skel'):
3547 if f.startswith('.bash'):
3548 shutil.copyfile(os.path.join('/etc/skel', f),
3549 os.path.join(home, f))
3550 mounts[home] = '/root'
3551
3552 c = CephContainer(
3553 image=args.image,
3554 entrypoint='doesnotmatter',
3555 args=[],
3556 container_args=container_args,
3557 volume_mounts=mounts,
3558 bind_mounts=binds,
3559 envs=args.env,
3560 privileged=True)
3561 command = c.shell_cmd(command)
3562
3563 return call_timeout(command, args.timeout)
3564
3565 ##################################
3566
3567
3568 @infer_fsid
3569 def command_enter():
3570 # type: () -> int
3571 if not args.fsid:
3572 raise Error('must pass --fsid to specify cluster')
3573 (daemon_type, daemon_id) = args.name.split('.', 1)
3574 container_args = [] # type: List[str]
3575 if args.command:
3576 command = args.command
3577 else:
3578 command = ['sh']
3579 container_args += [
3580 '-it',
3581 '-e', 'LANG=C',
3582 '-e', "PS1=%s" % CUSTOM_PS1,
3583 ]
3584 c = CephContainer(
3585 image=args.image,
3586 entrypoint='doesnotmatter',
3587 container_args=container_args,
3588 cname='ceph-%s-%s.%s' % (args.fsid, daemon_type, daemon_id),
3589 )
3590 command = c.exec_cmd(command)
3591 return call_timeout(command, args.timeout)
3592
3593 ##################################
3594
3595
3596 @infer_fsid
3597 @infer_image
3598 def command_ceph_volume():
3599 # type: () -> None
3600 if args.fsid:
3601 make_log_dir(args.fsid)
3602
3603 l = FileLock(args.fsid)
3604 l.acquire()
3605
3606 (uid, gid) = (0, 0) # ceph-volume runs as root
3607 mounts = get_container_mounts(args.fsid, 'osd', None)
3608
3609 tmp_config = None
3610 tmp_keyring = None
3611
3612 (config, keyring) = get_config_and_keyring()
3613
3614 if config:
3615 # tmp config file
3616 tmp_config = write_tmp(config, uid, gid)
3617 mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
3618
3619 if keyring:
3620 # tmp keyring file
3621 tmp_keyring = write_tmp(keyring, uid, gid)
3622 mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
3623
3624 c = CephContainer(
3625 image=args.image,
3626 entrypoint='/usr/sbin/ceph-volume',
3627 envs=args.env,
3628 args=args.command,
3629 privileged=True,
3630 volume_mounts=mounts,
3631 )
3632 out, err, code = call_throws(c.run_cmd(), verbosity=CallVerbosity.VERBOSE)
3633 if not code:
3634 print(out)
3635
3636 ##################################
3637
3638
3639 @infer_fsid
3640 def command_unit():
3641 # type: () -> None
3642 if not args.fsid:
3643 raise Error('must pass --fsid to specify cluster')
3644
3645 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
3646
3647 call_throws([
3648 'systemctl',
3649 args.command,
3650 unit_name],
3651 verbosity=CallVerbosity.VERBOSE,
3652 desc=''
3653 )
3654
3655 ##################################
3656
3657
3658 @infer_fsid
3659 def command_logs():
3660 # type: () -> None
3661 if not args.fsid:
3662 raise Error('must pass --fsid to specify cluster')
3663
3664 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
3665
3666 cmd = [find_program('journalctl')]
3667 cmd.extend(['-u', unit_name])
3668 if args.command:
3669 cmd.extend(args.command)
3670
3671 # call this directly, without our wrapper, so that we get an unmolested
3672 # stdout with logger prefixing.
3673 logger.debug("Running command: %s" % ' '.join(cmd))
3674 subprocess.call(cmd) # type: ignore
3675
3676 ##################################
3677
3678
3679 def list_networks():
3680 # type: () -> Dict[str,List[str]]
3681
3682 ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
3683 ## so we'll need to use a regex to parse 'ip' command output.
3684 #out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
3685 #j = json.loads(out)
3686 #for x in j:
3687
3688 res = _list_ipv4_networks()
3689 res.update(_list_ipv6_networks())
3690 return res
3691
3692
3693 def _list_ipv4_networks():
3694 out, _, _ = call_throws([find_executable('ip'), 'route', 'ls'])
3695 return _parse_ipv4_route(out)
3696
3697
3698 def _parse_ipv4_route(out):
3699 r = {} # type: Dict[str,List[str]]
3700 p = re.compile(r'^(\S+) (.*)scope link (.*)src (\S+)')
3701 for line in out.splitlines():
3702 m = p.findall(line)
3703 if not m:
3704 continue
3705 net = m[0][0]
3706 ip = m[0][3]
3707 if net not in r:
3708 r[net] = []
3709 r[net].append(ip)
3710 return r
3711
3712
3713 def _list_ipv6_networks():
3714 routes, _, _ = call_throws([find_executable('ip'), '-6', 'route', 'ls'])
3715 ips, _, _ = call_throws([find_executable('ip'), '-6', 'addr', 'ls'])
3716 return _parse_ipv6_route(routes, ips)
3717
3718
3719 def _parse_ipv6_route(routes, ips):
3720 r = {} # type: Dict[str,List[str]]
3721 route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
3722 ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
3723 for line in routes.splitlines():
3724 m = route_p.findall(line)
3725 if not m or m[0][0].lower() == 'default':
3726 continue
3727 net = m[0][0]
3728 if net not in r:
3729 r[net] = []
3730
3731 for line in ips.splitlines():
3732 m = ip_p.findall(line)
3733 if not m:
3734 continue
3735 ip = m[0][0]
3736 # find the network it belongs to
3737 net = [n for n in r.keys()
3738 if ipaddress.ip_address(unicode(ip)) in ipaddress.ip_network(unicode(n))]
3739 if net:
3740 r[net[0]].append(ip)
3741
3742 return r
3743
3744
3745 def command_list_networks():
3746 # type: () -> None
3747 r = list_networks()
3748 print(json.dumps(r, indent=4))
3749
3750 ##################################
3751
3752
3753 def command_ls():
3754 # type: () -> None
3755
3756 ls = list_daemons(detail=not args.no_detail,
3757 legacy_dir=args.legacy_dir)
3758 print(json.dumps(ls, indent=4))
3759
3760
3761 def list_daemons(detail=True, legacy_dir=None):
3762 # type: (bool, Optional[str]) -> List[Dict[str, str]]
3763 host_version = None
3764 ls = []
3765
3766 data_dir = args.data_dir
3767 if legacy_dir is not None:
3768 data_dir = os.path.abspath(legacy_dir + data_dir)
3769
3770 # keep track of ceph versions we see
3771 seen_versions = {} # type: Dict[str, Optional[str]]
3772
3773 # /var/lib/ceph
3774 if os.path.exists(data_dir):
3775 for i in os.listdir(data_dir):
3776 if i in ['mon', 'osd', 'mds', 'mgr']:
3777 daemon_type = i
3778 for j in os.listdir(os.path.join(data_dir, i)):
3779 if '-' not in j:
3780 continue
3781 (cluster, daemon_id) = j.split('-', 1)
3782 fsid = get_legacy_daemon_fsid(
3783 cluster, daemon_type, daemon_id,
3784 legacy_dir=legacy_dir)
3785 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
3786 i = {
3787 'style': 'legacy',
3788 'name': '%s.%s' % (daemon_type, daemon_id),
3789 'fsid': fsid if fsid is not None else 'unknown',
3790 'systemd_unit': legacy_unit_name,
3791 }
3792 if detail:
3793 (i['enabled'], i['state'], _) = check_unit(legacy_unit_name)
3794 if not host_version:
3795 try:
3796 out, err, code = call(['ceph', '-v'])
3797 if not code and out.startswith('ceph version '):
3798 host_version = out.split(' ')[2]
3799 except Exception:
3800 pass
3801 i['host_version'] = host_version
3802 ls.append(i)
3803 elif is_fsid(i):
3804 fsid = str(i) # convince mypy that fsid is a str here
3805 for j in os.listdir(os.path.join(data_dir, i)):
3806 if '.' in j:
3807 name = j
3808 (daemon_type, daemon_id) = j.split('.', 1)
3809 unit_name = get_unit_name(fsid,
3810 daemon_type,
3811 daemon_id)
3812 else:
3813 continue
3814 i = {
3815 'style': 'cephadm:v1',
3816 'name': name,
3817 'fsid': fsid,
3818 'systemd_unit': unit_name,
3819 }
3820 if detail:
3821 # get container id
3822 (i['enabled'], i['state'], _) = check_unit(unit_name)
3823 container_id = None
3824 image_name = None
3825 image_id = None
3826 version = None
3827 start_stamp = None
3828
3829 if 'podman' in container_path and get_podman_version() < (1, 6, 2):
3830 image_field = '.ImageID'
3831 else:
3832 image_field = '.Image'
3833
3834 out, err, code = call(
3835 [
3836 container_path, 'inspect',
3837 '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field,
3838 'ceph-%s-%s' % (fsid, j)
3839 ],
3840 verbosity=CallVerbosity.DEBUG)
3841 if not code:
3842 (container_id, image_name, image_id, start,
3843 version) = out.strip().split(',')
3844 image_id = normalize_container_id(image_id)
3845 daemon_type = name.split('.', 1)[0]
3846 start_stamp = try_convert_datetime(start)
3847 if not version or '.' not in version:
3848 version = seen_versions.get(image_id, None)
3849 if daemon_type == NFSGanesha.daemon_type:
3850 version = NFSGanesha.get_version(container_id)
3851 if daemon_type == CephIscsi.daemon_type:
3852 version = CephIscsi.get_version(container_id)
3853 elif not version:
3854 if daemon_type in Ceph.daemons:
3855 out, err, code = call(
3856 [container_path, 'exec', container_id,
3857 'ceph', '-v'])
3858 if not code and \
3859 out.startswith('ceph version '):
3860 version = out.split(' ')[2]
3861 seen_versions[image_id] = version
3862 elif daemon_type == 'grafana':
3863 out, err, code = call(
3864 [container_path, 'exec', container_id,
3865 'grafana-server', '-v'])
3866 if not code and \
3867 out.startswith('Version '):
3868 version = out.split(' ')[1]
3869 seen_versions[image_id] = version
3870 elif daemon_type in ['prometheus',
3871 'alertmanager',
3872 'node-exporter']:
3873 cmd = daemon_type.replace('-', '_')
3874 out, err, code = call(
3875 [container_path, 'exec', container_id,
3876 cmd, '--version'])
3877 if not code and \
3878 err.startswith('%s, version ' % cmd):
3879 version = err.split(' ')[2]
3880 seen_versions[image_id] = version
3881 elif daemon_type == CustomContainer.daemon_type:
3882 # Because a custom container can contain
3883 # everything, we do not know which command
3884 # to execute to get the version.
3885 pass
3886 else:
3887 logger.warning('version for unknown daemon type %s' % daemon_type)
3888 else:
3889 vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore
3890 try:
3891 with open(vfile, 'r') as f:
3892 image_name = f.read().strip() or None
3893 except IOError:
3894 pass
3895 i['container_id'] = container_id
3896 i['container_image_name'] = image_name
3897 i['container_image_id'] = image_id
3898 i['version'] = version
3899 i['started'] = start_stamp
3900 i['created'] = get_file_timestamp(
3901 os.path.join(data_dir, fsid, j, 'unit.created')
3902 )
3903 i['deployed'] = get_file_timestamp(
3904 os.path.join(data_dir, fsid, j, 'unit.image'))
3905 i['configured'] = get_file_timestamp(
3906 os.path.join(data_dir, fsid, j, 'unit.configured'))
3907
3908 ls.append(i)
3909
3910 return ls
3911
3912
3913 def get_daemon_description(fsid, name, detail=False, legacy_dir=None):
3914 # type: (str, str, bool, Optional[str]) -> Dict[str, str]
3915
3916 for d in list_daemons(detail=detail, legacy_dir=legacy_dir):
3917 if d['fsid'] != fsid:
3918 continue
3919 if d['name'] != name:
3920 continue
3921 return d
3922 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
3923
3924
3925 ##################################
3926
3927 @default_image
3928 def command_adopt():
3929 # type: () -> None
3930
3931 if not args.skip_pull:
3932 _pull_image(args.image)
3933
3934 (daemon_type, daemon_id) = args.name.split('.', 1)
3935
3936 # legacy check
3937 if args.style != 'legacy':
3938 raise Error('adoption of style %s not implemented' % args.style)
3939
3940 # lock
3941 fsid = get_legacy_daemon_fsid(args.cluster,
3942 daemon_type,
3943 daemon_id,
3944 legacy_dir=args.legacy_dir)
3945 if not fsid:
3946 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
3947 l = FileLock(fsid)
3948 l.acquire()
3949
3950 # call correct adoption
3951 if daemon_type in Ceph.daemons:
3952 command_adopt_ceph(daemon_type, daemon_id, fsid);
3953 elif daemon_type == 'prometheus':
3954 command_adopt_prometheus(daemon_id, fsid)
3955 elif daemon_type == 'grafana':
3956 command_adopt_grafana(daemon_id, fsid)
3957 elif daemon_type == 'node-exporter':
3958 raise Error('adoption of node-exporter not implemented')
3959 elif daemon_type == 'alertmanager':
3960 command_adopt_alertmanager(daemon_id, fsid)
3961 else:
3962 raise Error('daemon type %s not recognized' % daemon_type)
3963
3964
3965 class AdoptOsd(object):
3966 def __init__(self, osd_data_dir, osd_id):
3967 # type: (str, str) -> None
3968 self.osd_data_dir = osd_data_dir
3969 self.osd_id = osd_id
3970
3971 def check_online_osd(self):
3972 # type: () -> Tuple[Optional[str], Optional[str]]
3973
3974 osd_fsid, osd_type = None, None
3975
3976 path = os.path.join(self.osd_data_dir, 'fsid')
3977 try:
3978 with open(path, 'r') as f:
3979 osd_fsid = f.read().strip()
3980 logger.info("Found online OSD at %s" % path)
3981 except IOError:
3982 logger.info('Unable to read OSD fsid from %s' % path)
3983 if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
3984 with open(os.path.join(self.osd_data_dir, 'type')) as f:
3985 osd_type = f.read().strip()
3986 else:
3987 logger.info('"type" file missing for OSD data dir')
3988
3989 return osd_fsid, osd_type
3990
3991 def check_offline_lvm_osd(self):
3992 # type: () -> Tuple[Optional[str], Optional[str]]
3993
3994 osd_fsid, osd_type = None, None
3995
3996 c = CephContainer(
3997 image=args.image,
3998 entrypoint='/usr/sbin/ceph-volume',
3999 args=['lvm', 'list', '--format=json'],
4000 privileged=True
4001 )
4002 out, err, code = call_throws(c.run_cmd())
4003 if not code:
4004 try:
4005 js = json.loads(out)
4006 if self.osd_id in js:
4007 logger.info("Found offline LVM OSD {}".format(self.osd_id))
4008 osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
4009 for device in js[self.osd_id]:
4010 if device['tags']['ceph.type'] == 'block':
4011 osd_type = 'bluestore'
4012 break
4013 if device['tags']['ceph.type'] == 'data':
4014 osd_type = 'filestore'
4015 break
4016 except ValueError as e:
4017 logger.info("Invalid JSON in ceph-volume lvm list: {}".format(e))
4018
4019 return osd_fsid, osd_type
4020
4021 def check_offline_simple_osd(self):
4022 # type: () -> Tuple[Optional[str], Optional[str]]
4023
4024 osd_fsid, osd_type = None, None
4025
4026 osd_file = glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self.osd_id))
4027 if len(osd_file) == 1:
4028 with open(osd_file[0], 'r') as f:
4029 try:
4030 js = json.loads(f.read())
4031 logger.info("Found offline simple OSD {}".format(self.osd_id))
4032 osd_fsid = js["fsid"]
4033 osd_type = js["type"]
4034 if osd_type != "filestore":
4035 # need this to be mounted for the adopt to work, as it
4036 # needs to move files from this directory
4037 call_throws(['mount', js["data"]["path"], self.osd_data_dir])
4038 except ValueError as e:
4039 logger.info("Invalid JSON in {}: {}".format(osd_file, e))
4040
4041 return osd_fsid, osd_type
4042
4043
4044 def command_adopt_ceph(daemon_type, daemon_id, fsid):
4045 # type: (str, str, str) -> None
4046
4047 (uid, gid) = extract_uid_gid()
4048
4049 data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
4050 (daemon_type, args.cluster, daemon_id))
4051 data_dir_src = os.path.abspath(args.legacy_dir + data_dir_src)
4052
4053 if not os.path.exists(data_dir_src):
4054 raise Error("{}.{} data directory '{}' does not exist. "
4055 "Incorrect ID specified, or daemon alrady adopted?".format(
4056 daemon_type, daemon_id, data_dir_src))
4057
4058 osd_fsid = None
4059 if daemon_type == 'osd':
4060 adopt_osd = AdoptOsd(data_dir_src, daemon_id)
4061 osd_fsid, osd_type = adopt_osd.check_online_osd()
4062 if not osd_fsid:
4063 osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
4064 if not osd_fsid:
4065 osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
4066 if not osd_fsid:
4067 raise Error('Unable to find OSD {}'.format(daemon_id))
4068 logger.info('objectstore_type is %s' % osd_type)
4069 assert osd_type
4070 if osd_type == 'filestore':
4071 raise Error('FileStore is not supported by cephadm')
4072
4073 # NOTE: implicit assumption here that the units correspond to the
4074 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
4075 # CLUSTER field.
4076 unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
4077 (enabled, state, _) = check_unit(unit_name)
4078 if state == 'running':
4079 logger.info('Stopping old systemd unit %s...' % unit_name)
4080 call_throws(['systemctl', 'stop', unit_name])
4081 if enabled:
4082 logger.info('Disabling old systemd unit %s...' % unit_name)
4083 call_throws(['systemctl', 'disable', unit_name])
4084
4085 # data
4086 logger.info('Moving data...')
4087 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4088 uid=uid, gid=gid)
4089 move_files(glob(os.path.join(data_dir_src, '*')),
4090 data_dir_dst,
4091 uid=uid, gid=gid)
4092 logger.debug('Remove dir \'%s\'' % (data_dir_src))
4093 if os.path.ismount(data_dir_src):
4094 call_throws(['umount', data_dir_src])
4095 os.rmdir(data_dir_src)
4096
4097 logger.info('Chowning content...')
4098 call_throws(['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
4099
4100 if daemon_type == 'mon':
4101 # rename *.ldb -> *.sst, in case they are coming from ubuntu
4102 store = os.path.join(data_dir_dst, 'store.db')
4103 num_renamed = 0
4104 if os.path.exists(store):
4105 for oldf in os.listdir(store):
4106 if oldf.endswith('.ldb'):
4107 newf = oldf.replace('.ldb', '.sst')
4108 oldp = os.path.join(store, oldf)
4109 newp = os.path.join(store, newf)
4110 logger.debug('Renaming %s -> %s' % (oldp, newp))
4111 os.rename(oldp, newp)
4112 if num_renamed:
4113 logger.info('Renamed %d leveldb *.ldb files to *.sst',
4114 num_renamed)
4115 if daemon_type == 'osd':
4116 for n in ['block', 'block.db', 'block.wal']:
4117 p = os.path.join(data_dir_dst, n)
4118 if os.path.exists(p):
4119 logger.info('Chowning %s...' % p)
4120 os.chown(p, uid, gid)
4121 # disable the ceph-volume 'simple' mode files on the host
4122 simple_fn = os.path.join('/etc/ceph/osd',
4123 '%s-%s.json' % (daemon_id, osd_fsid))
4124 if os.path.exists(simple_fn):
4125 new_fn = simple_fn + '.adopted-by-cephadm'
4126 logger.info('Renaming %s -> %s', simple_fn, new_fn)
4127 os.rename(simple_fn, new_fn)
4128 logger.info('Disabling host unit ceph-volume@ simple unit...')
4129 call(['systemctl', 'disable',
4130 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
4131 else:
4132 # assume this is an 'lvm' c-v for now, but don't error
4133 # out if it's not.
4134 logger.info('Disabling host unit ceph-volume@ lvm unit...')
4135 call(['systemctl', 'disable',
4136 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
4137
4138 # config
4139 config_src = '/etc/ceph/%s.conf' % (args.cluster)
4140 config_src = os.path.abspath(args.legacy_dir + config_src)
4141 config_dst = os.path.join(data_dir_dst, 'config')
4142 copy_files([config_src], config_dst, uid=uid, gid=gid)
4143
4144 # logs
4145 logger.info('Moving logs...')
4146 log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
4147 (args.cluster, daemon_type, daemon_id))
4148 log_dir_src = os.path.abspath(args.legacy_dir + log_dir_src)
4149 log_dir_dst = make_log_dir(fsid, uid=uid, gid=gid)
4150 move_files(glob(log_dir_src),
4151 log_dir_dst,
4152 uid=uid, gid=gid)
4153
4154 logger.info('Creating new units...')
4155 make_var_run(fsid, uid, gid)
4156 c = get_container(fsid, daemon_type, daemon_id)
4157 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
4158 enable=True, # unconditionally enable the new unit
4159 start=(state == 'running' or args.force_start),
4160 osd_fsid=osd_fsid)
4161 update_firewalld(daemon_type)
4162
4163
4164 def command_adopt_prometheus(daemon_id, fsid):
4165 # type: (str, str) -> None
4166
4167 daemon_type = 'prometheus'
4168 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4169
4170 _stop_and_disable('prometheus')
4171
4172 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4173 uid=uid, gid=gid)
4174
4175 # config
4176 config_src = '/etc/prometheus/prometheus.yml'
4177 config_src = os.path.abspath(args.legacy_dir + config_src)
4178 config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
4179 makedirs(config_dst, uid, gid, 0o755)
4180 copy_files([config_src], config_dst, uid=uid, gid=gid)
4181
4182 # data
4183 data_src = '/var/lib/prometheus/metrics/'
4184 data_src = os.path.abspath(args.legacy_dir + data_src)
4185 data_dst = os.path.join(data_dir_dst, 'data')
4186 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4187
4188 make_var_run(fsid, uid, gid)
4189 c = get_container(fsid, daemon_type, daemon_id)
4190 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4191 update_firewalld(daemon_type)
4192
4193
4194 def command_adopt_grafana(daemon_id, fsid):
4195 # type: (str, str) -> None
4196
4197 daemon_type = 'grafana'
4198 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4199
4200 _stop_and_disable('grafana-server')
4201
4202 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4203 uid=uid, gid=gid)
4204
4205 # config
4206 config_src = '/etc/grafana/grafana.ini'
4207 config_src = os.path.abspath(args.legacy_dir + config_src)
4208 config_dst = os.path.join(data_dir_dst, 'etc/grafana')
4209 makedirs(config_dst, uid, gid, 0o755)
4210 copy_files([config_src], config_dst, uid=uid, gid=gid)
4211
4212 prov_src = '/etc/grafana/provisioning/'
4213 prov_src = os.path.abspath(args.legacy_dir + prov_src)
4214 prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
4215 copy_tree([prov_src], prov_dst, uid=uid, gid=gid)
4216
4217 # cert
4218 cert = '/etc/grafana/grafana.crt'
4219 key = '/etc/grafana/grafana.key'
4220 if os.path.exists(cert) and os.path.exists(key):
4221 cert_src = '/etc/grafana/grafana.crt'
4222 cert_src = os.path.abspath(args.legacy_dir + cert_src)
4223 makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
4224 cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
4225 copy_files([cert_src], cert_dst, uid=uid, gid=gid)
4226
4227 key_src = '/etc/grafana/grafana.key'
4228 key_src = os.path.abspath(args.legacy_dir + key_src)
4229 key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
4230 copy_files([key_src], key_dst, uid=uid, gid=gid)
4231
4232 _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
4233 else:
4234 logger.debug("Skipping ssl, missing cert {} or key {}".format(cert, key))
4235
4236 # data - possible custom dashboards/plugins
4237 data_src = '/var/lib/grafana/'
4238 data_src = os.path.abspath(args.legacy_dir + data_src)
4239 data_dst = os.path.join(data_dir_dst, 'data')
4240 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4241
4242 make_var_run(fsid, uid, gid)
4243 c = get_container(fsid, daemon_type, daemon_id)
4244 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4245 update_firewalld(daemon_type)
4246
4247
4248 def command_adopt_alertmanager(daemon_id, fsid):
4249 # type: (str, str) -> None
4250
4251 daemon_type = 'alertmanager'
4252 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4253
4254 _stop_and_disable('prometheus-alertmanager')
4255
4256 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4257 uid=uid, gid=gid)
4258
4259 # config
4260 config_src = '/etc/prometheus/alertmanager.yml'
4261 config_src = os.path.abspath(args.legacy_dir + config_src)
4262 config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
4263 makedirs(config_dst, uid, gid, 0o755)
4264 copy_files([config_src], config_dst, uid=uid, gid=gid)
4265
4266 # data
4267 data_src = '/var/lib/prometheus/alertmanager/'
4268 data_src = os.path.abspath(args.legacy_dir + data_src)
4269 data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
4270 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4271
4272 make_var_run(fsid, uid, gid)
4273 c = get_container(fsid, daemon_type, daemon_id)
4274 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4275 update_firewalld(daemon_type)
4276
4277
4278 def _adjust_grafana_ini(filename):
4279 # type: (str) -> None
4280
4281 # Update cert_file, cert_key pathnames in server section
4282 # ConfigParser does not preserve comments
4283 try:
4284 with open(filename, "r") as grafana_ini:
4285 lines = grafana_ini.readlines()
4286 with open("{}.new".format(filename), "w") as grafana_ini:
4287 server_section=False
4288 for line in lines:
4289 if line.startswith('['):
4290 server_section=False
4291 if line.startswith('[server]'):
4292 server_section=True
4293 if server_section:
4294 line = re.sub(r'^cert_file.*',
4295 'cert_file = /etc/grafana/certs/cert_file', line)
4296 line = re.sub(r'^cert_key.*',
4297 'cert_key = /etc/grafana/certs/cert_key', line)
4298 grafana_ini.write(line)
4299 os.rename("{}.new".format(filename), filename)
4300 except OSError as err:
4301 raise Error("Cannot update {}: {}".format(filename, err))
4302
4303
4304 def _stop_and_disable(unit_name):
4305 # type: (str) -> None
4306
4307 (enabled, state, _) = check_unit(unit_name)
4308 if state == 'running':
4309 logger.info('Stopping old systemd unit %s...' % unit_name)
4310 call_throws(['systemctl', 'stop', unit_name])
4311 if enabled:
4312 logger.info('Disabling old systemd unit %s...' % unit_name)
4313 call_throws(['systemctl', 'disable', unit_name])
4314
4315
4316 ##################################
4317
4318 def command_rm_daemon():
4319 # type: () -> None
4320
4321 l = FileLock(args.fsid)
4322 l.acquire()
4323
4324 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
4325
4326 (daemon_type, daemon_id) = args.name.split('.', 1)
4327 if daemon_type in ['mon', 'osd'] and not args.force:
4328 raise Error('must pass --force to proceed: '
4329 'this command may destroy precious data!')
4330
4331 call(['systemctl', 'stop', unit_name],
4332 verbosity=CallVerbosity.DEBUG)
4333 call(['systemctl', 'reset-failed', unit_name],
4334 verbosity=CallVerbosity.DEBUG)
4335 call(['systemctl', 'disable', unit_name],
4336 verbosity=CallVerbosity.DEBUG)
4337 data_dir = get_data_dir(args.fsid, daemon_type, daemon_id)
4338 if daemon_type in ['mon', 'osd', 'prometheus'] and \
4339 not args.force_delete_data:
4340 # rename it out of the way -- do not delete
4341 backup_dir = os.path.join(args.data_dir, args.fsid, 'removed')
4342 if not os.path.exists(backup_dir):
4343 makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
4344 dirname = '%s.%s_%s' % (daemon_type, daemon_id,
4345 datetime.datetime.utcnow().strftime(DATEFMT))
4346 os.rename(data_dir,
4347 os.path.join(backup_dir, dirname))
4348 else:
4349 call_throws(['rm', '-rf', data_dir])
4350
4351 ##################################
4352
4353
4354 def command_rm_cluster():
4355 # type: () -> None
4356 if not args.force:
4357 raise Error('must pass --force to proceed: '
4358 'this command may destroy precious data!')
4359
4360 l = FileLock(args.fsid)
4361 l.acquire()
4362
4363 # stop + disable individual daemon units
4364 for d in list_daemons(detail=False):
4365 if d['fsid'] != args.fsid:
4366 continue
4367 if d['style'] != 'cephadm:v1':
4368 continue
4369 unit_name = get_unit_name(args.fsid, d['name'])
4370 call(['systemctl', 'stop', unit_name],
4371 verbosity=CallVerbosity.DEBUG)
4372 call(['systemctl', 'reset-failed', unit_name],
4373 verbosity=CallVerbosity.DEBUG)
4374 call(['systemctl', 'disable', unit_name],
4375 verbosity=CallVerbosity.DEBUG)
4376
4377 # cluster units
4378 for unit_name in ['ceph-%s.target' % args.fsid]:
4379 call(['systemctl', 'stop', unit_name],
4380 verbosity=CallVerbosity.DEBUG)
4381 call(['systemctl', 'reset-failed', unit_name],
4382 verbosity=CallVerbosity.DEBUG)
4383 call(['systemctl', 'disable', unit_name],
4384 verbosity=CallVerbosity.DEBUG)
4385
4386 slice_name = 'system-%s.slice' % (('ceph-%s' % args.fsid).replace('-',
4387 '\\x2d'))
4388 call(['systemctl', 'stop', slice_name],
4389 verbosity=CallVerbosity.DEBUG)
4390
4391 # rm units
4392 call_throws(['rm', '-f', args.unit_dir +
4393 '/ceph-%s@.service' % args.fsid])
4394 call_throws(['rm', '-f', args.unit_dir +
4395 '/ceph-%s.target' % args.fsid])
4396 call_throws(['rm', '-rf',
4397 args.unit_dir + '/ceph-%s.target.wants' % args.fsid])
4398 # rm data
4399 call_throws(['rm', '-rf', args.data_dir + '/' + args.fsid])
4400 # rm logs
4401 call_throws(['rm', '-rf', args.log_dir + '/' + args.fsid])
4402 call_throws(['rm', '-rf', args.log_dir +
4403 '/*.wants/ceph-%s@*' % args.fsid])
4404 # rm logrotate config
4405 call_throws(['rm', '-f', args.logrotate_dir + '/ceph-%s' % args.fsid])
4406
4407 # clean up config, keyring, and pub key files
4408 files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
4409
4410 if os.path.exists(files[0]):
4411 valid_fsid = False
4412 with open(files[0]) as f:
4413 if args.fsid in f.read():
4414 valid_fsid = True
4415 if valid_fsid:
4416 for n in range(0, len(files)):
4417 if os.path.exists(files[n]):
4418 os.remove(files[n])
4419
4420
4421 ##################################
4422
4423 def check_time_sync(enabler=None):
4424 # type: (Optional[Packager]) -> bool
4425 units = [
4426 'chrony.service', # 18.04 (at least)
4427 'chronyd.service', # el / opensuse
4428 'systemd-timesyncd.service',
4429 'ntpd.service', # el7 (at least)
4430 'ntp.service', # 18.04 (at least)
4431 'ntpsec.service', # 20.04 (at least) / buster
4432 ]
4433 if not check_units(units, enabler):
4434 logger.warning('No time sync service is running; checked for %s' % units)
4435 return False
4436 return True
4437
4438
4439 def command_check_host():
4440 # type: () -> None
4441 global container_path
4442
4443 errors = []
4444 commands = ['systemctl', 'lvcreate']
4445
4446 if args.docker:
4447 container_path = find_program('docker')
4448 else:
4449 for i in CONTAINER_PREFERENCE:
4450 try:
4451 container_path = find_program(i)
4452 break
4453 except Exception as e:
4454 logger.debug('Could not locate %s: %s' % (i, e))
4455 if not container_path:
4456 errors.append('Unable to locate any of %s' % CONTAINER_PREFERENCE)
4457 else:
4458 logger.info('podman|docker (%s) is present' % container_path)
4459
4460 for command in commands:
4461 try:
4462 find_program(command)
4463 logger.info('%s is present' % command)
4464 except ValueError:
4465 errors.append('%s binary does not appear to be installed' % command)
4466
4467 # check for configured+running chronyd or ntp
4468 if not check_time_sync():
4469 errors.append('No time synchronization is active')
4470
4471 if 'expect_hostname' in args and args.expect_hostname:
4472 if get_hostname().lower() != args.expect_hostname.lower():
4473 errors.append('hostname "%s" does not match expected hostname "%s"' % (
4474 get_hostname(), args.expect_hostname))
4475 logger.info('Hostname "%s" matches what is expected.',
4476 args.expect_hostname)
4477
4478 if errors:
4479 raise Error('\n'.join(errors))
4480
4481 logger.info('Host looks OK')
4482
4483 ##################################
4484
4485
4486 def command_prepare_host():
4487 # type: () -> None
4488 logger.info('Verifying podman|docker is present...')
4489 pkg = None
4490 if not container_path:
4491 if not pkg:
4492 pkg = create_packager()
4493 pkg.install_podman()
4494
4495 logger.info('Verifying lvm2 is present...')
4496 if not find_executable('lvcreate'):
4497 if not pkg:
4498 pkg = create_packager()
4499 pkg.install(['lvm2'])
4500
4501 logger.info('Verifying time synchronization is in place...')
4502 if not check_time_sync():
4503 if not pkg:
4504 pkg = create_packager()
4505 pkg.install(['chrony'])
4506 # check again, and this time try to enable
4507 # the service
4508 check_time_sync(enabler=pkg)
4509
4510 if 'expect_hostname' in args and args.expect_hostname and args.expect_hostname != get_hostname():
4511 logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), args.expect_hostname))
4512 call_throws(['hostname', args.expect_hostname])
4513 with open('/etc/hostname', 'w') as f:
4514 f.write(args.expect_hostname + '\n')
4515
4516 logger.info('Repeating the final host check...')
4517 command_check_host()
4518
4519 ##################################
4520
4521
4522 class CustomValidation(argparse.Action):
4523
4524 def _check_name(self, values):
4525 try:
4526 (daemon_type, daemon_id) = values.split('.', 1)
4527 except ValueError:
4528 raise argparse.ArgumentError(self,
4529 "must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com")
4530
4531 daemons = get_supported_daemons()
4532 if daemon_type not in daemons:
4533 raise argparse.ArgumentError(self,
4534 "name must declare the type of daemon e.g. "
4535 "{}".format(', '.join(daemons)))
4536
4537 def __call__(self, parser, namespace, values, option_string=None):
4538 if self.dest == "name":
4539 self._check_name(values)
4540 setattr(namespace, self.dest, values)
4541
4542 ##################################
4543
4544
4545 def get_distro():
4546 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
4547 distro = None
4548 distro_version = None
4549 distro_codename = None
4550 with open('/etc/os-release', 'r') as f:
4551 for line in f.readlines():
4552 line = line.strip()
4553 if '=' not in line or line.startswith('#'):
4554 continue
4555 (var, val) = line.split('=', 1)
4556 if val[0] == '"' and val[-1] == '"':
4557 val = val[1:-1]
4558 if var == 'ID':
4559 distro = val.lower()
4560 elif var == 'VERSION_ID':
4561 distro_version = val.lower()
4562 elif var == 'VERSION_CODENAME':
4563 distro_codename = val.lower()
4564 return distro, distro_version, distro_codename
4565
4566
4567 class Packager(object):
4568 def __init__(self, stable=None, version=None, branch=None, commit=None):
4569 assert \
4570 (stable and not version and not branch and not commit) or \
4571 (not stable and version and not branch and not commit) or \
4572 (not stable and not version and branch) or \
4573 (not stable and not version and not branch and not commit)
4574 self.stable = stable
4575 self.version = version
4576 self.branch = branch
4577 self.commit = commit
4578
4579 def add_repo(self):
4580 raise NotImplementedError
4581
4582 def rm_repo(self):
4583 raise NotImplementedError
4584
4585 def query_shaman(self, distro, distro_version, branch, commit):
4586 # query shaman
4587 logger.info('Fetching repo metadata from shaman and chacra...')
4588 shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
4589 distro=distro,
4590 distro_version=distro_version,
4591 branch=branch,
4592 sha1=commit or 'latest',
4593 arch=get_arch()
4594 )
4595 try:
4596 shaman_response = urlopen(shaman_url)
4597 except HTTPError as err:
4598 logger.error('repository not found in shaman (might not be available yet)')
4599 raise Error('%s, failed to fetch %s' % (err, shaman_url))
4600 try:
4601 chacra_url = shaman_response.geturl()
4602 chacra_response = urlopen(chacra_url)
4603 except HTTPError as err:
4604 logger.error('repository not found in chacra (might not be available yet)')
4605 raise Error('%s, failed to fetch %s' % (err, chacra_url))
4606 return chacra_response.read().decode('utf-8')
4607
4608 def repo_gpgkey(self):
4609 if args.gpg_url:
4610 return args.gpg_url
4611 if self.stable or self.version:
4612 return 'https://download.ceph.com/keys/release.asc', 'release'
4613 else:
4614 return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
4615
4616 def enable_service(self, service):
4617 """
4618 Start and enable the service (typically using systemd).
4619 """
4620 call_throws(['systemctl', 'enable', '--now', service])
4621
4622
4623 class Apt(Packager):
4624 DISTRO_NAMES = {
4625 'ubuntu': 'ubuntu',
4626 'debian': 'debian',
4627 }
4628
4629 def __init__(self, stable, version, branch, commit,
4630 distro, distro_version, distro_codename):
4631 super(Apt, self).__init__(stable=stable, version=version,
4632 branch=branch, commit=commit)
4633 self.distro = self.DISTRO_NAMES[distro]
4634 self.distro_codename = distro_codename
4635 self.distro_version = distro_version
4636
4637 def repo_path(self):
4638 return '/etc/apt/sources.list.d/ceph.list'
4639
4640 def add_repo(self):
4641 url, name = self.repo_gpgkey()
4642 logger.info('Installing repo GPG key from %s...' % url)
4643 try:
4644 response = urlopen(url)
4645 except HTTPError as err:
4646 logger.error('failed to fetch GPG repo key from %s: %s' % (
4647 url, err))
4648 raise Error('failed to fetch GPG key')
4649 key = response.read().decode('utf-8')
4650 with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'w') as f:
4651 f.write(key)
4652
4653 if self.version:
4654 content = 'deb %s/debian-%s/ %s main\n' % (
4655 args.repo_url, self.version, self.distro_codename)
4656 elif self.stable:
4657 content = 'deb %s/debian-%s/ %s main\n' % (
4658 args.repo_url, self.stable, self.distro_codename)
4659 else:
4660 content = self.query_shaman(self.distro, self.distro_codename, self.branch,
4661 self.commit)
4662
4663 logger.info('Installing repo file at %s...' % self.repo_path())
4664 with open(self.repo_path(), 'w') as f:
4665 f.write(content)
4666
4667 def rm_repo(self):
4668 for name in ['autobuild', 'release']:
4669 p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
4670 if os.path.exists(p):
4671 logger.info('Removing repo GPG key %s...' % p)
4672 os.unlink(p)
4673 if os.path.exists(self.repo_path()):
4674 logger.info('Removing repo at %s...' % self.repo_path())
4675 os.unlink(self.repo_path())
4676
4677 if self.distro == 'ubuntu':
4678 self.rm_kubic_repo()
4679
4680 def install(self, ls):
4681 logger.info('Installing packages %s...' % ls)
4682 call_throws(['apt-get', 'install', '-y'] + ls)
4683
4684 def install_podman(self):
4685 if self.distro == 'ubuntu':
4686 logger.info('Setting up repo for podman...')
4687 self.add_kubic_repo()
4688 call_throws(['apt-get', 'update'])
4689
4690 logger.info('Attempting podman install...')
4691 try:
4692 self.install(['podman'])
4693 except Error as e:
4694 logger.info('Podman did not work. Falling back to docker...')
4695 self.install(['docker.io'])
4696
4697 def kubic_repo_url(self):
4698 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
4699 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version
4700
4701 def kubic_repo_path(self):
4702 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
4703
4704 def kubric_repo_gpgkey_url(self):
4705 return '%s/Release.key' % self.kubic_repo_url()
4706
4707 def kubric_repo_gpgkey_path(self):
4708 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
4709
4710 def add_kubic_repo(self):
4711 url = self.kubric_repo_gpgkey_url()
4712 logger.info('Installing repo GPG key from %s...' % url)
4713 try:
4714 response = urlopen(url)
4715 except HTTPError as err:
4716 logger.error('failed to fetch GPG repo key from %s: %s' % (
4717 url, err))
4718 raise Error('failed to fetch GPG key')
4719 key = response.read().decode('utf-8')
4720 tmp_key = write_tmp(key, 0, 0)
4721 keyring = self.kubric_repo_gpgkey_path()
4722 call_throws(['apt-key', '--keyring', keyring, 'add', tmp_key.name])
4723
4724 logger.info('Installing repo file at %s...' % self.kubic_repo_path())
4725 content = 'deb %s /\n' % self.kubic_repo_url()
4726 with open(self.kubic_repo_path(), 'w') as f:
4727 f.write(content)
4728
4729 def rm_kubic_repo(self):
4730 keyring = self.kubric_repo_gpgkey_path()
4731 if os.path.exists(keyring):
4732 logger.info('Removing repo GPG key %s...' % keyring)
4733 os.unlink(keyring)
4734
4735 p = self.kubic_repo_path()
4736 if os.path.exists(p):
4737 logger.info('Removing repo at %s...' % p)
4738 os.unlink(p)
4739
4740
4741 class YumDnf(Packager):
4742 DISTRO_NAMES = {
4743 'centos': ('centos', 'el'),
4744 'rhel': ('centos', 'el'),
4745 'scientific': ('centos', 'el'),
4746 'fedora': ('fedora', 'fc'),
4747 }
4748
4749 def __init__(self, stable, version, branch, commit,
4750 distro, distro_version):
4751 super(YumDnf, self).__init__(stable=stable, version=version,
4752 branch=branch, commit=commit)
4753 self.major = int(distro_version.split('.')[0])
4754 self.distro_normalized = self.DISTRO_NAMES[distro][0]
4755 self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
4756 if (self.distro_code == 'fc' and self.major >= 30) or \
4757 (self.distro_code == 'el' and self.major >= 8):
4758 self.tool = 'dnf'
4759 else:
4760 self.tool = 'yum'
4761
4762 def custom_repo(self, **kw):
4763 """
4764 Repo files need special care in that a whole line should not be present
4765 if there is no value for it. Because we were using `format()` we could
4766 not conditionally add a line for a repo file. So the end result would
4767 contain a key with a missing value (say if we were passing `None`).
4768
4769 For example, it could look like::
4770
4771 [ceph repo]
4772 name= ceph repo
4773 proxy=
4774 gpgcheck=
4775
4776 Which breaks. This function allows us to conditionally add lines,
4777 preserving an order and be more careful.
4778
4779 Previously, and for historical purposes, this is how the template used
4780 to look::
4781
4782 custom_repo =
4783 [{repo_name}]
4784 name={name}
4785 baseurl={baseurl}
4786 enabled={enabled}
4787 gpgcheck={gpgcheck}
4788 type={_type}
4789 gpgkey={gpgkey}
4790 proxy={proxy}
4791
4792 """
4793 lines = []
4794
4795 # by using tuples (vs a dict) we preserve the order of what we want to
4796 # return, like starting with a [repo name]
4797 tmpl = (
4798 ('reponame', '[%s]'),
4799 ('name', 'name=%s'),
4800 ('baseurl', 'baseurl=%s'),
4801 ('enabled', 'enabled=%s'),
4802 ('gpgcheck', 'gpgcheck=%s'),
4803 ('_type', 'type=%s'),
4804 ('gpgkey', 'gpgkey=%s'),
4805 ('proxy', 'proxy=%s'),
4806 ('priority', 'priority=%s'),
4807 )
4808
4809 for line in tmpl:
4810 tmpl_key, tmpl_value = line # key values from tmpl
4811
4812 # ensure that there is an actual value (not None nor empty string)
4813 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4814 lines.append(tmpl_value % kw.get(tmpl_key))
4815
4816 return '\n'.join(lines)
4817
4818 def repo_path(self):
4819 return '/etc/yum.repos.d/ceph.repo'
4820
4821 def repo_baseurl(self):
4822 assert self.stable or self.version
4823 if self.version:
4824 return '%s/rpm-%s/%s' % (args.repo_url, self.version,
4825 self.distro_code)
4826 else:
4827 return '%s/rpm-%s/%s' % (args.repo_url, self.stable,
4828 self.distro_code)
4829
4830 def add_repo(self):
4831 if self.stable or self.version:
4832 content = ''
4833 for n, t in {
4834 'Ceph': '$basearch',
4835 'Ceph-noarch': 'noarch',
4836 'Ceph-source': 'SRPMS'}.items():
4837 content += '[%s]\n' % (n)
4838 content += self.custom_repo(
4839 name='Ceph %s' % t,
4840 baseurl=self.repo_baseurl() + '/' + t,
4841 enabled=1,
4842 gpgcheck=1,
4843 gpgkey=self.repo_gpgkey()[0],
4844 )
4845 content += '\n\n'
4846 else:
4847 content = self.query_shaman(self.distro_normalized, self.major,
4848 self.branch,
4849 self.commit)
4850
4851 logger.info('Writing repo to %s...' % self.repo_path())
4852 with open(self.repo_path(), 'w') as f:
4853 f.write(content)
4854
4855 if self.distro_code.startswith('el'):
4856 logger.info('Enabling EPEL...')
4857 call_throws([self.tool, 'install', '-y', 'epel-release'])
4858
4859 def rm_repo(self):
4860 if os.path.exists(self.repo_path()):
4861 os.unlink(self.repo_path())
4862
4863 def install(self, ls):
4864 logger.info('Installing packages %s...' % ls)
4865 call_throws([self.tool, 'install', '-y'] + ls)
4866
4867 def install_podman(self):
4868 self.install(['podman'])
4869
4870
4871 class Zypper(Packager):
4872 DISTRO_NAMES = [
4873 'sles',
4874 'opensuse-tumbleweed',
4875 'opensuse-leap'
4876 ]
4877
4878 def __init__(self, stable, version, branch, commit,
4879 distro, distro_version):
4880 super(Zypper, self).__init__(stable=stable, version=version,
4881 branch=branch, commit=commit)
4882 self.tool = 'zypper'
4883 self.distro = 'opensuse'
4884 self.distro_version = '15.1'
4885 if 'tumbleweed' not in distro and distro_version is not None:
4886 self.distro_version = distro_version
4887
4888 def custom_repo(self, **kw):
4889 """
4890 See YumDnf for format explanation.
4891 """
4892 lines = []
4893
4894 # by using tuples (vs a dict) we preserve the order of what we want to
4895 # return, like starting with a [repo name]
4896 tmpl = (
4897 ('reponame', '[%s]'),
4898 ('name', 'name=%s'),
4899 ('baseurl', 'baseurl=%s'),
4900 ('enabled', 'enabled=%s'),
4901 ('gpgcheck', 'gpgcheck=%s'),
4902 ('_type', 'type=%s'),
4903 ('gpgkey', 'gpgkey=%s'),
4904 ('proxy', 'proxy=%s'),
4905 ('priority', 'priority=%s'),
4906 )
4907
4908 for line in tmpl:
4909 tmpl_key, tmpl_value = line # key values from tmpl
4910
4911 # ensure that there is an actual value (not None nor empty string)
4912 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4913 lines.append(tmpl_value % kw.get(tmpl_key))
4914
4915 return '\n'.join(lines)
4916
4917 def repo_path(self):
4918 return '/etc/zypp/repos.d/ceph.repo'
4919
4920 def repo_baseurl(self):
4921 assert self.stable or self.version
4922 if self.version:
4923 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4924 else:
4925 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4926
4927 def add_repo(self):
4928 if self.stable or self.version:
4929 content = ''
4930 for n, t in {
4931 'Ceph': '$basearch',
4932 'Ceph-noarch': 'noarch',
4933 'Ceph-source': 'SRPMS'}.items():
4934 content += '[%s]\n' % (n)
4935 content += self.custom_repo(
4936 name='Ceph %s' % t,
4937 baseurl=self.repo_baseurl() + '/' + t,
4938 enabled=1,
4939 gpgcheck=1,
4940 gpgkey=self.repo_gpgkey()[0],
4941 )
4942 content += '\n\n'
4943 else:
4944 content = self.query_shaman(self.distro, self.distro_version,
4945 self.branch,
4946 self.commit)
4947
4948 logger.info('Writing repo to %s...' % self.repo_path())
4949 with open(self.repo_path(), 'w') as f:
4950 f.write(content)
4951
4952 def rm_repo(self):
4953 if os.path.exists(self.repo_path()):
4954 os.unlink(self.repo_path())
4955
4956 def install(self, ls):
4957 logger.info('Installing packages %s...' % ls)
4958 call_throws([self.tool, 'in', '-y'] + ls)
4959
4960 def install_podman(self):
4961 self.install(['podman'])
4962
4963
4964 def create_packager(stable=None, version=None, branch=None, commit=None):
4965 distro, distro_version, distro_codename = get_distro()
4966 if distro in YumDnf.DISTRO_NAMES:
4967 return YumDnf(stable=stable, version=version,
4968 branch=branch, commit=commit,
4969 distro=distro, distro_version=distro_version)
4970 elif distro in Apt.DISTRO_NAMES:
4971 return Apt(stable=stable, version=version,
4972 branch=branch, commit=commit,
4973 distro=distro, distro_version=distro_version,
4974 distro_codename=distro_codename)
4975 elif distro in Zypper.DISTRO_NAMES:
4976 return Zypper(stable=stable, version=version,
4977 branch=branch, commit=commit,
4978 distro=distro, distro_version=distro_version)
4979 raise Error('Distro %s version %s not supported' % (distro, distro_version))
4980
4981
4982 def command_add_repo():
4983 if args.version and args.release:
4984 raise Error('you can specify either --release or --version but not both')
4985 if not args.version and not args.release and not args.dev and not args.dev_commit:
4986 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
4987 if args.version:
4988 try:
4989 (x, y, z) = args.version.split('.')
4990 except Exception as e:
4991 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
4992
4993 pkg = create_packager(stable=args.release,
4994 version=args.version,
4995 branch=args.dev,
4996 commit=args.dev_commit)
4997 pkg.add_repo()
4998
4999
5000 def command_rm_repo():
5001 pkg = create_packager()
5002 pkg.rm_repo()
5003
5004
5005 def command_install():
5006 pkg = create_packager()
5007 pkg.install(args.packages)
5008
5009 ##################################
5010
5011 def get_ipv4_address(ifname):
5012 # type: (str) -> str
5013 def _extract(sock, offset):
5014 return socket.inet_ntop(
5015 socket.AF_INET,
5016 fcntl.ioctl(
5017 sock.fileno(),
5018 offset,
5019 struct.pack('256s', bytes(ifname[:15], 'utf-8'))
5020 )[20:24])
5021
5022 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
5023 try:
5024 addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR
5025 dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK
5026 except OSError:
5027 # interface does not have an ipv4 address
5028 return ''
5029
5030 dec_mask = sum([bin(int(i)).count('1')
5031 for i in dq_mask.split('.')])
5032 return '{}/{}'.format(addr, dec_mask)
5033
5034
5035 def get_ipv6_address(ifname):
5036 # type: (str) -> str
5037 if not os.path.exists('/proc/net/if_inet6'):
5038 return ''
5039
5040 raw = read_file(['/proc/net/if_inet6'])
5041 data = raw.splitlines()
5042 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
5043 # field 0 is ipv6, field 2 is scope
5044 for iface_setting in data:
5045 field = iface_setting.split()
5046 if field[-1] == ifname:
5047 ipv6_raw = field[0]
5048 ipv6_fmtd = ":".join([ipv6_raw[_p:_p+4] for _p in range(0, len(field[0]),4)])
5049 # apply naming rules using ipaddress module
5050 ipv6 = ipaddress.ip_address(ipv6_fmtd)
5051 return "{}/{}".format(str(ipv6), int('0x{}'.format(field[2]), 16))
5052 return ''
5053
5054
5055 def bytes_to_human(num, mode='decimal'):
5056 # type: (float, str) -> str
5057 """Convert a bytes value into it's human-readable form.
5058
5059 :param num: number, in bytes, to convert
5060 :param mode: Either decimal (default) or binary to determine divisor
5061 :returns: string representing the bytes value in a more readable format
5062 """
5063 unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
5064 divisor = 1000.0
5065 yotta = "YB"
5066
5067 if mode == 'binary':
5068 unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
5069 divisor = 1024.0
5070 yotta = "YiB"
5071
5072 for unit in unit_list:
5073 if abs(num) < divisor:
5074 return "%3.1f%s" % (num, unit)
5075 num /= divisor
5076 return "%.1f%s" % (num, yotta)
5077
5078
5079 def read_file(path_list, file_name=''):
5080 # type: (List[str], str) -> str
5081 """Returns the content of the first file found within the `path_list`
5082
5083 :param path_list: list of file paths to search
5084 :param file_name: optional file_name to be applied to a file path
5085 :returns: content of the file or 'Unknown'
5086 """
5087 for path in path_list:
5088 if file_name:
5089 file_path = os.path.join(path, file_name)
5090 else:
5091 file_path = path
5092 if os.path.exists(file_path):
5093 with open(file_path, 'r') as f:
5094 try:
5095 content = f.read().strip()
5096 except OSError:
5097 # sysfs may populate the file, but for devices like
5098 # virtio reads can fail
5099 return "Unknown"
5100 else:
5101 return content
5102 return "Unknown"
5103
5104
5105 ##################################
5106 class HostFacts():
5107 _dmi_path_list = ['/sys/class/dmi/id']
5108 _nic_path_list = ['/sys/class/net']
5109 _selinux_path_list = ['/etc/selinux/config']
5110 _apparmor_path_list = ['/etc/apparmor']
5111 _disk_vendor_workarounds = {
5112 "0x1af4": "Virtio Block Device"
5113 }
5114
5115 def __init__(self):
5116 self.cpu_model = 'Unknown'
5117 self.cpu_count = 0
5118 self.cpu_cores = 0
5119 self.cpu_threads = 0
5120 self.interfaces = {}
5121
5122 self._meminfo = read_file(['/proc/meminfo']).splitlines()
5123 self._get_cpuinfo()
5124 self._process_nics()
5125 self.arch = platform.processor()
5126 self.kernel = platform.release()
5127
5128 def _get_cpuinfo(self):
5129 # type: () -> None
5130 """Determine cpu information via /proc/cpuinfo"""
5131 raw = read_file(['/proc/cpuinfo'])
5132 output = raw.splitlines()
5133 cpu_set = set()
5134
5135 for line in output:
5136 field = [l.strip() for l in line.split(':')]
5137 if "model name" in line:
5138 self.cpu_model = field[1]
5139 if "physical id" in line:
5140 cpu_set.add(field[1])
5141 if "siblings" in line:
5142 self.cpu_threads = int(field[1].strip())
5143 if "cpu cores" in line:
5144 self.cpu_cores = int(field[1].strip())
5145 pass
5146 self.cpu_count = len(cpu_set)
5147
5148 def _get_block_devs(self):
5149 # type: () -> List[str]
5150 """Determine the list of block devices by looking at /sys/block"""
5151 return [dev for dev in os.listdir('/sys/block')
5152 if not dev.startswith('dm')]
5153
5154 def _get_devs_by_type(self, rota='0'):
5155 # type: (str) -> List[str]
5156 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
5157 devs = list()
5158 for blk_dev in self._get_block_devs():
5159 rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev)
5160 rot_value = read_file([rot_path])
5161 if rot_value == rota:
5162 devs.append(blk_dev)
5163 return devs
5164
5165 @property
5166 def operating_system(self):
5167 # type: () -> str
5168 """Determine OS version"""
5169 raw_info = read_file(['/etc/os-release'])
5170 os_release = raw_info.splitlines()
5171 rel_str = 'Unknown'
5172 rel_dict = dict()
5173
5174 for line in os_release:
5175 if "=" in line:
5176 var_name, var_value = line.split('=')
5177 rel_dict[var_name] = var_value.strip('"')
5178
5179 # Would normally use PRETTY_NAME, but NAME and VERSION are more
5180 # consistent
5181 if all(_v in rel_dict for _v in ["NAME", "VERSION"]):
5182 rel_str = "{} {}".format(rel_dict['NAME'], rel_dict['VERSION'])
5183 return rel_str
5184
5185 @property
5186 def hostname(self):
5187 # type: () -> str
5188 """Return the hostname"""
5189 return platform.node()
5190
5191 @property
5192 def subscribed(self):
5193 # type: () -> str
5194 """Highlevel check to see if the host is subscribed to receive updates/support"""
5195 def _red_hat():
5196 # type: () -> str
5197 # RHEL 7 and RHEL 8
5198 entitlements_dir = '/etc/pki/entitlement'
5199 if os.path.exists(entitlements_dir):
5200 pems = glob('{}/*.pem'.format(entitlements_dir))
5201 if len(pems) >= 2:
5202 return "Yes"
5203
5204 return "No"
5205
5206 os_name = self.operating_system
5207 if os_name.upper().startswith("RED HAT"):
5208 return _red_hat()
5209
5210 return "Unknown"
5211
5212 @property
5213 def hdd_count(self):
5214 # type: () -> int
5215 """Return a count of HDDs (spinners)"""
5216 return len(self._get_devs_by_type(rota='1'))
5217
5218 def _get_capacity(self, dev):
5219 # type: (str) -> int
5220 """Determine the size of a given device"""
5221 size_path = os.path.join('/sys/block', dev, 'size')
5222 size_blocks = int(read_file([size_path]))
5223 blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size')
5224 blk_count = int(read_file([blk_path]))
5225 return size_blocks * blk_count
5226
5227 def _get_capacity_by_type(self, rota='0'):
5228 # type: (str) -> int
5229 """Return the total capacity of a category of device (flash or hdd)"""
5230 devs = self._get_devs_by_type(rota=rota)
5231 capacity = 0
5232 for dev in devs:
5233 capacity += self._get_capacity(dev)
5234 return capacity
5235
5236 def _dev_list(self, dev_list):
5237 # type: (List[str]) -> List[Dict[str, object]]
5238 """Return a 'pretty' name list for each device in the `dev_list`"""
5239 disk_list = list()
5240
5241 for dev in dev_list:
5242 disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
5243 disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
5244 disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
5245 vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
5246 disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
5247 disk_size_bytes = self._get_capacity(dev)
5248 disk_list.append({
5249 "description": "{} {} ({})".format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
5250 "vendor": disk_vendor,
5251 "model": disk_model,
5252 "rev": disk_rev,
5253 "wwid": disk_wwid,
5254 "dev_name": dev,
5255 "disk_size_bytes": disk_size_bytes,
5256 }
5257 )
5258 return disk_list
5259
5260 @property
5261 def hdd_list(self):
5262 # type: () -> List[Dict[str, object]]
5263 """Return a list of devices that are HDDs (spinners)"""
5264 devs = self._get_devs_by_type(rota='1')
5265 return self._dev_list(devs)
5266
5267 @property
5268 def flash_list(self):
5269 # type: () -> List[Dict[str, object]]
5270 """Return a list of devices that are flash based (SSD, NVMe)"""
5271 devs = self._get_devs_by_type(rota='0')
5272 return self._dev_list(devs)
5273
5274 @property
5275 def hdd_capacity_bytes(self):
5276 # type: () -> int
5277 """Return the total capacity for all HDD devices (bytes)"""
5278 return self._get_capacity_by_type(rota='1')
5279
5280 @property
5281 def hdd_capacity(self):
5282 # type: () -> str
5283 """Return the total capacity for all HDD devices (human readable format)"""
5284 return bytes_to_human(self.hdd_capacity_bytes)
5285
5286 @property
5287 def cpu_load(self):
5288 # type: () -> Dict[str, float]
5289 """Return the cpu load average data for the host"""
5290 raw = read_file(['/proc/loadavg']).strip()
5291 data = raw.split()
5292 return {
5293 "1min": float(data[0]),
5294 "5min": float(data[1]),
5295 "15min": float(data[2]),
5296 }
5297
5298 @property
5299 def flash_count(self):
5300 # type: () -> int
5301 """Return the number of flash devices in the system (SSD, NVMe)"""
5302 return len(self._get_devs_by_type(rota='0'))
5303
5304 @property
5305 def flash_capacity_bytes(self):
5306 # type: () -> int
5307 """Return the total capacity for all flash devices (bytes)"""
5308 return self._get_capacity_by_type(rota='0')
5309
5310 @property
5311 def flash_capacity(self):
5312 # type: () -> str
5313 """Return the total capacity for all Flash devices (human readable format)"""
5314 return bytes_to_human(self.flash_capacity_bytes)
5315
5316 def _process_nics(self):
5317 # type: () -> None
5318 """Look at the NIC devices and extract network related metadata"""
5319 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
5320 hw_lookup = {
5321 "1": "ethernet",
5322 "32": "infiniband",
5323 "772": "loopback",
5324 }
5325
5326 for nic_path in HostFacts._nic_path_list:
5327 if not os.path.exists(nic_path):
5328 continue
5329 for iface in os.listdir(nic_path):
5330
5331 lower_devs_list = [os.path.basename(link.replace("lower_", "")) for link in glob(os.path.join(nic_path, iface, "lower_*"))]
5332 upper_devs_list = [os.path.basename(link.replace("upper_", "")) for link in glob(os.path.join(nic_path, iface, "upper_*"))]
5333
5334 try:
5335 mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
5336 except ValueError:
5337 mtu = 0
5338
5339 operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
5340 try:
5341 speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
5342 except (OSError, ValueError):
5343 # OSError : device doesn't support the ethtool get_link_ksettings
5344 # ValueError : raised when the read fails, and returns Unknown
5345 #
5346 # Either way, we show a -1 when speed isn't available
5347 speed = -1
5348
5349 if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
5350 nic_type = "bridge"
5351 elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
5352 nic_type = "bonding"
5353 else:
5354 nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), "Unknown")
5355
5356 dev_link = os.path.join(nic_path, iface, 'device')
5357 if os.path.exists(dev_link):
5358 iftype = 'physical'
5359 driver_path = os.path.join(dev_link, 'driver')
5360 if os.path.exists(driver_path):
5361 driver = os.path.basename(
5362 os.path.realpath(driver_path))
5363 else:
5364 driver = 'Unknown'
5365
5366 else:
5367 iftype = 'logical'
5368 driver = ''
5369
5370 self.interfaces[iface] = {
5371 "mtu": mtu,
5372 "upper_devs_list": upper_devs_list,
5373 "lower_devs_list": lower_devs_list,
5374 "operstate": operstate,
5375 "iftype": iftype,
5376 "nic_type": nic_type,
5377 "driver": driver,
5378 "speed": speed,
5379 "ipv4_address": get_ipv4_address(iface),
5380 "ipv6_address": get_ipv6_address(iface),
5381 }
5382
5383 @property
5384 def nic_count(self):
5385 # type: () -> int
5386 """Return a total count of all physical NICs detected in the host"""
5387 phys_devs = []
5388 for iface in self.interfaces:
5389 if self.interfaces[iface]["iftype"] == 'physical':
5390 phys_devs.append(iface)
5391 return len(phys_devs)
5392
5393
5394 def _get_mem_data(self, field_name):
5395 # type: (str) -> int
5396 for line in self._meminfo:
5397 if line.startswith(field_name):
5398 _d = line.split()
5399 return int(_d[1])
5400 return 0
5401
5402 @property
5403 def memory_total_kb(self):
5404 # type: () -> int
5405 """Determine the memory installed (kb)"""
5406 return self._get_mem_data('MemTotal')
5407
5408 @property
5409 def memory_free_kb(self):
5410 # type: () -> int
5411 """Determine the memory free (not cache, immediately usable)"""
5412 return self._get_mem_data('MemFree')
5413
5414 @property
5415 def memory_available_kb(self):
5416 # type: () -> int
5417 """Determine the memory available to new applications without swapping"""
5418 return self._get_mem_data('MemAvailable')
5419
5420 @property
5421 def vendor(self):
5422 # type: () -> str
5423 """Determine server vendor from DMI data in sysfs"""
5424 return read_file(HostFacts._dmi_path_list, "sys_vendor")
5425
5426 @property
5427 def model(self):
5428 # type: () -> str
5429 """Determine server model information from DMI data in sysfs"""
5430 family = read_file(HostFacts._dmi_path_list, "product_family")
5431 product = read_file(HostFacts._dmi_path_list, "product_name")
5432 if family == 'Unknown' and product:
5433 return "{}".format(product)
5434
5435 return "{} ({})".format(family, product)
5436
5437 @property
5438 def bios_version(self):
5439 # type: () -> str
5440 """Determine server BIOS version from DMI data in sysfs"""
5441 return read_file(HostFacts._dmi_path_list, "bios_version")
5442
5443 @property
5444 def bios_date(self):
5445 # type: () -> str
5446 """Determine server BIOS date from DMI data in sysfs"""
5447 return read_file(HostFacts._dmi_path_list, "bios_date")
5448
5449 @property
5450 def timestamp(self):
5451 # type: () -> float
5452 """Return the current time as Epoch seconds"""
5453 return time.time()
5454
5455 @property
5456 def system_uptime(self):
5457 # type: () -> float
5458 """Return the system uptime (in secs)"""
5459 raw_time = read_file(['/proc/uptime'])
5460 up_secs, _ = raw_time.split()
5461 return float(up_secs)
5462
5463 def kernel_security(self):
5464 # type: () -> Dict[str, str]
5465 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
5466 def _fetch_selinux():
5467 """Read the selinux config file to determine state"""
5468 security = {}
5469 for selinux_path in HostFacts._selinux_path_list:
5470 if os.path.exists(selinux_path):
5471 selinux_config = read_file([selinux_path]).splitlines()
5472 security['type'] = 'SELinux'
5473 for line in selinux_config:
5474 if line.strip().startswith('#'):
5475 continue
5476 k, v = line.split('=')
5477 security[k] = v
5478 if security['SELINUX'].lower() == "disabled":
5479 security['description'] = "SELinux: Disabled"
5480 else:
5481 security['description'] = "SELinux: Enabled({}, {})".format(security['SELINUX'], security['SELINUXTYPE'])
5482 return security
5483
5484 def _fetch_apparmor():
5485 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
5486 security = {}
5487 for apparmor_path in HostFacts._apparmor_path_list:
5488 if os.path.exists(apparmor_path):
5489 security['type'] = "AppArmor"
5490 security['description'] = "AppArmor: Enabled"
5491 try:
5492 profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
5493 except OSError:
5494 pass
5495 else:
5496 summary = {} # type: Dict[str, int]
5497 for line in profiles.split('\n'):
5498 item, mode = line.split(' ')
5499 mode= mode.strip('()')
5500 if mode in summary:
5501 summary[mode] += 1
5502 else:
5503 summary[mode] = 0
5504 summary_str = ",".join(["{} {}".format(v, k) for k, v in summary.items()])
5505 security = {**security, **summary} # type: ignore
5506 security['description'] += "({})".format(summary_str)
5507
5508 return security
5509
5510 if os.path.exists('/sys/kernel/security/lsm'):
5511 lsm = read_file(['/sys/kernel/security/lsm']).strip()
5512 if 'selinux' in lsm:
5513 return _fetch_selinux()
5514 elif 'apparmor' in lsm:
5515 return _fetch_apparmor()
5516 else:
5517 return {
5518 "type": "Unknown",
5519 "description": "Linux Security Module framework is active, but is not using SELinux or AppArmor"
5520 }
5521
5522 return {
5523 "type": "None",
5524 "description": "Linux Security Module framework is not available"
5525 }
5526
5527 @property
5528 def kernel_parameters(self):
5529 # type: () -> Dict[str, str]
5530 """Get kernel parameters required/used in Ceph clusters"""
5531
5532 k_param = {}
5533 out, _, _ = call_throws(['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
5534 if out:
5535 param_list = out.split('\n')
5536 param_dict = { param.split(" = ")[0]:param.split(" = ")[-1] for param in param_list}
5537
5538 # return only desired parameters
5539 if 'net.ipv4.ip_nonlocal_bind' in param_dict:
5540 k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
5541
5542 return k_param
5543
5544 def dump(self):
5545 # type: () -> str
5546 """Return the attributes of this HostFacts object as json"""
5547 data = {k: getattr(self, k) for k in dir(self)
5548 if not k.startswith('_') and
5549 isinstance(getattr(self, k),
5550 (float, int, str, list, dict, tuple))
5551 }
5552 return json.dumps(data, indent=2, sort_keys=True)
5553
5554 ##################################
5555
5556 def command_gather_facts():
5557 """gather_facts is intended to provide host releated metadata to the caller"""
5558 host = HostFacts()
5559 print(host.dump())
5560
5561
5562 ##################################
5563
5564
5565 def _get_parser():
5566 # type: () -> argparse.ArgumentParser
5567 parser = argparse.ArgumentParser(
5568 description='Bootstrap Ceph daemons with systemd and containers.',
5569 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
5570 parser.add_argument(
5571 '--image',
5572 help='container image. Can also be set via the "CEPHADM_IMAGE" '
5573 'env var')
5574 parser.add_argument(
5575 '--docker',
5576 action='store_true',
5577 help='use docker instead of podman')
5578 parser.add_argument(
5579 '--data-dir',
5580 default=DATA_DIR,
5581 help='base directory for daemon data')
5582 parser.add_argument(
5583 '--log-dir',
5584 default=LOG_DIR,
5585 help='base directory for daemon logs')
5586 parser.add_argument(
5587 '--logrotate-dir',
5588 default=LOGROTATE_DIR,
5589 help='location of logrotate configuration files')
5590 parser.add_argument(
5591 '--unit-dir',
5592 default=UNIT_DIR,
5593 help='base directory for systemd units')
5594 parser.add_argument(
5595 '--verbose', '-v',
5596 action='store_true',
5597 help='Show debug-level log messages')
5598 parser.add_argument(
5599 '--timeout',
5600 type=int,
5601 default=DEFAULT_TIMEOUT,
5602 help='timeout in seconds')
5603 parser.add_argument(
5604 '--retry',
5605 type=int,
5606 default=DEFAULT_RETRY,
5607 help='max number of retries')
5608 parser.add_argument(
5609 '--env', '-e',
5610 action='append',
5611 default=[],
5612 help='set environment variable')
5613
5614 subparsers = parser.add_subparsers(help='sub-command')
5615
5616 parser_version = subparsers.add_parser(
5617 'version', help='get ceph version from container')
5618 parser_version.set_defaults(func=command_version)
5619
5620 parser_pull = subparsers.add_parser(
5621 'pull', help='pull latest image version')
5622 parser_pull.set_defaults(func=command_pull)
5623
5624 parser_inspect_image = subparsers.add_parser(
5625 'inspect-image', help='inspect local container image')
5626 parser_inspect_image.set_defaults(func=command_inspect_image)
5627
5628 parser_ls = subparsers.add_parser(
5629 'ls', help='list daemon instances on this host')
5630 parser_ls.set_defaults(func=command_ls)
5631 parser_ls.add_argument(
5632 '--no-detail',
5633 action='store_true',
5634 help='Do not include daemon status')
5635 parser_ls.add_argument(
5636 '--legacy-dir',
5637 default='/',
5638 help='base directory for legacy daemon data')
5639
5640 parser_list_networks = subparsers.add_parser(
5641 'list-networks', help='list IP networks')
5642 parser_list_networks.set_defaults(func=command_list_networks)
5643
5644 parser_adopt = subparsers.add_parser(
5645 'adopt', help='adopt daemon deployed with a different tool')
5646 parser_adopt.set_defaults(func=command_adopt)
5647 parser_adopt.add_argument(
5648 '--name', '-n',
5649 required=True,
5650 help='daemon name (type.id)')
5651 parser_adopt.add_argument(
5652 '--style',
5653 required=True,
5654 help='deployment style (legacy, ...)')
5655 parser_adopt.add_argument(
5656 '--cluster',
5657 default='ceph',
5658 help='cluster name')
5659 parser_adopt.add_argument(
5660 '--legacy-dir',
5661 default='/',
5662 help='base directory for legacy daemon data')
5663 parser_adopt.add_argument(
5664 '--config-json',
5665 help='Additional configuration information in JSON format')
5666 parser_adopt.add_argument(
5667 '--skip-firewalld',
5668 action='store_true',
5669 help='Do not configure firewalld')
5670 parser_adopt.add_argument(
5671 '--skip-pull',
5672 action='store_true',
5673 help='do not pull the latest image before adopting')
5674 parser_adopt.add_argument(
5675 '--force-start',
5676 action='store_true',
5677 help="start newly adoped daemon, even if it wasn't running previously")
5678 parser_adopt.add_argument(
5679 '--container-init',
5680 action='store_true',
5681 help='Run podman/docker with `--init`')
5682
5683 parser_rm_daemon = subparsers.add_parser(
5684 'rm-daemon', help='remove daemon instance')
5685 parser_rm_daemon.set_defaults(func=command_rm_daemon)
5686 parser_rm_daemon.add_argument(
5687 '--name', '-n',
5688 required=True,
5689 action=CustomValidation,
5690 help='daemon name (type.id)')
5691 parser_rm_daemon.add_argument(
5692 '--fsid',
5693 required=True,
5694 help='cluster FSID')
5695 parser_rm_daemon.add_argument(
5696 '--force',
5697 action='store_true',
5698 help='proceed, even though this may destroy valuable data')
5699 parser_rm_daemon.add_argument(
5700 '--force-delete-data',
5701 action='store_true',
5702 help='delete valuable daemon data instead of making a backup')
5703
5704 parser_rm_cluster = subparsers.add_parser(
5705 'rm-cluster', help='remove all daemons for a cluster')
5706 parser_rm_cluster.set_defaults(func=command_rm_cluster)
5707 parser_rm_cluster.add_argument(
5708 '--fsid',
5709 required=True,
5710 help='cluster FSID')
5711 parser_rm_cluster.add_argument(
5712 '--force',
5713 action='store_true',
5714 help='proceed, even though this may destroy valuable data')
5715
5716 parser_run = subparsers.add_parser(
5717 'run', help='run a ceph daemon, in a container, in the foreground')
5718 parser_run.set_defaults(func=command_run)
5719 parser_run.add_argument(
5720 '--name', '-n',
5721 required=True,
5722 help='daemon name (type.id)')
5723 parser_run.add_argument(
5724 '--fsid',
5725 required=True,
5726 help='cluster FSID')
5727
5728 parser_shell = subparsers.add_parser(
5729 'shell', help='run an interactive shell inside a daemon container')
5730 parser_shell.set_defaults(func=command_shell)
5731 parser_shell.add_argument(
5732 '--fsid',
5733 help='cluster FSID')
5734 parser_shell.add_argument(
5735 '--name', '-n',
5736 help='daemon name (type.id)')
5737 parser_shell.add_argument(
5738 '--config', '-c',
5739 help='ceph.conf to pass through to the container')
5740 parser_shell.add_argument(
5741 '--keyring', '-k',
5742 help='ceph.keyring to pass through to the container')
5743 parser_shell.add_argument(
5744 '--mount', '-m',
5745 help=("mount a file or directory in the container. "
5746 "Support multiple mounts. "
5747 "ie: `--mount /foo /bar:/bar`. "
5748 "When no destination is passed, default is /mnt"),
5749 nargs='+')
5750 parser_shell.add_argument(
5751 '--env', '-e',
5752 action='append',
5753 default=[],
5754 help='set environment variable')
5755 parser_shell.add_argument(
5756 'command', nargs=argparse.REMAINDER,
5757 help='command (optional)')
5758
5759 parser_enter = subparsers.add_parser(
5760 'enter', help='run an interactive shell inside a running daemon container')
5761 parser_enter.set_defaults(func=command_enter)
5762 parser_enter.add_argument(
5763 '--fsid',
5764 help='cluster FSID')
5765 parser_enter.add_argument(
5766 '--name', '-n',
5767 required=True,
5768 help='daemon name (type.id)')
5769 parser_enter.add_argument(
5770 'command', nargs=argparse.REMAINDER,
5771 help='command')
5772
5773 parser_ceph_volume = subparsers.add_parser(
5774 'ceph-volume', help='run ceph-volume inside a container')
5775 parser_ceph_volume.set_defaults(func=command_ceph_volume)
5776 parser_ceph_volume.add_argument(
5777 '--fsid',
5778 help='cluster FSID')
5779 parser_ceph_volume.add_argument(
5780 '--config-json',
5781 help='JSON file with config and (client.bootrap-osd) key')
5782 parser_ceph_volume.add_argument(
5783 '--config', '-c',
5784 help='ceph conf file')
5785 parser_ceph_volume.add_argument(
5786 '--keyring', '-k',
5787 help='ceph.keyring to pass through to the container')
5788 parser_ceph_volume.add_argument(
5789 'command', nargs=argparse.REMAINDER,
5790 help='command')
5791
5792 parser_unit = subparsers.add_parser(
5793 'unit', help='operate on the daemon\'s systemd unit')
5794 parser_unit.set_defaults(func=command_unit)
5795 parser_unit.add_argument(
5796 'command',
5797 help='systemd command (start, stop, restart, enable, disable, ...)')
5798 parser_unit.add_argument(
5799 '--fsid',
5800 help='cluster FSID')
5801 parser_unit.add_argument(
5802 '--name', '-n',
5803 required=True,
5804 help='daemon name (type.id)')
5805
5806 parser_logs = subparsers.add_parser(
5807 'logs', help='print journald logs for a daemon container')
5808 parser_logs.set_defaults(func=command_logs)
5809 parser_logs.add_argument(
5810 '--fsid',
5811 help='cluster FSID')
5812 parser_logs.add_argument(
5813 '--name', '-n',
5814 required=True,
5815 help='daemon name (type.id)')
5816 parser_logs.add_argument(
5817 'command', nargs='*',
5818 help='additional journalctl args')
5819
5820 parser_bootstrap = subparsers.add_parser(
5821 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
5822 parser_bootstrap.set_defaults(func=command_bootstrap)
5823 parser_bootstrap.add_argument(
5824 '--config', '-c',
5825 help='ceph conf file to incorporate')
5826 parser_bootstrap.add_argument(
5827 '--mon-id',
5828 required=False,
5829 help='mon id (default: local hostname)')
5830 parser_bootstrap.add_argument(
5831 '--mon-addrv',
5832 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
5833 parser_bootstrap.add_argument(
5834 '--mon-ip',
5835 help='mon IP')
5836 parser_bootstrap.add_argument(
5837 '--mgr-id',
5838 required=False,
5839 help='mgr id (default: randomly generated)')
5840 parser_bootstrap.add_argument(
5841 '--fsid',
5842 help='cluster FSID')
5843 parser_bootstrap.add_argument(
5844 '--output-dir',
5845 default='/etc/ceph',
5846 help='directory to write config, keyring, and pub key files')
5847 parser_bootstrap.add_argument(
5848 '--output-keyring',
5849 help='location to write keyring file with new cluster admin and mon keys')
5850 parser_bootstrap.add_argument(
5851 '--output-config',
5852 help='location to write conf file to connect to new cluster')
5853 parser_bootstrap.add_argument(
5854 '--output-pub-ssh-key',
5855 help='location to write the cluster\'s public SSH key')
5856 parser_bootstrap.add_argument(
5857 '--skip-ssh',
5858 action='store_true',
5859 help='skip setup of ssh key on local host')
5860 parser_bootstrap.add_argument(
5861 '--initial-dashboard-user',
5862 default='admin',
5863 help='Initial user for the dashboard')
5864 parser_bootstrap.add_argument(
5865 '--initial-dashboard-password',
5866 help='Initial password for the initial dashboard user')
5867 parser_bootstrap.add_argument(
5868 '--ssl-dashboard-port',
5869 type=int,
5870 default = 8443,
5871 help='Port number used to connect with dashboard using SSL')
5872 parser_bootstrap.add_argument(
5873 '--dashboard-key',
5874 type=argparse.FileType('r'),
5875 help='Dashboard key')
5876 parser_bootstrap.add_argument(
5877 '--dashboard-crt',
5878 type=argparse.FileType('r'),
5879 help='Dashboard certificate')
5880
5881 parser_bootstrap.add_argument(
5882 '--ssh-config',
5883 type=argparse.FileType('r'),
5884 help='SSH config')
5885 parser_bootstrap.add_argument(
5886 '--ssh-private-key',
5887 type=argparse.FileType('r'),
5888 help='SSH private key')
5889 parser_bootstrap.add_argument(
5890 '--ssh-public-key',
5891 type=argparse.FileType('r'),
5892 help='SSH public key')
5893 parser_bootstrap.add_argument(
5894 '--ssh-user',
5895 default='root',
5896 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
5897
5898 parser_bootstrap.add_argument(
5899 '--skip-mon-network',
5900 action='store_true',
5901 help='set mon public_network based on bootstrap mon ip')
5902 parser_bootstrap.add_argument(
5903 '--skip-dashboard',
5904 action='store_true',
5905 help='do not enable the Ceph Dashboard')
5906 parser_bootstrap.add_argument(
5907 '--dashboard-password-noupdate',
5908 action='store_true',
5909 help='stop forced dashboard password change')
5910 parser_bootstrap.add_argument(
5911 '--no-minimize-config',
5912 action='store_true',
5913 help='do not assimilate and minimize the config file')
5914 parser_bootstrap.add_argument(
5915 '--skip-ping-check',
5916 action='store_true',
5917 help='do not verify that mon IP is pingable')
5918 parser_bootstrap.add_argument(
5919 '--skip-pull',
5920 action='store_true',
5921 help='do not pull the latest image before bootstrapping')
5922 parser_bootstrap.add_argument(
5923 '--skip-firewalld',
5924 action='store_true',
5925 help='Do not configure firewalld')
5926 parser_bootstrap.add_argument(
5927 '--allow-overwrite',
5928 action='store_true',
5929 help='allow overwrite of existing --output-* config/keyring/ssh files')
5930 parser_bootstrap.add_argument(
5931 '--allow-fqdn-hostname',
5932 action='store_true',
5933 help='allow hostname that is fully-qualified (contains ".")')
5934 parser_bootstrap.add_argument(
5935 '--skip-prepare-host',
5936 action='store_true',
5937 help='Do not prepare host')
5938 parser_bootstrap.add_argument(
5939 '--orphan-initial-daemons',
5940 action='store_true',
5941 help='Do not create initial mon, mgr, and crash service specs')
5942 parser_bootstrap.add_argument(
5943 '--skip-monitoring-stack',
5944 action='store_true',
5945 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
5946 parser_bootstrap.add_argument(
5947 '--apply-spec',
5948 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
5949
5950 parser_bootstrap.add_argument(
5951 '--shared_ceph_folder',
5952 metavar='CEPH_SOURCE_FOLDER',
5953 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
5954
5955 parser_bootstrap.add_argument(
5956 '--registry-url',
5957 help='url for custom registry')
5958 parser_bootstrap.add_argument(
5959 '--registry-username',
5960 help='username for custom registry')
5961 parser_bootstrap.add_argument(
5962 '--registry-password',
5963 help='password for custom registry')
5964 parser_bootstrap.add_argument(
5965 '--registry-json',
5966 help='json file with custom registry login info (URL, Username, Password)')
5967 parser_bootstrap.add_argument(
5968 '--container-init',
5969 action='store_true',
5970 help='Run podman/docker with `--init`')
5971
5972 parser_deploy = subparsers.add_parser(
5973 'deploy', help='deploy a daemon')
5974 parser_deploy.set_defaults(func=command_deploy)
5975 parser_deploy.add_argument(
5976 '--name',
5977 required=True,
5978 action=CustomValidation,
5979 help='daemon name (type.id)')
5980 parser_deploy.add_argument(
5981 '--fsid',
5982 required=True,
5983 help='cluster FSID')
5984 parser_deploy.add_argument(
5985 '--config', '-c',
5986 help='config file for new daemon')
5987 parser_deploy.add_argument(
5988 '--config-json',
5989 help='Additional configuration information in JSON format')
5990 parser_deploy.add_argument(
5991 '--keyring',
5992 help='keyring for new daemon')
5993 parser_deploy.add_argument(
5994 '--key',
5995 help='key for new daemon')
5996 parser_deploy.add_argument(
5997 '--osd-fsid',
5998 help='OSD uuid, if creating an OSD container')
5999 parser_deploy.add_argument(
6000 '--skip-firewalld',
6001 action='store_true',
6002 help='Do not configure firewalld')
6003 parser_deploy.add_argument(
6004 '--tcp-ports',
6005 help='List of tcp ports to open in the host firewall')
6006 parser_deploy.add_argument(
6007 '--reconfig',
6008 action='store_true',
6009 help='Reconfigure a previously deployed daemon')
6010 parser_deploy.add_argument(
6011 '--allow-ptrace',
6012 action='store_true',
6013 help='Allow SYS_PTRACE on daemon container')
6014 parser_deploy.add_argument(
6015 '--container-init',
6016 action='store_true',
6017 help='Run podman/docker with `--init`')
6018
6019 parser_check_host = subparsers.add_parser(
6020 'check-host', help='check host configuration')
6021 parser_check_host.set_defaults(func=command_check_host)
6022 parser_check_host.add_argument(
6023 '--expect-hostname',
6024 help='Check that hostname matches an expected value')
6025
6026 parser_prepare_host = subparsers.add_parser(
6027 'prepare-host', help='prepare a host for cephadm use')
6028 parser_prepare_host.set_defaults(func=command_prepare_host)
6029 parser_prepare_host.add_argument(
6030 '--expect-hostname',
6031 help='Set hostname')
6032
6033 parser_add_repo = subparsers.add_parser(
6034 'add-repo', help='configure package repository')
6035 parser_add_repo.set_defaults(func=command_add_repo)
6036 parser_add_repo.add_argument(
6037 '--release',
6038 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
6039 parser_add_repo.add_argument(
6040 '--version',
6041 help='use specific upstream version (x.y.z)')
6042 parser_add_repo.add_argument(
6043 '--dev',
6044 help='use specified bleeding edge build from git branch or tag')
6045 parser_add_repo.add_argument(
6046 '--dev-commit',
6047 help='use specified bleeding edge build from git commit')
6048 parser_add_repo.add_argument(
6049 '--gpg-url',
6050 help='specify alternative GPG key location')
6051 parser_add_repo.add_argument(
6052 '--repo-url',
6053 default='https://download.ceph.com',
6054 help='specify alternative repo location')
6055 # TODO: proxy?
6056
6057 parser_rm_repo = subparsers.add_parser(
6058 'rm-repo', help='remove package repository configuration')
6059 parser_rm_repo.set_defaults(func=command_rm_repo)
6060
6061 parser_install = subparsers.add_parser(
6062 'install', help='install ceph package(s)')
6063 parser_install.set_defaults(func=command_install)
6064 parser_install.add_argument(
6065 'packages', nargs='*',
6066 default=['cephadm'],
6067 help='packages')
6068
6069 parser_registry_login = subparsers.add_parser(
6070 'registry-login', help='log host into authenticated registry')
6071 parser_registry_login.set_defaults(func=command_registry_login)
6072 parser_registry_login.add_argument(
6073 '--registry-url',
6074 help='url for custom registry')
6075 parser_registry_login.add_argument(
6076 '--registry-username',
6077 help='username for custom registry')
6078 parser_registry_login.add_argument(
6079 '--registry-password',
6080 help='password for custom registry')
6081 parser_registry_login.add_argument(
6082 '--registry-json',
6083 help='json file with custom registry login info (URL, Username, Password)')
6084 parser_registry_login.add_argument(
6085 '--fsid',
6086 help='cluster FSID')
6087
6088 parser_gather_facts = subparsers.add_parser(
6089 'gather-facts', help='gather and return host related information (JSON format)')
6090 parser_gather_facts.set_defaults(func=command_gather_facts)
6091
6092 return parser
6093
6094
6095 def _parse_args(av):
6096 parser = _get_parser()
6097 args = parser.parse_args(av)
6098 if 'command' in args and args.command and args.command[0] == "--":
6099 args.command.pop(0)
6100 return args
6101
6102
6103 if __name__ == "__main__":
6104
6105 # root?
6106 if os.geteuid() != 0:
6107 sys.stderr.write('ERROR: cephadm should be run as root\n')
6108 sys.exit(1)
6109
6110 # Logger configuration
6111 if not os.path.exists(LOG_DIR):
6112 os.makedirs(LOG_DIR)
6113 dictConfig(logging_config)
6114 logger = logging.getLogger()
6115
6116 # allow argv to be injected
6117 try:
6118 av = injected_argv # type: ignore
6119 except NameError:
6120 av = sys.argv[1:]
6121 logger.debug("%s\ncephadm %s" % ("-" * 80, av))
6122 args = _parse_args(av)
6123
6124 # More verbose console output
6125 if args.verbose:
6126 for handler in logger.handlers:
6127 if handler.name == "console":
6128 handler.setLevel(logging.DEBUG)
6129
6130 if 'func' not in args:
6131 sys.stderr.write('No command specified; pass -h or --help for usage\n')
6132 sys.exit(1)
6133
6134 # podman or docker?
6135 if args.func != command_check_host:
6136 if args.docker:
6137 container_path = find_program('docker')
6138 else:
6139 for i in CONTAINER_PREFERENCE:
6140 try:
6141 container_path = find_program(i)
6142 break
6143 except Exception as e:
6144 logger.debug('Could not locate %s: %s' % (i, e))
6145 if not container_path and args.func != command_prepare_host\
6146 and args.func != command_add_repo:
6147 sys.stderr.write('Unable to locate any of %s\n' % CONTAINER_PREFERENCE)
6148 sys.exit(1)
6149
6150 try:
6151 r = args.func()
6152 except Error as e:
6153 if args.verbose:
6154 raise
6155 sys.stderr.write('ERROR: %s\n' % e)
6156 sys.exit(1)
6157 if not r:
6158 r = 0
6159 sys.exit(r)