]> git.proxmox.com Git - ceph.git/blob - ceph/src/cephadm/cephadm
6eb2c2b5b169995e758e2b2449778ce75787965f
[ceph.git] / ceph / src / cephadm / cephadm
1 #!/usr/bin/python3
2
3 DEFAULT_IMAGE='docker.io/ceph/ceph:v15'
4 DEFAULT_IMAGE_IS_MASTER=False
5 LATEST_STABLE_RELEASE = 'octopus'
6 DATA_DIR = '/var/lib/ceph'
7 LOG_DIR = '/var/log/ceph'
8 LOCK_DIR = '/run/cephadm'
9 LOGROTATE_DIR = '/etc/logrotate.d'
10 UNIT_DIR = '/etc/systemd/system'
11 LOG_DIR_MODE = 0o770
12 DATA_DIR_MODE = 0o700
13 CONTAINER_INIT=False
14 CONTAINER_PREFERENCE = ['podman', 'docker'] # prefer podman to docker
15 CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
16 DEFAULT_TIMEOUT = None # in seconds
17 DEFAULT_RETRY = 10
18 SHELL_DEFAULT_CONF = '/etc/ceph/ceph.conf'
19 SHELL_DEFAULT_KEYRING = '/etc/ceph/ceph.client.admin.keyring'
20
21 """
22 You can invoke cephadm in two ways:
23
24 1. The normal way, at the command line.
25
26 2. By piping the script to the python3 binary. In this latter case, you should
27 prepend one or more lines to the beginning of the script.
28
29 For arguments,
30
31 injected_argv = [...]
32
33 e.g.,
34
35 injected_argv = ['ls']
36
37 For reading stdin from the '--config-json -' argument,
38
39 injected_stdin = '...'
40 """
41 import argparse
42 import datetime
43 import fcntl
44 import ipaddress
45 import json
46 import logging
47 from logging.config import dictConfig
48 import os
49 import platform
50 import pwd
51 import random
52 import select
53 import shutil
54 import socket
55 import string
56 import subprocess
57 import sys
58 import tempfile
59 import time
60 import errno
61 import struct
62 from enum import Enum
63 try:
64 from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO
65 except ImportError:
66 pass
67
68 import re
69 import uuid
70
71 from functools import wraps
72 from glob import glob
73 from threading import Thread
74
75 if sys.version_info >= (3, 0):
76 from io import StringIO
77 else:
78 from StringIO import StringIO
79
80 if sys.version_info >= (3, 2):
81 from configparser import ConfigParser
82 else:
83 from ConfigParser import SafeConfigParser
84
85 if sys.version_info >= (3, 0):
86 from urllib.request import urlopen
87 from urllib.error import HTTPError
88 else:
89 from urllib2 import urlopen, HTTPError
90
91 if sys.version_info > (3, 0):
92 unicode = str
93
94 container_path = ''
95 cached_stdin = None
96
97 DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
98
99 # Log and console output config
100 logging_config = {
101 'version': 1,
102 'disable_existing_loggers': True,
103 'formatters': {
104 'cephadm': {
105 'format': '%(asctime)s %(levelname)s %(message)s'
106 },
107 },
108 'handlers': {
109 'console':{
110 'level':'INFO',
111 'class':'logging.StreamHandler',
112 },
113 'log_file': {
114 'level': 'DEBUG',
115 'class': 'logging.handlers.RotatingFileHandler',
116 'formatter': 'cephadm',
117 'filename': '%s/cephadm.log' % LOG_DIR,
118 'maxBytes': 1024000,
119 'backupCount': 1,
120 }
121 },
122 'loggers': {
123 '': {
124 'level': 'DEBUG',
125 'handlers': ['console', 'log_file'],
126 }
127 }
128 }
129
130 class termcolor:
131 yellow = '\033[93m'
132 red = '\033[31m'
133 end = '\033[0m'
134
135
136 class Error(Exception):
137 pass
138
139
140 class TimeoutExpired(Error):
141 pass
142
143 ##################################
144
145
146 class Ceph(object):
147 daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
148 'crash')
149
150 ##################################
151
152
153 class Monitoring(object):
154 """Define the configs for the monitoring containers"""
155
156 port_map = {
157 "prometheus": [9095], # Avoid default 9090, due to conflict with cockpit UI
158 "node-exporter": [9100],
159 "grafana": [3000],
160 "alertmanager": [9093, 9094],
161 }
162
163 components = {
164 "prometheus": {
165 "image": "docker.io/prom/prometheus:v2.18.1",
166 "cpus": '2',
167 "memory": '4GB',
168 "args": [
169 "--config.file=/etc/prometheus/prometheus.yml",
170 "--storage.tsdb.path=/prometheus",
171 "--web.listen-address=:{}".format(port_map['prometheus'][0]),
172 ],
173 "config-json-files": [
174 "prometheus.yml",
175 ],
176 },
177 "node-exporter": {
178 "image": "docker.io/prom/node-exporter:v0.18.1",
179 "cpus": "1",
180 "memory": "1GB",
181 "args": [
182 "--no-collector.timex",
183 ],
184 },
185 "grafana": {
186 "image": "docker.io/ceph/ceph-grafana:6.7.4",
187 "cpus": "2",
188 "memory": "4GB",
189 "args": [],
190 "config-json-files": [
191 "grafana.ini",
192 "provisioning/datasources/ceph-dashboard.yml",
193 "certs/cert_file",
194 "certs/cert_key",
195 ],
196 },
197 "alertmanager": {
198 "image": "docker.io/prom/alertmanager:v0.20.0",
199 "cpus": "2",
200 "memory": "2GB",
201 "args": [
202 "--web.listen-address=:{}".format(port_map['alertmanager'][0]),
203 "--cluster.listen-address=:{}".format(port_map['alertmanager'][1]),
204 ],
205 "config-json-files": [
206 "alertmanager.yml",
207 ],
208 "config-json-args": [
209 "peers",
210 ],
211 },
212 } # type: ignore
213
214 @staticmethod
215 def get_version(container_path, container_id, daemon_type):
216 # type: (str, str, str) -> str
217 """
218 :param: daemon_type Either "prometheus", "alertmanager" or "node-exporter"
219 """
220 assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter')
221 cmd = daemon_type.replace('-', '_')
222 code = -1
223 err = ''
224 version = ''
225 if daemon_type == 'alertmanager':
226 for cmd in ['alertmanager', 'prometheus-alertmanager']:
227 _, err, code = call([
228 container_path, 'exec', container_id, cmd,
229 '--version'
230 ], verbosity=CallVerbosity.SILENT)
231 if code == 0:
232 break
233 cmd = 'alertmanager' # reset cmd for version extraction
234 else:
235 _, err, code = call([
236 container_path, 'exec', container_id, cmd, '--version'
237 ])
238 if code == 0 and \
239 err.startswith('%s, version ' % cmd):
240 version = err.split(' ')[2]
241 return version
242
243 ##################################
244
245
246 class NFSGanesha(object):
247 """Defines a NFS-Ganesha container"""
248
249 daemon_type = 'nfs'
250 entrypoint = '/usr/bin/ganesha.nfsd'
251 daemon_args = ['-F', '-L', 'STDERR']
252
253 required_files = ['ganesha.conf']
254
255 port_map = {
256 "nfs" : 2049,
257 }
258
259 def __init__(self,
260 fsid,
261 daemon_id,
262 config_json,
263 image=DEFAULT_IMAGE):
264 # type: (str, Union[int, str], Dict, str) -> None
265 self.fsid = fsid
266 self.daemon_id = daemon_id
267 self.image = image
268
269 # config-json options
270 self.pool = dict_get(config_json, 'pool', require=True)
271 self.namespace = dict_get(config_json, 'namespace')
272 self.userid = dict_get(config_json, 'userid')
273 self.extra_args = dict_get(config_json, 'extra_args', [])
274 self.files = dict_get(config_json, 'files', {})
275 self.rgw = dict_get(config_json, 'rgw', {})
276
277 # validate the supplied args
278 self.validate()
279
280 @classmethod
281 def init(cls, fsid, daemon_id):
282 # type: (str, Union[int, str]) -> NFSGanesha
283 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
284
285 def get_container_mounts(self, data_dir):
286 # type: (str) -> Dict[str, str]
287 mounts = dict()
288 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
289 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
290 mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
291 if self.rgw:
292 cluster = self.rgw.get('cluster', 'ceph')
293 rgw_user = self.rgw.get('user', 'admin')
294 mounts[os.path.join(data_dir, 'keyring.rgw')] = \
295 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
296 return mounts
297
298 @staticmethod
299 def get_container_envs():
300 # type: () -> List[str]
301 envs = [
302 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
303 ]
304 return envs
305
306 @staticmethod
307 def get_version(container_id):
308 # type: (str) -> Optional[str]
309 version = None
310 out, err, code = call(
311 [container_path, 'exec', container_id,
312 NFSGanesha.entrypoint, '-v'])
313 if code == 0:
314 match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
315 if match:
316 version = match.group(1)
317 return version
318
319 def validate(self):
320 # type: () -> None
321 if not is_fsid(self.fsid):
322 raise Error('not an fsid: %s' % self.fsid)
323 if not self.daemon_id:
324 raise Error('invalid daemon_id: %s' % self.daemon_id)
325 if not self.image:
326 raise Error('invalid image: %s' % self.image)
327
328 # check for the required files
329 if self.required_files:
330 for fname in self.required_files:
331 if fname not in self.files:
332 raise Error('required file missing from config-json: %s' % fname)
333
334 # check for an RGW config
335 if self.rgw:
336 if not self.rgw.get('keyring'):
337 raise Error('RGW keyring is missing')
338 if not self.rgw.get('user'):
339 raise Error('RGW user is missing')
340
341 def get_daemon_name(self):
342 # type: () -> str
343 return '%s.%s' % (self.daemon_type, self.daemon_id)
344
345 def get_container_name(self, desc=None):
346 # type: (Optional[str]) -> str
347 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
348 if desc:
349 cname = '%s-%s' % (cname, desc)
350 return cname
351
352 def get_daemon_args(self):
353 # type: () -> List[str]
354 return self.daemon_args + self.extra_args
355
356 def create_daemon_dirs(self, data_dir, uid, gid):
357 # type: (str, int, int) -> None
358 """Create files under the container data dir"""
359 if not os.path.isdir(data_dir):
360 raise OSError('data_dir is not a directory: %s' % (data_dir))
361
362 logger.info('Creating ganesha config...')
363
364 # create the ganesha conf dir
365 config_dir = os.path.join(data_dir, 'etc/ganesha')
366 makedirs(config_dir, uid, gid, 0o755)
367
368 # populate files from the config-json
369 for fname in self.files:
370 config_file = os.path.join(config_dir, fname)
371 config_content = dict_get_join(self.files, fname)
372 logger.info('Write file: %s' % (config_file))
373 with open(config_file, 'w') as f:
374 os.fchown(f.fileno(), uid, gid)
375 os.fchmod(f.fileno(), 0o600)
376 f.write(config_content)
377
378 # write the RGW keyring
379 if self.rgw:
380 keyring_path = os.path.join(data_dir, 'keyring.rgw')
381 with open(keyring_path, 'w') as f:
382 os.fchmod(f.fileno(), 0o600)
383 os.fchown(f.fileno(), uid, gid)
384 f.write(self.rgw.get('keyring', ''))
385
386 def get_rados_grace_container(self, action):
387 # type: (str) -> CephContainer
388 """Container for a ganesha action on the grace db"""
389 entrypoint = '/usr/bin/ganesha-rados-grace'
390
391 assert self.pool
392 args=['--pool', self.pool]
393 if self.namespace:
394 args += ['--ns', self.namespace]
395 if self.userid:
396 args += ['--userid', self.userid]
397 args += [action, self.get_daemon_name()]
398
399 data_dir = get_data_dir(self.fsid, self.daemon_type, self.daemon_id)
400 volume_mounts = self.get_container_mounts(data_dir)
401 envs = self.get_container_envs()
402
403 logger.info('Creating RADOS grace for action: %s' % action)
404 c = CephContainer(
405 image=self.image,
406 entrypoint=entrypoint,
407 args=args,
408 volume_mounts=volume_mounts,
409 cname=self.get_container_name(desc='grace-%s' % action),
410 envs=envs
411 )
412 return c
413
414 ##################################
415
416
417 class CephIscsi(object):
418 """Defines a Ceph-Iscsi container"""
419
420 daemon_type = 'iscsi'
421 entrypoint = '/usr/bin/rbd-target-api'
422
423 required_files = ['iscsi-gateway.cfg']
424
425 def __init__(self,
426 fsid,
427 daemon_id,
428 config_json,
429 image=DEFAULT_IMAGE):
430 # type: (str, Union[int, str], Dict, str) -> None
431 self.fsid = fsid
432 self.daemon_id = daemon_id
433 self.image = image
434
435 # config-json options
436 self.files = dict_get(config_json, 'files', {})
437
438 # validate the supplied args
439 self.validate()
440
441 @classmethod
442 def init(cls, fsid, daemon_id):
443 # type: (str, Union[int, str]) -> CephIscsi
444 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
445
446 @staticmethod
447 def get_container_mounts(data_dir, log_dir):
448 # type: (str, str) -> Dict[str, str]
449 mounts = dict()
450 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
451 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
452 mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
453 mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
454 mounts[log_dir] = '/var/log/rbd-target-api:z'
455 mounts['/dev'] = '/dev'
456 return mounts
457
458 @staticmethod
459 def get_container_binds():
460 # type: () -> List[List[str]]
461 binds = []
462 lib_modules = ['type=bind',
463 'source=/lib/modules',
464 'destination=/lib/modules',
465 'ro=true']
466 binds.append(lib_modules)
467 return binds
468
469 @staticmethod
470 def get_version(container_id):
471 # type: (str) -> Optional[str]
472 version = None
473 out, err, code = call(
474 [container_path, 'exec', container_id,
475 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"])
476 if code == 0:
477 version = out.strip()
478 return version
479
480 def validate(self):
481 # type: () -> None
482 if not is_fsid(self.fsid):
483 raise Error('not an fsid: %s' % self.fsid)
484 if not self.daemon_id:
485 raise Error('invalid daemon_id: %s' % self.daemon_id)
486 if not self.image:
487 raise Error('invalid image: %s' % self.image)
488
489 # check for the required files
490 if self.required_files:
491 for fname in self.required_files:
492 if fname not in self.files:
493 raise Error('required file missing from config-json: %s' % fname)
494
495 def get_daemon_name(self):
496 # type: () -> str
497 return '%s.%s' % (self.daemon_type, self.daemon_id)
498
499 def get_container_name(self, desc=None):
500 # type: (Optional[str]) -> str
501 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
502 if desc:
503 cname = '%s-%s' % (cname, desc)
504 return cname
505
506 def create_daemon_dirs(self, data_dir, uid, gid):
507 # type: (str, int, int) -> None
508 """Create files under the container data dir"""
509 if not os.path.isdir(data_dir):
510 raise OSError('data_dir is not a directory: %s' % (data_dir))
511
512 logger.info('Creating ceph-iscsi config...')
513 configfs_dir = os.path.join(data_dir, 'configfs')
514 makedirs(configfs_dir, uid, gid, 0o755)
515
516 # populate files from the config-json
517 for fname in self.files:
518 config_file = os.path.join(data_dir, fname)
519 config_content = dict_get_join(self.files, fname)
520 logger.info('Write file: %s' % (config_file))
521 with open(config_file, 'w') as f:
522 os.fchown(f.fileno(), uid, gid)
523 os.fchmod(f.fileno(), 0o600)
524 f.write(config_content)
525
526 @staticmethod
527 def configfs_mount_umount(data_dir, mount=True):
528 # type: (str, bool) -> List[str]
529 mount_path = os.path.join(data_dir, 'configfs')
530 if mount:
531 cmd = "if ! grep -qs {0} /proc/mounts; then " \
532 "mount -t configfs none {0}; fi".format(mount_path)
533 else:
534 cmd = "if grep -qs {0} /proc/mounts; then " \
535 "umount {0}; fi".format(mount_path)
536 return cmd.split()
537
538 def get_tcmu_runner_container(self):
539 # type: () -> CephContainer
540 tcmu_container = get_container(self.fsid, self.daemon_type, self.daemon_id)
541 tcmu_container.entrypoint = "/usr/bin/tcmu-runner"
542 tcmu_container.cname = self.get_container_name(desc='tcmu')
543 # remove extra container args for tcmu container.
544 # extra args could cause issue with forking service type
545 tcmu_container.container_args = []
546 return tcmu_container
547
548 ##################################
549
550
551 class CustomContainer(object):
552 """Defines a custom container"""
553 daemon_type = 'container'
554
555 def __init__(self, fsid: str, daemon_id: Union[int, str],
556 config_json: Dict, image: str) -> None:
557 self.fsid = fsid
558 self.daemon_id = daemon_id
559 self.image = image
560
561 # config-json options
562 self.entrypoint = dict_get(config_json, 'entrypoint')
563 self.uid = dict_get(config_json, 'uid', 65534) # nobody
564 self.gid = dict_get(config_json, 'gid', 65534) # nobody
565 self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
566 self.args = dict_get(config_json, 'args', [])
567 self.envs = dict_get(config_json, 'envs', [])
568 self.privileged = dict_get(config_json, 'privileged', False)
569 self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
570 self.ports = dict_get(config_json, 'ports', [])
571 self.dirs = dict_get(config_json, 'dirs', [])
572 self.files = dict_get(config_json, 'files', {})
573
574 @classmethod
575 def init(cls, fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
576 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
577
578 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
579 """
580 Create dirs/files below the container data directory.
581 """
582 logger.info('Creating custom container configuration '
583 'dirs/files in {} ...'.format(data_dir))
584
585 if not os.path.isdir(data_dir):
586 raise OSError('data_dir is not a directory: %s' % data_dir)
587
588 for dir_path in self.dirs:
589 logger.info('Creating directory: {}'.format(dir_path))
590 dir_path = os.path.join(data_dir, dir_path.strip('/'))
591 makedirs(dir_path, uid, gid, 0o755)
592
593 for file_path in self.files:
594 logger.info('Creating file: {}'.format(file_path))
595 content = dict_get_join(self.files, file_path)
596 file_path = os.path.join(data_dir, file_path.strip('/'))
597 with open(file_path, 'w', encoding='utf-8') as f:
598 os.fchown(f.fileno(), uid, gid)
599 os.fchmod(f.fileno(), 0o600)
600 f.write(content)
601
602 def get_daemon_args(self) -> List[str]:
603 return []
604
605 def get_container_args(self) -> List[str]:
606 return self.args
607
608 def get_container_envs(self) -> List[str]:
609 return self.envs
610
611 def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
612 """
613 Get the volume mounts. Relative source paths will be located below
614 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
615
616 Example:
617 {
618 /foo/conf: /conf
619 foo/conf: /conf
620 }
621 becomes
622 {
623 /foo/conf: /conf
624 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
625 }
626 """
627 mounts = {}
628 for source, destination in self.volume_mounts.items():
629 source = os.path.join(data_dir, source)
630 mounts[source] = destination
631 return mounts
632
633 def get_container_binds(self, data_dir: str) -> List[List[str]]:
634 """
635 Get the bind mounts. Relative `source=...` paths will be located below
636 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
637
638 Example:
639 [
640 'type=bind',
641 'source=lib/modules',
642 'destination=/lib/modules',
643 'ro=true'
644 ]
645 becomes
646 [
647 ...
648 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
649 ...
650 ]
651 """
652 binds = self.bind_mounts.copy()
653 for bind in binds:
654 for index, value in enumerate(bind):
655 match = re.match(r'^source=(.+)$', value)
656 if match:
657 bind[index] = 'source={}'.format(os.path.join(
658 data_dir, match.group(1)))
659 return binds
660
661 ##################################
662
663
664 def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
665 """
666 Helper function to get a key from a dictionary.
667 :param d: The dictionary to process.
668 :param key: The name of the key to get.
669 :param default: The default value in case the key does not
670 exist. Default is `None`.
671 :param require: Set to `True` if the key is required. An
672 exception will be raised if the key does not exist in
673 the given dictionary.
674 :return: Returns the value of the given key.
675 :raises: :exc:`self.Error` if the given key does not exist
676 and `require` is set to `True`.
677 """
678 if require and key not in d.keys():
679 raise Error('{} missing from dict'.format(key))
680 return d.get(key, default)
681
682 ##################################
683
684
685 def dict_get_join(d: Dict, key: str) -> Any:
686 """
687 Helper function to get the value of a given key from a dictionary.
688 `List` values will be converted to a string by joining them with a
689 line break.
690 :param d: The dictionary to process.
691 :param key: The name of the key to get.
692 :return: Returns the value of the given key. If it was a `list`, it
693 will be joining with a line break.
694 """
695 value = d.get(key)
696 if isinstance(value, list):
697 value = '\n'.join(map(str, value))
698 return value
699
700 ##################################
701
702
703 def get_supported_daemons():
704 # type: () -> List[str]
705 supported_daemons = list(Ceph.daemons)
706 supported_daemons.extend(Monitoring.components)
707 supported_daemons.append(NFSGanesha.daemon_type)
708 supported_daemons.append(CephIscsi.daemon_type)
709 supported_daemons.append(CustomContainer.daemon_type)
710 assert len(supported_daemons) == len(set(supported_daemons))
711 return supported_daemons
712
713 ##################################
714
715
716 def attempt_bind(s, address, port):
717 # type: (socket.socket, str, int) -> None
718 try:
719 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
720 s.bind((address, port))
721 except (socket.error, OSError) as e: # py2 and py3
722 msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
723 logger.warning(msg)
724 if e.errno == errno.EADDRINUSE:
725 raise OSError(msg)
726 elif e.errno == errno.EADDRNOTAVAIL:
727 pass
728 finally:
729 s.close()
730
731
732 def port_in_use(port_num):
733 # type: (int) -> bool
734 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
735 logger.info('Verifying port %d ...' % port_num)
736 try:
737 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
738 attempt_bind(s, '0.0.0.0', port_num)
739
740 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
741 attempt_bind(s, '::', port_num)
742 except OSError:
743 return True
744 else:
745 return False
746
747
748 def check_ip_port(ip, port):
749 # type: (str, int) -> None
750 if not args.skip_ping_check:
751 logger.info('Verifying IP %s port %d ...' % (ip, port))
752 if is_ipv6(ip):
753 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
754 ip = unwrap_ipv6(ip)
755 else:
756 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
757 try:
758 attempt_bind(s, ip, port)
759 except OSError as e:
760 raise Error(e)
761
762 ##################################
763
764 # this is an abbreviated version of
765 # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
766 # that drops all of the compatibility (this is Unix/Linux only).
767
768 try:
769 TimeoutError
770 except NameError:
771 TimeoutError = OSError
772
773
774 class Timeout(TimeoutError):
775 """
776 Raised when the lock could not be acquired in *timeout*
777 seconds.
778 """
779
780 def __init__(self, lock_file):
781 """
782 """
783 #: The path of the file lock.
784 self.lock_file = lock_file
785 return None
786
787 def __str__(self):
788 temp = "The file lock '{}' could not be acquired."\
789 .format(self.lock_file)
790 return temp
791
792
793 class _Acquire_ReturnProxy(object):
794 def __init__(self, lock):
795 self.lock = lock
796 return None
797
798 def __enter__(self):
799 return self.lock
800
801 def __exit__(self, exc_type, exc_value, traceback):
802 self.lock.release()
803 return None
804
805
806 class FileLock(object):
807 def __init__(self, name, timeout=-1):
808 if not os.path.exists(LOCK_DIR):
809 os.mkdir(LOCK_DIR, 0o700)
810 self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
811
812 # The file descriptor for the *_lock_file* as it is returned by the
813 # os.open() function.
814 # This file lock is only NOT None, if the object currently holds the
815 # lock.
816 self._lock_file_fd = None
817 self.timeout = timeout
818 # The lock counter is used for implementing the nested locking
819 # mechanism. Whenever the lock is acquired, the counter is increased and
820 # the lock is only released, when this value is 0 again.
821 self._lock_counter = 0
822 return None
823
824 @property
825 def is_locked(self):
826 return self._lock_file_fd is not None
827
828 def acquire(self, timeout=None, poll_intervall=0.05):
829 """
830 Acquires the file lock or fails with a :exc:`Timeout` error.
831 .. code-block:: python
832 # You can use this method in the context manager (recommended)
833 with lock.acquire():
834 pass
835 # Or use an equivalent try-finally construct:
836 lock.acquire()
837 try:
838 pass
839 finally:
840 lock.release()
841 :arg float timeout:
842 The maximum time waited for the file lock.
843 If ``timeout < 0``, there is no timeout and this method will
844 block until the lock could be acquired.
845 If ``timeout`` is None, the default :attr:`~timeout` is used.
846 :arg float poll_intervall:
847 We check once in *poll_intervall* seconds if we can acquire the
848 file lock.
849 :raises Timeout:
850 if the lock could not be acquired in *timeout* seconds.
851 .. versionchanged:: 2.0.0
852 This method returns now a *proxy* object instead of *self*,
853 so that it can be used in a with statement without side effects.
854 """
855 # Use the default timeout, if no timeout is provided.
856 if timeout is None:
857 timeout = self.timeout
858
859 # Increment the number right at the beginning.
860 # We can still undo it, if something fails.
861 self._lock_counter += 1
862
863 lock_id = id(self)
864 lock_filename = self._lock_file
865 start_time = time.time()
866 try:
867 while True:
868 if not self.is_locked:
869 logger.debug('Acquiring lock %s on %s', lock_id,
870 lock_filename)
871 self._acquire()
872
873 if self.is_locked:
874 logger.debug('Lock %s acquired on %s', lock_id,
875 lock_filename)
876 break
877 elif timeout >= 0 and time.time() - start_time > timeout:
878 logger.warning('Timeout acquiring lock %s on %s', lock_id,
879 lock_filename)
880 raise Timeout(self._lock_file)
881 else:
882 logger.debug(
883 'Lock %s not acquired on %s, waiting %s seconds ...',
884 lock_id, lock_filename, poll_intervall
885 )
886 time.sleep(poll_intervall)
887 except: # noqa
888 # Something did go wrong, so decrement the counter.
889 self._lock_counter = max(0, self._lock_counter - 1)
890
891 raise
892 return _Acquire_ReturnProxy(lock = self)
893
894 def release(self, force=False):
895 """
896 Releases the file lock.
897 Please note, that the lock is only completly released, if the lock
898 counter is 0.
899 Also note, that the lock file itself is not automatically deleted.
900 :arg bool force:
901 If true, the lock counter is ignored and the lock is released in
902 every case.
903 """
904 if self.is_locked:
905 self._lock_counter -= 1
906
907 if self._lock_counter == 0 or force:
908 lock_id = id(self)
909 lock_filename = self._lock_file
910
911 logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
912 self._release()
913 self._lock_counter = 0
914 logger.debug('Lock %s released on %s', lock_id, lock_filename)
915
916 return None
917
918 def __enter__(self):
919 self.acquire()
920 return self
921
922 def __exit__(self, exc_type, exc_value, traceback):
923 self.release()
924 return None
925
926 def __del__(self):
927 self.release(force=True)
928 return None
929
930 def _acquire(self):
931 open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
932 fd = os.open(self._lock_file, open_mode)
933
934 try:
935 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
936 except (IOError, OSError):
937 os.close(fd)
938 else:
939 self._lock_file_fd = fd
940 return None
941
942 def _release(self):
943 # Do not remove the lockfile:
944 #
945 # https://github.com/benediktschmitt/py-filelock/issues/31
946 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
947 fd = self._lock_file_fd
948 self._lock_file_fd = None
949 fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
950 os.close(fd) # type: ignore
951 return None
952
953
954 ##################################
955 # Popen wrappers, lifted from ceph-volume
956
957 class CallVerbosity(Enum):
958 SILENT = 0
959 # log stdout/stderr to logger.debug
960 DEBUG = 1
961 # On a non-zero exit status, it will forcefully set
962 # logging ON for the terminal
963 VERBOSE_ON_FAILURE = 2
964 # log at info (instead of debug) level.
965 VERBOSE = 3
966
967
968 def call(command: List[str],
969 desc: Optional[str] = None,
970 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
971 timeout: Optional[int] = DEFAULT_TIMEOUT,
972 **kwargs) -> Tuple[str, str, int]:
973 """
974 Wrap subprocess.Popen to
975
976 - log stdout/stderr to a logger,
977 - decode utf-8
978 - cleanly return out, err, returncode
979
980 :param timeout: timeout in seconds
981 """
982 if desc is None:
983 desc = command[0]
984 if desc:
985 desc += ': '
986 timeout = timeout or args.timeout
987
988 logger.debug("Running command: %s" % ' '.join(command))
989 process = subprocess.Popen(
990 command,
991 stdout=subprocess.PIPE,
992 stderr=subprocess.PIPE,
993 close_fds=True,
994 **kwargs
995 )
996 # get current p.stdout flags, add O_NONBLOCK
997 assert process.stdout is not None
998 assert process.stderr is not None
999 stdout_flags = fcntl.fcntl(process.stdout, fcntl.F_GETFL)
1000 stderr_flags = fcntl.fcntl(process.stderr, fcntl.F_GETFL)
1001 fcntl.fcntl(process.stdout, fcntl.F_SETFL, stdout_flags | os.O_NONBLOCK)
1002 fcntl.fcntl(process.stderr, fcntl.F_SETFL, stderr_flags | os.O_NONBLOCK)
1003
1004 out = ''
1005 err = ''
1006 reads = None
1007 stop = False
1008 out_buffer = '' # partial line (no newline yet)
1009 err_buffer = '' # partial line (no newline yet)
1010 start_time = time.time()
1011 end_time = None
1012 if timeout:
1013 end_time = start_time + timeout
1014 while not stop:
1015 if end_time and (time.time() >= end_time):
1016 stop = True
1017 if process.poll() is None:
1018 logger.info(desc + 'timeout after %s seconds' % timeout)
1019 process.kill()
1020 if reads and process.poll() is not None:
1021 # we want to stop, but first read off anything remaining
1022 # on stdout/stderr
1023 stop = True
1024 else:
1025 reads, _, _ = select.select(
1026 [process.stdout.fileno(), process.stderr.fileno()],
1027 [], [], timeout
1028 )
1029 for fd in reads:
1030 try:
1031 message_b = os.read(fd, 1024)
1032 if isinstance(message_b, bytes):
1033 message = message_b.decode('utf-8')
1034 if isinstance(message_b, str):
1035 message = message_b
1036 if stop and message:
1037 # process has terminated, but have more to read still, so not stopping yet
1038 # (os.read returns '' when it encounters EOF)
1039 stop = False
1040 if not message:
1041 continue
1042 if fd == process.stdout.fileno():
1043 out += message
1044 message = out_buffer + message
1045 lines = message.split('\n')
1046 out_buffer = lines.pop()
1047 for line in lines:
1048 if verbosity == CallVerbosity.VERBOSE:
1049 logger.info(desc + 'stdout ' + line)
1050 elif verbosity != CallVerbosity.SILENT:
1051 logger.debug(desc + 'stdout ' + line)
1052 elif fd == process.stderr.fileno():
1053 err += message
1054 message = err_buffer + message
1055 lines = message.split('\n')
1056 err_buffer = lines.pop()
1057 for line in lines:
1058 if verbosity == CallVerbosity.VERBOSE:
1059 logger.info(desc + 'stderr ' + line)
1060 elif verbosity != CallVerbosity.SILENT:
1061 logger.debug(desc + 'stderr ' + line)
1062 else:
1063 assert False
1064 except (IOError, OSError):
1065 pass
1066 if verbosity == CallVerbosity.VERBOSE:
1067 logger.debug(desc + 'profile rt=%s, stop=%s, exit=%s, reads=%s'
1068 % (time.time()-start_time, stop, process.poll(), reads))
1069
1070 returncode = process.wait()
1071
1072 if out_buffer != '':
1073 if verbosity == CallVerbosity.VERBOSE:
1074 logger.info(desc + 'stdout ' + out_buffer)
1075 elif verbosity != CallVerbosity.SILENT:
1076 logger.debug(desc + 'stdout ' + out_buffer)
1077 if err_buffer != '':
1078 if verbosity == CallVerbosity.VERBOSE:
1079 logger.info(desc + 'stderr ' + err_buffer)
1080 elif verbosity != CallVerbosity.SILENT:
1081 logger.debug(desc + 'stderr ' + err_buffer)
1082
1083 if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE:
1084 # dump stdout + stderr
1085 logger.info('Non-zero exit code %d from %s' % (returncode, ' '.join(command)))
1086 for line in out.splitlines():
1087 logger.info(desc + 'stdout ' + line)
1088 for line in err.splitlines():
1089 logger.info(desc + 'stderr ' + line)
1090
1091 return out, err, returncode
1092
1093
1094 def call_throws(command: List[str],
1095 desc: Optional[str] = None,
1096 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
1097 timeout: Optional[int] = DEFAULT_TIMEOUT,
1098 **kwargs) -> Tuple[str, str, int]:
1099 out, err, ret = call(command, desc, verbosity, timeout, **kwargs)
1100 if ret:
1101 raise RuntimeError('Failed command: %s' % ' '.join(command))
1102 return out, err, ret
1103
1104
1105 def call_timeout(command, timeout):
1106 # type: (List[str], int) -> int
1107
1108 logger.debug('Running command (timeout=%s): %s'
1109 % (timeout, ' '.join(command)))
1110
1111 def raise_timeout(command, timeout):
1112 # type: (List[str], int) -> NoReturn
1113 msg = 'Command \'%s\' timed out after %s seconds' % (command, timeout)
1114 logger.debug(msg)
1115 raise TimeoutExpired(msg)
1116
1117 def call_timeout_py2(command, timeout):
1118 # type: (List[str], int) -> int
1119 proc = subprocess.Popen(command)
1120 thread = Thread(target=proc.wait)
1121 thread.start()
1122 thread.join(timeout)
1123 if thread.is_alive():
1124 proc.kill()
1125 thread.join()
1126 raise_timeout(command, timeout)
1127 return proc.returncode
1128
1129 def call_timeout_py3(command, timeout):
1130 # type: (List[str], int) -> int
1131 try:
1132 return subprocess.call(command, timeout=timeout)
1133 except subprocess.TimeoutExpired as e:
1134 raise_timeout(command, timeout)
1135
1136 ret = 1
1137 if sys.version_info >= (3, 3):
1138 ret = call_timeout_py3(command, timeout)
1139 else:
1140 # py2 subprocess has no timeout arg
1141 ret = call_timeout_py2(command, timeout)
1142 return ret
1143
1144 ##################################
1145
1146
1147 def is_available(what, func):
1148 # type: (str, Callable[[], bool]) -> None
1149 """
1150 Wait for a service to become available
1151
1152 :param what: the name of the service
1153 :param func: the callable object that determines availability
1154 """
1155 retry = args.retry
1156 logger.info('Waiting for %s...' % what)
1157 num = 1
1158 while True:
1159 if func():
1160 logger.info('%s is available'
1161 % what)
1162 break
1163 elif num > retry:
1164 raise Error('%s not available after %s tries'
1165 % (what, retry))
1166
1167 logger.info('%s not available, waiting (%s/%s)...'
1168 % (what, num, retry))
1169
1170 num += 1
1171 time.sleep(1)
1172
1173
1174 def read_config(fn):
1175 # type: (Optional[str]) -> ConfigParser
1176 # bend over backwards here because py2's ConfigParser doesn't like
1177 # whitespace before config option names (e.g., '\n foo = bar\n').
1178 # Yeesh!
1179 if sys.version_info >= (3, 2):
1180 cp = ConfigParser()
1181 else:
1182 cp = SafeConfigParser()
1183
1184 if fn:
1185 with open(fn, 'r') as f:
1186 raw_conf = f.read()
1187 nice_conf = re.sub(r'\n(\s)+', r'\n', raw_conf)
1188 s_io = StringIO(nice_conf)
1189 if sys.version_info >= (3, 2):
1190 cp.read_file(s_io)
1191 else:
1192 cp.readfp(s_io)
1193
1194 return cp
1195
1196
1197 def pathify(p):
1198 # type: (str) -> str
1199 p = os.path.expanduser(p)
1200 return os.path.abspath(p)
1201
1202
1203 def get_file_timestamp(fn):
1204 # type: (str) -> Optional[str]
1205 try:
1206 mt = os.path.getmtime(fn)
1207 return datetime.datetime.fromtimestamp(
1208 mt, tz=datetime.timezone.utc
1209 ).strftime(DATEFMT)
1210 except Exception:
1211 return None
1212
1213
1214 def try_convert_datetime(s):
1215 # type: (str) -> Optional[str]
1216 # This is super irritating because
1217 # 1) podman and docker use different formats
1218 # 2) python's strptime can't parse either one
1219 #
1220 # I've seen:
1221 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1222 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1223 # 2020-03-03 15:52:30.136257504 -0600 CST
1224 # (In the podman case, there is a different string format for
1225 # 'inspect' and 'inspect --format {{.Created}}'!!)
1226
1227 # In *all* cases, the 9 digit second precision is too much for
1228 # python's strptime. Shorten it to 6 digits.
1229 p = re.compile(r'(\.[\d]{6})[\d]*')
1230 s = p.sub(r'\1', s)
1231
1232 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
1233 if s and s[-1] == 'Z':
1234 s = s[:-1] + '-0000'
1235
1236 # cut off the redundant 'CST' part that strptime can't parse, if
1237 # present.
1238 v = s.split(' ')
1239 s = ' '.join(v[0:3])
1240
1241 # try parsing with several format strings
1242 fmts = [
1243 '%Y-%m-%dT%H:%M:%S.%f%z',
1244 '%Y-%m-%d %H:%M:%S.%f %z',
1245 ]
1246 for f in fmts:
1247 try:
1248 # return timestamp normalized to UTC, rendered as DATEFMT.
1249 return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
1250 except ValueError:
1251 pass
1252 return None
1253
1254
1255 def get_podman_version():
1256 # type: () -> Tuple[int, ...]
1257 if 'podman' not in container_path:
1258 raise ValueError('not using podman')
1259 out, _, _ = call_throws([container_path, '--version'])
1260 return _parse_podman_version(out)
1261
1262
1263 def _parse_podman_version(out):
1264 # type: (str) -> Tuple[int, ...]
1265 _, _, version_str = out.strip().split()
1266
1267 def to_int(val, org_e=None):
1268 if not val and org_e:
1269 raise org_e
1270 try:
1271 return int(val)
1272 except ValueError as e:
1273 return to_int(val[0:-1], org_e or e)
1274
1275 return tuple(map(to_int, version_str.split('.')))
1276
1277
1278 def get_hostname():
1279 # type: () -> str
1280 return socket.gethostname()
1281
1282
1283 def get_fqdn():
1284 # type: () -> str
1285 return socket.getfqdn() or socket.gethostname()
1286
1287
1288 def get_arch():
1289 # type: () -> str
1290 return platform.uname().machine
1291
1292
1293 def generate_service_id():
1294 # type: () -> str
1295 return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
1296 for _ in range(6))
1297
1298
1299 def generate_password():
1300 # type: () -> str
1301 return ''.join(random.choice(string.ascii_lowercase + string.digits)
1302 for i in range(10))
1303
1304
1305 def normalize_container_id(i):
1306 # type: (str) -> str
1307 # docker adds the sha256: prefix, but AFAICS both
1308 # docker (18.09.7 in bionic at least) and podman
1309 # both always use sha256, so leave off the prefix
1310 # for consistency.
1311 prefix = 'sha256:'
1312 if i.startswith(prefix):
1313 i = i[len(prefix):]
1314 return i
1315
1316
1317 def make_fsid():
1318 # type: () -> str
1319 return str(uuid.uuid1())
1320
1321
1322 def is_fsid(s):
1323 # type: (str) -> bool
1324 try:
1325 uuid.UUID(s)
1326 except ValueError:
1327 return False
1328 return True
1329
1330
1331 def infer_fsid(func):
1332 """
1333 If we only find a single fsid in /var/lib/ceph/*, use that
1334 """
1335 @wraps(func)
1336 def _infer_fsid():
1337 if args.fsid:
1338 logger.debug('Using specified fsid: %s' % args.fsid)
1339 return func()
1340
1341 fsids_set = set()
1342 daemon_list = list_daemons(detail=False)
1343 for daemon in daemon_list:
1344 if not is_fsid(daemon['fsid']):
1345 # 'unknown' fsid
1346 continue
1347 elif 'name' not in args or not args.name:
1348 # args.name not specified
1349 fsids_set.add(daemon['fsid'])
1350 elif daemon['name'] == args.name:
1351 # args.name is a match
1352 fsids_set.add(daemon['fsid'])
1353 fsids = sorted(fsids_set)
1354
1355 if not fsids:
1356 # some commands do not always require an fsid
1357 pass
1358 elif len(fsids) == 1:
1359 logger.info('Inferring fsid %s' % fsids[0])
1360 args.fsid = fsids[0]
1361 else:
1362 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids)
1363 return func()
1364
1365 return _infer_fsid
1366
1367
1368 def infer_config(func):
1369 """
1370 If we find a MON daemon, use the config from that container
1371 """
1372 @wraps(func)
1373 def _infer_config():
1374 if args.config:
1375 logger.debug('Using specified config: %s' % args.config)
1376 return func()
1377 config = None
1378 if args.fsid:
1379 name = args.name
1380 if not name:
1381 daemon_list = list_daemons(detail=False)
1382 for daemon in daemon_list:
1383 if daemon['name'].startswith('mon.'):
1384 name = daemon['name']
1385 break
1386 if name:
1387 config = '/var/lib/ceph/{}/{}/config'.format(args.fsid, name)
1388 if config:
1389 logger.info('Inferring config %s' % config)
1390 args.config = config
1391 elif os.path.exists(SHELL_DEFAULT_CONF):
1392 logger.debug('Using default config: %s' % SHELL_DEFAULT_CONF)
1393 args.config = SHELL_DEFAULT_CONF
1394 return func()
1395
1396 return _infer_config
1397
1398
1399 def _get_default_image():
1400 if DEFAULT_IMAGE_IS_MASTER:
1401 warn = '''This is a development version of cephadm.
1402 For information regarding the latest stable release:
1403 https://docs.ceph.com/docs/{}/cephadm/install
1404 '''.format(LATEST_STABLE_RELEASE)
1405 for line in warn.splitlines():
1406 logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
1407 return DEFAULT_IMAGE
1408
1409
1410 def infer_image(func):
1411 """
1412 Use the most recent ceph image
1413 """
1414 @wraps(func)
1415 def _infer_image():
1416 if not args.image:
1417 args.image = os.environ.get('CEPHADM_IMAGE')
1418 if not args.image:
1419 args.image = get_last_local_ceph_image()
1420 if not args.image:
1421 args.image = _get_default_image()
1422 return func()
1423
1424 return _infer_image
1425
1426
1427 def default_image(func):
1428 @wraps(func)
1429 def _default_image():
1430 if not args.image:
1431 if 'name' in args and args.name:
1432 type_ = args.name.split('.', 1)[0]
1433 if type_ in Monitoring.components:
1434 args.image = Monitoring.components[type_]['image']
1435 if not args.image:
1436 args.image = os.environ.get('CEPHADM_IMAGE')
1437 if not args.image:
1438 args.image = _get_default_image()
1439
1440 return func()
1441
1442 return _default_image
1443
1444
1445 def get_last_local_ceph_image():
1446 """
1447 :return: The most recent local ceph image (already pulled)
1448 """
1449 out, _, _ = call_throws(
1450 [container_path, 'images',
1451 '--filter', 'label=ceph=True',
1452 '--filter', 'dangling=false',
1453 '--format', '{{.Repository}}@{{.Digest}}'])
1454 return _filter_last_local_ceph_image(out)
1455
1456
1457 def _filter_last_local_ceph_image(out):
1458 # str -> Optional[str]
1459 for image in out.splitlines():
1460 if image and not image.endswith('@'):
1461 logger.info('Using recent ceph image %s' % image)
1462 return image
1463 return None
1464
1465
1466 def write_tmp(s, uid, gid):
1467 # type: (str, int, int) -> Any
1468 tmp_f = tempfile.NamedTemporaryFile(mode='w',
1469 prefix='ceph-tmp')
1470 os.fchown(tmp_f.fileno(), uid, gid)
1471 tmp_f.write(s)
1472 tmp_f.flush()
1473
1474 return tmp_f
1475
1476
1477 def makedirs(dir, uid, gid, mode):
1478 # type: (str, int, int, int) -> None
1479 if not os.path.exists(dir):
1480 os.makedirs(dir, mode=mode)
1481 else:
1482 os.chmod(dir, mode)
1483 os.chown(dir, uid, gid)
1484 os.chmod(dir, mode) # the above is masked by umask...
1485
1486
1487 def get_data_dir(fsid, t, n):
1488 # type: (str, str, Union[int, str]) -> str
1489 return os.path.join(args.data_dir, fsid, '%s.%s' % (t, n))
1490
1491
1492 def get_log_dir(fsid):
1493 # type: (str) -> str
1494 return os.path.join(args.log_dir, fsid)
1495
1496
1497 def make_data_dir_base(fsid, uid, gid):
1498 # type: (str, int, int) -> str
1499 data_dir_base = os.path.join(args.data_dir, fsid)
1500 makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
1501 makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
1502 makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
1503 DATA_DIR_MODE)
1504 return data_dir_base
1505
1506
1507 def make_data_dir(fsid, daemon_type, daemon_id, uid=None, gid=None):
1508 # type: (str, str, Union[int, str], Optional[int], Optional[int]) -> str
1509 if uid is None or gid is None:
1510 uid, gid = extract_uid_gid()
1511 make_data_dir_base(fsid, uid, gid)
1512 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1513 makedirs(data_dir, uid, gid, DATA_DIR_MODE)
1514 return data_dir
1515
1516
1517 def make_log_dir(fsid, uid=None, gid=None):
1518 # type: (str, Optional[int], Optional[int]) -> str
1519 if uid is None or gid is None:
1520 uid, gid = extract_uid_gid()
1521 log_dir = get_log_dir(fsid)
1522 makedirs(log_dir, uid, gid, LOG_DIR_MODE)
1523 return log_dir
1524
1525
1526 def make_var_run(fsid, uid, gid):
1527 # type: (str, int, int) -> None
1528 call_throws(['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
1529 '/var/run/ceph/%s' % fsid])
1530
1531
1532 def copy_tree(src, dst, uid=None, gid=None):
1533 # type: (List[str], str, Optional[int], Optional[int]) -> None
1534 """
1535 Copy a directory tree from src to dst
1536 """
1537 if uid is None or gid is None:
1538 (uid, gid) = extract_uid_gid()
1539
1540 for src_dir in src:
1541 dst_dir = dst
1542 if os.path.isdir(dst):
1543 dst_dir = os.path.join(dst, os.path.basename(src_dir))
1544
1545 logger.debug('copy directory \'%s\' -> \'%s\'' % (src_dir, dst_dir))
1546 shutil.rmtree(dst_dir, ignore_errors=True)
1547 shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8
1548
1549 for dirpath, dirnames, filenames in os.walk(dst_dir):
1550 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dirpath))
1551 os.chown(dirpath, uid, gid)
1552 for filename in filenames:
1553 logger.debug('chown %s:%s \'%s\'' % (uid, gid, filename))
1554 os.chown(os.path.join(dirpath, filename), uid, gid)
1555
1556
1557 def copy_files(src, dst, uid=None, gid=None):
1558 # type: (List[str], str, Optional[int], Optional[int]) -> None
1559 """
1560 Copy a files from src to dst
1561 """
1562 if uid is None or gid is None:
1563 (uid, gid) = extract_uid_gid()
1564
1565 for src_file in src:
1566 dst_file = dst
1567 if os.path.isdir(dst):
1568 dst_file = os.path.join(dst, os.path.basename(src_file))
1569
1570 logger.debug('copy file \'%s\' -> \'%s\'' % (src_file, dst_file))
1571 shutil.copyfile(src_file, dst_file)
1572
1573 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1574 os.chown(dst_file, uid, gid)
1575
1576
1577 def move_files(src, dst, uid=None, gid=None):
1578 # type: (List[str], str, Optional[int], Optional[int]) -> None
1579 """
1580 Move files from src to dst
1581 """
1582 if uid is None or gid is None:
1583 (uid, gid) = extract_uid_gid()
1584
1585 for src_file in src:
1586 dst_file = dst
1587 if os.path.isdir(dst):
1588 dst_file = os.path.join(dst, os.path.basename(src_file))
1589
1590 if os.path.islink(src_file):
1591 # shutil.move() in py2 does not handle symlinks correctly
1592 src_rl = os.readlink(src_file)
1593 logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
1594 os.symlink(src_rl, dst_file)
1595 os.unlink(src_file)
1596 else:
1597 logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
1598 shutil.move(src_file, dst_file)
1599 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1600 os.chown(dst_file, uid, gid)
1601
1602
1603 ## copied from distutils ##
1604 def find_executable(executable, path=None):
1605 """Tries to find 'executable' in the directories listed in 'path'.
1606 A string listing directories separated by 'os.pathsep'; defaults to
1607 os.environ['PATH']. Returns the complete filename or None if not found.
1608 """
1609 _, ext = os.path.splitext(executable)
1610 if (sys.platform == 'win32') and (ext != '.exe'):
1611 executable = executable + '.exe'
1612
1613 if os.path.isfile(executable):
1614 return executable
1615
1616 if path is None:
1617 path = os.environ.get('PATH', None)
1618 if path is None:
1619 try:
1620 path = os.confstr("CS_PATH")
1621 except (AttributeError, ValueError):
1622 # os.confstr() or CS_PATH is not available
1623 path = os.defpath
1624 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1625 # set to an empty string
1626
1627 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1628 if not path:
1629 return None
1630
1631 paths = path.split(os.pathsep)
1632 for p in paths:
1633 f = os.path.join(p, executable)
1634 if os.path.isfile(f):
1635 # the file exists, we have a shot at spawn working
1636 return f
1637 return None
1638
1639
1640 def find_program(filename):
1641 # type: (str) -> str
1642 name = find_executable(filename)
1643 if name is None:
1644 raise ValueError('%s not found' % filename)
1645 return name
1646
1647
1648 def get_unit_name(fsid, daemon_type, daemon_id=None):
1649 # type: (str, str, Optional[Union[int, str]]) -> str
1650 # accept either name or type + id
1651 if daemon_id is not None:
1652 return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
1653 else:
1654 return 'ceph-%s@%s' % (fsid, daemon_type)
1655
1656
1657 def get_unit_name_by_daemon_name(fsid, name):
1658 daemon = get_daemon_description(fsid, name)
1659 try:
1660 return daemon['systemd_unit']
1661 except KeyError:
1662 raise Error('Failed to get unit name for {}'.format(daemon))
1663
1664
1665 def check_unit(unit_name):
1666 # type: (str) -> Tuple[bool, str, bool]
1667 # NOTE: we ignore the exit code here because systemctl outputs
1668 # various exit codes based on the state of the service, but the
1669 # string result is more explicit (and sufficient).
1670 enabled = False
1671 installed = False
1672 try:
1673 out, err, code = call(['systemctl', 'is-enabled', unit_name],
1674 verbosity=CallVerbosity.DEBUG)
1675 if code == 0:
1676 enabled = True
1677 installed = True
1678 elif "disabled" in out:
1679 installed = True
1680 except Exception as e:
1681 logger.warning('unable to run systemctl: %s' % e)
1682 enabled = False
1683 installed = False
1684
1685 state = 'unknown'
1686 try:
1687 out, err, code = call(['systemctl', 'is-active', unit_name],
1688 verbosity=CallVerbosity.DEBUG)
1689 out = out.strip()
1690 if out in ['active']:
1691 state = 'running'
1692 elif out in ['inactive']:
1693 state = 'stopped'
1694 elif out in ['failed', 'auto-restart']:
1695 state = 'error'
1696 else:
1697 state = 'unknown'
1698 except Exception as e:
1699 logger.warning('unable to run systemctl: %s' % e)
1700 state = 'unknown'
1701 return (enabled, state, installed)
1702
1703
1704 def check_units(units, enabler=None):
1705 # type: (List[str], Optional[Packager]) -> bool
1706 for u in units:
1707 (enabled, state, installed) = check_unit(u)
1708 if enabled and state == 'running':
1709 logger.info('Unit %s is enabled and running' % u)
1710 return True
1711 if enabler is not None:
1712 if installed:
1713 logger.info('Enabling unit %s' % u)
1714 enabler.enable_service(u)
1715 return False
1716
1717
1718 def is_container_running(name: str) -> bool:
1719 out, err, ret = call_throws([
1720 container_path, 'ps',
1721 '--format', '{{.Names}}'])
1722 return name in out
1723
1724
1725 def get_legacy_config_fsid(cluster, legacy_dir=None):
1726 # type: (str, Optional[str]) -> Optional[str]
1727 config_file = '/etc/ceph/%s.conf' % cluster
1728 if legacy_dir is not None:
1729 config_file = os.path.abspath(legacy_dir + config_file)
1730
1731 if os.path.exists(config_file):
1732 config = read_config(config_file)
1733 if config.has_section('global') and config.has_option('global', 'fsid'):
1734 return config.get('global', 'fsid')
1735 return None
1736
1737
1738 def get_legacy_daemon_fsid(cluster, daemon_type, daemon_id, legacy_dir=None):
1739 # type: (str, str, Union[int, str], Optional[str]) -> Optional[str]
1740 fsid = None
1741 if daemon_type == 'osd':
1742 try:
1743 fsid_file = os.path.join(args.data_dir,
1744 daemon_type,
1745 'ceph-%s' % daemon_id,
1746 'ceph_fsid')
1747 if legacy_dir is not None:
1748 fsid_file = os.path.abspath(legacy_dir + fsid_file)
1749 with open(fsid_file, 'r') as f:
1750 fsid = f.read().strip()
1751 except IOError:
1752 pass
1753 if not fsid:
1754 fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
1755 return fsid
1756
1757
1758 def get_daemon_args(fsid, daemon_type, daemon_id):
1759 # type: (str, str, Union[int, str]) -> List[str]
1760 r = list() # type: List[str]
1761
1762 if daemon_type in Ceph.daemons and daemon_type != 'crash':
1763 r += [
1764 '--setuser', 'ceph',
1765 '--setgroup', 'ceph',
1766 '--default-log-to-file=false',
1767 '--default-log-to-stderr=true',
1768 '--default-log-stderr-prefix="debug "',
1769 ]
1770 if daemon_type == 'mon':
1771 r += [
1772 '--default-mon-cluster-log-to-file=false',
1773 '--default-mon-cluster-log-to-stderr=true',
1774 ]
1775 elif daemon_type in Monitoring.components:
1776 metadata = Monitoring.components[daemon_type]
1777 r += metadata.get('args', list())
1778 if daemon_type == 'alertmanager':
1779 config = get_parm(args.config_json)
1780 peers = config.get('peers', list()) # type: ignore
1781 for peer in peers:
1782 r += ["--cluster.peer={}".format(peer)]
1783 # some alertmanager, by default, look elsewhere for a config
1784 r += ["--config.file=/etc/alertmanager/alertmanager.yml"]
1785 elif daemon_type == NFSGanesha.daemon_type:
1786 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1787 r += nfs_ganesha.get_daemon_args()
1788 elif daemon_type == CustomContainer.daemon_type:
1789 cc = CustomContainer.init(fsid, daemon_id)
1790 r.extend(cc.get_daemon_args())
1791
1792 return r
1793
1794
1795 def create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid,
1796 config=None, keyring=None):
1797 # type: (str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
1798 data_dir = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid)
1799 make_log_dir(fsid, uid=uid, gid=gid)
1800
1801 if config:
1802 config_path = os.path.join(data_dir, 'config')
1803 with open(config_path, 'w') as f:
1804 os.fchown(f.fileno(), uid, gid)
1805 os.fchmod(f.fileno(), 0o600)
1806 f.write(config)
1807
1808 if keyring:
1809 keyring_path = os.path.join(data_dir, 'keyring')
1810 with open(keyring_path, 'w') as f:
1811 os.fchmod(f.fileno(), 0o600)
1812 os.fchown(f.fileno(), uid, gid)
1813 f.write(keyring)
1814
1815 if daemon_type in Monitoring.components.keys():
1816 config_json: Dict[str, Any] = get_parm(args.config_json)
1817 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
1818
1819 # Set up directories specific to the monitoring component
1820 config_dir = ''
1821 if daemon_type == 'prometheus':
1822 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1823 config_dir = 'etc/prometheus'
1824 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1825 makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
1826 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1827 elif daemon_type == 'grafana':
1828 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1829 config_dir = 'etc/grafana'
1830 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1831 makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
1832 makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
1833 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1834 elif daemon_type == 'alertmanager':
1835 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1836 config_dir = 'etc/alertmanager'
1837 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1838 makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
1839
1840 # populate the config directory for the component from the config-json
1841 for fname in required_files:
1842 if 'files' in config_json: # type: ignore
1843 content = dict_get_join(config_json['files'], fname)
1844 with open(os.path.join(data_dir_root, config_dir, fname), 'w') as f:
1845 os.fchown(f.fileno(), uid, gid)
1846 os.fchmod(f.fileno(), 0o600)
1847 f.write(content)
1848
1849 elif daemon_type == NFSGanesha.daemon_type:
1850 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1851 nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
1852
1853 elif daemon_type == CephIscsi.daemon_type:
1854 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
1855 ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
1856
1857 elif daemon_type == CustomContainer.daemon_type:
1858 cc = CustomContainer.init(fsid, daemon_id)
1859 cc.create_daemon_dirs(data_dir, uid, gid)
1860
1861
1862 def get_parm(option):
1863 # type: (str) -> Dict[str, str]
1864
1865 if not option:
1866 return dict()
1867
1868 global cached_stdin
1869 if option == '-':
1870 if cached_stdin is not None:
1871 j = cached_stdin
1872 else:
1873 try:
1874 j = injected_stdin # type: ignore
1875 except NameError:
1876 j = sys.stdin.read()
1877 cached_stdin = j
1878 else:
1879 # inline json string
1880 if option[0] == '{' and option[-1] == '}':
1881 j = option
1882 # json file
1883 elif os.path.exists(option):
1884 with open(option, 'r') as f:
1885 j = f.read()
1886 else:
1887 raise Error("Config file {} not found".format(option))
1888
1889 try:
1890 js = json.loads(j)
1891 except ValueError as e:
1892 raise Error("Invalid JSON in {}: {}".format(option, e))
1893 else:
1894 return js
1895
1896
1897 def get_config_and_keyring():
1898 # type: () -> Tuple[Optional[str], Optional[str]]
1899 config = None
1900 keyring = None
1901
1902 if 'config_json' in args and args.config_json:
1903 d = get_parm(args.config_json)
1904 config = d.get('config')
1905 keyring = d.get('keyring')
1906
1907 if 'config' in args and args.config:
1908 with open(args.config, 'r') as f:
1909 config = f.read()
1910
1911 if 'key' in args and args.key:
1912 keyring = '[%s]\n\tkey = %s\n' % (args.name, args.key)
1913 elif 'keyring' in args and args.keyring:
1914 with open(args.keyring, 'r') as f:
1915 keyring = f.read()
1916
1917 return config, keyring
1918
1919
1920 def get_container_binds(fsid, daemon_type, daemon_id):
1921 # type: (str, str, Union[int, str, None]) -> List[List[str]]
1922 binds = list()
1923
1924 if daemon_type == CephIscsi.daemon_type:
1925 binds.extend(CephIscsi.get_container_binds())
1926 elif daemon_type == CustomContainer.daemon_type:
1927 assert daemon_id
1928 cc = CustomContainer.init(fsid, daemon_id)
1929 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1930 binds.extend(cc.get_container_binds(data_dir))
1931
1932 return binds
1933
1934
1935 def get_container_mounts(fsid, daemon_type, daemon_id,
1936 no_config=False):
1937 # type: (str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
1938 mounts = dict()
1939
1940 if daemon_type in Ceph.daemons:
1941 if fsid:
1942 run_path = os.path.join('/var/run/ceph', fsid);
1943 if os.path.exists(run_path):
1944 mounts[run_path] = '/var/run/ceph:z'
1945 log_dir = get_log_dir(fsid)
1946 mounts[log_dir] = '/var/log/ceph:z'
1947 crash_dir = '/var/lib/ceph/%s/crash' % fsid
1948 if os.path.exists(crash_dir):
1949 mounts[crash_dir] = '/var/lib/ceph/crash:z'
1950
1951 if daemon_type in Ceph.daemons and daemon_id:
1952 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1953 if daemon_type == 'rgw':
1954 cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
1955 else:
1956 cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
1957 if daemon_type != 'crash':
1958 mounts[data_dir] = cdata_dir + ':z'
1959 if not no_config:
1960 mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
1961 if daemon_type == 'rbd-mirror' or daemon_type == 'crash':
1962 # these do not search for their keyrings in a data directory
1963 mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
1964
1965 if daemon_type in ['mon', 'osd']:
1966 mounts['/dev'] = '/dev' # FIXME: narrow this down?
1967 mounts['/run/udev'] = '/run/udev'
1968 if daemon_type == 'osd':
1969 mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
1970 mounts['/run/lvm'] = '/run/lvm'
1971 mounts['/run/lock/lvm'] = '/run/lock/lvm'
1972
1973 try:
1974 if args.shared_ceph_folder: # make easy manager modules/ceph-volume development
1975 ceph_folder = pathify(args.shared_ceph_folder)
1976 if os.path.exists(ceph_folder):
1977 mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
1978 mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
1979 mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
1980 mounts[ceph_folder + '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
1981 mounts[ceph_folder + '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
1982 else:
1983 logger.error('{}{}{}'.format(termcolor.red,
1984 'Ceph shared source folder does not exist.',
1985 termcolor.end))
1986 except AttributeError:
1987 pass
1988
1989 if daemon_type in Monitoring.components and daemon_id:
1990 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1991 if daemon_type == 'prometheus':
1992 mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
1993 mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
1994 elif daemon_type == 'node-exporter':
1995 mounts['/proc'] = '/host/proc:ro'
1996 mounts['/sys'] = '/host/sys:ro'
1997 mounts['/'] = '/rootfs:ro'
1998 elif daemon_type == "grafana":
1999 mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
2000 mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
2001 mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
2002 elif daemon_type == 'alertmanager':
2003 mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
2004
2005 if daemon_type == NFSGanesha.daemon_type:
2006 assert daemon_id
2007 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2008 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2009 mounts.update(nfs_ganesha.get_container_mounts(data_dir))
2010
2011 if daemon_type == CephIscsi.daemon_type:
2012 assert daemon_id
2013 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2014 log_dir = get_log_dir(fsid)
2015 mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
2016
2017 if daemon_type == CustomContainer.daemon_type:
2018 assert daemon_id
2019 cc = CustomContainer.init(fsid, daemon_id)
2020 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2021 mounts.update(cc.get_container_mounts(data_dir))
2022
2023 return mounts
2024
2025
2026 def get_container(fsid: str, daemon_type: str, daemon_id: Union[int, str],
2027 privileged: bool = False,
2028 ptrace: bool = False,
2029 container_args: Optional[List[str]] = None) -> 'CephContainer':
2030 entrypoint: str = ''
2031 name: str = ''
2032 ceph_args: List[str] = []
2033 envs: List[str] = []
2034 host_network: bool = True
2035
2036 if container_args is None:
2037 container_args = []
2038 if daemon_type in ['mon', 'osd']:
2039 # mon and osd need privileged in order for libudev to query devices
2040 privileged = True
2041 if daemon_type == 'rgw':
2042 entrypoint = '/usr/bin/radosgw'
2043 name = 'client.rgw.%s' % daemon_id
2044 elif daemon_type == 'rbd-mirror':
2045 entrypoint = '/usr/bin/rbd-mirror'
2046 name = 'client.rbd-mirror.%s' % daemon_id
2047 elif daemon_type == 'crash':
2048 entrypoint = '/usr/bin/ceph-crash'
2049 name = 'client.crash.%s' % daemon_id
2050 elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
2051 entrypoint = '/usr/bin/ceph-' + daemon_type
2052 name = '%s.%s' % (daemon_type, daemon_id)
2053 elif daemon_type in Monitoring.components:
2054 entrypoint = ''
2055 elif daemon_type == NFSGanesha.daemon_type:
2056 entrypoint = NFSGanesha.entrypoint
2057 name = '%s.%s' % (daemon_type, daemon_id)
2058 envs.extend(NFSGanesha.get_container_envs())
2059 elif daemon_type == CephIscsi.daemon_type:
2060 entrypoint = CephIscsi.entrypoint
2061 name = '%s.%s' % (daemon_type, daemon_id)
2062 # So the container can modprobe iscsi_target_mod and have write perms
2063 # to configfs we need to make this a privileged container.
2064 privileged = True
2065 elif daemon_type == CustomContainer.daemon_type:
2066 cc = CustomContainer.init(fsid, daemon_id)
2067 entrypoint = cc.entrypoint
2068 host_network = False
2069 envs.extend(cc.get_container_envs())
2070 container_args.extend(cc.get_container_args())
2071
2072 if daemon_type in Monitoring.components:
2073 uid, gid = extract_uid_gid_monitoring(daemon_type)
2074 monitoring_args = [
2075 '--user',
2076 str(uid),
2077 # FIXME: disable cpu/memory limits for the time being (not supported
2078 # by ubuntu 18.04 kernel!)
2079 ]
2080 container_args.extend(monitoring_args)
2081 elif daemon_type == 'crash':
2082 ceph_args = ['-n', name]
2083 elif daemon_type in Ceph.daemons:
2084 ceph_args = ['-n', name, '-f']
2085
2086 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2087 # so service can have Type=Forking
2088 if 'podman' in container_path:
2089 runtime_dir = '/run'
2090 container_args.extend(['-d',
2091 '--conmon-pidfile',
2092 runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
2093 '--cidfile',
2094 runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id)])
2095
2096 return CephContainer(
2097 image=args.image,
2098 entrypoint=entrypoint,
2099 args=ceph_args + get_daemon_args(fsid, daemon_type, daemon_id),
2100 container_args=container_args,
2101 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
2102 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
2103 cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
2104 envs=envs,
2105 privileged=privileged,
2106 ptrace=ptrace,
2107 host_network=host_network,
2108 )
2109
2110
2111 def extract_uid_gid(img='', file_path='/var/lib/ceph'):
2112 # type: (str, Union[str, List[str]]) -> Tuple[int, int]
2113
2114 if not img:
2115 img = args.image
2116
2117 if isinstance(file_path, str):
2118 paths = [file_path]
2119 else:
2120 paths = file_path
2121
2122 for fp in paths:
2123 try:
2124 out = CephContainer(
2125 image=img,
2126 entrypoint='stat',
2127 args=['-c', '%u %g', fp]
2128 ).run()
2129 uid, gid = out.split(' ')
2130 return int(uid), int(gid)
2131 except RuntimeError:
2132 pass
2133 raise RuntimeError('uid/gid not found')
2134
2135
2136 def deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid,
2137 config=None, keyring=None,
2138 osd_fsid=None,
2139 reconfig=False,
2140 ports=None):
2141 # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
2142
2143 ports = ports or []
2144 if any([port_in_use(port) for port in ports]):
2145 raise Error("TCP Port(s) '{}' required for {} already in use".format(",".join(map(str, ports)), daemon_type))
2146
2147 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2148 if reconfig and not os.path.exists(data_dir):
2149 raise Error('cannot reconfig, data path %s does not exist' % data_dir)
2150 if daemon_type == 'mon' and not os.path.exists(data_dir):
2151 assert config
2152 assert keyring
2153 # tmp keyring file
2154 tmp_keyring = write_tmp(keyring, uid, gid)
2155
2156 # tmp config file
2157 tmp_config = write_tmp(config, uid, gid)
2158
2159 # --mkfs
2160 create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid)
2161 mon_dir = get_data_dir(fsid, 'mon', daemon_id)
2162 log_dir = get_log_dir(fsid)
2163 out = CephContainer(
2164 image=args.image,
2165 entrypoint='/usr/bin/ceph-mon',
2166 args=['--mkfs',
2167 '-i', str(daemon_id),
2168 '--fsid', fsid,
2169 '-c', '/tmp/config',
2170 '--keyring', '/tmp/keyring',
2171 ] + get_daemon_args(fsid, 'mon', daemon_id),
2172 volume_mounts={
2173 log_dir: '/var/log/ceph:z',
2174 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
2175 tmp_keyring.name: '/tmp/keyring:z',
2176 tmp_config.name: '/tmp/config:z',
2177 },
2178 ).run()
2179
2180 # write conf
2181 with open(mon_dir + '/config', 'w') as f:
2182 os.fchown(f.fileno(), uid, gid)
2183 os.fchmod(f.fileno(), 0o600)
2184 f.write(config)
2185 else:
2186 # dirs, conf, keyring
2187 create_daemon_dirs(
2188 fsid, daemon_type, daemon_id,
2189 uid, gid,
2190 config, keyring)
2191
2192 if not reconfig:
2193 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
2194 osd_fsid=osd_fsid)
2195
2196 if not os.path.exists(data_dir + '/unit.created'):
2197 with open(data_dir + '/unit.created', 'w') as f:
2198 os.fchmod(f.fileno(), 0o600)
2199 os.fchown(f.fileno(), uid, gid)
2200 f.write('mtime is time the daemon deployment was created\n')
2201
2202 with open(data_dir + '/unit.configured', 'w') as f:
2203 f.write('mtime is time we were last configured\n')
2204 os.fchmod(f.fileno(), 0o600)
2205 os.fchown(f.fileno(), uid, gid)
2206
2207 update_firewalld(daemon_type)
2208
2209 # Open ports explicitly required for the daemon
2210 if ports:
2211 fw = Firewalld()
2212 fw.open_ports(ports)
2213 fw.apply_rules()
2214
2215 if reconfig and daemon_type not in Ceph.daemons:
2216 # ceph daemons do not need a restart; others (presumably) do to pick
2217 # up the new config
2218 call_throws(['systemctl', 'reset-failed',
2219 get_unit_name(fsid, daemon_type, daemon_id)])
2220 call_throws(['systemctl', 'restart',
2221 get_unit_name(fsid, daemon_type, daemon_id)])
2222
2223 def _write_container_cmd_to_bash(file_obj, container, comment=None, background=False):
2224 # type: (IO[str], CephContainer, Optional[str], Optional[bool]) -> None
2225 if comment:
2226 # Sometimes adding a comment, especially if there are multiple containers in one
2227 # unit file, makes it easier to read and grok.
2228 file_obj.write('# ' + comment + '\n')
2229 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
2230 file_obj.write('! '+ ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
2231 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
2232 if 'podman' in container_path:
2233 file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + ' 2> /dev/null\n')
2234
2235 # container run command
2236 file_obj.write(' '.join(container.run_cmd()) + (' &' if background else '') + '\n')
2237
2238
2239 def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
2240 enable=True, start=True,
2241 osd_fsid=None):
2242 # type: (str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None
2243 # cmd
2244 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2245 with open(data_dir + '/unit.run.new', 'w') as f:
2246 f.write('set -e\n')
2247
2248 if daemon_type in Ceph.daemons:
2249 install_path = find_program('install')
2250 f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
2251
2252 # pre-start cmd(s)
2253 if daemon_type == 'osd':
2254 # osds have a pre-start step
2255 assert osd_fsid
2256 simple_fn = os.path.join('/etc/ceph/osd',
2257 '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
2258 if os.path.exists(simple_fn):
2259 f.write('# Simple OSDs need chown on startup:\n')
2260 for n in ['block', 'block.db', 'block.wal']:
2261 p = os.path.join(data_dir, n)
2262 f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
2263 else:
2264 prestart = CephContainer(
2265 image=args.image,
2266 entrypoint='/usr/sbin/ceph-volume',
2267 args=[
2268 'lvm', 'activate',
2269 str(daemon_id), osd_fsid,
2270 '--no-systemd'
2271 ],
2272 privileged=True,
2273 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
2274 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
2275 cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
2276 )
2277 _write_container_cmd_to_bash(f, prestart, 'LVM OSDs use ceph-volume lvm activate')
2278 elif daemon_type == NFSGanesha.daemon_type:
2279 # add nfs to the rados grace db
2280 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2281 prestart = nfs_ganesha.get_rados_grace_container('add')
2282 _write_container_cmd_to_bash(f, prestart, 'add daemon to rados grace')
2283 elif daemon_type == CephIscsi.daemon_type:
2284 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
2285 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
2286 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
2287 _write_container_cmd_to_bash(f, tcmu_container, 'iscsi tcmu-runnter container', background=True)
2288
2289 _write_container_cmd_to_bash(f, c, '%s.%s' % (daemon_type, str(daemon_id)))
2290 os.fchmod(f.fileno(), 0o600)
2291 os.rename(data_dir + '/unit.run.new',
2292 data_dir + '/unit.run')
2293
2294 # post-stop command(s)
2295 with open(data_dir + '/unit.poststop.new', 'w') as f:
2296 if daemon_type == 'osd':
2297 assert osd_fsid
2298 poststop = CephContainer(
2299 image=args.image,
2300 entrypoint='/usr/sbin/ceph-volume',
2301 args=[
2302 'lvm', 'deactivate',
2303 str(daemon_id), osd_fsid,
2304 ],
2305 privileged=True,
2306 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
2307 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
2308 cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
2309 daemon_id),
2310 )
2311 _write_container_cmd_to_bash(f, poststop, 'deactivate osd')
2312 elif daemon_type == NFSGanesha.daemon_type:
2313 # remove nfs from the rados grace db
2314 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2315 poststop = nfs_ganesha.get_rados_grace_container('remove')
2316 _write_container_cmd_to_bash(f, poststop, 'remove daemon from rados grace')
2317 elif daemon_type == CephIscsi.daemon_type:
2318 # make sure we also stop the tcmu container
2319 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
2320 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
2321 f.write('! '+ ' '.join(tcmu_container.stop_cmd()) + '\n')
2322 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
2323 os.fchmod(f.fileno(), 0o600)
2324 os.rename(data_dir + '/unit.poststop.new',
2325 data_dir + '/unit.poststop')
2326
2327 with open(data_dir + '/unit.image.new', 'w') as f:
2328 f.write(c.image + '\n')
2329 os.fchmod(f.fileno(), 0o600)
2330 os.rename(data_dir + '/unit.image.new',
2331 data_dir + '/unit.image')
2332
2333 # systemd
2334 install_base_units(fsid)
2335 unit = get_unit_file(fsid)
2336 unit_file = 'ceph-%s@.service' % (fsid)
2337 with open(args.unit_dir + '/' + unit_file + '.new', 'w') as f:
2338 f.write(unit)
2339 os.rename(args.unit_dir + '/' + unit_file + '.new',
2340 args.unit_dir + '/' + unit_file)
2341 call_throws(['systemctl', 'daemon-reload'])
2342
2343 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
2344 call(['systemctl', 'stop', unit_name],
2345 verbosity=CallVerbosity.DEBUG)
2346 call(['systemctl', 'reset-failed', unit_name],
2347 verbosity=CallVerbosity.DEBUG)
2348 if enable:
2349 call_throws(['systemctl', 'enable', unit_name])
2350 if start:
2351 call_throws(['systemctl', 'start', unit_name])
2352
2353
2354
2355 class Firewalld(object):
2356 def __init__(self):
2357 # type: () -> None
2358 self.available = self.check()
2359
2360 def check(self):
2361 # type: () -> bool
2362 self.cmd = find_executable('firewall-cmd')
2363 if not self.cmd:
2364 logger.debug('firewalld does not appear to be present')
2365 return False
2366 (enabled, state, _) = check_unit('firewalld.service')
2367 if not enabled:
2368 logger.debug('firewalld.service is not enabled')
2369 return False
2370 if state != "running":
2371 logger.debug('firewalld.service is not running')
2372 return False
2373
2374 logger.info("firewalld ready")
2375 return True
2376
2377 def enable_service_for(self, daemon_type):
2378 # type: (str) -> None
2379 if not self.available:
2380 logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)
2381 return
2382
2383 if daemon_type == 'mon':
2384 svc = 'ceph-mon'
2385 elif daemon_type in ['mgr', 'mds', 'osd']:
2386 svc = 'ceph'
2387 elif daemon_type == NFSGanesha.daemon_type:
2388 svc = 'nfs'
2389 else:
2390 return
2391
2392 out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
2393 if ret:
2394 logger.info('Enabling firewalld service %s in current zone...' % svc)
2395 out, err, ret = call([self.cmd, '--permanent', '--add-service', svc])
2396 if ret:
2397 raise RuntimeError(
2398 'unable to add service %s to current zone: %s' % (svc, err))
2399 else:
2400 logger.debug('firewalld service %s is enabled in current zone' % svc)
2401
2402 def open_ports(self, fw_ports):
2403 # type: (List[int]) -> None
2404 if not self.available:
2405 logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
2406 return
2407
2408 for port in fw_ports:
2409 tcp_port = str(port) + '/tcp'
2410 out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
2411 if ret:
2412 logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
2413 out, err, ret = call([self.cmd, '--permanent', '--add-port', tcp_port])
2414 if ret:
2415 raise RuntimeError('unable to add port %s to current zone: %s' %
2416 (tcp_port, err))
2417 else:
2418 logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
2419
2420 def apply_rules(self):
2421 # type: () -> None
2422 if not self.available:
2423 return
2424
2425 call_throws([self.cmd, '--reload'])
2426
2427
2428 def update_firewalld(daemon_type):
2429 # type: (str) -> None
2430 firewall = Firewalld()
2431
2432 firewall.enable_service_for(daemon_type)
2433
2434 fw_ports = []
2435
2436 if daemon_type in Monitoring.port_map.keys():
2437 fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc
2438
2439 firewall.open_ports(fw_ports)
2440 firewall.apply_rules()
2441
2442 def install_base_units(fsid):
2443 # type: (str) -> None
2444 """
2445 Set up ceph.target and ceph-$fsid.target units.
2446 """
2447 # global unit
2448 existed = os.path.exists(args.unit_dir + '/ceph.target')
2449 with open(args.unit_dir + '/ceph.target.new', 'w') as f:
2450 f.write('[Unit]\n'
2451 'Description=All Ceph clusters and services\n'
2452 '\n'
2453 '[Install]\n'
2454 'WantedBy=multi-user.target\n')
2455 os.rename(args.unit_dir + '/ceph.target.new',
2456 args.unit_dir + '/ceph.target')
2457 if not existed:
2458 # we disable before enable in case a different ceph.target
2459 # (from the traditional package) is present; while newer
2460 # systemd is smart enough to disable the old
2461 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2462 # some older versions of systemd error out with EEXIST.
2463 call_throws(['systemctl', 'disable', 'ceph.target'])
2464 call_throws(['systemctl', 'enable', 'ceph.target'])
2465 call_throws(['systemctl', 'start', 'ceph.target'])
2466
2467 # cluster unit
2468 existed = os.path.exists(args.unit_dir + '/ceph-%s.target' % fsid)
2469 with open(args.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f:
2470 f.write('[Unit]\n'
2471 'Description=Ceph cluster {fsid}\n'
2472 'PartOf=ceph.target\n'
2473 'Before=ceph.target\n'
2474 '\n'
2475 '[Install]\n'
2476 'WantedBy=multi-user.target ceph.target\n'.format(
2477 fsid=fsid)
2478 )
2479 os.rename(args.unit_dir + '/ceph-%s.target.new' % fsid,
2480 args.unit_dir + '/ceph-%s.target' % fsid)
2481 if not existed:
2482 call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid])
2483 call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid])
2484
2485 # logrotate for the cluster
2486 with open(args.logrotate_dir + '/ceph-%s' % fsid, 'w') as f:
2487 """
2488 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
2489 in all containers, but I don't see an elegant way to send SIGHUP *just* to
2490 the daemons for this cluster. (1) systemd kill -s will get the signal to
2491 podman, but podman will exit. (2) podman kill will get the signal to the
2492 first child (bash), but that isn't the ceph daemon. This is simpler and
2493 should be harmless.
2494 """
2495 f.write("""# created by cephadm
2496 /var/log/ceph/%s/*.log {
2497 rotate 7
2498 daily
2499 compress
2500 sharedscripts
2501 postrotate
2502 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror || pkill -1 -x "ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror" || true
2503 endscript
2504 missingok
2505 notifempty
2506 su root root
2507 }
2508 """ % fsid)
2509
2510
2511 def get_unit_file(fsid):
2512 # type: (str) -> str
2513 extra_args = ''
2514 if 'podman' in container_path:
2515 extra_args = ('ExecStartPre=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2516 'ExecStopPost=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2517 'Type=forking\n'
2518 'PIDFile=/%t/%n-pid\n')
2519
2520 docker = 'docker' in container_path
2521 u = """# generated by cephadm
2522 [Unit]
2523 Description=Ceph %i for {fsid}
2524
2525 # According to:
2526 # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
2527 # these can be removed once ceph-mon will dynamically change network
2528 # configuration.
2529 After=network-online.target local-fs.target time-sync.target{docker_after}
2530 Wants=network-online.target local-fs.target time-sync.target
2531 {docker_requires}
2532
2533 PartOf=ceph-{fsid}.target
2534 Before=ceph-{fsid}.target
2535
2536 [Service]
2537 LimitNOFILE=1048576
2538 LimitNPROC=1048576
2539 EnvironmentFile=-/etc/environment
2540 ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
2541 ExecStop=-{container_path} stop ceph-{fsid}-%i
2542 ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
2543 KillMode=none
2544 Restart=on-failure
2545 RestartSec=10s
2546 TimeoutStartSec=120
2547 TimeoutStopSec=120
2548 StartLimitInterval=30min
2549 StartLimitBurst=5
2550 {extra_args}
2551 [Install]
2552 WantedBy=ceph-{fsid}.target
2553 """.format(
2554 container_path=container_path,
2555 fsid=fsid,
2556 data_dir=args.data_dir,
2557 extra_args=extra_args,
2558 # if docker, we depend on docker.service
2559 docker_after=' docker.service' if docker else '',
2560 docker_requires='Requires=docker.service\n' if docker else '',
2561 )
2562
2563 return u
2564
2565 ##################################
2566
2567
2568 class CephContainer:
2569 def __init__(self,
2570 image: str,
2571 entrypoint: str,
2572 args: List[str] = [],
2573 volume_mounts: Dict[str, str] = {},
2574 cname: str = '',
2575 container_args: List[str] = [],
2576 envs: Optional[List[str]] = None,
2577 privileged: bool = False,
2578 ptrace: bool = False,
2579 bind_mounts: Optional[List[List[str]]] = None,
2580 init: Optional[bool] = None,
2581 host_network: bool = True,
2582 ) -> None:
2583 self.image = image
2584 self.entrypoint = entrypoint
2585 self.args = args
2586 self.volume_mounts = volume_mounts
2587 self.cname = cname
2588 self.container_args = container_args
2589 self.envs = envs
2590 self.privileged = privileged
2591 self.ptrace = ptrace
2592 self.bind_mounts = bind_mounts if bind_mounts else []
2593 self.init = init if init else container_init
2594 self.host_network = host_network
2595
2596 def run_cmd(self) -> List[str]:
2597 cmd_args: List[str] = [
2598 str(container_path),
2599 'run',
2600 '--rm',
2601 '--ipc=host',
2602 ]
2603 envs: List[str] = [
2604 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2605 '-e', 'NODE_NAME=%s' % get_hostname(),
2606 ]
2607 vols: List[str] = []
2608 binds: List[str] = []
2609
2610 if self.host_network:
2611 cmd_args.append('--net=host')
2612 if self.entrypoint:
2613 cmd_args.extend(['--entrypoint', self.entrypoint])
2614 if self.privileged:
2615 cmd_args.extend([
2616 '--privileged',
2617 # let OSD etc read block devs that haven't been chowned
2618 '--group-add=disk'])
2619 if self.ptrace and not self.privileged:
2620 # if privileged, the SYS_PTRACE cap is already added
2621 # in addition, --cap-add and --privileged are mutually
2622 # exclusive since podman >= 2.0
2623 cmd_args.append('--cap-add=SYS_PTRACE')
2624 if self.init:
2625 cmd_args.append('--init')
2626 envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
2627 if self.cname:
2628 cmd_args.extend(['--name', self.cname])
2629 if self.envs:
2630 for env in self.envs:
2631 envs.extend(['-e', env])
2632
2633 vols = sum(
2634 [['-v', '%s:%s' % (host_dir, container_dir)]
2635 for host_dir, container_dir in self.volume_mounts.items()], [])
2636 binds = sum([['--mount', '{}'.format(','.join(bind))]
2637 for bind in self.bind_mounts], [])
2638
2639 return cmd_args + self.container_args + envs + vols + binds + [
2640 self.image,
2641 ] + self.args # type: ignore
2642
2643 def shell_cmd(self, cmd: List[str]) -> List[str]:
2644 cmd_args: List[str] = [
2645 str(container_path),
2646 'run',
2647 '--rm',
2648 '--ipc=host',
2649 ]
2650 envs: List[str] = [
2651 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2652 '-e', 'NODE_NAME=%s' % get_hostname(),
2653 ]
2654 vols: List[str] = []
2655 binds: List[str] = []
2656
2657 if self.host_network:
2658 cmd_args.append('--net=host')
2659 if self.privileged:
2660 cmd_args.extend([
2661 '--privileged',
2662 # let OSD etc read block devs that haven't been chowned
2663 '--group-add=disk',
2664 ])
2665 if self.init:
2666 cmd_args.append('--init')
2667 envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
2668 if self.envs:
2669 for env in self.envs:
2670 envs.extend(['-e', env])
2671
2672 vols = sum(
2673 [['-v', '%s:%s' % (host_dir, container_dir)]
2674 for host_dir, container_dir in self.volume_mounts.items()], [])
2675 binds = sum([['--mount', '{}'.format(','.join(bind))]
2676 for bind in self.bind_mounts], [])
2677
2678 return cmd_args + self.container_args + envs + vols + binds + [
2679 '--entrypoint', cmd[0],
2680 self.image,
2681 ] + cmd[1:]
2682
2683 def exec_cmd(self, cmd):
2684 # type: (List[str]) -> List[str]
2685 return [
2686 str(container_path),
2687 'exec',
2688 ] + self.container_args + [
2689 self.cname,
2690 ] + cmd
2691
2692 def rm_cmd(self, storage=False):
2693 # type: (bool) -> List[str]
2694 ret = [
2695 str(container_path),
2696 'rm', '-f',
2697 ]
2698 if storage:
2699 ret.append('--storage')
2700 ret.append(self.cname)
2701 return ret
2702
2703 def stop_cmd(self):
2704 # type () -> List[str]
2705 ret = [
2706 str(container_path),
2707 'stop', self.cname,
2708 ]
2709 return ret
2710
2711 def run(self, timeout=DEFAULT_TIMEOUT):
2712 # type: (Optional[int]) -> str
2713 out, _, _ = call_throws(
2714 self.run_cmd(), desc=self.entrypoint, timeout=timeout)
2715 return out
2716
2717 ##################################
2718
2719
2720 @infer_image
2721 def command_version():
2722 # type: () -> int
2723 out = CephContainer(args.image, 'ceph', ['--version']).run()
2724 print(out.strip())
2725 return 0
2726
2727 ##################################
2728
2729
2730 @infer_image
2731 def command_pull():
2732 # type: () -> int
2733
2734 _pull_image(args.image)
2735 return command_inspect_image()
2736
2737
2738 def _pull_image(image):
2739 # type: (str) -> None
2740 logger.info('Pulling container image %s...' % image)
2741
2742 ignorelist = [
2743 "error creating read-write layer with ID",
2744 "net/http: TLS handshake timeout",
2745 "Digest did not match, expected",
2746 ]
2747
2748 cmd = [container_path, 'pull', image]
2749 cmd_str = ' '.join(cmd)
2750
2751 for sleep_secs in [1, 4, 25]:
2752 out, err, ret = call(cmd)
2753 if not ret:
2754 return
2755
2756 if not any(pattern in err for pattern in ignorelist):
2757 raise RuntimeError('Failed command: %s' % cmd_str)
2758
2759 logger.info('"%s failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))
2760 time.sleep(sleep_secs)
2761
2762 raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str)
2763 ##################################
2764
2765
2766 @infer_image
2767 def command_inspect_image():
2768 # type: () -> int
2769 out, err, ret = call_throws([
2770 container_path, 'inspect',
2771 '--format', '{{.ID}},{{.RepoDigests}}',
2772 args.image])
2773 if ret:
2774 return errno.ENOENT
2775 info_from = get_image_info_from_inspect(out.strip(), args.image)
2776
2777 ver = CephContainer(args.image, 'ceph', ['--version']).run().strip()
2778 info_from['ceph_version'] = ver
2779
2780 print(json.dumps(info_from, indent=4, sort_keys=True))
2781 return 0
2782
2783
2784 def get_image_info_from_inspect(out, image):
2785 # type: (str, str) -> Dict[str, str]
2786 image_id, digests = out.split(',', 1)
2787 if not out:
2788 raise Error('inspect {}: empty result'.format(image))
2789 r = {
2790 'image_id': normalize_container_id(image_id)
2791 }
2792 if digests:
2793 json_digests = digests[1:-1].split(' ')
2794 if json_digests:
2795 r['repo_digest'] = json_digests[0]
2796 return r
2797
2798
2799 ##################################
2800
2801
2802 def unwrap_ipv6(address):
2803 # type: (str) -> str
2804 if address.startswith('[') and address.endswith(']'):
2805 return address[1:-1]
2806 return address
2807
2808
2809 def wrap_ipv6(address):
2810 # type: (str) -> str
2811
2812 # We cannot assume it's already wrapped or even an IPv6 address if
2813 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
2814 # the ValueError
2815 try:
2816 if ipaddress.ip_address(unicode(address)).version == 6:
2817 return f"[{address}]"
2818 except ValueError:
2819 pass
2820
2821 return address
2822
2823
2824 def is_ipv6(address):
2825 # type: (str) -> bool
2826 address = unwrap_ipv6(address)
2827 try:
2828 return ipaddress.ip_address(unicode(address)).version == 6
2829 except ValueError:
2830 logger.warning("Address: {} isn't a valid IP address".format(address))
2831 return False
2832
2833
2834 @default_image
2835 def command_bootstrap():
2836 # type: () -> int
2837
2838 if not args.output_config:
2839 args.output_config = os.path.join(args.output_dir, 'ceph.conf')
2840 if not args.output_keyring:
2841 args.output_keyring = os.path.join(args.output_dir,
2842 'ceph.client.admin.keyring')
2843 if not args.output_pub_ssh_key:
2844 args.output_pub_ssh_key = os.path.join(args.output_dir, 'ceph.pub')
2845
2846 # verify output files
2847 for f in [args.output_config, args.output_keyring, args.output_pub_ssh_key]:
2848 if not args.allow_overwrite:
2849 if os.path.exists(f):
2850 raise Error('%s already exists; delete or pass '
2851 '--allow-overwrite to overwrite' % f)
2852 dirname = os.path.dirname(f)
2853 if dirname and not os.path.exists(dirname):
2854 fname = os.path.basename(f)
2855 logger.info(f"Creating directory {dirname} for {fname}")
2856 try:
2857 # use makedirs to create intermediate missing dirs
2858 os.makedirs(dirname, 0o755)
2859 except PermissionError:
2860 raise Error(f"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.")
2861
2862
2863 if not args.skip_prepare_host:
2864 command_prepare_host()
2865 else:
2866 logger.info('Skip prepare_host')
2867
2868 # initial vars
2869 fsid = args.fsid or make_fsid()
2870 hostname = get_hostname()
2871 if '.' in hostname and not args.allow_fqdn_hostname:
2872 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
2873 mon_id = args.mon_id or hostname
2874 mgr_id = args.mgr_id or generate_service_id()
2875 logger.info('Cluster fsid: %s' % fsid)
2876 ipv6 = False
2877
2878 l = FileLock(fsid)
2879 l.acquire()
2880
2881 # ip
2882 r = re.compile(r':(\d+)$')
2883 base_ip = ''
2884 if args.mon_ip:
2885 ipv6 = is_ipv6(args.mon_ip)
2886 if ipv6:
2887 args.mon_ip = wrap_ipv6(args.mon_ip)
2888 hasport = r.findall(args.mon_ip)
2889 if hasport:
2890 port = int(hasport[0])
2891 if port == 6789:
2892 addr_arg = '[v1:%s]' % args.mon_ip
2893 elif port == 3300:
2894 addr_arg = '[v2:%s]' % args.mon_ip
2895 else:
2896 logger.warning('Using msgr2 protocol for unrecognized port %d' %
2897 port)
2898 addr_arg = '[v2:%s]' % args.mon_ip
2899 base_ip = args.mon_ip[0:-(len(str(port)))-1]
2900 check_ip_port(base_ip, port)
2901 else:
2902 base_ip = args.mon_ip
2903 addr_arg = '[v2:%s:3300,v1:%s:6789]' % (args.mon_ip, args.mon_ip)
2904 check_ip_port(args.mon_ip, 3300)
2905 check_ip_port(args.mon_ip, 6789)
2906 elif args.mon_addrv:
2907 addr_arg = args.mon_addrv
2908 if addr_arg[0] != '[' or addr_arg[-1] != ']':
2909 raise Error('--mon-addrv value %s must use square backets' %
2910 addr_arg)
2911 ipv6 = addr_arg.count('[') > 1
2912 for addr in addr_arg[1:-1].split(','):
2913 hasport = r.findall(addr)
2914 if not hasport:
2915 raise Error('--mon-addrv value %s must include port number' %
2916 addr_arg)
2917 port = int(hasport[0])
2918 # strip off v1: or v2: prefix
2919 addr = re.sub(r'^\w+:', '', addr)
2920 base_ip = addr[0:-(len(str(port)))-1]
2921 check_ip_port(base_ip, port)
2922 else:
2923 raise Error('must specify --mon-ip or --mon-addrv')
2924 logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg))
2925
2926 mon_network = None
2927 if not args.skip_mon_network:
2928 # make sure IP is configured locally, and then figure out the
2929 # CIDR network
2930 for net, ips in list_networks().items():
2931 if ipaddress.ip_address(unicode(unwrap_ipv6(base_ip))) in \
2932 [ipaddress.ip_address(unicode(ip)) for ip in ips]:
2933 mon_network = net
2934 logger.info('Mon IP %s is in CIDR network %s' % (base_ip,
2935 mon_network))
2936 break
2937 if not mon_network:
2938 raise Error('Failed to infer CIDR network for mon ip %s; pass '
2939 '--skip-mon-network to configure it later' % base_ip)
2940
2941 # config
2942 cp = read_config(args.config)
2943 if not cp.has_section('global'):
2944 cp.add_section('global')
2945 cp.set('global', 'fsid', fsid);
2946 cp.set('global', 'mon_host', addr_arg)
2947 cp.set('global', 'container_image', args.image)
2948 if not cp.has_section('mon'):
2949 cp.add_section('mon')
2950 if (
2951 not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
2952 and not cp.has_option('mon', 'auth allow insecure global id reclaim')
2953 ):
2954 cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
2955 cpf = StringIO()
2956 cp.write(cpf)
2957 config = cpf.getvalue()
2958
2959 if args.registry_json or args.registry_url:
2960 command_registry_login()
2961
2962 if not args.skip_pull:
2963 _pull_image(args.image)
2964
2965 logger.info('Extracting ceph user uid/gid from container image...')
2966 (uid, gid) = extract_uid_gid()
2967
2968 # create some initial keys
2969 logger.info('Creating initial keys...')
2970 mon_key = CephContainer(
2971 image=args.image,
2972 entrypoint='/usr/bin/ceph-authtool',
2973 args=['--gen-print-key'],
2974 ).run().strip()
2975 admin_key = CephContainer(
2976 image=args.image,
2977 entrypoint='/usr/bin/ceph-authtool',
2978 args=['--gen-print-key'],
2979 ).run().strip()
2980 mgr_key = CephContainer(
2981 image=args.image,
2982 entrypoint='/usr/bin/ceph-authtool',
2983 args=['--gen-print-key'],
2984 ).run().strip()
2985
2986 keyring = ('[mon.]\n'
2987 '\tkey = %s\n'
2988 '\tcaps mon = allow *\n'
2989 '[client.admin]\n'
2990 '\tkey = %s\n'
2991 '\tcaps mon = allow *\n'
2992 '\tcaps mds = allow *\n'
2993 '\tcaps mgr = allow *\n'
2994 '\tcaps osd = allow *\n'
2995 '[mgr.%s]\n'
2996 '\tkey = %s\n'
2997 '\tcaps mon = profile mgr\n'
2998 '\tcaps mds = allow *\n'
2999 '\tcaps osd = allow *\n'
3000 % (mon_key, admin_key, mgr_id, mgr_key))
3001
3002 # tmp keyring file
3003 tmp_bootstrap_keyring = write_tmp(keyring, uid, gid)
3004
3005 # create initial monmap, tmp monmap file
3006 logger.info('Creating initial monmap...')
3007 tmp_monmap = write_tmp('', 0, 0)
3008 out = CephContainer(
3009 image=args.image,
3010 entrypoint='/usr/bin/monmaptool',
3011 args=['--create',
3012 '--clobber',
3013 '--fsid', fsid,
3014 '--addv', mon_id, addr_arg,
3015 '/tmp/monmap'
3016 ],
3017 volume_mounts={
3018 tmp_monmap.name: '/tmp/monmap:z',
3019 },
3020 ).run()
3021
3022 # pass monmap file to ceph user for use by ceph-mon --mkfs below
3023 os.fchown(tmp_monmap.fileno(), uid, gid)
3024
3025 # create mon
3026 logger.info('Creating mon...')
3027 create_daemon_dirs(fsid, 'mon', mon_id, uid, gid)
3028 mon_dir = get_data_dir(fsid, 'mon', mon_id)
3029 log_dir = get_log_dir(fsid)
3030 out = CephContainer(
3031 image=args.image,
3032 entrypoint='/usr/bin/ceph-mon',
3033 args=['--mkfs',
3034 '-i', mon_id,
3035 '--fsid', fsid,
3036 '-c', '/dev/null',
3037 '--monmap', '/tmp/monmap',
3038 '--keyring', '/tmp/keyring',
3039 ] + get_daemon_args(fsid, 'mon', mon_id),
3040 volume_mounts={
3041 log_dir: '/var/log/ceph:z',
3042 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
3043 tmp_bootstrap_keyring.name: '/tmp/keyring:z',
3044 tmp_monmap.name: '/tmp/monmap:z',
3045 },
3046 ).run()
3047
3048 with open(mon_dir + '/config', 'w') as f:
3049 os.fchown(f.fileno(), uid, gid)
3050 os.fchmod(f.fileno(), 0o600)
3051 f.write(config)
3052
3053 make_var_run(fsid, uid, gid)
3054 mon_c = get_container(fsid, 'mon', mon_id)
3055 deploy_daemon(fsid, 'mon', mon_id, mon_c, uid, gid,
3056 config=None, keyring=None)
3057
3058 # client.admin key + config to issue various CLI commands
3059 tmp_admin_keyring = write_tmp('[client.admin]\n'
3060 '\tkey = ' + admin_key + '\n',
3061 uid, gid)
3062 tmp_config = write_tmp(config, uid, gid)
3063
3064 # a CLI helper to reduce our typing
3065 def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT):
3066 # type: (List[str], Dict[str, str], Optional[int]) -> str
3067 mounts = {
3068 log_dir: '/var/log/ceph:z',
3069 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
3070 tmp_config.name: '/etc/ceph/ceph.conf:z',
3071 }
3072 for k, v in extra_mounts.items():
3073 mounts[k] = v
3074 timeout = timeout or args.timeout
3075 return CephContainer(
3076 image=args.image,
3077 entrypoint='/usr/bin/ceph',
3078 args=cmd,
3079 volume_mounts=mounts,
3080 ).run(timeout=timeout)
3081
3082 logger.info('Waiting for mon to start...')
3083 c = CephContainer(
3084 image=args.image,
3085 entrypoint='/usr/bin/ceph',
3086 args=[
3087 'status'],
3088 volume_mounts={
3089 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
3090 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
3091 tmp_config.name: '/etc/ceph/ceph.conf:z',
3092 },
3093 )
3094
3095 # wait for the service to become available
3096 def is_mon_available():
3097 # type: () -> bool
3098 timeout=args.timeout if args.timeout else 60 # seconds
3099 out, err, ret = call(c.run_cmd(),
3100 desc=c.entrypoint,
3101 timeout=timeout)
3102 return ret == 0
3103 is_available('mon', is_mon_available)
3104
3105 # assimilate and minimize config
3106 if not args.no_minimize_config:
3107 logger.info('Assimilating anything we can from ceph.conf...')
3108 cli([
3109 'config', 'assimilate-conf',
3110 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3111 ], {
3112 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3113 })
3114 logger.info('Generating new minimal ceph.conf...')
3115 cli([
3116 'config', 'generate-minimal-conf',
3117 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3118 ], {
3119 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3120 })
3121 # re-read our minimized config
3122 with open(mon_dir + '/config', 'r') as f:
3123 config = f.read()
3124 logger.info('Restarting the monitor...')
3125 call_throws([
3126 'systemctl',
3127 'restart',
3128 get_unit_name(fsid, 'mon', mon_id)
3129 ])
3130
3131 if mon_network:
3132 logger.info('Setting mon public_network...')
3133 cli(['config', 'set', 'mon', 'public_network', mon_network])
3134
3135 if ipv6:
3136 logger.info('Enabling IPv6 (ms_bind_ipv6)')
3137 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
3138
3139 # create mgr
3140 logger.info('Creating mgr...')
3141 mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
3142 mgr_c = get_container(fsid, 'mgr', mgr_id)
3143 # Note:the default port used by the Prometheus node exporter is opened in fw
3144 deploy_daemon(fsid, 'mgr', mgr_id, mgr_c, uid, gid,
3145 config=config, keyring=mgr_keyring, ports=[9283])
3146
3147 # output files
3148 with open(args.output_keyring, 'w') as f:
3149 os.fchmod(f.fileno(), 0o600)
3150 f.write('[client.admin]\n'
3151 '\tkey = ' + admin_key + '\n')
3152 logger.info('Wrote keyring to %s' % args.output_keyring)
3153
3154 with open(args.output_config, 'w') as f:
3155 f.write(config)
3156 logger.info('Wrote config to %s' % args.output_config)
3157
3158 # wait for the service to become available
3159 logger.info('Waiting for mgr to start...')
3160 def is_mgr_available():
3161 # type: () -> bool
3162 timeout=args.timeout if args.timeout else 60 # seconds
3163 try:
3164 out = cli(['status', '-f', 'json-pretty'], timeout=timeout)
3165 j = json.loads(out)
3166 return j.get('mgrmap', {}).get('available', False)
3167 except Exception as e:
3168 logger.debug('status failed: %s' % e)
3169 return False
3170 is_available('mgr', is_mgr_available)
3171
3172 # wait for mgr to restart (after enabling a module)
3173 def wait_for_mgr_restart():
3174 # first get latest mgrmap epoch from the mon
3175 out = cli(['mgr', 'dump'])
3176 j = json.loads(out)
3177 epoch = j['epoch']
3178 # wait for mgr to have it
3179 logger.info('Waiting for the mgr to restart...')
3180 def mgr_has_latest_epoch():
3181 # type: () -> bool
3182 try:
3183 out = cli(['tell', 'mgr', 'mgr_status'])
3184 j = json.loads(out)
3185 return j['mgrmap_epoch'] >= epoch
3186 except Exception as e:
3187 logger.debug('tell mgr mgr_status failed: %s' % e)
3188 return False
3189 is_available('Mgr epoch %d' % epoch, mgr_has_latest_epoch)
3190
3191 # ssh
3192 if not args.skip_ssh:
3193 cli(['config-key', 'set', 'mgr/cephadm/ssh_user', args.ssh_user])
3194
3195 logger.info('Enabling cephadm module...')
3196 cli(['mgr', 'module', 'enable', 'cephadm'])
3197 wait_for_mgr_restart()
3198
3199 logger.info('Setting orchestrator backend to cephadm...')
3200 cli(['orch', 'set', 'backend', 'cephadm'])
3201
3202 if args.ssh_config:
3203 logger.info('Using provided ssh config...')
3204 mounts = {
3205 pathify(args.ssh_config.name): '/tmp/cephadm-ssh-config:z',
3206 }
3207 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
3208
3209 if args.ssh_private_key and args.ssh_public_key:
3210 logger.info('Using provided ssh keys...')
3211 mounts = {
3212 pathify(args.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
3213 pathify(args.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
3214 }
3215 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
3216 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
3217 else:
3218 logger.info('Generating ssh key...')
3219 cli(['cephadm', 'generate-key'])
3220 ssh_pub = cli(['cephadm', 'get-pub-key'])
3221
3222 with open(args.output_pub_ssh_key, 'w') as f:
3223 f.write(ssh_pub)
3224 logger.info('Wrote public SSH key to to %s' % args.output_pub_ssh_key)
3225
3226 logger.info('Adding key to %s@localhost\'s authorized_keys...' % args.ssh_user)
3227 try:
3228 s_pwd = pwd.getpwnam(args.ssh_user)
3229 except KeyError as e:
3230 raise Error('Cannot find uid/gid for ssh-user: %s' % (args.ssh_user))
3231 ssh_uid = s_pwd.pw_uid
3232 ssh_gid = s_pwd.pw_gid
3233 ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
3234
3235 if not os.path.exists(ssh_dir):
3236 makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
3237
3238 auth_keys_file = '%s/authorized_keys' % ssh_dir
3239 add_newline = False
3240
3241 if os.path.exists(auth_keys_file):
3242 with open(auth_keys_file, 'r') as f:
3243 f.seek(0, os.SEEK_END)
3244 if f.tell() > 0:
3245 f.seek(f.tell()-1, os.SEEK_SET) # go to last char
3246 if f.read() != '\n':
3247 add_newline = True
3248
3249 with open(auth_keys_file, 'a') as f:
3250 os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it
3251 os.fchmod(f.fileno(), 0o600) # just in case we created it
3252 if add_newline:
3253 f.write('\n')
3254 f.write(ssh_pub.strip() + '\n')
3255
3256 host = get_hostname()
3257 logger.info('Adding host %s...' % host)
3258 try:
3259 cli(['orch', 'host', 'add', host])
3260 except RuntimeError as e:
3261 raise Error('Failed to add host <%s>: %s' % (host, e))
3262
3263 if not args.orphan_initial_daemons:
3264 for t in ['mon', 'mgr', 'crash']:
3265 logger.info('Deploying %s service with default placement...' % t)
3266 cli(['orch', 'apply', t])
3267
3268 if not args.skip_monitoring_stack:
3269 logger.info('Enabling mgr prometheus module...')
3270 cli(['mgr', 'module', 'enable', 'prometheus'])
3271 for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
3272 logger.info('Deploying %s service with default placement...' % t)
3273 cli(['orch', 'apply', t])
3274
3275 if args.registry_url and args.registry_username and args.registry_password:
3276 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', args.registry_url, '--force'])
3277 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', args.registry_username, '--force'])
3278 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', args.registry_password, '--force'])
3279
3280 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(container_init), '--force'])
3281
3282 if not args.skip_dashboard:
3283 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
3284 # if the user does not want to use SSL he can change this setting once the cluster is up
3285 cli(["config", "set", "mgr", "mgr/dashboard/ssl_server_port" , str(args.ssl_dashboard_port)])
3286
3287 # configuring dashboard parameters
3288 logger.info('Enabling the dashboard module...')
3289 cli(['mgr', 'module', 'enable', 'dashboard'])
3290 wait_for_mgr_restart()
3291
3292 # dashboard crt and key
3293 if args.dashboard_key and args.dashboard_crt:
3294 logger.info('Using provided dashboard certificate...')
3295 mounts = {
3296 pathify(args.dashboard_crt.name): '/tmp/dashboard.crt:z',
3297 pathify(args.dashboard_key.name): '/tmp/dashboard.key:z'
3298 }
3299 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
3300 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
3301 else:
3302 logger.info('Generating a dashboard self-signed certificate...')
3303 cli(['dashboard', 'create-self-signed-cert'])
3304
3305 logger.info('Creating initial admin user...')
3306 password = args.initial_dashboard_password or generate_password()
3307 tmp_password_file = write_tmp(password, uid, gid)
3308 cmd = ['dashboard', 'ac-user-create', args.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
3309 if not args.dashboard_password_noupdate:
3310 cmd.append('--pwd-update-required')
3311 cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'})
3312 logger.info('Fetching dashboard port number...')
3313 out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
3314 port = int(out)
3315
3316 # Open dashboard port
3317 fw = Firewalld()
3318 fw.open_ports([port])
3319 fw.apply_rules()
3320
3321 logger.info('Ceph Dashboard is now available at:\n\n'
3322 '\t URL: https://%s:%s/\n'
3323 '\t User: %s\n'
3324 '\tPassword: %s\n' % (
3325 get_fqdn(), port,
3326 args.initial_dashboard_user,
3327 password))
3328
3329 if args.apply_spec:
3330 logger.info('Applying %s to cluster' % args.apply_spec)
3331
3332 with open(args.apply_spec) as f:
3333 for line in f:
3334 if 'hostname:' in line:
3335 line = line.replace('\n', '')
3336 split = line.split(': ')
3337 if split[1] != host:
3338 logger.info('Adding ssh key to %s' % split[1])
3339
3340 ssh_key = '/etc/ceph/ceph.pub'
3341 if args.ssh_public_key:
3342 ssh_key = args.ssh_public_key.name
3343 out, err, code = call_throws(['sudo', '-u', args.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (args.ssh_user, split[1])])
3344
3345 mounts = {}
3346 mounts[pathify(args.apply_spec)] = '/tmp/spec.yml:z'
3347
3348 out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
3349 logger.info(out)
3350
3351 logger.info('You can access the Ceph CLI with:\n\n'
3352 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
3353 sys.argv[0],
3354 fsid,
3355 args.output_config,
3356 args.output_keyring))
3357 logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
3358 '\tceph telemetry on\n\n'
3359 'For more information see:\n\n'
3360 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
3361 logger.info('Bootstrap complete.')
3362 return 0
3363
3364 ##################################
3365
3366 def command_registry_login():
3367 if args.registry_json:
3368 logger.info("Pulling custom registry login info from %s." % args.registry_json)
3369 d = get_parm(args.registry_json)
3370 if d.get('url') and d.get('username') and d.get('password'):
3371 args.registry_url = d.get('url')
3372 args.registry_username = d.get('username')
3373 args.registry_password = d.get('password')
3374 registry_login(args.registry_url, args.registry_username, args.registry_password)
3375 else:
3376 raise Error("json provided for custom registry login did not include all necessary fields. "
3377 "Please setup json file as\n"
3378 "{\n"
3379 " \"url\": \"REGISTRY_URL\",\n"
3380 " \"username\": \"REGISTRY_USERNAME\",\n"
3381 " \"password\": \"REGISTRY_PASSWORD\"\n"
3382 "}\n")
3383 elif args.registry_url and args.registry_username and args.registry_password:
3384 registry_login(args.registry_url, args.registry_username, args.registry_password)
3385 else:
3386 raise Error("Invalid custom registry arguments received. To login to a custom registry include "
3387 "--registry-url, --registry-username and --registry-password "
3388 "options or --registry-json option")
3389 return 0
3390
3391 def registry_login(url, username, password):
3392 logger.info("Logging into custom registry.")
3393 try:
3394 out, _, _ = call_throws([container_path, 'login',
3395 '-u', username,
3396 '-p', password,
3397 url])
3398 except:
3399 raise Error("Failed to login to custom registry @ %s as %s with given password" % (args.registry_url, args.registry_username))
3400
3401 ##################################
3402
3403
3404 def extract_uid_gid_monitoring(daemon_type):
3405 # type: (str) -> Tuple[int, int]
3406
3407 if daemon_type == 'prometheus':
3408 uid, gid = extract_uid_gid(file_path='/etc/prometheus')
3409 elif daemon_type == 'node-exporter':
3410 uid, gid = 65534, 65534
3411 elif daemon_type == 'grafana':
3412 uid, gid = extract_uid_gid(file_path='/var/lib/grafana')
3413 elif daemon_type == 'alertmanager':
3414 uid, gid = extract_uid_gid(file_path=['/etc/alertmanager', '/etc/prometheus'])
3415 else:
3416 raise Error("{} not implemented yet".format(daemon_type))
3417 return uid, gid
3418
3419
3420 @default_image
3421 def command_deploy():
3422 # type: () -> None
3423 daemon_type, daemon_id = args.name.split('.', 1)
3424
3425 l = FileLock(args.fsid)
3426 l.acquire()
3427
3428 if daemon_type not in get_supported_daemons():
3429 raise Error('daemon type %s not recognized' % daemon_type)
3430
3431 redeploy = False
3432 unit_name = get_unit_name(args.fsid, daemon_type, daemon_id)
3433 container_name = 'ceph-%s-%s.%s' % (args.fsid, daemon_type, daemon_id)
3434 (_, state, _) = check_unit(unit_name)
3435 if state == 'running' or is_container_running(container_name):
3436 redeploy = True
3437
3438 if args.reconfig:
3439 logger.info('%s daemon %s ...' % ('Reconfig', args.name))
3440 elif redeploy:
3441 logger.info('%s daemon %s ...' % ('Redeploy', args.name))
3442 else:
3443 logger.info('%s daemon %s ...' % ('Deploy', args.name))
3444
3445 # Get and check ports explicitly required to be opened
3446 daemon_ports = [] # type: List[int]
3447 if args.tcp_ports:
3448 daemon_ports = list(map(int, args.tcp_ports.split()))
3449
3450 if daemon_type in Ceph.daemons:
3451 config, keyring = get_config_and_keyring()
3452 uid, gid = extract_uid_gid()
3453 make_var_run(args.fsid, uid, gid)
3454
3455 c = get_container(args.fsid, daemon_type, daemon_id,
3456 ptrace=args.allow_ptrace)
3457 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3458 config=config, keyring=keyring,
3459 osd_fsid=args.osd_fsid,
3460 reconfig=args.reconfig,
3461 ports=daemon_ports)
3462
3463 elif daemon_type in Monitoring.components:
3464 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
3465 # Default Checks
3466 if not args.reconfig and not redeploy:
3467 daemon_ports.extend(Monitoring.port_map[daemon_type])
3468
3469 # make sure provided config-json is sufficient
3470 config = get_parm(args.config_json) # type: ignore
3471 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
3472 required_args = Monitoring.components[daemon_type].get('config-json-args', list())
3473 if required_files:
3474 if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
3475 raise Error("{} deployment requires config-json which must "
3476 "contain file content for {}".format(daemon_type.capitalize(), ', '.join(required_files)))
3477 if required_args:
3478 if not config or not all(c in config.keys() for c in required_args): # type: ignore
3479 raise Error("{} deployment requires config-json which must "
3480 "contain arg for {}".format(daemon_type.capitalize(), ', '.join(required_args)))
3481
3482 uid, gid = extract_uid_gid_monitoring(daemon_type)
3483 c = get_container(args.fsid, daemon_type, daemon_id)
3484 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3485 reconfig=args.reconfig,
3486 ports=daemon_ports)
3487
3488 elif daemon_type == NFSGanesha.daemon_type:
3489 if not args.reconfig and not redeploy:
3490 daemon_ports.extend(NFSGanesha.port_map.values())
3491
3492 config, keyring = get_config_and_keyring()
3493 # TODO: extract ganesha uid/gid (997, 994) ?
3494 uid, gid = extract_uid_gid()
3495 c = get_container(args.fsid, daemon_type, daemon_id)
3496 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3497 config=config, keyring=keyring,
3498 reconfig=args.reconfig,
3499 ports=daemon_ports)
3500
3501 elif daemon_type == CephIscsi.daemon_type:
3502 config, keyring = get_config_and_keyring()
3503 uid, gid = extract_uid_gid()
3504 c = get_container(args.fsid, daemon_type, daemon_id)
3505 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3506 config=config, keyring=keyring,
3507 reconfig=args.reconfig,
3508 ports=daemon_ports)
3509
3510 elif daemon_type == CustomContainer.daemon_type:
3511 cc = CustomContainer.init(args.fsid, daemon_id)
3512 if not args.reconfig and not redeploy:
3513 daemon_ports.extend(cc.ports)
3514 c = get_container(args.fsid, daemon_type, daemon_id,
3515 privileged=cc.privileged,
3516 ptrace=args.allow_ptrace)
3517 deploy_daemon(args.fsid, daemon_type, daemon_id, c,
3518 uid=cc.uid, gid=cc.gid, config=None,
3519 keyring=None, reconfig=args.reconfig,
3520 ports=daemon_ports)
3521
3522 else:
3523 raise Error('daemon type {} not implemented in command_deploy function'
3524 .format(daemon_type))
3525
3526 ##################################
3527
3528
3529 @infer_image
3530 def command_run():
3531 # type: () -> int
3532 (daemon_type, daemon_id) = args.name.split('.', 1)
3533 c = get_container(args.fsid, daemon_type, daemon_id)
3534 command = c.run_cmd()
3535 return call_timeout(command, args.timeout)
3536
3537 ##################################
3538
3539
3540 @infer_fsid
3541 @infer_config
3542 @infer_image
3543 def command_shell():
3544 # type: () -> int
3545 if args.fsid:
3546 make_log_dir(args.fsid)
3547 if args.name:
3548 if '.' in args.name:
3549 (daemon_type, daemon_id) = args.name.split('.', 1)
3550 else:
3551 daemon_type = args.name
3552 daemon_id = None
3553 else:
3554 daemon_type = 'osd' # get the most mounts
3555 daemon_id = None
3556
3557 if daemon_id and not args.fsid:
3558 raise Error('must pass --fsid to specify cluster')
3559
3560 # use /etc/ceph files by default, if present. we do this instead of
3561 # making these defaults in the arg parser because we don't want an error
3562 # if they don't exist.
3563 if not args.keyring and os.path.exists(SHELL_DEFAULT_KEYRING):
3564 args.keyring = SHELL_DEFAULT_KEYRING
3565
3566 container_args = [] # type: List[str]
3567 mounts = get_container_mounts(args.fsid, daemon_type, daemon_id,
3568 no_config=True if args.config else False)
3569 binds = get_container_binds(args.fsid, daemon_type, daemon_id)
3570 if args.config:
3571 mounts[pathify(args.config)] = '/etc/ceph/ceph.conf:z'
3572 if args.keyring:
3573 mounts[pathify(args.keyring)] = '/etc/ceph/ceph.keyring:z'
3574 if args.mount:
3575 for _mount in args.mount:
3576 split_src_dst = _mount.split(':')
3577 mount = pathify(split_src_dst[0])
3578 filename = os.path.basename(split_src_dst[0])
3579 if len(split_src_dst) > 1:
3580 dst = split_src_dst[1] + ':z' if len(split_src_dst) == 3 else split_src_dst[1]
3581 mounts[mount] = dst
3582 else:
3583 mounts[mount] = '/mnt/{}:z'.format(filename)
3584 if args.command:
3585 command = args.command
3586 else:
3587 command = ['bash']
3588 container_args += [
3589 '-it',
3590 '-e', 'LANG=C',
3591 '-e', "PS1=%s" % CUSTOM_PS1,
3592 ]
3593 if args.fsid:
3594 home = os.path.join(args.data_dir, args.fsid, 'home')
3595 if not os.path.exists(home):
3596 logger.debug('Creating root home at %s' % home)
3597 makedirs(home, 0, 0, 0o660)
3598 if os.path.exists('/etc/skel'):
3599 for f in os.listdir('/etc/skel'):
3600 if f.startswith('.bash'):
3601 shutil.copyfile(os.path.join('/etc/skel', f),
3602 os.path.join(home, f))
3603 mounts[home] = '/root'
3604
3605 c = CephContainer(
3606 image=args.image,
3607 entrypoint='doesnotmatter',
3608 args=[],
3609 container_args=container_args,
3610 volume_mounts=mounts,
3611 bind_mounts=binds,
3612 envs=args.env,
3613 privileged=True)
3614 command = c.shell_cmd(command)
3615
3616 return call_timeout(command, args.timeout)
3617
3618 ##################################
3619
3620
3621 @infer_fsid
3622 def command_enter():
3623 # type: () -> int
3624 if not args.fsid:
3625 raise Error('must pass --fsid to specify cluster')
3626 (daemon_type, daemon_id) = args.name.split('.', 1)
3627 container_args = [] # type: List[str]
3628 if args.command:
3629 command = args.command
3630 else:
3631 command = ['sh']
3632 container_args += [
3633 '-it',
3634 '-e', 'LANG=C',
3635 '-e', "PS1=%s" % CUSTOM_PS1,
3636 ]
3637 c = CephContainer(
3638 image=args.image,
3639 entrypoint='doesnotmatter',
3640 container_args=container_args,
3641 cname='ceph-%s-%s.%s' % (args.fsid, daemon_type, daemon_id),
3642 )
3643 command = c.exec_cmd(command)
3644 return call_timeout(command, args.timeout)
3645
3646 ##################################
3647
3648
3649 @infer_fsid
3650 @infer_image
3651 def command_ceph_volume():
3652 # type: () -> None
3653 if args.fsid:
3654 make_log_dir(args.fsid)
3655
3656 l = FileLock(args.fsid)
3657 l.acquire()
3658
3659 (uid, gid) = (0, 0) # ceph-volume runs as root
3660 mounts = get_container_mounts(args.fsid, 'osd', None)
3661
3662 tmp_config = None
3663 tmp_keyring = None
3664
3665 (config, keyring) = get_config_and_keyring()
3666
3667 if config:
3668 # tmp config file
3669 tmp_config = write_tmp(config, uid, gid)
3670 mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
3671
3672 if keyring:
3673 # tmp keyring file
3674 tmp_keyring = write_tmp(keyring, uid, gid)
3675 mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
3676
3677 c = CephContainer(
3678 image=args.image,
3679 entrypoint='/usr/sbin/ceph-volume',
3680 envs=args.env,
3681 args=args.command,
3682 privileged=True,
3683 volume_mounts=mounts,
3684 )
3685 out, err, code = call_throws(c.run_cmd(), verbosity=CallVerbosity.VERBOSE)
3686 if not code:
3687 print(out)
3688
3689 ##################################
3690
3691
3692 @infer_fsid
3693 def command_unit():
3694 # type: () -> None
3695 if not args.fsid:
3696 raise Error('must pass --fsid to specify cluster')
3697
3698 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
3699
3700 call_throws([
3701 'systemctl',
3702 args.command,
3703 unit_name],
3704 verbosity=CallVerbosity.VERBOSE,
3705 desc=''
3706 )
3707
3708 ##################################
3709
3710
3711 @infer_fsid
3712 def command_logs():
3713 # type: () -> None
3714 if not args.fsid:
3715 raise Error('must pass --fsid to specify cluster')
3716
3717 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
3718
3719 cmd = [find_program('journalctl')]
3720 cmd.extend(['-u', unit_name])
3721 if args.command:
3722 cmd.extend(args.command)
3723
3724 # call this directly, without our wrapper, so that we get an unmolested
3725 # stdout with logger prefixing.
3726 logger.debug("Running command: %s" % ' '.join(cmd))
3727 subprocess.call(cmd) # type: ignore
3728
3729 ##################################
3730
3731
3732 def list_networks():
3733 # type: () -> Dict[str,List[str]]
3734
3735 ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
3736 ## so we'll need to use a regex to parse 'ip' command output.
3737 #out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
3738 #j = json.loads(out)
3739 #for x in j:
3740
3741 res = _list_ipv4_networks()
3742 res.update(_list_ipv6_networks())
3743 return res
3744
3745
3746 def _list_ipv4_networks():
3747 out, _, _ = call_throws([find_executable('ip'), 'route', 'ls'])
3748 return _parse_ipv4_route(out)
3749
3750
3751 def _parse_ipv4_route(out):
3752 r = {} # type: Dict[str,List[str]]
3753 p = re.compile(r'^(\S+) (.*)scope link (.*)src (\S+)')
3754 for line in out.splitlines():
3755 m = p.findall(line)
3756 if not m:
3757 continue
3758 net = m[0][0]
3759 ip = m[0][3]
3760 if net not in r:
3761 r[net] = []
3762 r[net].append(ip)
3763 return r
3764
3765
3766 def _list_ipv6_networks():
3767 routes, _, _ = call_throws([find_executable('ip'), '-6', 'route', 'ls'])
3768 ips, _, _ = call_throws([find_executable('ip'), '-6', 'addr', 'ls'])
3769 return _parse_ipv6_route(routes, ips)
3770
3771
3772 def _parse_ipv6_route(routes, ips):
3773 r = {} # type: Dict[str,List[str]]
3774 route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
3775 ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
3776 for line in routes.splitlines():
3777 m = route_p.findall(line)
3778 if not m or m[0][0].lower() == 'default':
3779 continue
3780 net = m[0][0]
3781 if net not in r:
3782 r[net] = []
3783
3784 for line in ips.splitlines():
3785 m = ip_p.findall(line)
3786 if not m:
3787 continue
3788 ip = m[0][0]
3789 # find the network it belongs to
3790 net = [n for n in r.keys()
3791 if ipaddress.ip_address(unicode(ip)) in ipaddress.ip_network(unicode(n))]
3792 if net:
3793 r[net[0]].append(ip)
3794
3795 return r
3796
3797
3798 def command_list_networks():
3799 # type: () -> None
3800 r = list_networks()
3801 print(json.dumps(r, indent=4))
3802
3803 ##################################
3804
3805
3806 def command_ls():
3807 # type: () -> None
3808
3809 ls = list_daemons(detail=not args.no_detail,
3810 legacy_dir=args.legacy_dir)
3811 print(json.dumps(ls, indent=4))
3812
3813
3814 def list_daemons(detail=True, legacy_dir=None):
3815 # type: (bool, Optional[str]) -> List[Dict[str, str]]
3816 host_version = None
3817 ls = []
3818
3819 data_dir = args.data_dir
3820 if legacy_dir is not None:
3821 data_dir = os.path.abspath(legacy_dir + data_dir)
3822
3823 # keep track of ceph versions we see
3824 seen_versions = {} # type: Dict[str, Optional[str]]
3825
3826 # /var/lib/ceph
3827 if os.path.exists(data_dir):
3828 for i in os.listdir(data_dir):
3829 if i in ['mon', 'osd', 'mds', 'mgr']:
3830 daemon_type = i
3831 for j in os.listdir(os.path.join(data_dir, i)):
3832 if '-' not in j:
3833 continue
3834 (cluster, daemon_id) = j.split('-', 1)
3835 fsid = get_legacy_daemon_fsid(
3836 cluster, daemon_type, daemon_id,
3837 legacy_dir=legacy_dir)
3838 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
3839 i = {
3840 'style': 'legacy',
3841 'name': '%s.%s' % (daemon_type, daemon_id),
3842 'fsid': fsid if fsid is not None else 'unknown',
3843 'systemd_unit': legacy_unit_name,
3844 }
3845 if detail:
3846 (i['enabled'], i['state'], _) = check_unit(legacy_unit_name)
3847 if not host_version:
3848 try:
3849 out, err, code = call(['ceph', '-v'])
3850 if not code and out.startswith('ceph version '):
3851 host_version = out.split(' ')[2]
3852 except Exception:
3853 pass
3854 i['host_version'] = host_version
3855 ls.append(i)
3856 elif is_fsid(i):
3857 fsid = str(i) # convince mypy that fsid is a str here
3858 for j in os.listdir(os.path.join(data_dir, i)):
3859 if '.' in j:
3860 name = j
3861 (daemon_type, daemon_id) = j.split('.', 1)
3862 unit_name = get_unit_name(fsid,
3863 daemon_type,
3864 daemon_id)
3865 else:
3866 continue
3867 i = {
3868 'style': 'cephadm:v1',
3869 'name': name,
3870 'fsid': fsid,
3871 'systemd_unit': unit_name,
3872 }
3873 if detail:
3874 # get container id
3875 (i['enabled'], i['state'], _) = check_unit(unit_name)
3876 container_id = None
3877 image_name = None
3878 image_id = None
3879 version = None
3880 start_stamp = None
3881
3882 if 'podman' in container_path and get_podman_version() < (1, 6, 2):
3883 image_field = '.ImageID'
3884 else:
3885 image_field = '.Image'
3886
3887 out, err, code = call(
3888 [
3889 container_path, 'inspect',
3890 '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field,
3891 'ceph-%s-%s' % (fsid, j)
3892 ],
3893 verbosity=CallVerbosity.DEBUG)
3894 if not code:
3895 (container_id, image_name, image_id, start,
3896 version) = out.strip().split(',')
3897 image_id = normalize_container_id(image_id)
3898 daemon_type = name.split('.', 1)[0]
3899 start_stamp = try_convert_datetime(start)
3900 if not version or '.' not in version:
3901 version = seen_versions.get(image_id, None)
3902 if daemon_type == NFSGanesha.daemon_type:
3903 version = NFSGanesha.get_version(container_id)
3904 if daemon_type == CephIscsi.daemon_type:
3905 version = CephIscsi.get_version(container_id)
3906 elif not version:
3907 if daemon_type in Ceph.daemons:
3908 out, err, code = call(
3909 [container_path, 'exec', container_id,
3910 'ceph', '-v'])
3911 if not code and \
3912 out.startswith('ceph version '):
3913 version = out.split(' ')[2]
3914 seen_versions[image_id] = version
3915 elif daemon_type == 'grafana':
3916 out, err, code = call(
3917 [container_path, 'exec', container_id,
3918 'grafana-server', '-v'])
3919 if not code and \
3920 out.startswith('Version '):
3921 version = out.split(' ')[1]
3922 seen_versions[image_id] = version
3923 elif daemon_type in ['prometheus',
3924 'alertmanager',
3925 'node-exporter']:
3926 version = Monitoring.get_version(container_path, container_id, daemon_type)
3927 seen_versions[image_id] = version
3928 elif daemon_type == CustomContainer.daemon_type:
3929 # Because a custom container can contain
3930 # everything, we do not know which command
3931 # to execute to get the version.
3932 pass
3933 else:
3934 logger.warning('version for unknown daemon type %s' % daemon_type)
3935 else:
3936 vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore
3937 try:
3938 with open(vfile, 'r') as f:
3939 image_name = f.read().strip() or None
3940 except IOError:
3941 pass
3942 i['container_id'] = container_id
3943 i['container_image_name'] = image_name
3944 i['container_image_id'] = image_id
3945 i['version'] = version
3946 i['started'] = start_stamp
3947 i['created'] = get_file_timestamp(
3948 os.path.join(data_dir, fsid, j, 'unit.created')
3949 )
3950 i['deployed'] = get_file_timestamp(
3951 os.path.join(data_dir, fsid, j, 'unit.image'))
3952 i['configured'] = get_file_timestamp(
3953 os.path.join(data_dir, fsid, j, 'unit.configured'))
3954
3955 ls.append(i)
3956
3957 return ls
3958
3959
3960 def get_daemon_description(fsid, name, detail=False, legacy_dir=None):
3961 # type: (str, str, bool, Optional[str]) -> Dict[str, str]
3962
3963 for d in list_daemons(detail=detail, legacy_dir=legacy_dir):
3964 if d['fsid'] != fsid:
3965 continue
3966 if d['name'] != name:
3967 continue
3968 return d
3969 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
3970
3971
3972 ##################################
3973
3974 @default_image
3975 def command_adopt():
3976 # type: () -> None
3977
3978 if not args.skip_pull:
3979 _pull_image(args.image)
3980
3981 (daemon_type, daemon_id) = args.name.split('.', 1)
3982
3983 # legacy check
3984 if args.style != 'legacy':
3985 raise Error('adoption of style %s not implemented' % args.style)
3986
3987 # lock
3988 fsid = get_legacy_daemon_fsid(args.cluster,
3989 daemon_type,
3990 daemon_id,
3991 legacy_dir=args.legacy_dir)
3992 if not fsid:
3993 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
3994 l = FileLock(fsid)
3995 l.acquire()
3996
3997 # call correct adoption
3998 if daemon_type in Ceph.daemons:
3999 command_adopt_ceph(daemon_type, daemon_id, fsid);
4000 elif daemon_type == 'prometheus':
4001 command_adopt_prometheus(daemon_id, fsid)
4002 elif daemon_type == 'grafana':
4003 command_adopt_grafana(daemon_id, fsid)
4004 elif daemon_type == 'node-exporter':
4005 raise Error('adoption of node-exporter not implemented')
4006 elif daemon_type == 'alertmanager':
4007 command_adopt_alertmanager(daemon_id, fsid)
4008 else:
4009 raise Error('daemon type %s not recognized' % daemon_type)
4010
4011
4012 class AdoptOsd(object):
4013 def __init__(self, osd_data_dir, osd_id):
4014 # type: (str, str) -> None
4015 self.osd_data_dir = osd_data_dir
4016 self.osd_id = osd_id
4017
4018 def check_online_osd(self):
4019 # type: () -> Tuple[Optional[str], Optional[str]]
4020
4021 osd_fsid, osd_type = None, None
4022
4023 path = os.path.join(self.osd_data_dir, 'fsid')
4024 try:
4025 with open(path, 'r') as f:
4026 osd_fsid = f.read().strip()
4027 logger.info("Found online OSD at %s" % path)
4028 except IOError:
4029 logger.info('Unable to read OSD fsid from %s' % path)
4030 if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
4031 with open(os.path.join(self.osd_data_dir, 'type')) as f:
4032 osd_type = f.read().strip()
4033 else:
4034 logger.info('"type" file missing for OSD data dir')
4035
4036 return osd_fsid, osd_type
4037
4038 def check_offline_lvm_osd(self):
4039 # type: () -> Tuple[Optional[str], Optional[str]]
4040
4041 osd_fsid, osd_type = None, None
4042
4043 c = CephContainer(
4044 image=args.image,
4045 entrypoint='/usr/sbin/ceph-volume',
4046 args=['lvm', 'list', '--format=json'],
4047 privileged=True
4048 )
4049 out, err, code = call_throws(c.run_cmd())
4050 if not code:
4051 try:
4052 js = json.loads(out)
4053 if self.osd_id in js:
4054 logger.info("Found offline LVM OSD {}".format(self.osd_id))
4055 osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
4056 for device in js[self.osd_id]:
4057 if device['tags']['ceph.type'] == 'block':
4058 osd_type = 'bluestore'
4059 break
4060 if device['tags']['ceph.type'] == 'data':
4061 osd_type = 'filestore'
4062 break
4063 except ValueError as e:
4064 logger.info("Invalid JSON in ceph-volume lvm list: {}".format(e))
4065
4066 return osd_fsid, osd_type
4067
4068 def check_offline_simple_osd(self):
4069 # type: () -> Tuple[Optional[str], Optional[str]]
4070
4071 osd_fsid, osd_type = None, None
4072
4073 osd_file = glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self.osd_id))
4074 if len(osd_file) == 1:
4075 with open(osd_file[0], 'r') as f:
4076 try:
4077 js = json.loads(f.read())
4078 logger.info("Found offline simple OSD {}".format(self.osd_id))
4079 osd_fsid = js["fsid"]
4080 osd_type = js["type"]
4081 if osd_type != "filestore":
4082 # need this to be mounted for the adopt to work, as it
4083 # needs to move files from this directory
4084 call_throws(['mount', js["data"]["path"], self.osd_data_dir])
4085 except ValueError as e:
4086 logger.info("Invalid JSON in {}: {}".format(osd_file, e))
4087
4088 return osd_fsid, osd_type
4089
4090
4091 def command_adopt_ceph(daemon_type, daemon_id, fsid):
4092 # type: (str, str, str) -> None
4093
4094 (uid, gid) = extract_uid_gid()
4095
4096 data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
4097 (daemon_type, args.cluster, daemon_id))
4098 data_dir_src = os.path.abspath(args.legacy_dir + data_dir_src)
4099
4100 if not os.path.exists(data_dir_src):
4101 raise Error("{}.{} data directory '{}' does not exist. "
4102 "Incorrect ID specified, or daemon alrady adopted?".format(
4103 daemon_type, daemon_id, data_dir_src))
4104
4105 osd_fsid = None
4106 if daemon_type == 'osd':
4107 adopt_osd = AdoptOsd(data_dir_src, daemon_id)
4108 osd_fsid, osd_type = adopt_osd.check_online_osd()
4109 if not osd_fsid:
4110 osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
4111 if not osd_fsid:
4112 osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
4113 if not osd_fsid:
4114 raise Error('Unable to find OSD {}'.format(daemon_id))
4115 logger.info('objectstore_type is %s' % osd_type)
4116 assert osd_type
4117 if osd_type == 'filestore':
4118 raise Error('FileStore is not supported by cephadm')
4119
4120 # NOTE: implicit assumption here that the units correspond to the
4121 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
4122 # CLUSTER field.
4123 unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
4124 (enabled, state, _) = check_unit(unit_name)
4125 if state == 'running':
4126 logger.info('Stopping old systemd unit %s...' % unit_name)
4127 call_throws(['systemctl', 'stop', unit_name])
4128 if enabled:
4129 logger.info('Disabling old systemd unit %s...' % unit_name)
4130 call_throws(['systemctl', 'disable', unit_name])
4131
4132 # data
4133 logger.info('Moving data...')
4134 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4135 uid=uid, gid=gid)
4136 move_files(glob(os.path.join(data_dir_src, '*')),
4137 data_dir_dst,
4138 uid=uid, gid=gid)
4139 logger.debug('Remove dir \'%s\'' % (data_dir_src))
4140 if os.path.ismount(data_dir_src):
4141 call_throws(['umount', data_dir_src])
4142 os.rmdir(data_dir_src)
4143
4144 logger.info('Chowning content...')
4145 call_throws(['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
4146
4147 if daemon_type == 'mon':
4148 # rename *.ldb -> *.sst, in case they are coming from ubuntu
4149 store = os.path.join(data_dir_dst, 'store.db')
4150 num_renamed = 0
4151 if os.path.exists(store):
4152 for oldf in os.listdir(store):
4153 if oldf.endswith('.ldb'):
4154 newf = oldf.replace('.ldb', '.sst')
4155 oldp = os.path.join(store, oldf)
4156 newp = os.path.join(store, newf)
4157 logger.debug('Renaming %s -> %s' % (oldp, newp))
4158 os.rename(oldp, newp)
4159 if num_renamed:
4160 logger.info('Renamed %d leveldb *.ldb files to *.sst',
4161 num_renamed)
4162 if daemon_type == 'osd':
4163 for n in ['block', 'block.db', 'block.wal']:
4164 p = os.path.join(data_dir_dst, n)
4165 if os.path.exists(p):
4166 logger.info('Chowning %s...' % p)
4167 os.chown(p, uid, gid)
4168 # disable the ceph-volume 'simple' mode files on the host
4169 simple_fn = os.path.join('/etc/ceph/osd',
4170 '%s-%s.json' % (daemon_id, osd_fsid))
4171 if os.path.exists(simple_fn):
4172 new_fn = simple_fn + '.adopted-by-cephadm'
4173 logger.info('Renaming %s -> %s', simple_fn, new_fn)
4174 os.rename(simple_fn, new_fn)
4175 logger.info('Disabling host unit ceph-volume@ simple unit...')
4176 call(['systemctl', 'disable',
4177 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
4178 else:
4179 # assume this is an 'lvm' c-v for now, but don't error
4180 # out if it's not.
4181 logger.info('Disabling host unit ceph-volume@ lvm unit...')
4182 call(['systemctl', 'disable',
4183 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
4184
4185 # config
4186 config_src = '/etc/ceph/%s.conf' % (args.cluster)
4187 config_src = os.path.abspath(args.legacy_dir + config_src)
4188 config_dst = os.path.join(data_dir_dst, 'config')
4189 copy_files([config_src], config_dst, uid=uid, gid=gid)
4190
4191 # logs
4192 logger.info('Moving logs...')
4193 log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
4194 (args.cluster, daemon_type, daemon_id))
4195 log_dir_src = os.path.abspath(args.legacy_dir + log_dir_src)
4196 log_dir_dst = make_log_dir(fsid, uid=uid, gid=gid)
4197 move_files(glob(log_dir_src),
4198 log_dir_dst,
4199 uid=uid, gid=gid)
4200
4201 logger.info('Creating new units...')
4202 make_var_run(fsid, uid, gid)
4203 c = get_container(fsid, daemon_type, daemon_id)
4204 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
4205 enable=True, # unconditionally enable the new unit
4206 start=(state == 'running' or args.force_start),
4207 osd_fsid=osd_fsid)
4208 update_firewalld(daemon_type)
4209
4210
4211 def command_adopt_prometheus(daemon_id, fsid):
4212 # type: (str, str) -> None
4213
4214 daemon_type = 'prometheus'
4215 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4216
4217 _stop_and_disable('prometheus')
4218
4219 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4220 uid=uid, gid=gid)
4221
4222 # config
4223 config_src = '/etc/prometheus/prometheus.yml'
4224 config_src = os.path.abspath(args.legacy_dir + config_src)
4225 config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
4226 makedirs(config_dst, uid, gid, 0o755)
4227 copy_files([config_src], config_dst, uid=uid, gid=gid)
4228
4229 # data
4230 data_src = '/var/lib/prometheus/metrics/'
4231 data_src = os.path.abspath(args.legacy_dir + data_src)
4232 data_dst = os.path.join(data_dir_dst, 'data')
4233 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4234
4235 make_var_run(fsid, uid, gid)
4236 c = get_container(fsid, daemon_type, daemon_id)
4237 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4238 update_firewalld(daemon_type)
4239
4240
4241 def command_adopt_grafana(daemon_id, fsid):
4242 # type: (str, str) -> None
4243
4244 daemon_type = 'grafana'
4245 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4246
4247 _stop_and_disable('grafana-server')
4248
4249 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4250 uid=uid, gid=gid)
4251
4252 # config
4253 config_src = '/etc/grafana/grafana.ini'
4254 config_src = os.path.abspath(args.legacy_dir + config_src)
4255 config_dst = os.path.join(data_dir_dst, 'etc/grafana')
4256 makedirs(config_dst, uid, gid, 0o755)
4257 copy_files([config_src], config_dst, uid=uid, gid=gid)
4258
4259 prov_src = '/etc/grafana/provisioning/'
4260 prov_src = os.path.abspath(args.legacy_dir + prov_src)
4261 prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
4262 copy_tree([prov_src], prov_dst, uid=uid, gid=gid)
4263
4264 # cert
4265 cert = '/etc/grafana/grafana.crt'
4266 key = '/etc/grafana/grafana.key'
4267 if os.path.exists(cert) and os.path.exists(key):
4268 cert_src = '/etc/grafana/grafana.crt'
4269 cert_src = os.path.abspath(args.legacy_dir + cert_src)
4270 makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
4271 cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
4272 copy_files([cert_src], cert_dst, uid=uid, gid=gid)
4273
4274 key_src = '/etc/grafana/grafana.key'
4275 key_src = os.path.abspath(args.legacy_dir + key_src)
4276 key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
4277 copy_files([key_src], key_dst, uid=uid, gid=gid)
4278
4279 _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
4280 else:
4281 logger.debug("Skipping ssl, missing cert {} or key {}".format(cert, key))
4282
4283 # data - possible custom dashboards/plugins
4284 data_src = '/var/lib/grafana/'
4285 data_src = os.path.abspath(args.legacy_dir + data_src)
4286 data_dst = os.path.join(data_dir_dst, 'data')
4287 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4288
4289 make_var_run(fsid, uid, gid)
4290 c = get_container(fsid, daemon_type, daemon_id)
4291 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4292 update_firewalld(daemon_type)
4293
4294
4295 def command_adopt_alertmanager(daemon_id, fsid):
4296 # type: (str, str) -> None
4297
4298 daemon_type = 'alertmanager'
4299 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4300
4301 _stop_and_disable('prometheus-alertmanager')
4302
4303 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4304 uid=uid, gid=gid)
4305
4306 # config
4307 config_src = '/etc/prometheus/alertmanager.yml'
4308 config_src = os.path.abspath(args.legacy_dir + config_src)
4309 config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
4310 makedirs(config_dst, uid, gid, 0o755)
4311 copy_files([config_src], config_dst, uid=uid, gid=gid)
4312
4313 # data
4314 data_src = '/var/lib/prometheus/alertmanager/'
4315 data_src = os.path.abspath(args.legacy_dir + data_src)
4316 data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
4317 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4318
4319 make_var_run(fsid, uid, gid)
4320 c = get_container(fsid, daemon_type, daemon_id)
4321 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4322 update_firewalld(daemon_type)
4323
4324
4325 def _adjust_grafana_ini(filename):
4326 # type: (str) -> None
4327
4328 # Update cert_file, cert_key pathnames in server section
4329 # ConfigParser does not preserve comments
4330 try:
4331 with open(filename, "r") as grafana_ini:
4332 lines = grafana_ini.readlines()
4333 with open("{}.new".format(filename), "w") as grafana_ini:
4334 server_section=False
4335 for line in lines:
4336 if line.startswith('['):
4337 server_section=False
4338 if line.startswith('[server]'):
4339 server_section=True
4340 if server_section:
4341 line = re.sub(r'^cert_file.*',
4342 'cert_file = /etc/grafana/certs/cert_file', line)
4343 line = re.sub(r'^cert_key.*',
4344 'cert_key = /etc/grafana/certs/cert_key', line)
4345 grafana_ini.write(line)
4346 os.rename("{}.new".format(filename), filename)
4347 except OSError as err:
4348 raise Error("Cannot update {}: {}".format(filename, err))
4349
4350
4351 def _stop_and_disable(unit_name):
4352 # type: (str) -> None
4353
4354 (enabled, state, _) = check_unit(unit_name)
4355 if state == 'running':
4356 logger.info('Stopping old systemd unit %s...' % unit_name)
4357 call_throws(['systemctl', 'stop', unit_name])
4358 if enabled:
4359 logger.info('Disabling old systemd unit %s...' % unit_name)
4360 call_throws(['systemctl', 'disable', unit_name])
4361
4362
4363 ##################################
4364
4365 def command_rm_daemon():
4366 # type: () -> None
4367
4368 l = FileLock(args.fsid)
4369 l.acquire()
4370
4371 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
4372
4373 (daemon_type, daemon_id) = args.name.split('.', 1)
4374 if daemon_type in ['mon', 'osd'] and not args.force:
4375 raise Error('must pass --force to proceed: '
4376 'this command may destroy precious data!')
4377
4378 call(['systemctl', 'stop', unit_name],
4379 verbosity=CallVerbosity.DEBUG)
4380 call(['systemctl', 'reset-failed', unit_name],
4381 verbosity=CallVerbosity.DEBUG)
4382 call(['systemctl', 'disable', unit_name],
4383 verbosity=CallVerbosity.DEBUG)
4384 data_dir = get_data_dir(args.fsid, daemon_type, daemon_id)
4385 if daemon_type in ['mon', 'osd', 'prometheus'] and \
4386 not args.force_delete_data:
4387 # rename it out of the way -- do not delete
4388 backup_dir = os.path.join(args.data_dir, args.fsid, 'removed')
4389 if not os.path.exists(backup_dir):
4390 makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
4391 dirname = '%s.%s_%s' % (daemon_type, daemon_id,
4392 datetime.datetime.utcnow().strftime(DATEFMT))
4393 os.rename(data_dir,
4394 os.path.join(backup_dir, dirname))
4395 else:
4396 call_throws(['rm', '-rf', data_dir])
4397
4398 ##################################
4399
4400
4401 def command_rm_cluster():
4402 # type: () -> None
4403 if not args.force:
4404 raise Error('must pass --force to proceed: '
4405 'this command may destroy precious data!')
4406
4407 l = FileLock(args.fsid)
4408 l.acquire()
4409
4410 # stop + disable individual daemon units
4411 for d in list_daemons(detail=False):
4412 if d['fsid'] != args.fsid:
4413 continue
4414 if d['style'] != 'cephadm:v1':
4415 continue
4416 unit_name = get_unit_name(args.fsid, d['name'])
4417 call(['systemctl', 'stop', unit_name],
4418 verbosity=CallVerbosity.DEBUG)
4419 call(['systemctl', 'reset-failed', unit_name],
4420 verbosity=CallVerbosity.DEBUG)
4421 call(['systemctl', 'disable', unit_name],
4422 verbosity=CallVerbosity.DEBUG)
4423
4424 # cluster units
4425 for unit_name in ['ceph-%s.target' % args.fsid]:
4426 call(['systemctl', 'stop', unit_name],
4427 verbosity=CallVerbosity.DEBUG)
4428 call(['systemctl', 'reset-failed', unit_name],
4429 verbosity=CallVerbosity.DEBUG)
4430 call(['systemctl', 'disable', unit_name],
4431 verbosity=CallVerbosity.DEBUG)
4432
4433 slice_name = 'system-%s.slice' % (('ceph-%s' % args.fsid).replace('-',
4434 '\\x2d'))
4435 call(['systemctl', 'stop', slice_name],
4436 verbosity=CallVerbosity.DEBUG)
4437
4438 # rm units
4439 call_throws(['rm', '-f', args.unit_dir +
4440 '/ceph-%s@.service' % args.fsid])
4441 call_throws(['rm', '-f', args.unit_dir +
4442 '/ceph-%s.target' % args.fsid])
4443 call_throws(['rm', '-rf',
4444 args.unit_dir + '/ceph-%s.target.wants' % args.fsid])
4445 # rm data
4446 call_throws(['rm', '-rf', args.data_dir + '/' + args.fsid])
4447 # rm logs
4448 call_throws(['rm', '-rf', args.log_dir + '/' + args.fsid])
4449 call_throws(['rm', '-rf', args.log_dir +
4450 '/*.wants/ceph-%s@*' % args.fsid])
4451 # rm logrotate config
4452 call_throws(['rm', '-f', args.logrotate_dir + '/ceph-%s' % args.fsid])
4453
4454 # clean up config, keyring, and pub key files
4455 files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
4456
4457 if os.path.exists(files[0]):
4458 valid_fsid = False
4459 with open(files[0]) as f:
4460 if args.fsid in f.read():
4461 valid_fsid = True
4462 if valid_fsid:
4463 for n in range(0, len(files)):
4464 if os.path.exists(files[n]):
4465 os.remove(files[n])
4466
4467
4468 ##################################
4469
4470 def check_time_sync(enabler=None):
4471 # type: (Optional[Packager]) -> bool
4472 units = [
4473 'chrony.service', # 18.04 (at least)
4474 'chronyd.service', # el / opensuse
4475 'systemd-timesyncd.service',
4476 'ntpd.service', # el7 (at least)
4477 'ntp.service', # 18.04 (at least)
4478 'ntpsec.service', # 20.04 (at least) / buster
4479 ]
4480 if not check_units(units, enabler):
4481 logger.warning('No time sync service is running; checked for %s' % units)
4482 return False
4483 return True
4484
4485
4486 def command_check_host():
4487 # type: () -> None
4488 global container_path
4489
4490 errors = []
4491 commands = ['systemctl', 'lvcreate']
4492
4493 if args.docker:
4494 container_path = find_program('docker')
4495 else:
4496 for i in CONTAINER_PREFERENCE:
4497 try:
4498 container_path = find_program(i)
4499 break
4500 except Exception as e:
4501 logger.debug('Could not locate %s: %s' % (i, e))
4502 if not container_path:
4503 errors.append('Unable to locate any of %s' % CONTAINER_PREFERENCE)
4504 else:
4505 logger.info('podman|docker (%s) is present' % container_path)
4506
4507 for command in commands:
4508 try:
4509 find_program(command)
4510 logger.info('%s is present' % command)
4511 except ValueError:
4512 errors.append('%s binary does not appear to be installed' % command)
4513
4514 # check for configured+running chronyd or ntp
4515 if not check_time_sync():
4516 errors.append('No time synchronization is active')
4517
4518 if 'expect_hostname' in args and args.expect_hostname:
4519 if get_hostname().lower() != args.expect_hostname.lower():
4520 errors.append('hostname "%s" does not match expected hostname "%s"' % (
4521 get_hostname(), args.expect_hostname))
4522 logger.info('Hostname "%s" matches what is expected.',
4523 args.expect_hostname)
4524
4525 if errors:
4526 raise Error('\n'.join(errors))
4527
4528 logger.info('Host looks OK')
4529
4530 ##################################
4531
4532
4533 def command_prepare_host():
4534 # type: () -> None
4535 logger.info('Verifying podman|docker is present...')
4536 pkg = None
4537 if not container_path:
4538 if not pkg:
4539 pkg = create_packager()
4540 pkg.install_podman()
4541
4542 logger.info('Verifying lvm2 is present...')
4543 if not find_executable('lvcreate'):
4544 if not pkg:
4545 pkg = create_packager()
4546 pkg.install(['lvm2'])
4547
4548 logger.info('Verifying time synchronization is in place...')
4549 if not check_time_sync():
4550 if not pkg:
4551 pkg = create_packager()
4552 pkg.install(['chrony'])
4553 # check again, and this time try to enable
4554 # the service
4555 check_time_sync(enabler=pkg)
4556
4557 if 'expect_hostname' in args and args.expect_hostname and args.expect_hostname != get_hostname():
4558 logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), args.expect_hostname))
4559 call_throws(['hostname', args.expect_hostname])
4560 with open('/etc/hostname', 'w') as f:
4561 f.write(args.expect_hostname + '\n')
4562
4563 logger.info('Repeating the final host check...')
4564 command_check_host()
4565
4566 ##################################
4567
4568
4569 class CustomValidation(argparse.Action):
4570
4571 def _check_name(self, values):
4572 try:
4573 (daemon_type, daemon_id) = values.split('.', 1)
4574 except ValueError:
4575 raise argparse.ArgumentError(self,
4576 "must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com")
4577
4578 daemons = get_supported_daemons()
4579 if daemon_type not in daemons:
4580 raise argparse.ArgumentError(self,
4581 "name must declare the type of daemon e.g. "
4582 "{}".format(', '.join(daemons)))
4583
4584 def __call__(self, parser, namespace, values, option_string=None):
4585 if self.dest == "name":
4586 self._check_name(values)
4587 setattr(namespace, self.dest, values)
4588
4589 ##################################
4590
4591
4592 def get_distro():
4593 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
4594 distro = None
4595 distro_version = None
4596 distro_codename = None
4597 with open('/etc/os-release', 'r') as f:
4598 for line in f.readlines():
4599 line = line.strip()
4600 if '=' not in line or line.startswith('#'):
4601 continue
4602 (var, val) = line.split('=', 1)
4603 if val[0] == '"' and val[-1] == '"':
4604 val = val[1:-1]
4605 if var == 'ID':
4606 distro = val.lower()
4607 elif var == 'VERSION_ID':
4608 distro_version = val.lower()
4609 elif var == 'VERSION_CODENAME':
4610 distro_codename = val.lower()
4611 return distro, distro_version, distro_codename
4612
4613
4614 class Packager(object):
4615 def __init__(self, stable=None, version=None, branch=None, commit=None):
4616 assert \
4617 (stable and not version and not branch and not commit) or \
4618 (not stable and version and not branch and not commit) or \
4619 (not stable and not version and branch) or \
4620 (not stable and not version and not branch and not commit)
4621 self.stable = stable
4622 self.version = version
4623 self.branch = branch
4624 self.commit = commit
4625
4626 def add_repo(self):
4627 raise NotImplementedError
4628
4629 def rm_repo(self):
4630 raise NotImplementedError
4631
4632 def query_shaman(self, distro, distro_version, branch, commit):
4633 # query shaman
4634 logger.info('Fetching repo metadata from shaman and chacra...')
4635 shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
4636 distro=distro,
4637 distro_version=distro_version,
4638 branch=branch,
4639 sha1=commit or 'latest',
4640 arch=get_arch()
4641 )
4642 try:
4643 shaman_response = urlopen(shaman_url)
4644 except HTTPError as err:
4645 logger.error('repository not found in shaman (might not be available yet)')
4646 raise Error('%s, failed to fetch %s' % (err, shaman_url))
4647 try:
4648 chacra_url = shaman_response.geturl()
4649 chacra_response = urlopen(chacra_url)
4650 except HTTPError as err:
4651 logger.error('repository not found in chacra (might not be available yet)')
4652 raise Error('%s, failed to fetch %s' % (err, chacra_url))
4653 return chacra_response.read().decode('utf-8')
4654
4655 def repo_gpgkey(self):
4656 if args.gpg_url:
4657 return args.gpg_url
4658 if self.stable or self.version:
4659 return 'https://download.ceph.com/keys/release.asc', 'release'
4660 else:
4661 return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
4662
4663 def enable_service(self, service):
4664 """
4665 Start and enable the service (typically using systemd).
4666 """
4667 call_throws(['systemctl', 'enable', '--now', service])
4668
4669
4670 class Apt(Packager):
4671 DISTRO_NAMES = {
4672 'ubuntu': 'ubuntu',
4673 'debian': 'debian',
4674 }
4675
4676 def __init__(self, stable, version, branch, commit,
4677 distro, distro_version, distro_codename):
4678 super(Apt, self).__init__(stable=stable, version=version,
4679 branch=branch, commit=commit)
4680 self.distro = self.DISTRO_NAMES[distro]
4681 self.distro_codename = distro_codename
4682 self.distro_version = distro_version
4683
4684 def repo_path(self):
4685 return '/etc/apt/sources.list.d/ceph.list'
4686
4687 def add_repo(self):
4688 url, name = self.repo_gpgkey()
4689 logger.info('Installing repo GPG key from %s...' % url)
4690 try:
4691 response = urlopen(url)
4692 except HTTPError as err:
4693 logger.error('failed to fetch GPG repo key from %s: %s' % (
4694 url, err))
4695 raise Error('failed to fetch GPG key')
4696 key = response.read().decode('utf-8')
4697 with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'w') as f:
4698 f.write(key)
4699
4700 if self.version:
4701 content = 'deb %s/debian-%s/ %s main\n' % (
4702 args.repo_url, self.version, self.distro_codename)
4703 elif self.stable:
4704 content = 'deb %s/debian-%s/ %s main\n' % (
4705 args.repo_url, self.stable, self.distro_codename)
4706 else:
4707 content = self.query_shaman(self.distro, self.distro_codename, self.branch,
4708 self.commit)
4709
4710 logger.info('Installing repo file at %s...' % self.repo_path())
4711 with open(self.repo_path(), 'w') as f:
4712 f.write(content)
4713
4714 def rm_repo(self):
4715 for name in ['autobuild', 'release']:
4716 p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
4717 if os.path.exists(p):
4718 logger.info('Removing repo GPG key %s...' % p)
4719 os.unlink(p)
4720 if os.path.exists(self.repo_path()):
4721 logger.info('Removing repo at %s...' % self.repo_path())
4722 os.unlink(self.repo_path())
4723
4724 if self.distro == 'ubuntu':
4725 self.rm_kubic_repo()
4726
4727 def install(self, ls):
4728 logger.info('Installing packages %s...' % ls)
4729 call_throws(['apt-get', 'install', '-y'] + ls)
4730
4731 def install_podman(self):
4732 if self.distro == 'ubuntu':
4733 logger.info('Setting up repo for podman...')
4734 self.add_kubic_repo()
4735 call_throws(['apt-get', 'update'])
4736
4737 logger.info('Attempting podman install...')
4738 try:
4739 self.install(['podman'])
4740 except Error as e:
4741 logger.info('Podman did not work. Falling back to docker...')
4742 self.install(['docker.io'])
4743
4744 def kubic_repo_url(self):
4745 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
4746 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version
4747
4748 def kubic_repo_path(self):
4749 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
4750
4751 def kubric_repo_gpgkey_url(self):
4752 return '%s/Release.key' % self.kubic_repo_url()
4753
4754 def kubric_repo_gpgkey_path(self):
4755 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
4756
4757 def add_kubic_repo(self):
4758 url = self.kubric_repo_gpgkey_url()
4759 logger.info('Installing repo GPG key from %s...' % url)
4760 try:
4761 response = urlopen(url)
4762 except HTTPError as err:
4763 logger.error('failed to fetch GPG repo key from %s: %s' % (
4764 url, err))
4765 raise Error('failed to fetch GPG key')
4766 key = response.read().decode('utf-8')
4767 tmp_key = write_tmp(key, 0, 0)
4768 keyring = self.kubric_repo_gpgkey_path()
4769 call_throws(['apt-key', '--keyring', keyring, 'add', tmp_key.name])
4770
4771 logger.info('Installing repo file at %s...' % self.kubic_repo_path())
4772 content = 'deb %s /\n' % self.kubic_repo_url()
4773 with open(self.kubic_repo_path(), 'w') as f:
4774 f.write(content)
4775
4776 def rm_kubic_repo(self):
4777 keyring = self.kubric_repo_gpgkey_path()
4778 if os.path.exists(keyring):
4779 logger.info('Removing repo GPG key %s...' % keyring)
4780 os.unlink(keyring)
4781
4782 p = self.kubic_repo_path()
4783 if os.path.exists(p):
4784 logger.info('Removing repo at %s...' % p)
4785 os.unlink(p)
4786
4787
4788 class YumDnf(Packager):
4789 DISTRO_NAMES = {
4790 'centos': ('centos', 'el'),
4791 'rhel': ('centos', 'el'),
4792 'scientific': ('centos', 'el'),
4793 'fedora': ('fedora', 'fc'),
4794 }
4795
4796 def __init__(self, stable, version, branch, commit,
4797 distro, distro_version):
4798 super(YumDnf, self).__init__(stable=stable, version=version,
4799 branch=branch, commit=commit)
4800 self.major = int(distro_version.split('.')[0])
4801 self.distro_normalized = self.DISTRO_NAMES[distro][0]
4802 self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
4803 if (self.distro_code == 'fc' and self.major >= 30) or \
4804 (self.distro_code == 'el' and self.major >= 8):
4805 self.tool = 'dnf'
4806 else:
4807 self.tool = 'yum'
4808
4809 def custom_repo(self, **kw):
4810 """
4811 Repo files need special care in that a whole line should not be present
4812 if there is no value for it. Because we were using `format()` we could
4813 not conditionally add a line for a repo file. So the end result would
4814 contain a key with a missing value (say if we were passing `None`).
4815
4816 For example, it could look like::
4817
4818 [ceph repo]
4819 name= ceph repo
4820 proxy=
4821 gpgcheck=
4822
4823 Which breaks. This function allows us to conditionally add lines,
4824 preserving an order and be more careful.
4825
4826 Previously, and for historical purposes, this is how the template used
4827 to look::
4828
4829 custom_repo =
4830 [{repo_name}]
4831 name={name}
4832 baseurl={baseurl}
4833 enabled={enabled}
4834 gpgcheck={gpgcheck}
4835 type={_type}
4836 gpgkey={gpgkey}
4837 proxy={proxy}
4838
4839 """
4840 lines = []
4841
4842 # by using tuples (vs a dict) we preserve the order of what we want to
4843 # return, like starting with a [repo name]
4844 tmpl = (
4845 ('reponame', '[%s]'),
4846 ('name', 'name=%s'),
4847 ('baseurl', 'baseurl=%s'),
4848 ('enabled', 'enabled=%s'),
4849 ('gpgcheck', 'gpgcheck=%s'),
4850 ('_type', 'type=%s'),
4851 ('gpgkey', 'gpgkey=%s'),
4852 ('proxy', 'proxy=%s'),
4853 ('priority', 'priority=%s'),
4854 )
4855
4856 for line in tmpl:
4857 tmpl_key, tmpl_value = line # key values from tmpl
4858
4859 # ensure that there is an actual value (not None nor empty string)
4860 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4861 lines.append(tmpl_value % kw.get(tmpl_key))
4862
4863 return '\n'.join(lines)
4864
4865 def repo_path(self):
4866 return '/etc/yum.repos.d/ceph.repo'
4867
4868 def repo_baseurl(self):
4869 assert self.stable or self.version
4870 if self.version:
4871 return '%s/rpm-%s/%s' % (args.repo_url, self.version,
4872 self.distro_code)
4873 else:
4874 return '%s/rpm-%s/%s' % (args.repo_url, self.stable,
4875 self.distro_code)
4876
4877 def add_repo(self):
4878 if self.stable or self.version:
4879 content = ''
4880 for n, t in {
4881 'Ceph': '$basearch',
4882 'Ceph-noarch': 'noarch',
4883 'Ceph-source': 'SRPMS'}.items():
4884 content += '[%s]\n' % (n)
4885 content += self.custom_repo(
4886 name='Ceph %s' % t,
4887 baseurl=self.repo_baseurl() + '/' + t,
4888 enabled=1,
4889 gpgcheck=1,
4890 gpgkey=self.repo_gpgkey()[0],
4891 )
4892 content += '\n\n'
4893 else:
4894 content = self.query_shaman(self.distro_normalized, self.major,
4895 self.branch,
4896 self.commit)
4897
4898 logger.info('Writing repo to %s...' % self.repo_path())
4899 with open(self.repo_path(), 'w') as f:
4900 f.write(content)
4901
4902 if self.distro_code.startswith('el'):
4903 logger.info('Enabling EPEL...')
4904 call_throws([self.tool, 'install', '-y', 'epel-release'])
4905
4906 def rm_repo(self):
4907 if os.path.exists(self.repo_path()):
4908 os.unlink(self.repo_path())
4909
4910 def install(self, ls):
4911 logger.info('Installing packages %s...' % ls)
4912 call_throws([self.tool, 'install', '-y'] + ls)
4913
4914 def install_podman(self):
4915 self.install(['podman'])
4916
4917
4918 class Zypper(Packager):
4919 DISTRO_NAMES = [
4920 'sles',
4921 'opensuse-tumbleweed',
4922 'opensuse-leap'
4923 ]
4924
4925 def __init__(self, stable, version, branch, commit,
4926 distro, distro_version):
4927 super(Zypper, self).__init__(stable=stable, version=version,
4928 branch=branch, commit=commit)
4929 self.tool = 'zypper'
4930 self.distro = 'opensuse'
4931 self.distro_version = '15.1'
4932 if 'tumbleweed' not in distro and distro_version is not None:
4933 self.distro_version = distro_version
4934
4935 def custom_repo(self, **kw):
4936 """
4937 See YumDnf for format explanation.
4938 """
4939 lines = []
4940
4941 # by using tuples (vs a dict) we preserve the order of what we want to
4942 # return, like starting with a [repo name]
4943 tmpl = (
4944 ('reponame', '[%s]'),
4945 ('name', 'name=%s'),
4946 ('baseurl', 'baseurl=%s'),
4947 ('enabled', 'enabled=%s'),
4948 ('gpgcheck', 'gpgcheck=%s'),
4949 ('_type', 'type=%s'),
4950 ('gpgkey', 'gpgkey=%s'),
4951 ('proxy', 'proxy=%s'),
4952 ('priority', 'priority=%s'),
4953 )
4954
4955 for line in tmpl:
4956 tmpl_key, tmpl_value = line # key values from tmpl
4957
4958 # ensure that there is an actual value (not None nor empty string)
4959 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4960 lines.append(tmpl_value % kw.get(tmpl_key))
4961
4962 return '\n'.join(lines)
4963
4964 def repo_path(self):
4965 return '/etc/zypp/repos.d/ceph.repo'
4966
4967 def repo_baseurl(self):
4968 assert self.stable or self.version
4969 if self.version:
4970 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4971 else:
4972 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4973
4974 def add_repo(self):
4975 if self.stable or self.version:
4976 content = ''
4977 for n, t in {
4978 'Ceph': '$basearch',
4979 'Ceph-noarch': 'noarch',
4980 'Ceph-source': 'SRPMS'}.items():
4981 content += '[%s]\n' % (n)
4982 content += self.custom_repo(
4983 name='Ceph %s' % t,
4984 baseurl=self.repo_baseurl() + '/' + t,
4985 enabled=1,
4986 gpgcheck=1,
4987 gpgkey=self.repo_gpgkey()[0],
4988 )
4989 content += '\n\n'
4990 else:
4991 content = self.query_shaman(self.distro, self.distro_version,
4992 self.branch,
4993 self.commit)
4994
4995 logger.info('Writing repo to %s...' % self.repo_path())
4996 with open(self.repo_path(), 'w') as f:
4997 f.write(content)
4998
4999 def rm_repo(self):
5000 if os.path.exists(self.repo_path()):
5001 os.unlink(self.repo_path())
5002
5003 def install(self, ls):
5004 logger.info('Installing packages %s...' % ls)
5005 call_throws([self.tool, 'in', '-y'] + ls)
5006
5007 def install_podman(self):
5008 self.install(['podman'])
5009
5010
5011 def create_packager(stable=None, version=None, branch=None, commit=None):
5012 distro, distro_version, distro_codename = get_distro()
5013 if distro in YumDnf.DISTRO_NAMES:
5014 return YumDnf(stable=stable, version=version,
5015 branch=branch, commit=commit,
5016 distro=distro, distro_version=distro_version)
5017 elif distro in Apt.DISTRO_NAMES:
5018 return Apt(stable=stable, version=version,
5019 branch=branch, commit=commit,
5020 distro=distro, distro_version=distro_version,
5021 distro_codename=distro_codename)
5022 elif distro in Zypper.DISTRO_NAMES:
5023 return Zypper(stable=stable, version=version,
5024 branch=branch, commit=commit,
5025 distro=distro, distro_version=distro_version)
5026 raise Error('Distro %s version %s not supported' % (distro, distro_version))
5027
5028
5029 def command_add_repo():
5030 if args.version and args.release:
5031 raise Error('you can specify either --release or --version but not both')
5032 if not args.version and not args.release and not args.dev and not args.dev_commit:
5033 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
5034 if args.version:
5035 try:
5036 (x, y, z) = args.version.split('.')
5037 except Exception as e:
5038 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
5039
5040 pkg = create_packager(stable=args.release,
5041 version=args.version,
5042 branch=args.dev,
5043 commit=args.dev_commit)
5044 pkg.add_repo()
5045
5046
5047 def command_rm_repo():
5048 pkg = create_packager()
5049 pkg.rm_repo()
5050
5051
5052 def command_install():
5053 pkg = create_packager()
5054 pkg.install(args.packages)
5055
5056 ##################################
5057
5058 def get_ipv4_address(ifname):
5059 # type: (str) -> str
5060 def _extract(sock, offset):
5061 return socket.inet_ntop(
5062 socket.AF_INET,
5063 fcntl.ioctl(
5064 sock.fileno(),
5065 offset,
5066 struct.pack('256s', bytes(ifname[:15], 'utf-8'))
5067 )[20:24])
5068
5069 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
5070 try:
5071 addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR
5072 dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK
5073 except OSError:
5074 # interface does not have an ipv4 address
5075 return ''
5076
5077 dec_mask = sum([bin(int(i)).count('1')
5078 for i in dq_mask.split('.')])
5079 return '{}/{}'.format(addr, dec_mask)
5080
5081
5082 def get_ipv6_address(ifname):
5083 # type: (str) -> str
5084 if not os.path.exists('/proc/net/if_inet6'):
5085 return ''
5086
5087 raw = read_file(['/proc/net/if_inet6'])
5088 data = raw.splitlines()
5089 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
5090 # field 0 is ipv6, field 2 is scope
5091 for iface_setting in data:
5092 field = iface_setting.split()
5093 if field[-1] == ifname:
5094 ipv6_raw = field[0]
5095 ipv6_fmtd = ":".join([ipv6_raw[_p:_p+4] for _p in range(0, len(field[0]),4)])
5096 # apply naming rules using ipaddress module
5097 ipv6 = ipaddress.ip_address(ipv6_fmtd)
5098 return "{}/{}".format(str(ipv6), int('0x{}'.format(field[2]), 16))
5099 return ''
5100
5101
5102 def bytes_to_human(num, mode='decimal'):
5103 # type: (float, str) -> str
5104 """Convert a bytes value into it's human-readable form.
5105
5106 :param num: number, in bytes, to convert
5107 :param mode: Either decimal (default) or binary to determine divisor
5108 :returns: string representing the bytes value in a more readable format
5109 """
5110 unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
5111 divisor = 1000.0
5112 yotta = "YB"
5113
5114 if mode == 'binary':
5115 unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
5116 divisor = 1024.0
5117 yotta = "YiB"
5118
5119 for unit in unit_list:
5120 if abs(num) < divisor:
5121 return "%3.1f%s" % (num, unit)
5122 num /= divisor
5123 return "%.1f%s" % (num, yotta)
5124
5125
5126 def read_file(path_list, file_name=''):
5127 # type: (List[str], str) -> str
5128 """Returns the content of the first file found within the `path_list`
5129
5130 :param path_list: list of file paths to search
5131 :param file_name: optional file_name to be applied to a file path
5132 :returns: content of the file or 'Unknown'
5133 """
5134 for path in path_list:
5135 if file_name:
5136 file_path = os.path.join(path, file_name)
5137 else:
5138 file_path = path
5139 if os.path.exists(file_path):
5140 with open(file_path, 'r') as f:
5141 try:
5142 content = f.read().strip()
5143 except OSError:
5144 # sysfs may populate the file, but for devices like
5145 # virtio reads can fail
5146 return "Unknown"
5147 else:
5148 return content
5149 return "Unknown"
5150
5151
5152 ##################################
5153 class HostFacts():
5154 _dmi_path_list = ['/sys/class/dmi/id']
5155 _nic_path_list = ['/sys/class/net']
5156 _selinux_path_list = ['/etc/selinux/config']
5157 _apparmor_path_list = ['/etc/apparmor']
5158 _disk_vendor_workarounds = {
5159 "0x1af4": "Virtio Block Device"
5160 }
5161
5162 def __init__(self):
5163 self.cpu_model = 'Unknown'
5164 self.cpu_count = 0
5165 self.cpu_cores = 0
5166 self.cpu_threads = 0
5167 self.interfaces = {}
5168
5169 self._meminfo = read_file(['/proc/meminfo']).splitlines()
5170 self._get_cpuinfo()
5171 self._process_nics()
5172 self.arch = platform.processor()
5173 self.kernel = platform.release()
5174
5175 def _get_cpuinfo(self):
5176 # type: () -> None
5177 """Determine cpu information via /proc/cpuinfo"""
5178 raw = read_file(['/proc/cpuinfo'])
5179 output = raw.splitlines()
5180 cpu_set = set()
5181
5182 for line in output:
5183 field = [l.strip() for l in line.split(':')]
5184 if "model name" in line:
5185 self.cpu_model = field[1]
5186 if "physical id" in line:
5187 cpu_set.add(field[1])
5188 if "siblings" in line:
5189 self.cpu_threads = int(field[1].strip())
5190 if "cpu cores" in line:
5191 self.cpu_cores = int(field[1].strip())
5192 pass
5193 self.cpu_count = len(cpu_set)
5194
5195 def _get_block_devs(self):
5196 # type: () -> List[str]
5197 """Determine the list of block devices by looking at /sys/block"""
5198 return [dev for dev in os.listdir('/sys/block')
5199 if not dev.startswith('dm')]
5200
5201 def _get_devs_by_type(self, rota='0'):
5202 # type: (str) -> List[str]
5203 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
5204 devs = list()
5205 for blk_dev in self._get_block_devs():
5206 rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev)
5207 rot_value = read_file([rot_path])
5208 if rot_value == rota:
5209 devs.append(blk_dev)
5210 return devs
5211
5212 @property
5213 def operating_system(self):
5214 # type: () -> str
5215 """Determine OS version"""
5216 raw_info = read_file(['/etc/os-release'])
5217 os_release = raw_info.splitlines()
5218 rel_str = 'Unknown'
5219 rel_dict = dict()
5220
5221 for line in os_release:
5222 if "=" in line:
5223 var_name, var_value = line.split('=')
5224 rel_dict[var_name] = var_value.strip('"')
5225
5226 # Would normally use PRETTY_NAME, but NAME and VERSION are more
5227 # consistent
5228 if all(_v in rel_dict for _v in ["NAME", "VERSION"]):
5229 rel_str = "{} {}".format(rel_dict['NAME'], rel_dict['VERSION'])
5230 return rel_str
5231
5232 @property
5233 def hostname(self):
5234 # type: () -> str
5235 """Return the hostname"""
5236 return platform.node()
5237
5238 @property
5239 def subscribed(self):
5240 # type: () -> str
5241 """Highlevel check to see if the host is subscribed to receive updates/support"""
5242 def _red_hat():
5243 # type: () -> str
5244 # RHEL 7 and RHEL 8
5245 entitlements_dir = '/etc/pki/entitlement'
5246 if os.path.exists(entitlements_dir):
5247 pems = glob('{}/*.pem'.format(entitlements_dir))
5248 if len(pems) >= 2:
5249 return "Yes"
5250
5251 return "No"
5252
5253 os_name = self.operating_system
5254 if os_name.upper().startswith("RED HAT"):
5255 return _red_hat()
5256
5257 return "Unknown"
5258
5259 @property
5260 def hdd_count(self):
5261 # type: () -> int
5262 """Return a count of HDDs (spinners)"""
5263 return len(self._get_devs_by_type(rota='1'))
5264
5265 def _get_capacity(self, dev):
5266 # type: (str) -> int
5267 """Determine the size of a given device"""
5268 size_path = os.path.join('/sys/block', dev, 'size')
5269 size_blocks = int(read_file([size_path]))
5270 blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size')
5271 blk_count = int(read_file([blk_path]))
5272 return size_blocks * blk_count
5273
5274 def _get_capacity_by_type(self, rota='0'):
5275 # type: (str) -> int
5276 """Return the total capacity of a category of device (flash or hdd)"""
5277 devs = self._get_devs_by_type(rota=rota)
5278 capacity = 0
5279 for dev in devs:
5280 capacity += self._get_capacity(dev)
5281 return capacity
5282
5283 def _dev_list(self, dev_list):
5284 # type: (List[str]) -> List[Dict[str, object]]
5285 """Return a 'pretty' name list for each device in the `dev_list`"""
5286 disk_list = list()
5287
5288 for dev in dev_list:
5289 disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
5290 disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
5291 disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
5292 vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
5293 disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
5294 disk_size_bytes = self._get_capacity(dev)
5295 disk_list.append({
5296 "description": "{} {} ({})".format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
5297 "vendor": disk_vendor,
5298 "model": disk_model,
5299 "rev": disk_rev,
5300 "wwid": disk_wwid,
5301 "dev_name": dev,
5302 "disk_size_bytes": disk_size_bytes,
5303 }
5304 )
5305 return disk_list
5306
5307 @property
5308 def hdd_list(self):
5309 # type: () -> List[Dict[str, object]]
5310 """Return a list of devices that are HDDs (spinners)"""
5311 devs = self._get_devs_by_type(rota='1')
5312 return self._dev_list(devs)
5313
5314 @property
5315 def flash_list(self):
5316 # type: () -> List[Dict[str, object]]
5317 """Return a list of devices that are flash based (SSD, NVMe)"""
5318 devs = self._get_devs_by_type(rota='0')
5319 return self._dev_list(devs)
5320
5321 @property
5322 def hdd_capacity_bytes(self):
5323 # type: () -> int
5324 """Return the total capacity for all HDD devices (bytes)"""
5325 return self._get_capacity_by_type(rota='1')
5326
5327 @property
5328 def hdd_capacity(self):
5329 # type: () -> str
5330 """Return the total capacity for all HDD devices (human readable format)"""
5331 return bytes_to_human(self.hdd_capacity_bytes)
5332
5333 @property
5334 def cpu_load(self):
5335 # type: () -> Dict[str, float]
5336 """Return the cpu load average data for the host"""
5337 raw = read_file(['/proc/loadavg']).strip()
5338 data = raw.split()
5339 return {
5340 "1min": float(data[0]),
5341 "5min": float(data[1]),
5342 "15min": float(data[2]),
5343 }
5344
5345 @property
5346 def flash_count(self):
5347 # type: () -> int
5348 """Return the number of flash devices in the system (SSD, NVMe)"""
5349 return len(self._get_devs_by_type(rota='0'))
5350
5351 @property
5352 def flash_capacity_bytes(self):
5353 # type: () -> int
5354 """Return the total capacity for all flash devices (bytes)"""
5355 return self._get_capacity_by_type(rota='0')
5356
5357 @property
5358 def flash_capacity(self):
5359 # type: () -> str
5360 """Return the total capacity for all Flash devices (human readable format)"""
5361 return bytes_to_human(self.flash_capacity_bytes)
5362
5363 def _process_nics(self):
5364 # type: () -> None
5365 """Look at the NIC devices and extract network related metadata"""
5366 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
5367 hw_lookup = {
5368 "1": "ethernet",
5369 "32": "infiniband",
5370 "772": "loopback",
5371 }
5372
5373 for nic_path in HostFacts._nic_path_list:
5374 if not os.path.exists(nic_path):
5375 continue
5376 for iface in os.listdir(nic_path):
5377
5378 lower_devs_list = [os.path.basename(link.replace("lower_", "")) for link in glob(os.path.join(nic_path, iface, "lower_*"))]
5379 upper_devs_list = [os.path.basename(link.replace("upper_", "")) for link in glob(os.path.join(nic_path, iface, "upper_*"))]
5380
5381 try:
5382 mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
5383 except ValueError:
5384 mtu = 0
5385
5386 operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
5387 try:
5388 speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
5389 except (OSError, ValueError):
5390 # OSError : device doesn't support the ethtool get_link_ksettings
5391 # ValueError : raised when the read fails, and returns Unknown
5392 #
5393 # Either way, we show a -1 when speed isn't available
5394 speed = -1
5395
5396 if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
5397 nic_type = "bridge"
5398 elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
5399 nic_type = "bonding"
5400 else:
5401 nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), "Unknown")
5402
5403 dev_link = os.path.join(nic_path, iface, 'device')
5404 if os.path.exists(dev_link):
5405 iftype = 'physical'
5406 driver_path = os.path.join(dev_link, 'driver')
5407 if os.path.exists(driver_path):
5408 driver = os.path.basename(
5409 os.path.realpath(driver_path))
5410 else:
5411 driver = 'Unknown'
5412
5413 else:
5414 iftype = 'logical'
5415 driver = ''
5416
5417 self.interfaces[iface] = {
5418 "mtu": mtu,
5419 "upper_devs_list": upper_devs_list,
5420 "lower_devs_list": lower_devs_list,
5421 "operstate": operstate,
5422 "iftype": iftype,
5423 "nic_type": nic_type,
5424 "driver": driver,
5425 "speed": speed,
5426 "ipv4_address": get_ipv4_address(iface),
5427 "ipv6_address": get_ipv6_address(iface),
5428 }
5429
5430 @property
5431 def nic_count(self):
5432 # type: () -> int
5433 """Return a total count of all physical NICs detected in the host"""
5434 phys_devs = []
5435 for iface in self.interfaces:
5436 if self.interfaces[iface]["iftype"] == 'physical':
5437 phys_devs.append(iface)
5438 return len(phys_devs)
5439
5440
5441 def _get_mem_data(self, field_name):
5442 # type: (str) -> int
5443 for line in self._meminfo:
5444 if line.startswith(field_name):
5445 _d = line.split()
5446 return int(_d[1])
5447 return 0
5448
5449 @property
5450 def memory_total_kb(self):
5451 # type: () -> int
5452 """Determine the memory installed (kb)"""
5453 return self._get_mem_data('MemTotal')
5454
5455 @property
5456 def memory_free_kb(self):
5457 # type: () -> int
5458 """Determine the memory free (not cache, immediately usable)"""
5459 return self._get_mem_data('MemFree')
5460
5461 @property
5462 def memory_available_kb(self):
5463 # type: () -> int
5464 """Determine the memory available to new applications without swapping"""
5465 return self._get_mem_data('MemAvailable')
5466
5467 @property
5468 def vendor(self):
5469 # type: () -> str
5470 """Determine server vendor from DMI data in sysfs"""
5471 return read_file(HostFacts._dmi_path_list, "sys_vendor")
5472
5473 @property
5474 def model(self):
5475 # type: () -> str
5476 """Determine server model information from DMI data in sysfs"""
5477 family = read_file(HostFacts._dmi_path_list, "product_family")
5478 product = read_file(HostFacts._dmi_path_list, "product_name")
5479 if family == 'Unknown' and product:
5480 return "{}".format(product)
5481
5482 return "{} ({})".format(family, product)
5483
5484 @property
5485 def bios_version(self):
5486 # type: () -> str
5487 """Determine server BIOS version from DMI data in sysfs"""
5488 return read_file(HostFacts._dmi_path_list, "bios_version")
5489
5490 @property
5491 def bios_date(self):
5492 # type: () -> str
5493 """Determine server BIOS date from DMI data in sysfs"""
5494 return read_file(HostFacts._dmi_path_list, "bios_date")
5495
5496 @property
5497 def timestamp(self):
5498 # type: () -> float
5499 """Return the current time as Epoch seconds"""
5500 return time.time()
5501
5502 @property
5503 def system_uptime(self):
5504 # type: () -> float
5505 """Return the system uptime (in secs)"""
5506 raw_time = read_file(['/proc/uptime'])
5507 up_secs, _ = raw_time.split()
5508 return float(up_secs)
5509
5510 def kernel_security(self):
5511 # type: () -> Dict[str, str]
5512 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
5513 def _fetch_selinux():
5514 """Read the selinux config file to determine state"""
5515 security = {}
5516 for selinux_path in HostFacts._selinux_path_list:
5517 if os.path.exists(selinux_path):
5518 selinux_config = read_file([selinux_path]).splitlines()
5519 security['type'] = 'SELinux'
5520 for line in selinux_config:
5521 if line.strip().startswith('#'):
5522 continue
5523 k, v = line.split('=')
5524 security[k] = v
5525 if security['SELINUX'].lower() == "disabled":
5526 security['description'] = "SELinux: Disabled"
5527 else:
5528 security['description'] = "SELinux: Enabled({}, {})".format(security['SELINUX'], security['SELINUXTYPE'])
5529 return security
5530
5531 def _fetch_apparmor():
5532 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
5533 security = {}
5534 for apparmor_path in HostFacts._apparmor_path_list:
5535 if os.path.exists(apparmor_path):
5536 security['type'] = "AppArmor"
5537 security['description'] = "AppArmor: Enabled"
5538 try:
5539 profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
5540 except OSError:
5541 pass
5542 else:
5543 summary = {} # type: Dict[str, int]
5544 for line in profiles.split('\n'):
5545 item, mode = line.split(' ')
5546 mode= mode.strip('()')
5547 if mode in summary:
5548 summary[mode] += 1
5549 else:
5550 summary[mode] = 0
5551 summary_str = ",".join(["{} {}".format(v, k) for k, v in summary.items()])
5552 security = {**security, **summary} # type: ignore
5553 security['description'] += "({})".format(summary_str)
5554
5555 return security
5556
5557 if os.path.exists('/sys/kernel/security/lsm'):
5558 lsm = read_file(['/sys/kernel/security/lsm']).strip()
5559 if 'selinux' in lsm:
5560 return _fetch_selinux()
5561 elif 'apparmor' in lsm:
5562 return _fetch_apparmor()
5563 else:
5564 return {
5565 "type": "Unknown",
5566 "description": "Linux Security Module framework is active, but is not using SELinux or AppArmor"
5567 }
5568
5569 return {
5570 "type": "None",
5571 "description": "Linux Security Module framework is not available"
5572 }
5573
5574 @property
5575 def kernel_parameters(self):
5576 # type: () -> Dict[str, str]
5577 """Get kernel parameters required/used in Ceph clusters"""
5578
5579 k_param = {}
5580 out, _, _ = call_throws(['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
5581 if out:
5582 param_list = out.split('\n')
5583 param_dict = { param.split(" = ")[0]:param.split(" = ")[-1] for param in param_list}
5584
5585 # return only desired parameters
5586 if 'net.ipv4.ip_nonlocal_bind' in param_dict:
5587 k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
5588
5589 return k_param
5590
5591 def dump(self):
5592 # type: () -> str
5593 """Return the attributes of this HostFacts object as json"""
5594 data = {k: getattr(self, k) for k in dir(self)
5595 if not k.startswith('_') and
5596 isinstance(getattr(self, k),
5597 (float, int, str, list, dict, tuple))
5598 }
5599 return json.dumps(data, indent=2, sort_keys=True)
5600
5601 ##################################
5602
5603 def command_gather_facts():
5604 """gather_facts is intended to provide host releated metadata to the caller"""
5605 host = HostFacts()
5606 print(host.dump())
5607
5608
5609 ##################################
5610
5611
5612 def _get_parser():
5613 # type: () -> argparse.ArgumentParser
5614 parser = argparse.ArgumentParser(
5615 description='Bootstrap Ceph daemons with systemd and containers.',
5616 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
5617 parser.add_argument(
5618 '--image',
5619 help='container image. Can also be set via the "CEPHADM_IMAGE" '
5620 'env var')
5621 parser.add_argument(
5622 '--docker',
5623 action='store_true',
5624 help='use docker instead of podman')
5625 parser.add_argument(
5626 '--data-dir',
5627 default=DATA_DIR,
5628 help='base directory for daemon data')
5629 parser.add_argument(
5630 '--log-dir',
5631 default=LOG_DIR,
5632 help='base directory for daemon logs')
5633 parser.add_argument(
5634 '--logrotate-dir',
5635 default=LOGROTATE_DIR,
5636 help='location of logrotate configuration files')
5637 parser.add_argument(
5638 '--unit-dir',
5639 default=UNIT_DIR,
5640 help='base directory for systemd units')
5641 parser.add_argument(
5642 '--verbose', '-v',
5643 action='store_true',
5644 help='Show debug-level log messages')
5645 parser.add_argument(
5646 '--timeout',
5647 type=int,
5648 default=DEFAULT_TIMEOUT,
5649 help='timeout in seconds')
5650 parser.add_argument(
5651 '--retry',
5652 type=int,
5653 default=DEFAULT_RETRY,
5654 help='max number of retries')
5655 parser.add_argument(
5656 '--env', '-e',
5657 action='append',
5658 default=[],
5659 help='set environment variable')
5660 parser.add_argument(
5661 '--no-container-init',
5662 action='store_true',
5663 default=not CONTAINER_INIT,
5664 help='Do not run podman/docker with `--init`')
5665
5666 subparsers = parser.add_subparsers(help='sub-command')
5667
5668 parser_version = subparsers.add_parser(
5669 'version', help='get ceph version from container')
5670 parser_version.set_defaults(func=command_version)
5671
5672 parser_pull = subparsers.add_parser(
5673 'pull', help='pull latest image version')
5674 parser_pull.set_defaults(func=command_pull)
5675
5676 parser_inspect_image = subparsers.add_parser(
5677 'inspect-image', help='inspect local container image')
5678 parser_inspect_image.set_defaults(func=command_inspect_image)
5679
5680 parser_ls = subparsers.add_parser(
5681 'ls', help='list daemon instances on this host')
5682 parser_ls.set_defaults(func=command_ls)
5683 parser_ls.add_argument(
5684 '--no-detail',
5685 action='store_true',
5686 help='Do not include daemon status')
5687 parser_ls.add_argument(
5688 '--legacy-dir',
5689 default='/',
5690 help='base directory for legacy daemon data')
5691
5692 parser_list_networks = subparsers.add_parser(
5693 'list-networks', help='list IP networks')
5694 parser_list_networks.set_defaults(func=command_list_networks)
5695
5696 parser_adopt = subparsers.add_parser(
5697 'adopt', help='adopt daemon deployed with a different tool')
5698 parser_adopt.set_defaults(func=command_adopt)
5699 parser_adopt.add_argument(
5700 '--name', '-n',
5701 required=True,
5702 help='daemon name (type.id)')
5703 parser_adopt.add_argument(
5704 '--style',
5705 required=True,
5706 help='deployment style (legacy, ...)')
5707 parser_adopt.add_argument(
5708 '--cluster',
5709 default='ceph',
5710 help='cluster name')
5711 parser_adopt.add_argument(
5712 '--legacy-dir',
5713 default='/',
5714 help='base directory for legacy daemon data')
5715 parser_adopt.add_argument(
5716 '--config-json',
5717 help='Additional configuration information in JSON format')
5718 parser_adopt.add_argument(
5719 '--skip-firewalld',
5720 action='store_true',
5721 help='Do not configure firewalld')
5722 parser_adopt.add_argument(
5723 '--skip-pull',
5724 action='store_true',
5725 help='do not pull the latest image before adopting')
5726 parser_adopt.add_argument(
5727 '--force-start',
5728 action='store_true',
5729 help="start newly adoped daemon, even if it wasn't running previously")
5730 parser_adopt.add_argument(
5731 '--container-init',
5732 action='store_true',
5733 default=CONTAINER_INIT,
5734 help=argparse.SUPPRESS)
5735
5736 parser_rm_daemon = subparsers.add_parser(
5737 'rm-daemon', help='remove daemon instance')
5738 parser_rm_daemon.set_defaults(func=command_rm_daemon)
5739 parser_rm_daemon.add_argument(
5740 '--name', '-n',
5741 required=True,
5742 action=CustomValidation,
5743 help='daemon name (type.id)')
5744 parser_rm_daemon.add_argument(
5745 '--fsid',
5746 required=True,
5747 help='cluster FSID')
5748 parser_rm_daemon.add_argument(
5749 '--force',
5750 action='store_true',
5751 help='proceed, even though this may destroy valuable data')
5752 parser_rm_daemon.add_argument(
5753 '--force-delete-data',
5754 action='store_true',
5755 help='delete valuable daemon data instead of making a backup')
5756
5757 parser_rm_cluster = subparsers.add_parser(
5758 'rm-cluster', help='remove all daemons for a cluster')
5759 parser_rm_cluster.set_defaults(func=command_rm_cluster)
5760 parser_rm_cluster.add_argument(
5761 '--fsid',
5762 required=True,
5763 help='cluster FSID')
5764 parser_rm_cluster.add_argument(
5765 '--force',
5766 action='store_true',
5767 help='proceed, even though this may destroy valuable data')
5768
5769 parser_run = subparsers.add_parser(
5770 'run', help='run a ceph daemon, in a container, in the foreground')
5771 parser_run.set_defaults(func=command_run)
5772 parser_run.add_argument(
5773 '--name', '-n',
5774 required=True,
5775 help='daemon name (type.id)')
5776 parser_run.add_argument(
5777 '--fsid',
5778 required=True,
5779 help='cluster FSID')
5780
5781 parser_shell = subparsers.add_parser(
5782 'shell', help='run an interactive shell inside a daemon container')
5783 parser_shell.set_defaults(func=command_shell)
5784 parser_shell.add_argument(
5785 '--fsid',
5786 help='cluster FSID')
5787 parser_shell.add_argument(
5788 '--name', '-n',
5789 help='daemon name (type.id)')
5790 parser_shell.add_argument(
5791 '--config', '-c',
5792 help='ceph.conf to pass through to the container')
5793 parser_shell.add_argument(
5794 '--keyring', '-k',
5795 help='ceph.keyring to pass through to the container')
5796 parser_shell.add_argument(
5797 '--mount', '-m',
5798 help=("mount a file or directory in the container. "
5799 "Support multiple mounts. "
5800 "ie: `--mount /foo /bar:/bar`. "
5801 "When no destination is passed, default is /mnt"),
5802 nargs='+')
5803 parser_shell.add_argument(
5804 '--env', '-e',
5805 action='append',
5806 default=[],
5807 help='set environment variable')
5808 parser_shell.add_argument(
5809 'command', nargs=argparse.REMAINDER,
5810 help='command (optional)')
5811
5812 parser_enter = subparsers.add_parser(
5813 'enter', help='run an interactive shell inside a running daemon container')
5814 parser_enter.set_defaults(func=command_enter)
5815 parser_enter.add_argument(
5816 '--fsid',
5817 help='cluster FSID')
5818 parser_enter.add_argument(
5819 '--name', '-n',
5820 required=True,
5821 help='daemon name (type.id)')
5822 parser_enter.add_argument(
5823 'command', nargs=argparse.REMAINDER,
5824 help='command')
5825
5826 parser_ceph_volume = subparsers.add_parser(
5827 'ceph-volume', help='run ceph-volume inside a container')
5828 parser_ceph_volume.set_defaults(func=command_ceph_volume)
5829 parser_ceph_volume.add_argument(
5830 '--fsid',
5831 help='cluster FSID')
5832 parser_ceph_volume.add_argument(
5833 '--config-json',
5834 help='JSON file with config and (client.bootrap-osd) key')
5835 parser_ceph_volume.add_argument(
5836 '--config', '-c',
5837 help='ceph conf file')
5838 parser_ceph_volume.add_argument(
5839 '--keyring', '-k',
5840 help='ceph.keyring to pass through to the container')
5841 parser_ceph_volume.add_argument(
5842 'command', nargs=argparse.REMAINDER,
5843 help='command')
5844
5845 parser_unit = subparsers.add_parser(
5846 'unit', help='operate on the daemon\'s systemd unit')
5847 parser_unit.set_defaults(func=command_unit)
5848 parser_unit.add_argument(
5849 'command',
5850 help='systemd command (start, stop, restart, enable, disable, ...)')
5851 parser_unit.add_argument(
5852 '--fsid',
5853 help='cluster FSID')
5854 parser_unit.add_argument(
5855 '--name', '-n',
5856 required=True,
5857 help='daemon name (type.id)')
5858
5859 parser_logs = subparsers.add_parser(
5860 'logs', help='print journald logs for a daemon container')
5861 parser_logs.set_defaults(func=command_logs)
5862 parser_logs.add_argument(
5863 '--fsid',
5864 help='cluster FSID')
5865 parser_logs.add_argument(
5866 '--name', '-n',
5867 required=True,
5868 help='daemon name (type.id)')
5869 parser_logs.add_argument(
5870 'command', nargs='*',
5871 help='additional journalctl args')
5872
5873 parser_bootstrap = subparsers.add_parser(
5874 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
5875 parser_bootstrap.set_defaults(func=command_bootstrap)
5876 parser_bootstrap.add_argument(
5877 '--config', '-c',
5878 help='ceph conf file to incorporate')
5879 parser_bootstrap.add_argument(
5880 '--mon-id',
5881 required=False,
5882 help='mon id (default: local hostname)')
5883 parser_bootstrap.add_argument(
5884 '--mon-addrv',
5885 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
5886 parser_bootstrap.add_argument(
5887 '--mon-ip',
5888 help='mon IP')
5889 parser_bootstrap.add_argument(
5890 '--mgr-id',
5891 required=False,
5892 help='mgr id (default: randomly generated)')
5893 parser_bootstrap.add_argument(
5894 '--fsid',
5895 help='cluster FSID')
5896 parser_bootstrap.add_argument(
5897 '--output-dir',
5898 default='/etc/ceph',
5899 help='directory to write config, keyring, and pub key files')
5900 parser_bootstrap.add_argument(
5901 '--output-keyring',
5902 help='location to write keyring file with new cluster admin and mon keys')
5903 parser_bootstrap.add_argument(
5904 '--output-config',
5905 help='location to write conf file to connect to new cluster')
5906 parser_bootstrap.add_argument(
5907 '--output-pub-ssh-key',
5908 help='location to write the cluster\'s public SSH key')
5909 parser_bootstrap.add_argument(
5910 '--skip-ssh',
5911 action='store_true',
5912 help='skip setup of ssh key on local host')
5913 parser_bootstrap.add_argument(
5914 '--initial-dashboard-user',
5915 default='admin',
5916 help='Initial user for the dashboard')
5917 parser_bootstrap.add_argument(
5918 '--initial-dashboard-password',
5919 help='Initial password for the initial dashboard user')
5920 parser_bootstrap.add_argument(
5921 '--ssl-dashboard-port',
5922 type=int,
5923 default = 8443,
5924 help='Port number used to connect with dashboard using SSL')
5925 parser_bootstrap.add_argument(
5926 '--dashboard-key',
5927 type=argparse.FileType('r'),
5928 help='Dashboard key')
5929 parser_bootstrap.add_argument(
5930 '--dashboard-crt',
5931 type=argparse.FileType('r'),
5932 help='Dashboard certificate')
5933
5934 parser_bootstrap.add_argument(
5935 '--ssh-config',
5936 type=argparse.FileType('r'),
5937 help='SSH config')
5938 parser_bootstrap.add_argument(
5939 '--ssh-private-key',
5940 type=argparse.FileType('r'),
5941 help='SSH private key')
5942 parser_bootstrap.add_argument(
5943 '--ssh-public-key',
5944 type=argparse.FileType('r'),
5945 help='SSH public key')
5946 parser_bootstrap.add_argument(
5947 '--ssh-user',
5948 default='root',
5949 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
5950
5951 parser_bootstrap.add_argument(
5952 '--skip-mon-network',
5953 action='store_true',
5954 help='set mon public_network based on bootstrap mon ip')
5955 parser_bootstrap.add_argument(
5956 '--skip-dashboard',
5957 action='store_true',
5958 help='do not enable the Ceph Dashboard')
5959 parser_bootstrap.add_argument(
5960 '--dashboard-password-noupdate',
5961 action='store_true',
5962 help='stop forced dashboard password change')
5963 parser_bootstrap.add_argument(
5964 '--no-minimize-config',
5965 action='store_true',
5966 help='do not assimilate and minimize the config file')
5967 parser_bootstrap.add_argument(
5968 '--skip-ping-check',
5969 action='store_true',
5970 help='do not verify that mon IP is pingable')
5971 parser_bootstrap.add_argument(
5972 '--skip-pull',
5973 action='store_true',
5974 help='do not pull the latest image before bootstrapping')
5975 parser_bootstrap.add_argument(
5976 '--skip-firewalld',
5977 action='store_true',
5978 help='Do not configure firewalld')
5979 parser_bootstrap.add_argument(
5980 '--allow-overwrite',
5981 action='store_true',
5982 help='allow overwrite of existing --output-* config/keyring/ssh files')
5983 parser_bootstrap.add_argument(
5984 '--allow-fqdn-hostname',
5985 action='store_true',
5986 help='allow hostname that is fully-qualified (contains ".")')
5987 parser_bootstrap.add_argument(
5988 '--skip-prepare-host',
5989 action='store_true',
5990 help='Do not prepare host')
5991 parser_bootstrap.add_argument(
5992 '--orphan-initial-daemons',
5993 action='store_true',
5994 help='Do not create initial mon, mgr, and crash service specs')
5995 parser_bootstrap.add_argument(
5996 '--skip-monitoring-stack',
5997 action='store_true',
5998 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
5999 parser_bootstrap.add_argument(
6000 '--apply-spec',
6001 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
6002
6003 parser_bootstrap.add_argument(
6004 '--shared_ceph_folder',
6005 metavar='CEPH_SOURCE_FOLDER',
6006 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
6007
6008 parser_bootstrap.add_argument(
6009 '--registry-url',
6010 help='url for custom registry')
6011 parser_bootstrap.add_argument(
6012 '--registry-username',
6013 help='username for custom registry')
6014 parser_bootstrap.add_argument(
6015 '--registry-password',
6016 help='password for custom registry')
6017 parser_bootstrap.add_argument(
6018 '--registry-json',
6019 help='json file with custom registry login info (URL, Username, Password)')
6020 parser_bootstrap.add_argument(
6021 '--container-init',
6022 action='store_true',
6023 default=CONTAINER_INIT,
6024 help=argparse.SUPPRESS)
6025
6026 parser_deploy = subparsers.add_parser(
6027 'deploy', help='deploy a daemon')
6028 parser_deploy.set_defaults(func=command_deploy)
6029 parser_deploy.add_argument(
6030 '--name',
6031 required=True,
6032 action=CustomValidation,
6033 help='daemon name (type.id)')
6034 parser_deploy.add_argument(
6035 '--fsid',
6036 required=True,
6037 help='cluster FSID')
6038 parser_deploy.add_argument(
6039 '--config', '-c',
6040 help='config file for new daemon')
6041 parser_deploy.add_argument(
6042 '--config-json',
6043 help='Additional configuration information in JSON format')
6044 parser_deploy.add_argument(
6045 '--keyring',
6046 help='keyring for new daemon')
6047 parser_deploy.add_argument(
6048 '--key',
6049 help='key for new daemon')
6050 parser_deploy.add_argument(
6051 '--osd-fsid',
6052 help='OSD uuid, if creating an OSD container')
6053 parser_deploy.add_argument(
6054 '--skip-firewalld',
6055 action='store_true',
6056 help='Do not configure firewalld')
6057 parser_deploy.add_argument(
6058 '--tcp-ports',
6059 help='List of tcp ports to open in the host firewall')
6060 parser_deploy.add_argument(
6061 '--reconfig',
6062 action='store_true',
6063 help='Reconfigure a previously deployed daemon')
6064 parser_deploy.add_argument(
6065 '--allow-ptrace',
6066 action='store_true',
6067 help='Allow SYS_PTRACE on daemon container')
6068 parser_deploy.add_argument(
6069 '--container-init',
6070 action='store_true',
6071 default=CONTAINER_INIT,
6072 help=argparse.SUPPRESS)
6073
6074 parser_check_host = subparsers.add_parser(
6075 'check-host', help='check host configuration')
6076 parser_check_host.set_defaults(func=command_check_host)
6077 parser_check_host.add_argument(
6078 '--expect-hostname',
6079 help='Check that hostname matches an expected value')
6080
6081 parser_prepare_host = subparsers.add_parser(
6082 'prepare-host', help='prepare a host for cephadm use')
6083 parser_prepare_host.set_defaults(func=command_prepare_host)
6084 parser_prepare_host.add_argument(
6085 '--expect-hostname',
6086 help='Set hostname')
6087
6088 parser_add_repo = subparsers.add_parser(
6089 'add-repo', help='configure package repository')
6090 parser_add_repo.set_defaults(func=command_add_repo)
6091 parser_add_repo.add_argument(
6092 '--release',
6093 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
6094 parser_add_repo.add_argument(
6095 '--version',
6096 help='use specific upstream version (x.y.z)')
6097 parser_add_repo.add_argument(
6098 '--dev',
6099 help='use specified bleeding edge build from git branch or tag')
6100 parser_add_repo.add_argument(
6101 '--dev-commit',
6102 help='use specified bleeding edge build from git commit')
6103 parser_add_repo.add_argument(
6104 '--gpg-url',
6105 help='specify alternative GPG key location')
6106 parser_add_repo.add_argument(
6107 '--repo-url',
6108 default='https://download.ceph.com',
6109 help='specify alternative repo location')
6110 # TODO: proxy?
6111
6112 parser_rm_repo = subparsers.add_parser(
6113 'rm-repo', help='remove package repository configuration')
6114 parser_rm_repo.set_defaults(func=command_rm_repo)
6115
6116 parser_install = subparsers.add_parser(
6117 'install', help='install ceph package(s)')
6118 parser_install.set_defaults(func=command_install)
6119 parser_install.add_argument(
6120 'packages', nargs='*',
6121 default=['cephadm'],
6122 help='packages')
6123
6124 parser_registry_login = subparsers.add_parser(
6125 'registry-login', help='log host into authenticated registry')
6126 parser_registry_login.set_defaults(func=command_registry_login)
6127 parser_registry_login.add_argument(
6128 '--registry-url',
6129 help='url for custom registry')
6130 parser_registry_login.add_argument(
6131 '--registry-username',
6132 help='username for custom registry')
6133 parser_registry_login.add_argument(
6134 '--registry-password',
6135 help='password for custom registry')
6136 parser_registry_login.add_argument(
6137 '--registry-json',
6138 help='json file with custom registry login info (URL, Username, Password)')
6139 parser_registry_login.add_argument(
6140 '--fsid',
6141 help='cluster FSID')
6142
6143 parser_gather_facts = subparsers.add_parser(
6144 'gather-facts', help='gather and return host related information (JSON format)')
6145 parser_gather_facts.set_defaults(func=command_gather_facts)
6146
6147 return parser
6148
6149
6150 def _parse_args(av):
6151 parser = _get_parser()
6152
6153 args = parser.parse_args(av)
6154 if 'command' in args and args.command and args.command[0] == "--":
6155 args.command.pop(0)
6156
6157 # workaround argparse to deprecate the subparser `--container-init` flag
6158 # container_init and no_container_init must always be mutually exclusive
6159 container_init_args = ('--container-init', '--no-container-init')
6160 if set(container_init_args).issubset(av):
6161 parser.error('argument %s: not allowed with argument %s' % (container_init_args))
6162 elif '--container-init' in av:
6163 args.no_container_init = not args.container_init
6164 else:
6165 args.container_init = not args.no_container_init
6166 assert args.container_init is not args.no_container_init
6167
6168 return args
6169
6170
6171 if __name__ == "__main__":
6172
6173 # root?
6174 if os.geteuid() != 0:
6175 sys.stderr.write('ERROR: cephadm should be run as root\n')
6176 sys.exit(1)
6177
6178 # Logger configuration
6179 if not os.path.exists(LOG_DIR):
6180 os.makedirs(LOG_DIR)
6181 dictConfig(logging_config)
6182 logger = logging.getLogger()
6183
6184 # allow argv to be injected
6185 try:
6186 av = injected_argv # type: ignore
6187 except NameError:
6188 av = sys.argv[1:]
6189 logger.debug("%s\ncephadm %s" % ("-" * 80, av))
6190 args = _parse_args(av)
6191
6192 # More verbose console output
6193 if args.verbose:
6194 for handler in logger.handlers:
6195 if handler.name == "console":
6196 handler.setLevel(logging.DEBUG)
6197
6198 if 'func' not in args:
6199 sys.stderr.write('No command specified; pass -h or --help for usage\n')
6200 sys.exit(1)
6201
6202 # podman or docker?
6203 if args.func != command_check_host:
6204 if args.docker:
6205 container_path = find_program('docker')
6206 else:
6207 for i in CONTAINER_PREFERENCE:
6208 try:
6209 container_path = find_program(i)
6210 break
6211 except Exception as e:
6212 logger.debug('Could not locate %s: %s' % (i, e))
6213 if not container_path and args.func != command_prepare_host\
6214 and args.func != command_add_repo:
6215 sys.stderr.write('Unable to locate any of %s\n' % CONTAINER_PREFERENCE)
6216 sys.exit(1)
6217
6218 # container-init?
6219 container_init = args.container_init
6220 logger.debug('container_init=%s' % (container_init))
6221
6222 try:
6223 r = args.func()
6224 except Error as e:
6225 if args.verbose:
6226 raise
6227 sys.stderr.write('ERROR: %s\n' % e)
6228 sys.exit(1)
6229 if not r:
6230 r = 0
6231 sys.exit(r)