]> git.proxmox.com Git - ceph.git/blame - ceph/src/cephadm/cephadm
import ceph 15.2.14
[ceph.git] / ceph / src / cephadm / cephadm
CommitLineData
9f95a23c
TL
1#!/usr/bin/python3
2
3DEFAULT_IMAGE='docker.io/ceph/ceph:v15'
1911f103 4DEFAULT_IMAGE_IS_MASTER=False
f6b5b4d7
TL
5LATEST_STABLE_RELEASE = 'octopus'
6DATA_DIR = '/var/lib/ceph'
7LOG_DIR = '/var/log/ceph'
8LOCK_DIR = '/run/cephadm'
9LOGROTATE_DIR = '/etc/logrotate.d'
10UNIT_DIR = '/etc/systemd/system'
11LOG_DIR_MODE = 0o770
12DATA_DIR_MODE = 0o700
7f7e6c64 13CONTAINER_INIT=False
9f95a23c 14CONTAINER_PREFERENCE = ['podman', 'docker'] # prefer podman to docker
f6b5b4d7
TL
15CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
16DEFAULT_TIMEOUT = None # in seconds
17DEFAULT_RETRY = 10
18SHELL_DEFAULT_CONF = '/etc/ceph/ceph.conf'
19SHELL_DEFAULT_KEYRING = '/etc/ceph/ceph.client.admin.keyring'
9f95a23c
TL
20
21"""
22You can invoke cephadm in two ways:
23
241. The normal way, at the command line.
25
262. By piping the script to the python3 binary. In this latter case, you should
27 prepend one or more lines to the beginning of the script.
28
29 For arguments,
30
31 injected_argv = [...]
32
33 e.g.,
34
35 injected_argv = ['ls']
36
37 For reading stdin from the '--config-json -' argument,
38
39 injected_stdin = '...'
40"""
9f95a23c
TL
41import argparse
42import datetime
43import fcntl
f6b5b4d7 44import ipaddress
9f95a23c
TL
45import json
46import logging
f91f0fd5 47from logging.config import dictConfig
9f95a23c
TL
48import os
49import platform
f6b5b4d7 50import pwd
9f95a23c 51import random
9f95a23c 52import select
ec96510d 53import shlex
9f95a23c
TL
54import shutil
55import socket
56import string
57import subprocess
58import sys
59import tempfile
60import time
61import errno
f91f0fd5 62import struct
adb31ebb 63from enum import Enum
9f95a23c 64try:
f6b5b4d7 65 from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO
9f95a23c
TL
66except ImportError:
67 pass
f91f0fd5
TL
68
69import re
9f95a23c
TL
70import uuid
71
72from functools import wraps
73from glob import glob
74from threading import Thread
75
76if sys.version_info >= (3, 0):
77 from io import StringIO
78else:
79 from StringIO import StringIO
80
81if sys.version_info >= (3, 2):
82 from configparser import ConfigParser
83else:
84 from ConfigParser import SafeConfigParser
85
86if sys.version_info >= (3, 0):
87 from urllib.request import urlopen
88 from urllib.error import HTTPError
89else:
90 from urllib2 import urlopen, HTTPError
91
f6b5b4d7
TL
92if sys.version_info > (3, 0):
93 unicode = str
94
9f95a23c
TL
95container_path = ''
96cached_stdin = None
97
adb31ebb 98DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
9f95a23c 99
f91f0fd5
TL
100# Log and console output config
101logging_config = {
102 'version': 1,
103 'disable_existing_loggers': True,
104 'formatters': {
105 'cephadm': {
106 'format': '%(asctime)s %(levelname)s %(message)s'
107 },
108 },
109 'handlers': {
110 'console':{
111 'level':'INFO',
112 'class':'logging.StreamHandler',
113 },
114 'log_file': {
115 'level': 'DEBUG',
116 'class': 'logging.handlers.RotatingFileHandler',
117 'formatter': 'cephadm',
118 'filename': '%s/cephadm.log' % LOG_DIR,
119 'maxBytes': 1024000,
120 'backupCount': 1,
121 }
122 },
123 'loggers': {
124 '': {
125 'level': 'DEBUG',
126 'handlers': ['console', 'log_file'],
127 }
128 }
129}
e306af50
TL
130
131class termcolor:
132 yellow = '\033[93m'
133 red = '\033[31m'
134 end = '\033[0m'
135
f6b5b4d7 136
9f95a23c
TL
137class Error(Exception):
138 pass
139
f6b5b4d7 140
9f95a23c
TL
141class TimeoutExpired(Error):
142 pass
143
144##################################
145
f6b5b4d7 146
9f95a23c
TL
147class Ceph(object):
148 daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
149 'crash')
150
151##################################
152
f6b5b4d7 153
9f95a23c
TL
154class Monitoring(object):
155 """Define the configs for the monitoring containers"""
156
157 port_map = {
158 "prometheus": [9095], # Avoid default 9090, due to conflict with cockpit UI
159 "node-exporter": [9100],
160 "grafana": [3000],
161 "alertmanager": [9093, 9094],
162 }
163
164 components = {
165 "prometheus": {
f91f0fd5 166 "image": "docker.io/prom/prometheus:v2.18.1",
9f95a23c
TL
167 "cpus": '2',
168 "memory": '4GB',
169 "args": [
170 "--config.file=/etc/prometheus/prometheus.yml",
171 "--storage.tsdb.path=/prometheus",
172 "--web.listen-address=:{}".format(port_map['prometheus'][0]),
173 ],
174 "config-json-files": [
175 "prometheus.yml",
176 ],
177 },
178 "node-exporter": {
f91f0fd5 179 "image": "docker.io/prom/node-exporter:v0.18.1",
9f95a23c
TL
180 "cpus": "1",
181 "memory": "1GB",
182 "args": [
183 "--no-collector.timex",
184 ],
185 },
186 "grafana": {
cd265ab1 187 "image": "docker.io/ceph/ceph-grafana:6.7.4",
9f95a23c
TL
188 "cpus": "2",
189 "memory": "4GB",
190 "args": [],
191 "config-json-files": [
192 "grafana.ini",
193 "provisioning/datasources/ceph-dashboard.yml",
194 "certs/cert_file",
195 "certs/cert_key",
196 ],
197 },
198 "alertmanager": {
f91f0fd5 199 "image": "docker.io/prom/alertmanager:v0.20.0",
9f95a23c
TL
200 "cpus": "2",
201 "memory": "2GB",
f91f0fd5
TL
202 "args": [
203 "--web.listen-address=:{}".format(port_map['alertmanager'][0]),
204 "--cluster.listen-address=:{}".format(port_map['alertmanager'][1]),
205 ],
9f95a23c
TL
206 "config-json-files": [
207 "alertmanager.yml",
208 ],
209 "config-json-args": [
210 "peers",
211 ],
212 },
213 } # type: ignore
214
7f7e6c64
TL
215 @staticmethod
216 def get_version(container_path, container_id, daemon_type):
217 # type: (str, str, str) -> str
218 """
219 :param: daemon_type Either "prometheus", "alertmanager" or "node-exporter"
220 """
221 assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter')
222 cmd = daemon_type.replace('-', '_')
223 code = -1
224 err = ''
225 version = ''
226 if daemon_type == 'alertmanager':
227 for cmd in ['alertmanager', 'prometheus-alertmanager']:
228 _, err, code = call([
229 container_path, 'exec', container_id, cmd,
230 '--version'
231 ], verbosity=CallVerbosity.SILENT)
232 if code == 0:
233 break
234 cmd = 'alertmanager' # reset cmd for version extraction
235 else:
236 _, err, code = call([
237 container_path, 'exec', container_id, cmd, '--version'
238 ])
239 if code == 0 and \
240 err.startswith('%s, version ' % cmd):
241 version = err.split(' ')[2]
242 return version
243
9f95a23c
TL
244##################################
245
f6b5b4d7 246
9f95a23c
TL
247class NFSGanesha(object):
248 """Defines a NFS-Ganesha container"""
249
250 daemon_type = 'nfs'
251 entrypoint = '/usr/bin/ganesha.nfsd'
252 daemon_args = ['-F', '-L', 'STDERR']
253
254 required_files = ['ganesha.conf']
255
256 port_map = {
257 "nfs" : 2049,
258 }
259
260 def __init__(self,
261 fsid,
262 daemon_id,
263 config_json,
264 image=DEFAULT_IMAGE):
265 # type: (str, Union[int, str], Dict, str) -> None
266 self.fsid = fsid
267 self.daemon_id = daemon_id
268 self.image = image
269
9f95a23c 270 # config-json options
f91f0fd5
TL
271 self.pool = dict_get(config_json, 'pool', require=True)
272 self.namespace = dict_get(config_json, 'namespace')
273 self.userid = dict_get(config_json, 'userid')
274 self.extra_args = dict_get(config_json, 'extra_args', [])
275 self.files = dict_get(config_json, 'files', {})
276 self.rgw = dict_get(config_json, 'rgw', {})
9f95a23c
TL
277
278 # validate the supplied args
279 self.validate()
280
281 @classmethod
282 def init(cls, fsid, daemon_id):
283 # type: (str, Union[int, str]) -> NFSGanesha
284 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
285
f91f0fd5 286 def get_container_mounts(self, data_dir):
9f95a23c
TL
287 # type: (str) -> Dict[str, str]
288 mounts = dict()
289 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
290 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
291 mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
f91f0fd5
TL
292 if self.rgw:
293 cluster = self.rgw.get('cluster', 'ceph')
294 rgw_user = self.rgw.get('user', 'admin')
295 mounts[os.path.join(data_dir, 'keyring.rgw')] = \
296 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
9f95a23c
TL
297 return mounts
298
299 @staticmethod
300 def get_container_envs():
301 # type: () -> List[str]
302 envs = [
303 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
304 ]
305 return envs
306
307 @staticmethod
308 def get_version(container_id):
e306af50 309 # type: (str) -> Optional[str]
9f95a23c
TL
310 version = None
311 out, err, code = call(
312 [container_path, 'exec', container_id,
313 NFSGanesha.entrypoint, '-v'])
314 if code == 0:
315 match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
316 if match:
317 version = match.group(1)
318 return version
319
320 def validate(self):
e306af50 321 # type: () -> None
9f95a23c
TL
322 if not is_fsid(self.fsid):
323 raise Error('not an fsid: %s' % self.fsid)
324 if not self.daemon_id:
325 raise Error('invalid daemon_id: %s' % self.daemon_id)
326 if not self.image:
327 raise Error('invalid image: %s' % self.image)
328
329 # check for the required files
330 if self.required_files:
331 for fname in self.required_files:
332 if fname not in self.files:
333 raise Error('required file missing from config-json: %s' % fname)
334
f91f0fd5
TL
335 # check for an RGW config
336 if self.rgw:
337 if not self.rgw.get('keyring'):
338 raise Error('RGW keyring is missing')
339 if not self.rgw.get('user'):
340 raise Error('RGW user is missing')
341
9f95a23c
TL
342 def get_daemon_name(self):
343 # type: () -> str
344 return '%s.%s' % (self.daemon_type, self.daemon_id)
345
346 def get_container_name(self, desc=None):
347 # type: (Optional[str]) -> str
348 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
349 if desc:
350 cname = '%s-%s' % (cname, desc)
351 return cname
352
1911f103
TL
353 def get_daemon_args(self):
354 # type: () -> List[str]
355 return self.daemon_args + self.extra_args
356
9f95a23c
TL
357 def create_daemon_dirs(self, data_dir, uid, gid):
358 # type: (str, int, int) -> None
359 """Create files under the container data dir"""
360 if not os.path.isdir(data_dir):
361 raise OSError('data_dir is not a directory: %s' % (data_dir))
362
363 logger.info('Creating ganesha config...')
364
365 # create the ganesha conf dir
366 config_dir = os.path.join(data_dir, 'etc/ganesha')
367 makedirs(config_dir, uid, gid, 0o755)
368
369 # populate files from the config-json
370 for fname in self.files:
371 config_file = os.path.join(config_dir, fname)
f91f0fd5 372 config_content = dict_get_join(self.files, fname)
9f95a23c
TL
373 logger.info('Write file: %s' % (config_file))
374 with open(config_file, 'w') as f:
375 os.fchown(f.fileno(), uid, gid)
376 os.fchmod(f.fileno(), 0o600)
377 f.write(config_content)
378
f91f0fd5
TL
379 # write the RGW keyring
380 if self.rgw:
381 keyring_path = os.path.join(data_dir, 'keyring.rgw')
382 with open(keyring_path, 'w') as f:
383 os.fchmod(f.fileno(), 0o600)
384 os.fchown(f.fileno(), uid, gid)
385 f.write(self.rgw.get('keyring', ''))
386
9f95a23c
TL
387 def get_rados_grace_container(self, action):
388 # type: (str) -> CephContainer
389 """Container for a ganesha action on the grace db"""
390 entrypoint = '/usr/bin/ganesha-rados-grace'
391
392 assert self.pool
393 args=['--pool', self.pool]
394 if self.namespace:
395 args += ['--ns', self.namespace]
1911f103
TL
396 if self.userid:
397 args += ['--userid', self.userid]
9f95a23c
TL
398 args += [action, self.get_daemon_name()]
399
400 data_dir = get_data_dir(self.fsid, self.daemon_type, self.daemon_id)
401 volume_mounts = self.get_container_mounts(data_dir)
402 envs = self.get_container_envs()
403
f6b5b4d7 404 logger.info('Creating RADOS grace for action: %s' % action)
9f95a23c
TL
405 c = CephContainer(
406 image=self.image,
407 entrypoint=entrypoint,
408 args=args,
409 volume_mounts=volume_mounts,
f6b5b4d7 410 cname=self.get_container_name(desc='grace-%s' % action),
9f95a23c
TL
411 envs=envs
412 )
413 return c
414
415##################################
416
f6b5b4d7 417
1911f103
TL
418class CephIscsi(object):
419 """Defines a Ceph-Iscsi container"""
420
421 daemon_type = 'iscsi'
422 entrypoint = '/usr/bin/rbd-target-api'
423
424 required_files = ['iscsi-gateway.cfg']
425
426 def __init__(self,
427 fsid,
428 daemon_id,
429 config_json,
430 image=DEFAULT_IMAGE):
431 # type: (str, Union[int, str], Dict, str) -> None
432 self.fsid = fsid
433 self.daemon_id = daemon_id
434 self.image = image
435
1911f103 436 # config-json options
f91f0fd5 437 self.files = dict_get(config_json, 'files', {})
1911f103
TL
438
439 # validate the supplied args
440 self.validate()
441
442 @classmethod
443 def init(cls, fsid, daemon_id):
444 # type: (str, Union[int, str]) -> CephIscsi
445 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
446
447 @staticmethod
448 def get_container_mounts(data_dir, log_dir):
449 # type: (str, str) -> Dict[str, str]
450 mounts = dict()
451 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
452 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
453 mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
f91f0fd5 454 mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
1911f103 455 mounts[log_dir] = '/var/log/rbd-target-api:z'
f91f0fd5 456 mounts['/dev'] = '/dev'
1911f103
TL
457 return mounts
458
f6b5b4d7
TL
459 @staticmethod
460 def get_container_binds():
461 # type: () -> List[List[str]]
462 binds = []
463 lib_modules = ['type=bind',
464 'source=/lib/modules',
465 'destination=/lib/modules',
466 'ro=true']
467 binds.append(lib_modules)
468 return binds
469
1911f103
TL
470 @staticmethod
471 def get_version(container_id):
e306af50 472 # type: (str) -> Optional[str]
1911f103
TL
473 version = None
474 out, err, code = call(
475 [container_path, 'exec', container_id,
476 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"])
477 if code == 0:
f6b5b4d7 478 version = out.strip()
1911f103
TL
479 return version
480
481 def validate(self):
e306af50 482 # type: () -> None
1911f103
TL
483 if not is_fsid(self.fsid):
484 raise Error('not an fsid: %s' % self.fsid)
485 if not self.daemon_id:
486 raise Error('invalid daemon_id: %s' % self.daemon_id)
487 if not self.image:
488 raise Error('invalid image: %s' % self.image)
489
490 # check for the required files
491 if self.required_files:
492 for fname in self.required_files:
493 if fname not in self.files:
494 raise Error('required file missing from config-json: %s' % fname)
495
496 def get_daemon_name(self):
497 # type: () -> str
498 return '%s.%s' % (self.daemon_type, self.daemon_id)
499
500 def get_container_name(self, desc=None):
501 # type: (Optional[str]) -> str
502 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
503 if desc:
504 cname = '%s-%s' % (cname, desc)
505 return cname
506
1911f103
TL
507 def create_daemon_dirs(self, data_dir, uid, gid):
508 # type: (str, int, int) -> None
509 """Create files under the container data dir"""
510 if not os.path.isdir(data_dir):
511 raise OSError('data_dir is not a directory: %s' % (data_dir))
512
513 logger.info('Creating ceph-iscsi config...')
514 configfs_dir = os.path.join(data_dir, 'configfs')
515 makedirs(configfs_dir, uid, gid, 0o755)
516
517 # populate files from the config-json
518 for fname in self.files:
519 config_file = os.path.join(data_dir, fname)
f91f0fd5 520 config_content = dict_get_join(self.files, fname)
1911f103
TL
521 logger.info('Write file: %s' % (config_file))
522 with open(config_file, 'w') as f:
523 os.fchown(f.fileno(), uid, gid)
524 os.fchmod(f.fileno(), 0o600)
525 f.write(config_content)
526
527 @staticmethod
528 def configfs_mount_umount(data_dir, mount=True):
e306af50 529 # type: (str, bool) -> List[str]
1911f103
TL
530 mount_path = os.path.join(data_dir, 'configfs')
531 if mount:
532 cmd = "if ! grep -qs {0} /proc/mounts; then " \
533 "mount -t configfs none {0}; fi".format(mount_path)
534 else:
535 cmd = "if grep -qs {0} /proc/mounts; then " \
536 "umount {0}; fi".format(mount_path)
537 return cmd.split()
538
f6b5b4d7
TL
539 def get_tcmu_runner_container(self):
540 # type: () -> CephContainer
541 tcmu_container = get_container(self.fsid, self.daemon_type, self.daemon_id)
542 tcmu_container.entrypoint = "/usr/bin/tcmu-runner"
f6b5b4d7 543 tcmu_container.cname = self.get_container_name(desc='tcmu')
f91f0fd5
TL
544 # remove extra container args for tcmu container.
545 # extra args could cause issue with forking service type
546 tcmu_container.container_args = []
f6b5b4d7
TL
547 return tcmu_container
548
1911f103
TL
549##################################
550
f6b5b4d7 551
f91f0fd5
TL
552class CustomContainer(object):
553 """Defines a custom container"""
554 daemon_type = 'container'
555
556 def __init__(self, fsid: str, daemon_id: Union[int, str],
557 config_json: Dict, image: str) -> None:
558 self.fsid = fsid
559 self.daemon_id = daemon_id
560 self.image = image
561
562 # config-json options
563 self.entrypoint = dict_get(config_json, 'entrypoint')
564 self.uid = dict_get(config_json, 'uid', 65534) # nobody
565 self.gid = dict_get(config_json, 'gid', 65534) # nobody
566 self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
567 self.args = dict_get(config_json, 'args', [])
568 self.envs = dict_get(config_json, 'envs', [])
569 self.privileged = dict_get(config_json, 'privileged', False)
570 self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
571 self.ports = dict_get(config_json, 'ports', [])
572 self.dirs = dict_get(config_json, 'dirs', [])
573 self.files = dict_get(config_json, 'files', {})
574
575 @classmethod
576 def init(cls, fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
577 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
578
579 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
580 """
581 Create dirs/files below the container data directory.
582 """
583 logger.info('Creating custom container configuration '
584 'dirs/files in {} ...'.format(data_dir))
585
586 if not os.path.isdir(data_dir):
587 raise OSError('data_dir is not a directory: %s' % data_dir)
588
589 for dir_path in self.dirs:
590 logger.info('Creating directory: {}'.format(dir_path))
591 dir_path = os.path.join(data_dir, dir_path.strip('/'))
592 makedirs(dir_path, uid, gid, 0o755)
593
594 for file_path in self.files:
595 logger.info('Creating file: {}'.format(file_path))
596 content = dict_get_join(self.files, file_path)
597 file_path = os.path.join(data_dir, file_path.strip('/'))
598 with open(file_path, 'w', encoding='utf-8') as f:
599 os.fchown(f.fileno(), uid, gid)
600 os.fchmod(f.fileno(), 0o600)
601 f.write(content)
602
603 def get_daemon_args(self) -> List[str]:
604 return []
605
606 def get_container_args(self) -> List[str]:
607 return self.args
608
609 def get_container_envs(self) -> List[str]:
610 return self.envs
611
612 def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
613 """
614 Get the volume mounts. Relative source paths will be located below
615 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
616
617 Example:
618 {
619 /foo/conf: /conf
620 foo/conf: /conf
621 }
622 becomes
623 {
624 /foo/conf: /conf
625 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
626 }
627 """
628 mounts = {}
629 for source, destination in self.volume_mounts.items():
630 source = os.path.join(data_dir, source)
631 mounts[source] = destination
632 return mounts
633
634 def get_container_binds(self, data_dir: str) -> List[List[str]]:
635 """
636 Get the bind mounts. Relative `source=...` paths will be located below
637 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
638
639 Example:
640 [
641 'type=bind',
642 'source=lib/modules',
643 'destination=/lib/modules',
644 'ro=true'
645 ]
646 becomes
647 [
648 ...
649 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
650 ...
651 ]
652 """
653 binds = self.bind_mounts.copy()
654 for bind in binds:
655 for index, value in enumerate(bind):
656 match = re.match(r'^source=(.+)$', value)
657 if match:
658 bind[index] = 'source={}'.format(os.path.join(
659 data_dir, match.group(1)))
660 return binds
661
662##################################
663
664
665def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
666 """
667 Helper function to get a key from a dictionary.
668 :param d: The dictionary to process.
669 :param key: The name of the key to get.
670 :param default: The default value in case the key does not
671 exist. Default is `None`.
672 :param require: Set to `True` if the key is required. An
673 exception will be raised if the key does not exist in
674 the given dictionary.
675 :return: Returns the value of the given key.
676 :raises: :exc:`self.Error` if the given key does not exist
677 and `require` is set to `True`.
678 """
679 if require and key not in d.keys():
680 raise Error('{} missing from dict'.format(key))
681 return d.get(key, default)
682
683##################################
684
685
686def dict_get_join(d: Dict, key: str) -> Any:
687 """
688 Helper function to get the value of a given key from a dictionary.
689 `List` values will be converted to a string by joining them with a
690 line break.
691 :param d: The dictionary to process.
692 :param key: The name of the key to get.
693 :return: Returns the value of the given key. If it was a `list`, it
694 will be joining with a line break.
695 """
696 value = d.get(key)
697 if isinstance(value, list):
698 value = '\n'.join(map(str, value))
699 return value
700
701##################################
702
703
9f95a23c 704def get_supported_daemons():
e306af50 705 # type: () -> List[str]
9f95a23c
TL
706 supported_daemons = list(Ceph.daemons)
707 supported_daemons.extend(Monitoring.components)
708 supported_daemons.append(NFSGanesha.daemon_type)
1911f103 709 supported_daemons.append(CephIscsi.daemon_type)
f91f0fd5 710 supported_daemons.append(CustomContainer.daemon_type)
9f95a23c
TL
711 assert len(supported_daemons) == len(set(supported_daemons))
712 return supported_daemons
713
714##################################
715
ec96510d
FG
716class PortOccupiedError(Error):
717 pass
718
f6b5b4d7 719
9f95a23c 720def attempt_bind(s, address, port):
e306af50 721 # type: (socket.socket, str, int) -> None
9f95a23c
TL
722 try:
723 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
724 s.bind((address, port))
725 except (socket.error, OSError) as e: # py2 and py3
9f95a23c 726 if e.errno == errno.EADDRINUSE:
ec96510d
FG
727 msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
728 logger.warning(msg)
729 raise PortOccupiedError(msg)
730 else:
731 raise e
9f95a23c
TL
732 finally:
733 s.close()
734
f6b5b4d7 735
9f95a23c 736def port_in_use(port_num):
e306af50 737 # type: (int) -> bool
9f95a23c 738 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
e306af50 739 logger.info('Verifying port %d ...' % port_num)
ec96510d
FG
740 def _port_in_use(af, address):
741 # type: (socket.AddressFamily, str) -> bool
742 try:
743 s = socket.socket(af, socket.SOCK_STREAM)
744 attempt_bind(s, address, port_num)
745 except PortOccupiedError:
746 return True
747 except OSError as e:
748 if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL):
749 # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
750 # being tested here and one might be intentionally be disabled.
751 # In that case no error should be raised.
752 return False
753 else:
754 raise e
9f95a23c 755 return False
ec96510d
FG
756 return any(_port_in_use(af, address) for af, address in (
757 (socket.AF_INET, '0.0.0.0'),
758 (socket.AF_INET6, '::')
759 ))
9f95a23c 760
f6b5b4d7 761
9f95a23c 762def check_ip_port(ip, port):
e306af50 763 # type: (str, int) -> None
9f95a23c
TL
764 if not args.skip_ping_check:
765 logger.info('Verifying IP %s port %d ...' % (ip, port))
f91f0fd5 766 if is_ipv6(ip):
9f95a23c 767 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
f91f0fd5 768 ip = unwrap_ipv6(ip)
9f95a23c
TL
769 else:
770 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
ec96510d 771 attempt_bind(s, ip, port)
9f95a23c
TL
772
773##################################
774
775# this is an abbreviated version of
776# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
777# that drops all of the compatibility (this is Unix/Linux only).
778
779try:
780 TimeoutError
781except NameError:
782 TimeoutError = OSError
783
f6b5b4d7 784
9f95a23c
TL
785class Timeout(TimeoutError):
786 """
787 Raised when the lock could not be acquired in *timeout*
788 seconds.
789 """
790
791 def __init__(self, lock_file):
792 """
793 """
794 #: The path of the file lock.
795 self.lock_file = lock_file
796 return None
797
798 def __str__(self):
799 temp = "The file lock '{}' could not be acquired."\
800 .format(self.lock_file)
801 return temp
802
803
804class _Acquire_ReturnProxy(object):
805 def __init__(self, lock):
806 self.lock = lock
807 return None
808
809 def __enter__(self):
810 return self.lock
811
812 def __exit__(self, exc_type, exc_value, traceback):
813 self.lock.release()
814 return None
815
816
817class FileLock(object):
f6b5b4d7 818 def __init__(self, name, timeout=-1):
9f95a23c
TL
819 if not os.path.exists(LOCK_DIR):
820 os.mkdir(LOCK_DIR, 0o700)
821 self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
822
823 # The file descriptor for the *_lock_file* as it is returned by the
824 # os.open() function.
825 # This file lock is only NOT None, if the object currently holds the
826 # lock.
827 self._lock_file_fd = None
828 self.timeout = timeout
829 # The lock counter is used for implementing the nested locking
830 # mechanism. Whenever the lock is acquired, the counter is increased and
831 # the lock is only released, when this value is 0 again.
832 self._lock_counter = 0
833 return None
834
835 @property
836 def is_locked(self):
837 return self._lock_file_fd is not None
838
839 def acquire(self, timeout=None, poll_intervall=0.05):
840 """
841 Acquires the file lock or fails with a :exc:`Timeout` error.
842 .. code-block:: python
843 # You can use this method in the context manager (recommended)
844 with lock.acquire():
845 pass
846 # Or use an equivalent try-finally construct:
847 lock.acquire()
848 try:
849 pass
850 finally:
851 lock.release()
852 :arg float timeout:
853 The maximum time waited for the file lock.
854 If ``timeout < 0``, there is no timeout and this method will
855 block until the lock could be acquired.
856 If ``timeout`` is None, the default :attr:`~timeout` is used.
857 :arg float poll_intervall:
858 We check once in *poll_intervall* seconds if we can acquire the
859 file lock.
860 :raises Timeout:
861 if the lock could not be acquired in *timeout* seconds.
862 .. versionchanged:: 2.0.0
863 This method returns now a *proxy* object instead of *self*,
864 so that it can be used in a with statement without side effects.
865 """
866 # Use the default timeout, if no timeout is provided.
867 if timeout is None:
868 timeout = self.timeout
869
870 # Increment the number right at the beginning.
871 # We can still undo it, if something fails.
872 self._lock_counter += 1
873
874 lock_id = id(self)
875 lock_filename = self._lock_file
876 start_time = time.time()
877 try:
878 while True:
879 if not self.is_locked:
880 logger.debug('Acquiring lock %s on %s', lock_id,
881 lock_filename)
882 self._acquire()
883
884 if self.is_locked:
885 logger.debug('Lock %s acquired on %s', lock_id,
886 lock_filename)
887 break
888 elif timeout >= 0 and time.time() - start_time > timeout:
889 logger.warning('Timeout acquiring lock %s on %s', lock_id,
890 lock_filename)
891 raise Timeout(self._lock_file)
892 else:
893 logger.debug(
894 'Lock %s not acquired on %s, waiting %s seconds ...',
895 lock_id, lock_filename, poll_intervall
896 )
897 time.sleep(poll_intervall)
f6b5b4d7 898 except: # noqa
9f95a23c
TL
899 # Something did go wrong, so decrement the counter.
900 self._lock_counter = max(0, self._lock_counter - 1)
901
902 raise
903 return _Acquire_ReturnProxy(lock = self)
904
f6b5b4d7 905 def release(self, force=False):
9f95a23c
TL
906 """
907 Releases the file lock.
908 Please note, that the lock is only completly released, if the lock
909 counter is 0.
910 Also note, that the lock file itself is not automatically deleted.
911 :arg bool force:
912 If true, the lock counter is ignored and the lock is released in
913 every case.
914 """
915 if self.is_locked:
916 self._lock_counter -= 1
917
918 if self._lock_counter == 0 or force:
919 lock_id = id(self)
920 lock_filename = self._lock_file
921
922 logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
923 self._release()
924 self._lock_counter = 0
925 logger.debug('Lock %s released on %s', lock_id, lock_filename)
926
927 return None
928
929 def __enter__(self):
930 self.acquire()
931 return self
932
933 def __exit__(self, exc_type, exc_value, traceback):
934 self.release()
935 return None
936
937 def __del__(self):
f6b5b4d7 938 self.release(force=True)
9f95a23c
TL
939 return None
940
9f95a23c
TL
941 def _acquire(self):
942 open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
943 fd = os.open(self._lock_file, open_mode)
944
945 try:
946 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
947 except (IOError, OSError):
948 os.close(fd)
949 else:
950 self._lock_file_fd = fd
951 return None
952
953 def _release(self):
954 # Do not remove the lockfile:
955 #
956 # https://github.com/benediktschmitt/py-filelock/issues/31
957 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
958 fd = self._lock_file_fd
959 self._lock_file_fd = None
f6b5b4d7
TL
960 fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
961 os.close(fd) # type: ignore
9f95a23c
TL
962 return None
963
964
965##################################
966# Popen wrappers, lifted from ceph-volume
967
adb31ebb
TL
968class CallVerbosity(Enum):
969 SILENT = 0
970 # log stdout/stderr to logger.debug
971 DEBUG = 1
972 # On a non-zero exit status, it will forcefully set
973 # logging ON for the terminal
974 VERBOSE_ON_FAILURE = 2
975 # log at info (instead of debug) level.
976 VERBOSE = 3
977
978
979def call(command: List[str],
980 desc: Optional[str] = None,
981 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
982 timeout: Optional[int] = DEFAULT_TIMEOUT,
983 **kwargs) -> Tuple[str, str, int]:
9f95a23c
TL
984 """
985 Wrap subprocess.Popen to
986
987 - log stdout/stderr to a logger,
988 - decode utf-8
989 - cleanly return out, err, returncode
990
9f95a23c
TL
991 :param timeout: timeout in seconds
992 """
adb31ebb 993 if desc is None:
9f95a23c 994 desc = command[0]
adb31ebb
TL
995 if desc:
996 desc += ': '
9f95a23c
TL
997 timeout = timeout or args.timeout
998
999 logger.debug("Running command: %s" % ' '.join(command))
1000 process = subprocess.Popen(
1001 command,
1002 stdout=subprocess.PIPE,
1003 stderr=subprocess.PIPE,
1004 close_fds=True,
1005 **kwargs
1006 )
1007 # get current p.stdout flags, add O_NONBLOCK
1008 assert process.stdout is not None
1009 assert process.stderr is not None
1010 stdout_flags = fcntl.fcntl(process.stdout, fcntl.F_GETFL)
1011 stderr_flags = fcntl.fcntl(process.stderr, fcntl.F_GETFL)
1012 fcntl.fcntl(process.stdout, fcntl.F_SETFL, stdout_flags | os.O_NONBLOCK)
1013 fcntl.fcntl(process.stderr, fcntl.F_SETFL, stderr_flags | os.O_NONBLOCK)
1014
1015 out = ''
1016 err = ''
1017 reads = None
1018 stop = False
1019 out_buffer = '' # partial line (no newline yet)
1020 err_buffer = '' # partial line (no newline yet)
1021 start_time = time.time()
1022 end_time = None
1023 if timeout:
1024 end_time = start_time + timeout
1025 while not stop:
1026 if end_time and (time.time() >= end_time):
9f95a23c 1027 stop = True
f6b5b4d7 1028 if process.poll() is None:
adb31ebb 1029 logger.info(desc + 'timeout after %s seconds' % timeout)
f6b5b4d7 1030 process.kill()
9f95a23c
TL
1031 if reads and process.poll() is not None:
1032 # we want to stop, but first read off anything remaining
1033 # on stdout/stderr
1034 stop = True
1035 else:
1036 reads, _, _ = select.select(
1037 [process.stdout.fileno(), process.stderr.fileno()],
1038 [], [], timeout
1039 )
1040 for fd in reads:
1041 try:
1042 message_b = os.read(fd, 1024)
1043 if isinstance(message_b, bytes):
1044 message = message_b.decode('utf-8')
1045 if isinstance(message_b, str):
1046 message = message_b
801d1391
TL
1047 if stop and message:
1048 # process has terminated, but have more to read still, so not stopping yet
1049 # (os.read returns '' when it encounters EOF)
1050 stop = False
1051 if not message:
1052 continue
9f95a23c
TL
1053 if fd == process.stdout.fileno():
1054 out += message
1055 message = out_buffer + message
1056 lines = message.split('\n')
1057 out_buffer = lines.pop()
1058 for line in lines:
adb31ebb
TL
1059 if verbosity == CallVerbosity.VERBOSE:
1060 logger.info(desc + 'stdout ' + line)
1061 elif verbosity != CallVerbosity.SILENT:
1062 logger.debug(desc + 'stdout ' + line)
9f95a23c
TL
1063 elif fd == process.stderr.fileno():
1064 err += message
1065 message = err_buffer + message
1066 lines = message.split('\n')
1067 err_buffer = lines.pop()
1068 for line in lines:
adb31ebb
TL
1069 if verbosity == CallVerbosity.VERBOSE:
1070 logger.info(desc + 'stderr ' + line)
1071 elif verbosity != CallVerbosity.SILENT:
1072 logger.debug(desc + 'stderr ' + line)
9f95a23c
TL
1073 else:
1074 assert False
1075 except (IOError, OSError):
1076 pass
adb31ebb
TL
1077 if verbosity == CallVerbosity.VERBOSE:
1078 logger.debug(desc + 'profile rt=%s, stop=%s, exit=%s, reads=%s'
f6b5b4d7 1079 % (time.time()-start_time, stop, process.poll(), reads))
9f95a23c
TL
1080
1081 returncode = process.wait()
1082
1083 if out_buffer != '':
adb31ebb
TL
1084 if verbosity == CallVerbosity.VERBOSE:
1085 logger.info(desc + 'stdout ' + out_buffer)
1086 elif verbosity != CallVerbosity.SILENT:
1087 logger.debug(desc + 'stdout ' + out_buffer)
9f95a23c 1088 if err_buffer != '':
adb31ebb
TL
1089 if verbosity == CallVerbosity.VERBOSE:
1090 logger.info(desc + 'stderr ' + err_buffer)
1091 elif verbosity != CallVerbosity.SILENT:
1092 logger.debug(desc + 'stderr ' + err_buffer)
9f95a23c 1093
adb31ebb 1094 if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE:
9f95a23c
TL
1095 # dump stdout + stderr
1096 logger.info('Non-zero exit code %d from %s' % (returncode, ' '.join(command)))
1097 for line in out.splitlines():
adb31ebb 1098 logger.info(desc + 'stdout ' + line)
9f95a23c 1099 for line in err.splitlines():
adb31ebb 1100 logger.info(desc + 'stderr ' + line)
9f95a23c
TL
1101
1102 return out, err, returncode
1103
1104
adb31ebb
TL
1105def call_throws(command: List[str],
1106 desc: Optional[str] = None,
1107 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
1108 timeout: Optional[int] = DEFAULT_TIMEOUT,
1109 **kwargs) -> Tuple[str, str, int]:
1110 out, err, ret = call(command, desc, verbosity, timeout, **kwargs)
9f95a23c
TL
1111 if ret:
1112 raise RuntimeError('Failed command: %s' % ' '.join(command))
1113 return out, err, ret
1114
1115
1116def call_timeout(command, timeout):
1117 # type: (List[str], int) -> int
1118
1119 logger.debug('Running command (timeout=%s): %s'
1120 % (timeout, ' '.join(command)))
1121
1122 def raise_timeout(command, timeout):
1123 # type: (List[str], int) -> NoReturn
1124 msg = 'Command \'%s\' timed out after %s seconds' % (command, timeout)
1125 logger.debug(msg)
1126 raise TimeoutExpired(msg)
1127
1128 def call_timeout_py2(command, timeout):
1129 # type: (List[str], int) -> int
1130 proc = subprocess.Popen(command)
1131 thread = Thread(target=proc.wait)
1132 thread.start()
1133 thread.join(timeout)
1134 if thread.is_alive():
1135 proc.kill()
1136 thread.join()
1137 raise_timeout(command, timeout)
1138 return proc.returncode
1139
1140 def call_timeout_py3(command, timeout):
1141 # type: (List[str], int) -> int
1142 try:
1143 return subprocess.call(command, timeout=timeout)
1144 except subprocess.TimeoutExpired as e:
1145 raise_timeout(command, timeout)
1146
1147 ret = 1
1148 if sys.version_info >= (3, 3):
1149 ret = call_timeout_py3(command, timeout)
1150 else:
1151 # py2 subprocess has no timeout arg
1152 ret = call_timeout_py2(command, timeout)
1153 return ret
1154
1155##################################
1156
f6b5b4d7 1157
9f95a23c
TL
1158def is_available(what, func):
1159 # type: (str, Callable[[], bool]) -> None
1160 """
1161 Wait for a service to become available
1162
1163 :param what: the name of the service
1164 :param func: the callable object that determines availability
1165 """
1166 retry = args.retry
f6b5b4d7 1167 logger.info('Waiting for %s...' % what)
9f95a23c
TL
1168 num = 1
1169 while True:
1170 if func():
e306af50 1171 logger.info('%s is available'
f6b5b4d7 1172 % what)
9f95a23c
TL
1173 break
1174 elif num > retry:
1175 raise Error('%s not available after %s tries'
1176 % (what, retry))
1177
1178 logger.info('%s not available, waiting (%s/%s)...'
1179 % (what, num, retry))
1180
1181 num += 1
1182 time.sleep(1)
1183
1184
1185def read_config(fn):
1186 # type: (Optional[str]) -> ConfigParser
1187 # bend over backwards here because py2's ConfigParser doesn't like
1188 # whitespace before config option names (e.g., '\n foo = bar\n').
1189 # Yeesh!
1190 if sys.version_info >= (3, 2):
1191 cp = ConfigParser()
1192 else:
1193 cp = SafeConfigParser()
1194
1195 if fn:
1196 with open(fn, 'r') as f:
1197 raw_conf = f.read()
1198 nice_conf = re.sub(r'\n(\s)+', r'\n', raw_conf)
1199 s_io = StringIO(nice_conf)
1200 if sys.version_info >= (3, 2):
1201 cp.read_file(s_io)
1202 else:
1203 cp.readfp(s_io)
1204
1205 return cp
1206
f6b5b4d7 1207
9f95a23c
TL
1208def pathify(p):
1209 # type: (str) -> str
e306af50
TL
1210 p = os.path.expanduser(p)
1211 return os.path.abspath(p)
9f95a23c 1212
f6b5b4d7 1213
9f95a23c 1214def get_file_timestamp(fn):
e306af50 1215 # type: (str) -> Optional[str]
9f95a23c
TL
1216 try:
1217 mt = os.path.getmtime(fn)
1218 return datetime.datetime.fromtimestamp(
1219 mt, tz=datetime.timezone.utc
1220 ).strftime(DATEFMT)
adb31ebb 1221 except Exception:
9f95a23c
TL
1222 return None
1223
f6b5b4d7 1224
9f95a23c 1225def try_convert_datetime(s):
e306af50 1226 # type: (str) -> Optional[str]
9f95a23c
TL
1227 # This is super irritating because
1228 # 1) podman and docker use different formats
1229 # 2) python's strptime can't parse either one
1230 #
1231 # I've seen:
1232 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1233 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1234 # 2020-03-03 15:52:30.136257504 -0600 CST
1235 # (In the podman case, there is a different string format for
1236 # 'inspect' and 'inspect --format {{.Created}}'!!)
1237
1238 # In *all* cases, the 9 digit second precision is too much for
1239 # python's strptime. Shorten it to 6 digits.
1240 p = re.compile(r'(\.[\d]{6})[\d]*')
1241 s = p.sub(r'\1', s)
1242
adb31ebb 1243 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
9f95a23c
TL
1244 if s and s[-1] == 'Z':
1245 s = s[:-1] + '-0000'
1246
adb31ebb 1247 # cut off the redundant 'CST' part that strptime can't parse, if
9f95a23c
TL
1248 # present.
1249 v = s.split(' ')
1250 s = ' '.join(v[0:3])
1251
1252 # try parsing with several format strings
1253 fmts = [
1254 '%Y-%m-%dT%H:%M:%S.%f%z',
1255 '%Y-%m-%d %H:%M:%S.%f %z',
1256 ]
1257 for f in fmts:
1258 try:
1259 # return timestamp normalized to UTC, rendered as DATEFMT.
1260 return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
1261 except ValueError:
1262 pass
1263 return None
1264
f6b5b4d7 1265
9f95a23c
TL
1266def get_podman_version():
1267 # type: () -> Tuple[int, ...]
1268 if 'podman' not in container_path:
1269 raise ValueError('not using podman')
1270 out, _, _ = call_throws([container_path, '--version'])
1271 return _parse_podman_version(out)
1272
f6b5b4d7 1273
9f95a23c
TL
1274def _parse_podman_version(out):
1275 # type: (str) -> Tuple[int, ...]
1276 _, _, version_str = out.strip().split()
1277
1278 def to_int(val, org_e=None):
1279 if not val and org_e:
1280 raise org_e
1281 try:
1282 return int(val)
1283 except ValueError as e:
1284 return to_int(val[0:-1], org_e or e)
1285
1286 return tuple(map(to_int, version_str.split('.')))
1287
1288
1289def get_hostname():
1290 # type: () -> str
1291 return socket.gethostname()
1292
f6b5b4d7 1293
9f95a23c
TL
1294def get_fqdn():
1295 # type: () -> str
1296 return socket.getfqdn() or socket.gethostname()
1297
f6b5b4d7 1298
9f95a23c
TL
1299def get_arch():
1300 # type: () -> str
1301 return platform.uname().machine
1302
f6b5b4d7 1303
9f95a23c
TL
1304def generate_service_id():
1305 # type: () -> str
1306 return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
1307 for _ in range(6))
1308
f6b5b4d7 1309
9f95a23c
TL
1310def generate_password():
1311 # type: () -> str
1312 return ''.join(random.choice(string.ascii_lowercase + string.digits)
1313 for i in range(10))
1314
f6b5b4d7 1315
9f95a23c
TL
1316def normalize_container_id(i):
1317 # type: (str) -> str
1318 # docker adds the sha256: prefix, but AFAICS both
1319 # docker (18.09.7 in bionic at least) and podman
1320 # both always use sha256, so leave off the prefix
1321 # for consistency.
1322 prefix = 'sha256:'
1323 if i.startswith(prefix):
1324 i = i[len(prefix):]
1325 return i
1326
f6b5b4d7 1327
9f95a23c
TL
1328def make_fsid():
1329 # type: () -> str
1330 return str(uuid.uuid1())
1331
f6b5b4d7 1332
9f95a23c
TL
1333def is_fsid(s):
1334 # type: (str) -> bool
1335 try:
1336 uuid.UUID(s)
1337 except ValueError:
1338 return False
1339 return True
1340
f6b5b4d7 1341
9f95a23c
TL
1342def infer_fsid(func):
1343 """
1344 If we only find a single fsid in /var/lib/ceph/*, use that
1345 """
1346 @wraps(func)
1347 def _infer_fsid():
1348 if args.fsid:
1349 logger.debug('Using specified fsid: %s' % args.fsid)
1350 return func()
1351
f6b5b4d7 1352 fsids_set = set()
9f95a23c
TL
1353 daemon_list = list_daemons(detail=False)
1354 for daemon in daemon_list:
f6b5b4d7
TL
1355 if not is_fsid(daemon['fsid']):
1356 # 'unknown' fsid
1357 continue
1358 elif 'name' not in args or not args.name:
1359 # args.name not specified
1360 fsids_set.add(daemon['fsid'])
9f95a23c 1361 elif daemon['name'] == args.name:
f6b5b4d7
TL
1362 # args.name is a match
1363 fsids_set.add(daemon['fsid'])
1364 fsids = sorted(fsids_set)
9f95a23c
TL
1365
1366 if not fsids:
1367 # some commands do not always require an fsid
1368 pass
1369 elif len(fsids) == 1:
1370 logger.info('Inferring fsid %s' % fsids[0])
1371 args.fsid = fsids[0]
1372 else:
1373 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids)
1374 return func()
1375
1376 return _infer_fsid
1377
f6b5b4d7 1378
e306af50
TL
1379def infer_config(func):
1380 """
1381 If we find a MON daemon, use the config from that container
1382 """
1383 @wraps(func)
1384 def _infer_config():
1385 if args.config:
1386 logger.debug('Using specified config: %s' % args.config)
1387 return func()
1388 config = None
1389 if args.fsid:
1390 name = args.name
1391 if not name:
1392 daemon_list = list_daemons(detail=False)
1393 for daemon in daemon_list:
1394 if daemon['name'].startswith('mon.'):
1395 name = daemon['name']
1396 break
1397 if name:
1398 config = '/var/lib/ceph/{}/{}/config'.format(args.fsid, name)
1399 if config:
1400 logger.info('Inferring config %s' % config)
1401 args.config = config
1402 elif os.path.exists(SHELL_DEFAULT_CONF):
1403 logger.debug('Using default config: %s' % SHELL_DEFAULT_CONF)
1404 args.config = SHELL_DEFAULT_CONF
1405 return func()
1406
1407 return _infer_config
1408
f6b5b4d7 1409
1911f103
TL
1410def _get_default_image():
1411 if DEFAULT_IMAGE_IS_MASTER:
1911f103
TL
1412 warn = '''This is a development version of cephadm.
1413For information regarding the latest stable release:
1414 https://docs.ceph.com/docs/{}/cephadm/install
1415'''.format(LATEST_STABLE_RELEASE)
1416 for line in warn.splitlines():
e306af50 1417 logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
1911f103
TL
1418 return DEFAULT_IMAGE
1419
f6b5b4d7 1420
9f95a23c
TL
1421def infer_image(func):
1422 """
1423 Use the most recent ceph image
1424 """
1425 @wraps(func)
1426 def _infer_image():
1427 if not args.image:
1428 args.image = os.environ.get('CEPHADM_IMAGE')
1429 if not args.image:
1430 args.image = get_last_local_ceph_image()
1431 if not args.image:
1911f103 1432 args.image = _get_default_image()
9f95a23c
TL
1433 return func()
1434
1435 return _infer_image
1436
f6b5b4d7 1437
9f95a23c
TL
1438def default_image(func):
1439 @wraps(func)
1440 def _default_image():
1441 if not args.image:
1442 if 'name' in args and args.name:
1443 type_ = args.name.split('.', 1)[0]
1444 if type_ in Monitoring.components:
1445 args.image = Monitoring.components[type_]['image']
1446 if not args.image:
1447 args.image = os.environ.get('CEPHADM_IMAGE')
1448 if not args.image:
1911f103
TL
1449 args.image = _get_default_image()
1450
9f95a23c
TL
1451 return func()
1452
1453 return _default_image
1454
f6b5b4d7 1455
9f95a23c
TL
1456def get_last_local_ceph_image():
1457 """
1458 :return: The most recent local ceph image (already pulled)
1459 """
1460 out, _, _ = call_throws(
1461 [container_path, 'images',
1462 '--filter', 'label=ceph=True',
f91f0fd5 1463 '--filter', 'dangling=false',
adb31ebb
TL
1464 '--format', '{{.Repository}}@{{.Digest}}'])
1465 return _filter_last_local_ceph_image(out)
1466
1467
1468def _filter_last_local_ceph_image(out):
1469 # str -> Optional[str]
1470 for image in out.splitlines():
1471 if image and not image.endswith('@'):
1472 logger.info('Using recent ceph image %s' % image)
1473 return image
9f95a23c
TL
1474 return None
1475
f6b5b4d7 1476
9f95a23c 1477def write_tmp(s, uid, gid):
e306af50 1478 # type: (str, int, int) -> Any
9f95a23c
TL
1479 tmp_f = tempfile.NamedTemporaryFile(mode='w',
1480 prefix='ceph-tmp')
1481 os.fchown(tmp_f.fileno(), uid, gid)
1482 tmp_f.write(s)
1483 tmp_f.flush()
1484
1485 return tmp_f
1486
f6b5b4d7 1487
9f95a23c
TL
1488def makedirs(dir, uid, gid, mode):
1489 # type: (str, int, int, int) -> None
1490 if not os.path.exists(dir):
1491 os.makedirs(dir, mode=mode)
1492 else:
1493 os.chmod(dir, mode)
1494 os.chown(dir, uid, gid)
1495 os.chmod(dir, mode) # the above is masked by umask...
1496
f6b5b4d7 1497
9f95a23c
TL
1498def get_data_dir(fsid, t, n):
1499 # type: (str, str, Union[int, str]) -> str
1500 return os.path.join(args.data_dir, fsid, '%s.%s' % (t, n))
1501
f6b5b4d7 1502
9f95a23c
TL
1503def get_log_dir(fsid):
1504 # type: (str) -> str
1505 return os.path.join(args.log_dir, fsid)
1506
f6b5b4d7 1507
9f95a23c
TL
1508def make_data_dir_base(fsid, uid, gid):
1509 # type: (str, int, int) -> str
1510 data_dir_base = os.path.join(args.data_dir, fsid)
1511 makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
1512 makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
1513 makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
1514 DATA_DIR_MODE)
1515 return data_dir_base
1516
f6b5b4d7 1517
9f95a23c 1518def make_data_dir(fsid, daemon_type, daemon_id, uid=None, gid=None):
f6b5b4d7
TL
1519 # type: (str, str, Union[int, str], Optional[int], Optional[int]) -> str
1520 if uid is None or gid is None:
1521 uid, gid = extract_uid_gid()
9f95a23c
TL
1522 make_data_dir_base(fsid, uid, gid)
1523 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1524 makedirs(data_dir, uid, gid, DATA_DIR_MODE)
1525 return data_dir
1526
f6b5b4d7 1527
9f95a23c 1528def make_log_dir(fsid, uid=None, gid=None):
f6b5b4d7
TL
1529 # type: (str, Optional[int], Optional[int]) -> str
1530 if uid is None or gid is None:
1531 uid, gid = extract_uid_gid()
9f95a23c
TL
1532 log_dir = get_log_dir(fsid)
1533 makedirs(log_dir, uid, gid, LOG_DIR_MODE)
1534 return log_dir
1535
f6b5b4d7 1536
9f95a23c
TL
1537def make_var_run(fsid, uid, gid):
1538 # type: (str, int, int) -> None
1539 call_throws(['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
1540 '/var/run/ceph/%s' % fsid])
1541
f6b5b4d7 1542
9f95a23c 1543def copy_tree(src, dst, uid=None, gid=None):
f6b5b4d7 1544 # type: (List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1545 """
1546 Copy a directory tree from src to dst
1547 """
f91f0fd5 1548 if uid is None or gid is None:
9f95a23c
TL
1549 (uid, gid) = extract_uid_gid()
1550
1551 for src_dir in src:
1552 dst_dir = dst
1553 if os.path.isdir(dst):
1554 dst_dir = os.path.join(dst, os.path.basename(src_dir))
1555
1556 logger.debug('copy directory \'%s\' -> \'%s\'' % (src_dir, dst_dir))
1557 shutil.rmtree(dst_dir, ignore_errors=True)
1558 shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8
1559
1560 for dirpath, dirnames, filenames in os.walk(dst_dir):
1561 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dirpath))
1562 os.chown(dirpath, uid, gid)
1563 for filename in filenames:
1564 logger.debug('chown %s:%s \'%s\'' % (uid, gid, filename))
1565 os.chown(os.path.join(dirpath, filename), uid, gid)
1566
1567
1568def copy_files(src, dst, uid=None, gid=None):
f6b5b4d7 1569 # type: (List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1570 """
1571 Copy a files from src to dst
1572 """
f91f0fd5 1573 if uid is None or gid is None:
9f95a23c
TL
1574 (uid, gid) = extract_uid_gid()
1575
1576 for src_file in src:
1577 dst_file = dst
1578 if os.path.isdir(dst):
1579 dst_file = os.path.join(dst, os.path.basename(src_file))
1580
1581 logger.debug('copy file \'%s\' -> \'%s\'' % (src_file, dst_file))
1582 shutil.copyfile(src_file, dst_file)
1583
1584 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1585 os.chown(dst_file, uid, gid)
1586
f6b5b4d7 1587
9f95a23c 1588def move_files(src, dst, uid=None, gid=None):
f6b5b4d7 1589 # type: (List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1590 """
1591 Move files from src to dst
1592 """
f91f0fd5 1593 if uid is None or gid is None:
9f95a23c
TL
1594 (uid, gid) = extract_uid_gid()
1595
1596 for src_file in src:
1597 dst_file = dst
1598 if os.path.isdir(dst):
1599 dst_file = os.path.join(dst, os.path.basename(src_file))
1600
1601 if os.path.islink(src_file):
1602 # shutil.move() in py2 does not handle symlinks correctly
1603 src_rl = os.readlink(src_file)
1604 logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
1605 os.symlink(src_rl, dst_file)
1606 os.unlink(src_file)
1607 else:
1608 logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
1609 shutil.move(src_file, dst_file)
1610 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1611 os.chown(dst_file, uid, gid)
1612
f6b5b4d7 1613
9f95a23c
TL
1614## copied from distutils ##
1615def find_executable(executable, path=None):
1616 """Tries to find 'executable' in the directories listed in 'path'.
1617 A string listing directories separated by 'os.pathsep'; defaults to
1618 os.environ['PATH']. Returns the complete filename or None if not found.
1619 """
1620 _, ext = os.path.splitext(executable)
1621 if (sys.platform == 'win32') and (ext != '.exe'):
1622 executable = executable + '.exe'
1623
1624 if os.path.isfile(executable):
1625 return executable
1626
1627 if path is None:
1628 path = os.environ.get('PATH', None)
1629 if path is None:
1630 try:
1631 path = os.confstr("CS_PATH")
1632 except (AttributeError, ValueError):
1633 # os.confstr() or CS_PATH is not available
1634 path = os.defpath
1635 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1636 # set to an empty string
1637
1638 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1639 if not path:
1640 return None
1641
1642 paths = path.split(os.pathsep)
1643 for p in paths:
1644 f = os.path.join(p, executable)
1645 if os.path.isfile(f):
1646 # the file exists, we have a shot at spawn working
1647 return f
1648 return None
1649
f6b5b4d7 1650
9f95a23c
TL
1651def find_program(filename):
1652 # type: (str) -> str
1653 name = find_executable(filename)
1654 if name is None:
1655 raise ValueError('%s not found' % filename)
1656 return name
1657
f6b5b4d7 1658
9f95a23c
TL
1659def get_unit_name(fsid, daemon_type, daemon_id=None):
1660 # type: (str, str, Optional[Union[int, str]]) -> str
1661 # accept either name or type + id
1662 if daemon_id is not None:
1663 return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
1664 else:
1665 return 'ceph-%s@%s' % (fsid, daemon_type)
1666
f6b5b4d7 1667
e306af50
TL
1668def get_unit_name_by_daemon_name(fsid, name):
1669 daemon = get_daemon_description(fsid, name)
1670 try:
1671 return daemon['systemd_unit']
1672 except KeyError:
1673 raise Error('Failed to get unit name for {}'.format(daemon))
1674
f6b5b4d7 1675
9f95a23c
TL
1676def check_unit(unit_name):
1677 # type: (str) -> Tuple[bool, str, bool]
1678 # NOTE: we ignore the exit code here because systemctl outputs
1679 # various exit codes based on the state of the service, but the
1680 # string result is more explicit (and sufficient).
1681 enabled = False
1682 installed = False
1683 try:
1684 out, err, code = call(['systemctl', 'is-enabled', unit_name],
adb31ebb 1685 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
1686 if code == 0:
1687 enabled = True
1688 installed = True
1689 elif "disabled" in out:
1690 installed = True
1691 except Exception as e:
1692 logger.warning('unable to run systemctl: %s' % e)
1693 enabled = False
1694 installed = False
1695
1696 state = 'unknown'
1697 try:
1698 out, err, code = call(['systemctl', 'is-active', unit_name],
adb31ebb 1699 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
1700 out = out.strip()
1701 if out in ['active']:
1702 state = 'running'
1703 elif out in ['inactive']:
1704 state = 'stopped'
1705 elif out in ['failed', 'auto-restart']:
1706 state = 'error'
1707 else:
1708 state = 'unknown'
1709 except Exception as e:
1710 logger.warning('unable to run systemctl: %s' % e)
1711 state = 'unknown'
1712 return (enabled, state, installed)
1713
f6b5b4d7 1714
9f95a23c
TL
1715def check_units(units, enabler=None):
1716 # type: (List[str], Optional[Packager]) -> bool
1717 for u in units:
1718 (enabled, state, installed) = check_unit(u)
1719 if enabled and state == 'running':
1720 logger.info('Unit %s is enabled and running' % u)
1721 return True
1722 if enabler is not None:
1723 if installed:
1724 logger.info('Enabling unit %s' % u)
1725 enabler.enable_service(u)
1726 return False
1727
f6b5b4d7 1728
7f7e6c64
TL
1729def is_container_running(name: str) -> bool:
1730 out, err, ret = call_throws([
1731 container_path, 'ps',
1732 '--format', '{{.Names}}'])
1733 return name in out
1734
1735
9f95a23c 1736def get_legacy_config_fsid(cluster, legacy_dir=None):
f6b5b4d7 1737 # type: (str, Optional[str]) -> Optional[str]
9f95a23c
TL
1738 config_file = '/etc/ceph/%s.conf' % cluster
1739 if legacy_dir is not None:
1740 config_file = os.path.abspath(legacy_dir + config_file)
1741
1742 if os.path.exists(config_file):
1743 config = read_config(config_file)
1744 if config.has_section('global') and config.has_option('global', 'fsid'):
1745 return config.get('global', 'fsid')
1746 return None
1747
f6b5b4d7 1748
9f95a23c 1749def get_legacy_daemon_fsid(cluster, daemon_type, daemon_id, legacy_dir=None):
f6b5b4d7 1750 # type: (str, str, Union[int, str], Optional[str]) -> Optional[str]
9f95a23c
TL
1751 fsid = None
1752 if daemon_type == 'osd':
1753 try:
1754 fsid_file = os.path.join(args.data_dir,
1755 daemon_type,
1756 'ceph-%s' % daemon_id,
1757 'ceph_fsid')
1758 if legacy_dir is not None:
1759 fsid_file = os.path.abspath(legacy_dir + fsid_file)
1760 with open(fsid_file, 'r') as f:
1761 fsid = f.read().strip()
1762 except IOError:
1763 pass
1764 if not fsid:
1765 fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
1766 return fsid
1767
f6b5b4d7 1768
9f95a23c
TL
1769def get_daemon_args(fsid, daemon_type, daemon_id):
1770 # type: (str, str, Union[int, str]) -> List[str]
1771 r = list() # type: List[str]
1772
1773 if daemon_type in Ceph.daemons and daemon_type != 'crash':
1774 r += [
1775 '--setuser', 'ceph',
1776 '--setgroup', 'ceph',
1777 '--default-log-to-file=false',
1778 '--default-log-to-stderr=true',
ec96510d 1779 '--default-log-stderr-prefix=debug ',
9f95a23c
TL
1780 ]
1781 if daemon_type == 'mon':
1782 r += [
1783 '--default-mon-cluster-log-to-file=false',
1784 '--default-mon-cluster-log-to-stderr=true',
1785 ]
1786 elif daemon_type in Monitoring.components:
1787 metadata = Monitoring.components[daemon_type]
1788 r += metadata.get('args', list())
1789 if daemon_type == 'alertmanager':
1790 config = get_parm(args.config_json)
1791 peers = config.get('peers', list()) # type: ignore
1792 for peer in peers:
1793 r += ["--cluster.peer={}".format(peer)]
f6b5b4d7
TL
1794 # some alertmanager, by default, look elsewhere for a config
1795 r += ["--config.file=/etc/alertmanager/alertmanager.yml"]
9f95a23c 1796 elif daemon_type == NFSGanesha.daemon_type:
1911f103
TL
1797 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1798 r += nfs_ganesha.get_daemon_args()
f91f0fd5
TL
1799 elif daemon_type == CustomContainer.daemon_type:
1800 cc = CustomContainer.init(fsid, daemon_id)
1801 r.extend(cc.get_daemon_args())
9f95a23c
TL
1802
1803 return r
1804
f6b5b4d7 1805
9f95a23c 1806def create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid,
e306af50
TL
1807 config=None, keyring=None):
1808 # type: (str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
9f95a23c
TL
1809 data_dir = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid)
1810 make_log_dir(fsid, uid=uid, gid=gid)
1811
1812 if config:
1813 config_path = os.path.join(data_dir, 'config')
1814 with open(config_path, 'w') as f:
1815 os.fchown(f.fileno(), uid, gid)
1816 os.fchmod(f.fileno(), 0o600)
1817 f.write(config)
f91f0fd5 1818
9f95a23c
TL
1819 if keyring:
1820 keyring_path = os.path.join(data_dir, 'keyring')
1821 with open(keyring_path, 'w') as f:
1822 os.fchmod(f.fileno(), 0o600)
1823 os.fchown(f.fileno(), uid, gid)
1824 f.write(keyring)
1825
1826 if daemon_type in Monitoring.components.keys():
f91f0fd5 1827 config_json: Dict[str, Any] = get_parm(args.config_json)
9f95a23c
TL
1828
1829 # Set up directories specific to the monitoring component
1830 config_dir = ''
1831 if daemon_type == 'prometheus':
1832 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1833 config_dir = 'etc/prometheus'
1834 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1835 makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
1836 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1837 elif daemon_type == 'grafana':
1838 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1839 config_dir = 'etc/grafana'
1840 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1841 makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
1842 makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
1843 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1844 elif daemon_type == 'alertmanager':
1845 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1846 config_dir = 'etc/alertmanager'
1847 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1848 makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
1849
9f95a23c 1850 # populate the config directory for the component from the config-json
ec96510d
FG
1851 if 'files' in config_json:
1852 for fname in config_json['files']:
f91f0fd5 1853 content = dict_get_join(config_json['files'], fname)
ec96510d
FG
1854 if os.path.isabs(fname):
1855 fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep))
1856 else:
1857 fpath = os.path.join(data_dir_root, config_dir, fname)
1858 with open(fpath, 'w', encoding='utf-8') as f:
9f95a23c
TL
1859 os.fchown(f.fileno(), uid, gid)
1860 os.fchmod(f.fileno(), 0o600)
1861 f.write(content)
1862
f91f0fd5 1863 elif daemon_type == NFSGanesha.daemon_type:
9f95a23c
TL
1864 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1865 nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
1866
f91f0fd5 1867 elif daemon_type == CephIscsi.daemon_type:
1911f103
TL
1868 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
1869 ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
1870
f91f0fd5
TL
1871 elif daemon_type == CustomContainer.daemon_type:
1872 cc = CustomContainer.init(fsid, daemon_id)
1873 cc.create_daemon_dirs(data_dir, uid, gid)
1874
f6b5b4d7 1875
9f95a23c
TL
1876def get_parm(option):
1877 # type: (str) -> Dict[str, str]
1878
1879 if not option:
1880 return dict()
1881
1882 global cached_stdin
1883 if option == '-':
1884 if cached_stdin is not None:
1885 j = cached_stdin
1886 else:
1887 try:
1888 j = injected_stdin # type: ignore
1889 except NameError:
1890 j = sys.stdin.read()
1891 cached_stdin = j
1892 else:
1893 # inline json string
1894 if option[0] == '{' and option[-1] == '}':
1895 j = option
1896 # json file
1897 elif os.path.exists(option):
1898 with open(option, 'r') as f:
1899 j = f.read()
1900 else:
1901 raise Error("Config file {} not found".format(option))
1902
1903 try:
1904 js = json.loads(j)
1905 except ValueError as e:
1906 raise Error("Invalid JSON in {}: {}".format(option, e))
1907 else:
1908 return js
1909
f6b5b4d7 1910
9f95a23c 1911def get_config_and_keyring():
801d1391
TL
1912 # type: () -> Tuple[Optional[str], Optional[str]]
1913 config = None
1914 keyring = None
1915
9f95a23c
TL
1916 if 'config_json' in args and args.config_json:
1917 d = get_parm(args.config_json)
1918 config = d.get('config')
1919 keyring = d.get('keyring')
1920
1921 if 'config' in args and args.config:
1922 with open(args.config, 'r') as f:
1923 config = f.read()
1924
1925 if 'key' in args and args.key:
1926 keyring = '[%s]\n\tkey = %s\n' % (args.name, args.key)
1927 elif 'keyring' in args and args.keyring:
1928 with open(args.keyring, 'r') as f:
1929 keyring = f.read()
1930
f6b5b4d7
TL
1931 return config, keyring
1932
1933
1934def get_container_binds(fsid, daemon_type, daemon_id):
1935 # type: (str, str, Union[int, str, None]) -> List[List[str]]
1936 binds = list()
1937
1938 if daemon_type == CephIscsi.daemon_type:
f6b5b4d7 1939 binds.extend(CephIscsi.get_container_binds())
f91f0fd5
TL
1940 elif daemon_type == CustomContainer.daemon_type:
1941 assert daemon_id
1942 cc = CustomContainer.init(fsid, daemon_id)
1943 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1944 binds.extend(cc.get_container_binds(data_dir))
f6b5b4d7
TL
1945
1946 return binds
1947
9f95a23c
TL
1948
1949def get_container_mounts(fsid, daemon_type, daemon_id,
1950 no_config=False):
1951 # type: (str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
1952 mounts = dict()
1953
1954 if daemon_type in Ceph.daemons:
1955 if fsid:
1956 run_path = os.path.join('/var/run/ceph', fsid);
1957 if os.path.exists(run_path):
1958 mounts[run_path] = '/var/run/ceph:z'
1959 log_dir = get_log_dir(fsid)
1960 mounts[log_dir] = '/var/log/ceph:z'
1961 crash_dir = '/var/lib/ceph/%s/crash' % fsid
1962 if os.path.exists(crash_dir):
1963 mounts[crash_dir] = '/var/lib/ceph/crash:z'
1964
1965 if daemon_type in Ceph.daemons and daemon_id:
1966 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1967 if daemon_type == 'rgw':
1968 cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
1969 else:
1970 cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
1971 if daemon_type != 'crash':
1972 mounts[data_dir] = cdata_dir + ':z'
1973 if not no_config:
1974 mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
1975 if daemon_type == 'rbd-mirror' or daemon_type == 'crash':
1976 # these do not search for their keyrings in a data directory
1977 mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
1978
1979 if daemon_type in ['mon', 'osd']:
1980 mounts['/dev'] = '/dev' # FIXME: narrow this down?
1981 mounts['/run/udev'] = '/run/udev'
1982 if daemon_type == 'osd':
1983 mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
1984 mounts['/run/lvm'] = '/run/lvm'
1985 mounts['/run/lock/lvm'] = '/run/lock/lvm'
1986
e306af50
TL
1987 try:
1988 if args.shared_ceph_folder: # make easy manager modules/ceph-volume development
1989 ceph_folder = pathify(args.shared_ceph_folder)
1990 if os.path.exists(ceph_folder):
1991 mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
1992 mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
1993 mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
1994 mounts[ceph_folder + '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
1995 mounts[ceph_folder + '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
1996 else:
1997 logger.error('{}{}{}'.format(termcolor.red,
1998 'Ceph shared source folder does not exist.',
1999 termcolor.end))
2000 except AttributeError:
2001 pass
2002
9f95a23c
TL
2003 if daemon_type in Monitoring.components and daemon_id:
2004 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2005 if daemon_type == 'prometheus':
2006 mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
2007 mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
2008 elif daemon_type == 'node-exporter':
2009 mounts['/proc'] = '/host/proc:ro'
2010 mounts['/sys'] = '/host/sys:ro'
2011 mounts['/'] = '/rootfs:ro'
2012 elif daemon_type == "grafana":
2013 mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
2014 mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
2015 mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
2016 elif daemon_type == 'alertmanager':
f6b5b4d7 2017 mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
9f95a23c
TL
2018
2019 if daemon_type == NFSGanesha.daemon_type:
2020 assert daemon_id
2021 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
f91f0fd5
TL
2022 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2023 mounts.update(nfs_ganesha.get_container_mounts(data_dir))
9f95a23c 2024
1911f103
TL
2025 if daemon_type == CephIscsi.daemon_type:
2026 assert daemon_id
2027 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2028 log_dir = get_log_dir(fsid)
2029 mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
2030
f91f0fd5
TL
2031 if daemon_type == CustomContainer.daemon_type:
2032 assert daemon_id
2033 cc = CustomContainer.init(fsid, daemon_id)
2034 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2035 mounts.update(cc.get_container_mounts(data_dir))
2036
9f95a23c
TL
2037 return mounts
2038
f6b5b4d7 2039
f91f0fd5
TL
2040def get_container(fsid: str, daemon_type: str, daemon_id: Union[int, str],
2041 privileged: bool = False,
2042 ptrace: bool = False,
2043 container_args: Optional[List[str]] = None) -> 'CephContainer':
2044 entrypoint: str = ''
2045 name: str = ''
2046 ceph_args: List[str] = []
2047 envs: List[str] = []
2048 host_network: bool = True
2049
2050 if container_args is None:
2051 container_args = []
9f95a23c
TL
2052 if daemon_type in ['mon', 'osd']:
2053 # mon and osd need privileged in order for libudev to query devices
2054 privileged = True
2055 if daemon_type == 'rgw':
2056 entrypoint = '/usr/bin/radosgw'
2057 name = 'client.rgw.%s' % daemon_id
2058 elif daemon_type == 'rbd-mirror':
2059 entrypoint = '/usr/bin/rbd-mirror'
2060 name = 'client.rbd-mirror.%s' % daemon_id
2061 elif daemon_type == 'crash':
2062 entrypoint = '/usr/bin/ceph-crash'
2063 name = 'client.crash.%s' % daemon_id
2064 elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
2065 entrypoint = '/usr/bin/ceph-' + daemon_type
2066 name = '%s.%s' % (daemon_type, daemon_id)
2067 elif daemon_type in Monitoring.components:
2068 entrypoint = ''
9f95a23c
TL
2069 elif daemon_type == NFSGanesha.daemon_type:
2070 entrypoint = NFSGanesha.entrypoint
2071 name = '%s.%s' % (daemon_type, daemon_id)
f91f0fd5 2072 envs.extend(NFSGanesha.get_container_envs())
1911f103
TL
2073 elif daemon_type == CephIscsi.daemon_type:
2074 entrypoint = CephIscsi.entrypoint
2075 name = '%s.%s' % (daemon_type, daemon_id)
e306af50
TL
2076 # So the container can modprobe iscsi_target_mod and have write perms
2077 # to configfs we need to make this a privileged container.
2078 privileged = True
f91f0fd5
TL
2079 elif daemon_type == CustomContainer.daemon_type:
2080 cc = CustomContainer.init(fsid, daemon_id)
2081 entrypoint = cc.entrypoint
2082 host_network = False
2083 envs.extend(cc.get_container_envs())
2084 container_args.extend(cc.get_container_args())
9f95a23c 2085
9f95a23c
TL
2086 if daemon_type in Monitoring.components:
2087 uid, gid = extract_uid_gid_monitoring(daemon_type)
9f95a23c
TL
2088 monitoring_args = [
2089 '--user',
2090 str(uid),
2091 # FIXME: disable cpu/memory limits for the time being (not supported
2092 # by ubuntu 18.04 kernel!)
9f95a23c
TL
2093 ]
2094 container_args.extend(monitoring_args)
2095 elif daemon_type == 'crash':
2096 ceph_args = ['-n', name]
2097 elif daemon_type in Ceph.daemons:
2098 ceph_args = ['-n', name, '-f']
2099
f91f0fd5
TL
2100 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2101 # so service can have Type=Forking
2102 if 'podman' in container_path:
2103 runtime_dir = '/run'
2104 container_args.extend(['-d',
2105 '--conmon-pidfile',
2106 runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
2107 '--cidfile',
2108 runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id)])
9f95a23c
TL
2109
2110 return CephContainer(
2111 image=args.image,
2112 entrypoint=entrypoint,
2113 args=ceph_args + get_daemon_args(fsid, daemon_type, daemon_id),
2114 container_args=container_args,
2115 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
f6b5b4d7 2116 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
9f95a23c
TL
2117 cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
2118 envs=envs,
2119 privileged=privileged,
2120 ptrace=ptrace,
f91f0fd5 2121 host_network=host_network,
9f95a23c
TL
2122 )
2123
f6b5b4d7 2124
9f95a23c 2125def extract_uid_gid(img='', file_path='/var/lib/ceph'):
f6b5b4d7 2126 # type: (str, Union[str, List[str]]) -> Tuple[int, int]
9f95a23c
TL
2127
2128 if not img:
2129 img = args.image
2130
f6b5b4d7
TL
2131 if isinstance(file_path, str):
2132 paths = [file_path]
2133 else:
2134 paths = file_path
2135
2136 for fp in paths:
2137 try:
2138 out = CephContainer(
2139 image=img,
2140 entrypoint='stat',
2141 args=['-c', '%u %g', fp]
2142 ).run()
2143 uid, gid = out.split(' ')
2144 return int(uid), int(gid)
2145 except RuntimeError:
2146 pass
2147 raise RuntimeError('uid/gid not found')
2148
9f95a23c
TL
2149
2150def deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid,
2151 config=None, keyring=None,
2152 osd_fsid=None,
f6b5b4d7
TL
2153 reconfig=False,
2154 ports=None):
2155 # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
2156
2157 ports = ports or []
2158 if any([port_in_use(port) for port in ports]):
2159 raise Error("TCP Port(s) '{}' required for {} already in use".format(",".join(map(str, ports)), daemon_type))
2160
9f95a23c
TL
2161 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2162 if reconfig and not os.path.exists(data_dir):
2163 raise Error('cannot reconfig, data path %s does not exist' % data_dir)
2164 if daemon_type == 'mon' and not os.path.exists(data_dir):
2165 assert config
2166 assert keyring
2167 # tmp keyring file
2168 tmp_keyring = write_tmp(keyring, uid, gid)
2169
2170 # tmp config file
2171 tmp_config = write_tmp(config, uid, gid)
2172
2173 # --mkfs
2174 create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid)
2175 mon_dir = get_data_dir(fsid, 'mon', daemon_id)
2176 log_dir = get_log_dir(fsid)
2177 out = CephContainer(
2178 image=args.image,
2179 entrypoint='/usr/bin/ceph-mon',
2180 args=['--mkfs',
2181 '-i', str(daemon_id),
2182 '--fsid', fsid,
2183 '-c', '/tmp/config',
2184 '--keyring', '/tmp/keyring',
2185 ] + get_daemon_args(fsid, 'mon', daemon_id),
2186 volume_mounts={
2187 log_dir: '/var/log/ceph:z',
2188 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
2189 tmp_keyring.name: '/tmp/keyring:z',
2190 tmp_config.name: '/tmp/config:z',
2191 },
2192 ).run()
2193
2194 # write conf
2195 with open(mon_dir + '/config', 'w') as f:
2196 os.fchown(f.fileno(), uid, gid)
2197 os.fchmod(f.fileno(), 0o600)
2198 f.write(config)
2199 else:
2200 # dirs, conf, keyring
2201 create_daemon_dirs(
2202 fsid, daemon_type, daemon_id,
2203 uid, gid,
2204 config, keyring)
2205
2206 if not reconfig:
2207 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
2208 osd_fsid=osd_fsid)
2209
2210 if not os.path.exists(data_dir + '/unit.created'):
2211 with open(data_dir + '/unit.created', 'w') as f:
2212 os.fchmod(f.fileno(), 0o600)
2213 os.fchown(f.fileno(), uid, gid)
2214 f.write('mtime is time the daemon deployment was created\n')
2215
2216 with open(data_dir + '/unit.configured', 'w') as f:
2217 f.write('mtime is time we were last configured\n')
2218 os.fchmod(f.fileno(), 0o600)
2219 os.fchown(f.fileno(), uid, gid)
2220
2221 update_firewalld(daemon_type)
2222
f6b5b4d7
TL
2223 # Open ports explicitly required for the daemon
2224 if ports:
2225 fw = Firewalld()
2226 fw.open_ports(ports)
2227 fw.apply_rules()
2228
9f95a23c
TL
2229 if reconfig and daemon_type not in Ceph.daemons:
2230 # ceph daemons do not need a restart; others (presumably) do to pick
2231 # up the new config
2232 call_throws(['systemctl', 'reset-failed',
2233 get_unit_name(fsid, daemon_type, daemon_id)])
2234 call_throws(['systemctl', 'restart',
2235 get_unit_name(fsid, daemon_type, daemon_id)])
2236
f6b5b4d7
TL
2237def _write_container_cmd_to_bash(file_obj, container, comment=None, background=False):
2238 # type: (IO[str], CephContainer, Optional[str], Optional[bool]) -> None
2239 if comment:
f91f0fd5 2240 # Sometimes adding a comment, especially if there are multiple containers in one
f6b5b4d7
TL
2241 # unit file, makes it easier to read and grok.
2242 file_obj.write('# ' + comment + '\n')
2243 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
adb31ebb 2244 file_obj.write('! '+ ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
f6b5b4d7
TL
2245 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
2246 if 'podman' in container_path:
ec96510d
FG
2247 file_obj.write(
2248 '! '
2249 + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)])
2250 + ' 2> /dev/null\n')
f6b5b4d7
TL
2251
2252 # container run command
ec96510d
FG
2253 file_obj.write(
2254 ' '.join([shlex.quote(a) for a in container.run_cmd()])
2255 + (' &' if background else '') + '\n')
f6b5b4d7 2256
f91f0fd5 2257
9f95a23c
TL
2258def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
2259 enable=True, start=True,
2260 osd_fsid=None):
2261 # type: (str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None
2262 # cmd
2263 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2264 with open(data_dir + '/unit.run.new', 'w') as f:
f6b5b4d7 2265 f.write('set -e\n')
f91f0fd5
TL
2266
2267 if daemon_type in Ceph.daemons:
2268 install_path = find_program('install')
2269 f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
2270
9f95a23c
TL
2271 # pre-start cmd(s)
2272 if daemon_type == 'osd':
2273 # osds have a pre-start step
2274 assert osd_fsid
f6b5b4d7
TL
2275 simple_fn = os.path.join('/etc/ceph/osd',
2276 '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
2277 if os.path.exists(simple_fn):
2278 f.write('# Simple OSDs need chown on startup:\n')
2279 for n in ['block', 'block.db', 'block.wal']:
2280 p = os.path.join(data_dir, n)
2281 f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
2282 else:
f6b5b4d7
TL
2283 prestart = CephContainer(
2284 image=args.image,
2285 entrypoint='/usr/sbin/ceph-volume',
2286 args=[
2287 'lvm', 'activate',
2288 str(daemon_id), osd_fsid,
2289 '--no-systemd'
2290 ],
2291 privileged=True,
2292 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
2293 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
2294 cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
2295 )
f91f0fd5 2296 _write_container_cmd_to_bash(f, prestart, 'LVM OSDs use ceph-volume lvm activate')
9f95a23c
TL
2297 elif daemon_type == NFSGanesha.daemon_type:
2298 # add nfs to the rados grace db
2299 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2300 prestart = nfs_ganesha.get_rados_grace_container('add')
f91f0fd5 2301 _write_container_cmd_to_bash(f, prestart, 'add daemon to rados grace')
1911f103
TL
2302 elif daemon_type == CephIscsi.daemon_type:
2303 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
f6b5b4d7
TL
2304 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
2305 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
2306 _write_container_cmd_to_bash(f, tcmu_container, 'iscsi tcmu-runnter container', background=True)
1911f103 2307
f6b5b4d7 2308 _write_container_cmd_to_bash(f, c, '%s.%s' % (daemon_type, str(daemon_id)))
9f95a23c
TL
2309 os.fchmod(f.fileno(), 0o600)
2310 os.rename(data_dir + '/unit.run.new',
2311 data_dir + '/unit.run')
2312
2313 # post-stop command(s)
2314 with open(data_dir + '/unit.poststop.new', 'w') as f:
2315 if daemon_type == 'osd':
2316 assert osd_fsid
2317 poststop = CephContainer(
2318 image=args.image,
2319 entrypoint='/usr/sbin/ceph-volume',
2320 args=[
2321 'lvm', 'deactivate',
2322 str(daemon_id), osd_fsid,
2323 ],
2324 privileged=True,
2325 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
f6b5b4d7 2326 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
9f95a23c
TL
2327 cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
2328 daemon_id),
2329 )
f91f0fd5 2330 _write_container_cmd_to_bash(f, poststop, 'deactivate osd')
9f95a23c
TL
2331 elif daemon_type == NFSGanesha.daemon_type:
2332 # remove nfs from the rados grace db
2333 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2334 poststop = nfs_ganesha.get_rados_grace_container('remove')
f91f0fd5 2335 _write_container_cmd_to_bash(f, poststop, 'remove daemon from rados grace')
1911f103 2336 elif daemon_type == CephIscsi.daemon_type:
f6b5b4d7
TL
2337 # make sure we also stop the tcmu container
2338 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
2339 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
2340 f.write('! '+ ' '.join(tcmu_container.stop_cmd()) + '\n')
1911f103 2341 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
9f95a23c
TL
2342 os.fchmod(f.fileno(), 0o600)
2343 os.rename(data_dir + '/unit.poststop.new',
2344 data_dir + '/unit.poststop')
2345
2346 with open(data_dir + '/unit.image.new', 'w') as f:
2347 f.write(c.image + '\n')
2348 os.fchmod(f.fileno(), 0o600)
2349 os.rename(data_dir + '/unit.image.new',
2350 data_dir + '/unit.image')
2351
2352 # systemd
2353 install_base_units(fsid)
1911f103 2354 unit = get_unit_file(fsid)
9f95a23c
TL
2355 unit_file = 'ceph-%s@.service' % (fsid)
2356 with open(args.unit_dir + '/' + unit_file + '.new', 'w') as f:
2357 f.write(unit)
2358 os.rename(args.unit_dir + '/' + unit_file + '.new',
2359 args.unit_dir + '/' + unit_file)
2360 call_throws(['systemctl', 'daemon-reload'])
2361
2362 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
2363 call(['systemctl', 'stop', unit_name],
adb31ebb 2364 verbosity=CallVerbosity.DEBUG)
9f95a23c 2365 call(['systemctl', 'reset-failed', unit_name],
adb31ebb 2366 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
2367 if enable:
2368 call_throws(['systemctl', 'enable', unit_name])
2369 if start:
2370 call_throws(['systemctl', 'start', unit_name])
2371
9f95a23c 2372
f6b5b4d7
TL
2373
2374class Firewalld(object):
2375 def __init__(self):
2376 # type: () -> None
2377 self.available = self.check()
2378
2379 def check(self):
2380 # type: () -> bool
2381 self.cmd = find_executable('firewall-cmd')
2382 if not self.cmd:
2383 logger.debug('firewalld does not appear to be present')
2384 return False
2385 (enabled, state, _) = check_unit('firewalld.service')
2386 if not enabled:
2387 logger.debug('firewalld.service is not enabled')
2388 return False
2389 if state != "running":
2390 logger.debug('firewalld.service is not running')
2391 return False
2392
2393 logger.info("firewalld ready")
2394 return True
2395
2396 def enable_service_for(self, daemon_type):
2397 # type: (str) -> None
2398 if not self.available:
2399 logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)
2400 return
2401
2402 if daemon_type == 'mon':
2403 svc = 'ceph-mon'
2404 elif daemon_type in ['mgr', 'mds', 'osd']:
2405 svc = 'ceph'
2406 elif daemon_type == NFSGanesha.daemon_type:
2407 svc = 'nfs'
2408 else:
2409 return
2410
adb31ebb 2411 out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
2412 if ret:
2413 logger.info('Enabling firewalld service %s in current zone...' % svc)
f6b5b4d7 2414 out, err, ret = call([self.cmd, '--permanent', '--add-service', svc])
9f95a23c
TL
2415 if ret:
2416 raise RuntimeError(
2417 'unable to add service %s to current zone: %s' % (svc, err))
2418 else:
2419 logger.debug('firewalld service %s is enabled in current zone' % svc)
f6b5b4d7
TL
2420
2421 def open_ports(self, fw_ports):
2422 # type: (List[int]) -> None
2423 if not self.available:
2424 logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
2425 return
2426
2427 for port in fw_ports:
2428 tcp_port = str(port) + '/tcp'
adb31ebb 2429 out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
9f95a23c 2430 if ret:
f6b5b4d7
TL
2431 logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
2432 out, err, ret = call([self.cmd, '--permanent', '--add-port', tcp_port])
2433 if ret:
2434 raise RuntimeError('unable to add port %s to current zone: %s' %
2435 (tcp_port, err))
2436 else:
2437 logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
2438
2439 def apply_rules(self):
2440 # type: () -> None
2441 if not self.available:
2442 return
2443
2444 call_throws([self.cmd, '--reload'])
2445
2446
2447def update_firewalld(daemon_type):
2448 # type: (str) -> None
2449 firewall = Firewalld()
2450
2451 firewall.enable_service_for(daemon_type)
2452
2453 fw_ports = []
2454
2455 if daemon_type in Monitoring.port_map.keys():
2456 fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc
2457
2458 firewall.open_ports(fw_ports)
2459 firewall.apply_rules()
9f95a23c
TL
2460
2461def install_base_units(fsid):
2462 # type: (str) -> None
2463 """
2464 Set up ceph.target and ceph-$fsid.target units.
2465 """
2466 # global unit
2467 existed = os.path.exists(args.unit_dir + '/ceph.target')
2468 with open(args.unit_dir + '/ceph.target.new', 'w') as f:
2469 f.write('[Unit]\n'
2470 'Description=All Ceph clusters and services\n'
2471 '\n'
2472 '[Install]\n'
2473 'WantedBy=multi-user.target\n')
2474 os.rename(args.unit_dir + '/ceph.target.new',
2475 args.unit_dir + '/ceph.target')
2476 if not existed:
2477 # we disable before enable in case a different ceph.target
2478 # (from the traditional package) is present; while newer
2479 # systemd is smart enough to disable the old
2480 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2481 # some older versions of systemd error out with EEXIST.
2482 call_throws(['systemctl', 'disable', 'ceph.target'])
2483 call_throws(['systemctl', 'enable', 'ceph.target'])
2484 call_throws(['systemctl', 'start', 'ceph.target'])
2485
2486 # cluster unit
2487 existed = os.path.exists(args.unit_dir + '/ceph-%s.target' % fsid)
2488 with open(args.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f:
2489 f.write('[Unit]\n'
2490 'Description=Ceph cluster {fsid}\n'
2491 'PartOf=ceph.target\n'
2492 'Before=ceph.target\n'
2493 '\n'
2494 '[Install]\n'
2495 'WantedBy=multi-user.target ceph.target\n'.format(
2496 fsid=fsid)
2497 )
2498 os.rename(args.unit_dir + '/ceph-%s.target.new' % fsid,
2499 args.unit_dir + '/ceph-%s.target' % fsid)
2500 if not existed:
2501 call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid])
2502 call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid])
2503
2504 # logrotate for the cluster
2505 with open(args.logrotate_dir + '/ceph-%s' % fsid, 'w') as f:
2506 """
2507 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
2508 in all containers, but I don't see an elegant way to send SIGHUP *just* to
2509 the daemons for this cluster. (1) systemd kill -s will get the signal to
2510 podman, but podman will exit. (2) podman kill will get the signal to the
2511 first child (bash), but that isn't the ceph daemon. This is simpler and
2512 should be harmless.
2513 """
2514 f.write("""# created by cephadm
2515/var/log/ceph/%s/*.log {
2516 rotate 7
2517 daily
2518 compress
2519 sharedscripts
2520 postrotate
2521 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror || pkill -1 -x "ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror" || true
2522 endscript
2523 missingok
2524 notifempty
2525 su root root
2526}
2527""" % fsid)
2528
f6b5b4d7 2529
1911f103
TL
2530def get_unit_file(fsid):
2531 # type: (str) -> str
f91f0fd5
TL
2532 extra_args = ''
2533 if 'podman' in container_path:
2534 extra_args = ('ExecStartPre=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2535 'ExecStopPost=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2536 'Type=forking\n'
2537 'PIDFile=/%t/%n-pid\n')
2538
7f7e6c64 2539 docker = 'docker' in container_path
9f95a23c
TL
2540 u = """# generated by cephadm
2541[Unit]
2542Description=Ceph %i for {fsid}
2543
2544# According to:
2545# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
2546# these can be removed once ceph-mon will dynamically change network
2547# configuration.
7f7e6c64 2548After=network-online.target local-fs.target time-sync.target{docker_after}
9f95a23c 2549Wants=network-online.target local-fs.target time-sync.target
7f7e6c64 2550{docker_requires}
9f95a23c
TL
2551
2552PartOf=ceph-{fsid}.target
2553Before=ceph-{fsid}.target
2554
2555[Service]
2556LimitNOFILE=1048576
2557LimitNPROC=1048576
2558EnvironmentFile=-/etc/environment
9f95a23c
TL
2559ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
2560ExecStop=-{container_path} stop ceph-{fsid}-%i
2561ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
2562KillMode=none
2563Restart=on-failure
2564RestartSec=10s
2565TimeoutStartSec=120
e306af50 2566TimeoutStopSec=120
9f95a23c
TL
2567StartLimitInterval=30min
2568StartLimitBurst=5
f91f0fd5 2569{extra_args}
9f95a23c
TL
2570[Install]
2571WantedBy=ceph-{fsid}.target
2572""".format(
2573 container_path=container_path,
9f95a23c 2574 fsid=fsid,
f91f0fd5 2575 data_dir=args.data_dir,
7f7e6c64
TL
2576 extra_args=extra_args,
2577 # if docker, we depend on docker.service
2578 docker_after=' docker.service' if docker else '',
2579 docker_requires='Requires=docker.service\n' if docker else '',
2580)
f91f0fd5 2581
9f95a23c
TL
2582 return u
2583
2584##################################
2585
f6b5b4d7 2586
9f95a23c
TL
2587class CephContainer:
2588 def __init__(self,
f91f0fd5
TL
2589 image: str,
2590 entrypoint: str,
2591 args: List[str] = [],
2592 volume_mounts: Dict[str, str] = {},
2593 cname: str = '',
2594 container_args: List[str] = [],
2595 envs: Optional[List[str]] = None,
2596 privileged: bool = False,
2597 ptrace: bool = False,
2598 bind_mounts: Optional[List[List[str]]] = None,
7f7e6c64 2599 init: Optional[bool] = None,
f91f0fd5
TL
2600 host_network: bool = True,
2601 ) -> None:
9f95a23c
TL
2602 self.image = image
2603 self.entrypoint = entrypoint
2604 self.args = args
2605 self.volume_mounts = volume_mounts
2606 self.cname = cname
2607 self.container_args = container_args
2608 self.envs = envs
2609 self.privileged = privileged
2610 self.ptrace = ptrace
f6b5b4d7 2611 self.bind_mounts = bind_mounts if bind_mounts else []
7f7e6c64 2612 self.init = init if init else container_init
f91f0fd5 2613 self.host_network = host_network
9f95a23c 2614
f91f0fd5
TL
2615 def run_cmd(self) -> List[str]:
2616 cmd_args: List[str] = [
2617 str(container_path),
2618 'run',
2619 '--rm',
2620 '--ipc=host',
2621 ]
2622 envs: List[str] = [
2623 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2624 '-e', 'NODE_NAME=%s' % get_hostname(),
2625 ]
2626 vols: List[str] = []
2627 binds: List[str] = []
9f95a23c 2628
f91f0fd5
TL
2629 if self.host_network:
2630 cmd_args.append('--net=host')
2631 if self.entrypoint:
2632 cmd_args.extend(['--entrypoint', self.entrypoint])
9f95a23c 2633 if self.privileged:
f91f0fd5
TL
2634 cmd_args.extend([
2635 '--privileged',
2636 # let OSD etc read block devs that haven't been chowned
2637 '--group-add=disk'])
2638 if self.ptrace and not self.privileged:
2639 # if privileged, the SYS_PTRACE cap is already added
2640 # in addition, --cap-add and --privileged are mutually
2641 # exclusive since podman >= 2.0
2642 cmd_args.append('--cap-add=SYS_PTRACE')
2643 if self.init:
2644 cmd_args.append('--init')
7f7e6c64 2645 envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
f91f0fd5
TL
2646 if self.cname:
2647 cmd_args.extend(['--name', self.cname])
2648 if self.envs:
2649 for env in self.envs:
2650 envs.extend(['-e', env])
2651
9f95a23c
TL
2652 vols = sum(
2653 [['-v', '%s:%s' % (host_dir, container_dir)]
2654 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7 2655 binds = sum([['--mount', '{}'.format(','.join(bind))]
f91f0fd5
TL
2656 for bind in self.bind_mounts], [])
2657
2658 return cmd_args + self.container_args + envs + vols + binds + [
2659 self.image,
2660 ] + self.args # type: ignore
2661
2662 def shell_cmd(self, cmd: List[str]) -> List[str]:
2663 cmd_args: List[str] = [
9f95a23c
TL
2664 str(container_path),
2665 'run',
2666 '--rm',
e306af50 2667 '--ipc=host',
f91f0fd5
TL
2668 ]
2669 envs: List[str] = [
2670 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2671 '-e', 'NODE_NAME=%s' % get_hostname(),
2672 ]
2673 vols: List[str] = []
2674 binds: List[str] = []
9f95a23c 2675
f91f0fd5
TL
2676 if self.host_network:
2677 cmd_args.append('--net=host')
9f95a23c 2678 if self.privileged:
f91f0fd5
TL
2679 cmd_args.extend([
2680 '--privileged',
2681 # let OSD etc read block devs that haven't been chowned
2682 '--group-add=disk',
2683 ])
7f7e6c64
TL
2684 if self.init:
2685 cmd_args.append('--init')
2686 envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
f91f0fd5
TL
2687 if self.envs:
2688 for env in self.envs:
2689 envs.extend(['-e', env])
2690
9f95a23c
TL
2691 vols = sum(
2692 [['-v', '%s:%s' % (host_dir, container_dir)]
2693 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7
TL
2694 binds = sum([['--mount', '{}'.format(','.join(bind))]
2695 for bind in self.bind_mounts], [])
f91f0fd5
TL
2696
2697 return cmd_args + self.container_args + envs + vols + binds + [
9f95a23c 2698 '--entrypoint', cmd[0],
f91f0fd5 2699 self.image,
9f95a23c
TL
2700 ] + cmd[1:]
2701
2702 def exec_cmd(self, cmd):
2703 # type: (List[str]) -> List[str]
2704 return [
2705 str(container_path),
2706 'exec',
2707 ] + self.container_args + [
2708 self.cname,
2709 ] + cmd
2710
f6b5b4d7
TL
2711 def rm_cmd(self, storage=False):
2712 # type: (bool) -> List[str]
2713 ret = [
2714 str(container_path),
2715 'rm', '-f',
2716 ]
2717 if storage:
2718 ret.append('--storage')
2719 ret.append(self.cname)
2720 return ret
2721
2722 def stop_cmd(self):
2723 # type () -> List[str]
2724 ret = [
2725 str(container_path),
2726 'stop', self.cname,
2727 ]
2728 return ret
2729
9f95a23c
TL
2730 def run(self, timeout=DEFAULT_TIMEOUT):
2731 # type: (Optional[int]) -> str
9f95a23c
TL
2732 out, _, _ = call_throws(
2733 self.run_cmd(), desc=self.entrypoint, timeout=timeout)
2734 return out
2735
2736##################################
2737
f6b5b4d7 2738
9f95a23c
TL
2739@infer_image
2740def command_version():
2741 # type: () -> int
2742 out = CephContainer(args.image, 'ceph', ['--version']).run()
2743 print(out.strip())
2744 return 0
2745
2746##################################
2747
f6b5b4d7 2748
9f95a23c
TL
2749@infer_image
2750def command_pull():
2751 # type: () -> int
f6b5b4d7
TL
2752
2753 _pull_image(args.image)
9f95a23c
TL
2754 return command_inspect_image()
2755
f6b5b4d7
TL
2756
2757def _pull_image(image):
2758 # type: (str) -> None
2759 logger.info('Pulling container image %s...' % image)
2760
2761 ignorelist = [
2762 "error creating read-write layer with ID",
2763 "net/http: TLS handshake timeout",
2764 "Digest did not match, expected",
2765 ]
2766
2767 cmd = [container_path, 'pull', image]
2768 cmd_str = ' '.join(cmd)
2769
2770 for sleep_secs in [1, 4, 25]:
2771 out, err, ret = call(cmd)
2772 if not ret:
2773 return
2774
2775 if not any(pattern in err for pattern in ignorelist):
2776 raise RuntimeError('Failed command: %s' % cmd_str)
2777
2778 logger.info('"%s failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))
2779 time.sleep(sleep_secs)
2780
2781 raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str)
9f95a23c
TL
2782##################################
2783
f6b5b4d7 2784
9f95a23c
TL
2785@infer_image
2786def command_inspect_image():
2787 # type: () -> int
2788 out, err, ret = call_throws([
2789 container_path, 'inspect',
cd265ab1 2790 '--format', '{{.ID}},{{.RepoDigests}}',
9f95a23c
TL
2791 args.image])
2792 if ret:
2793 return errno.ENOENT
f91f0fd5
TL
2794 info_from = get_image_info_from_inspect(out.strip(), args.image)
2795
9f95a23c 2796 ver = CephContainer(args.image, 'ceph', ['--version']).run().strip()
f91f0fd5
TL
2797 info_from['ceph_version'] = ver
2798
2799 print(json.dumps(info_from, indent=4, sort_keys=True))
2800 return 0
2801
2802
2803def get_image_info_from_inspect(out, image):
2804 # type: (str, str) -> Dict[str, str]
2805 image_id, digests = out.split(',', 1)
2806 if not out:
2807 raise Error('inspect {}: empty result'.format(image))
9f95a23c 2808 r = {
f91f0fd5 2809 'image_id': normalize_container_id(image_id)
9f95a23c 2810 }
f91f0fd5 2811 if digests:
cd265ab1 2812 json_digests = digests[1:-1].split(' ')
f91f0fd5
TL
2813 if json_digests:
2814 r['repo_digest'] = json_digests[0]
2815 return r
2816
9f95a23c
TL
2817
2818##################################
2819
f91f0fd5 2820
f6b5b4d7
TL
2821def unwrap_ipv6(address):
2822 # type: (str) -> str
2823 if address.startswith('[') and address.endswith(']'):
2824 return address[1:-1]
2825 return address
2826
2827
f91f0fd5
TL
2828def wrap_ipv6(address):
2829 # type: (str) -> str
2830
2831 # We cannot assume it's already wrapped or even an IPv6 address if
2832 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
2833 # the ValueError
2834 try:
2835 if ipaddress.ip_address(unicode(address)).version == 6:
2836 return f"[{address}]"
2837 except ValueError:
2838 pass
2839
2840 return address
2841
2842
f6b5b4d7
TL
2843def is_ipv6(address):
2844 # type: (str) -> bool
2845 address = unwrap_ipv6(address)
2846 try:
2847 return ipaddress.ip_address(unicode(address)).version == 6
2848 except ValueError:
2849 logger.warning("Address: {} isn't a valid IP address".format(address))
2850 return False
2851
2852
9f95a23c
TL
2853@default_image
2854def command_bootstrap():
2855 # type: () -> int
2856
2857 if not args.output_config:
2858 args.output_config = os.path.join(args.output_dir, 'ceph.conf')
2859 if not args.output_keyring:
2860 args.output_keyring = os.path.join(args.output_dir,
2861 'ceph.client.admin.keyring')
2862 if not args.output_pub_ssh_key:
2863 args.output_pub_ssh_key = os.path.join(args.output_dir, 'ceph.pub')
2864
2865 # verify output files
2866 for f in [args.output_config, args.output_keyring, args.output_pub_ssh_key]:
2867 if not args.allow_overwrite:
2868 if os.path.exists(f):
2869 raise Error('%s already exists; delete or pass '
2870 '--allow-overwrite to overwrite' % f)
2871 dirname = os.path.dirname(f)
2872 if dirname and not os.path.exists(dirname):
adb31ebb
TL
2873 fname = os.path.basename(f)
2874 logger.info(f"Creating directory {dirname} for {fname}")
2875 try:
2876 # use makedirs to create intermediate missing dirs
2877 os.makedirs(dirname, 0o755)
2878 except PermissionError:
2879 raise Error(f"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.")
2880
9f95a23c
TL
2881
2882 if not args.skip_prepare_host:
2883 command_prepare_host()
2884 else:
2885 logger.info('Skip prepare_host')
2886
2887 # initial vars
2888 fsid = args.fsid or make_fsid()
2889 hostname = get_hostname()
2890 if '.' in hostname and not args.allow_fqdn_hostname:
2891 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
2892 mon_id = args.mon_id or hostname
2893 mgr_id = args.mgr_id or generate_service_id()
f91f0fd5 2894 logger.info('Cluster fsid: %s' % fsid)
f6b5b4d7 2895 ipv6 = False
9f95a23c
TL
2896
2897 l = FileLock(fsid)
2898 l.acquire()
2899
2900 # ip
2901 r = re.compile(r':(\d+)$')
f6b5b4d7 2902 base_ip = ''
9f95a23c 2903 if args.mon_ip:
f6b5b4d7 2904 ipv6 = is_ipv6(args.mon_ip)
f91f0fd5
TL
2905 if ipv6:
2906 args.mon_ip = wrap_ipv6(args.mon_ip)
9f95a23c
TL
2907 hasport = r.findall(args.mon_ip)
2908 if hasport:
2909 port = int(hasport[0])
2910 if port == 6789:
2911 addr_arg = '[v1:%s]' % args.mon_ip
2912 elif port == 3300:
2913 addr_arg = '[v2:%s]' % args.mon_ip
2914 else:
2915 logger.warning('Using msgr2 protocol for unrecognized port %d' %
2916 port)
2917 addr_arg = '[v2:%s]' % args.mon_ip
2918 base_ip = args.mon_ip[0:-(len(str(port)))-1]
2919 check_ip_port(base_ip, port)
2920 else:
2921 base_ip = args.mon_ip
2922 addr_arg = '[v2:%s:3300,v1:%s:6789]' % (args.mon_ip, args.mon_ip)
2923 check_ip_port(args.mon_ip, 3300)
2924 check_ip_port(args.mon_ip, 6789)
2925 elif args.mon_addrv:
2926 addr_arg = args.mon_addrv
2927 if addr_arg[0] != '[' or addr_arg[-1] != ']':
2928 raise Error('--mon-addrv value %s must use square backets' %
2929 addr_arg)
f6b5b4d7 2930 ipv6 = addr_arg.count('[') > 1
9f95a23c
TL
2931 for addr in addr_arg[1:-1].split(','):
2932 hasport = r.findall(addr)
2933 if not hasport:
2934 raise Error('--mon-addrv value %s must include port number' %
2935 addr_arg)
2936 port = int(hasport[0])
2937 # strip off v1: or v2: prefix
2938 addr = re.sub(r'^\w+:', '', addr)
2939 base_ip = addr[0:-(len(str(port)))-1]
2940 check_ip_port(base_ip, port)
2941 else:
2942 raise Error('must specify --mon-ip or --mon-addrv')
2943 logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg))
2944
2945 mon_network = None
2946 if not args.skip_mon_network:
2947 # make sure IP is configured locally, and then figure out the
2948 # CIDR network
2949 for net, ips in list_networks().items():
f6b5b4d7
TL
2950 if ipaddress.ip_address(unicode(unwrap_ipv6(base_ip))) in \
2951 [ipaddress.ip_address(unicode(ip)) for ip in ips]:
9f95a23c
TL
2952 mon_network = net
2953 logger.info('Mon IP %s is in CIDR network %s' % (base_ip,
2954 mon_network))
2955 break
2956 if not mon_network:
2957 raise Error('Failed to infer CIDR network for mon ip %s; pass '
2958 '--skip-mon-network to configure it later' % base_ip)
2959
2960 # config
2961 cp = read_config(args.config)
2962 if not cp.has_section('global'):
2963 cp.add_section('global')
2964 cp.set('global', 'fsid', fsid);
c5c27e9a 2965 cp.set('global', 'mon_host', addr_arg)
9f95a23c 2966 cp.set('global', 'container_image', args.image)
c5c27e9a
TL
2967 if not cp.has_section('mon'):
2968 cp.add_section('mon')
2969 if (
2970 not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
2971 and not cp.has_option('mon', 'auth allow insecure global id reclaim')
2972 ):
2973 cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
9f95a23c
TL
2974 cpf = StringIO()
2975 cp.write(cpf)
2976 config = cpf.getvalue()
2977
f6b5b4d7
TL
2978 if args.registry_json or args.registry_url:
2979 command_registry_login()
2980
9f95a23c 2981 if not args.skip_pull:
f6b5b4d7 2982 _pull_image(args.image)
9f95a23c
TL
2983
2984 logger.info('Extracting ceph user uid/gid from container image...')
2985 (uid, gid) = extract_uid_gid()
2986
2987 # create some initial keys
2988 logger.info('Creating initial keys...')
2989 mon_key = CephContainer(
2990 image=args.image,
2991 entrypoint='/usr/bin/ceph-authtool',
2992 args=['--gen-print-key'],
2993 ).run().strip()
2994 admin_key = CephContainer(
2995 image=args.image,
2996 entrypoint='/usr/bin/ceph-authtool',
2997 args=['--gen-print-key'],
2998 ).run().strip()
2999 mgr_key = CephContainer(
3000 image=args.image,
3001 entrypoint='/usr/bin/ceph-authtool',
3002 args=['--gen-print-key'],
3003 ).run().strip()
3004
3005 keyring = ('[mon.]\n'
3006 '\tkey = %s\n'
3007 '\tcaps mon = allow *\n'
3008 '[client.admin]\n'
3009 '\tkey = %s\n'
3010 '\tcaps mon = allow *\n'
3011 '\tcaps mds = allow *\n'
3012 '\tcaps mgr = allow *\n'
3013 '\tcaps osd = allow *\n'
3014 '[mgr.%s]\n'
3015 '\tkey = %s\n'
3016 '\tcaps mon = profile mgr\n'
3017 '\tcaps mds = allow *\n'
3018 '\tcaps osd = allow *\n'
3019 % (mon_key, admin_key, mgr_id, mgr_key))
3020
3021 # tmp keyring file
3022 tmp_bootstrap_keyring = write_tmp(keyring, uid, gid)
3023
3024 # create initial monmap, tmp monmap file
3025 logger.info('Creating initial monmap...')
3026 tmp_monmap = write_tmp('', 0, 0)
3027 out = CephContainer(
3028 image=args.image,
3029 entrypoint='/usr/bin/monmaptool',
3030 args=['--create',
3031 '--clobber',
3032 '--fsid', fsid,
3033 '--addv', mon_id, addr_arg,
3034 '/tmp/monmap'
3035 ],
3036 volume_mounts={
3037 tmp_monmap.name: '/tmp/monmap:z',
3038 },
3039 ).run()
3040
3041 # pass monmap file to ceph user for use by ceph-mon --mkfs below
3042 os.fchown(tmp_monmap.fileno(), uid, gid)
3043
3044 # create mon
3045 logger.info('Creating mon...')
3046 create_daemon_dirs(fsid, 'mon', mon_id, uid, gid)
3047 mon_dir = get_data_dir(fsid, 'mon', mon_id)
3048 log_dir = get_log_dir(fsid)
3049 out = CephContainer(
3050 image=args.image,
3051 entrypoint='/usr/bin/ceph-mon',
3052 args=['--mkfs',
3053 '-i', mon_id,
3054 '--fsid', fsid,
3055 '-c', '/dev/null',
3056 '--monmap', '/tmp/monmap',
3057 '--keyring', '/tmp/keyring',
3058 ] + get_daemon_args(fsid, 'mon', mon_id),
3059 volume_mounts={
3060 log_dir: '/var/log/ceph:z',
3061 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
3062 tmp_bootstrap_keyring.name: '/tmp/keyring:z',
3063 tmp_monmap.name: '/tmp/monmap:z',
3064 },
3065 ).run()
3066
3067 with open(mon_dir + '/config', 'w') as f:
3068 os.fchown(f.fileno(), uid, gid)
3069 os.fchmod(f.fileno(), 0o600)
3070 f.write(config)
3071
3072 make_var_run(fsid, uid, gid)
3073 mon_c = get_container(fsid, 'mon', mon_id)
3074 deploy_daemon(fsid, 'mon', mon_id, mon_c, uid, gid,
3075 config=None, keyring=None)
3076
3077 # client.admin key + config to issue various CLI commands
3078 tmp_admin_keyring = write_tmp('[client.admin]\n'
3079 '\tkey = ' + admin_key + '\n',
3080 uid, gid)
3081 tmp_config = write_tmp(config, uid, gid)
3082
3083 # a CLI helper to reduce our typing
3084 def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT):
3085 # type: (List[str], Dict[str, str], Optional[int]) -> str
3086 mounts = {
3087 log_dir: '/var/log/ceph:z',
3088 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
3089 tmp_config.name: '/etc/ceph/ceph.conf:z',
3090 }
3091 for k, v in extra_mounts.items():
3092 mounts[k] = v
3093 timeout = timeout or args.timeout
3094 return CephContainer(
3095 image=args.image,
3096 entrypoint='/usr/bin/ceph',
3097 args=cmd,
3098 volume_mounts=mounts,
3099 ).run(timeout=timeout)
3100
3101 logger.info('Waiting for mon to start...')
3102 c = CephContainer(
3103 image=args.image,
3104 entrypoint='/usr/bin/ceph',
3105 args=[
3106 'status'],
3107 volume_mounts={
3108 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
3109 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
3110 tmp_config.name: '/etc/ceph/ceph.conf:z',
3111 },
3112 )
3113
3114 # wait for the service to become available
3115 def is_mon_available():
3116 # type: () -> bool
f6b5b4d7 3117 timeout=args.timeout if args.timeout else 60 # seconds
9f95a23c
TL
3118 out, err, ret = call(c.run_cmd(),
3119 desc=c.entrypoint,
3120 timeout=timeout)
3121 return ret == 0
3122 is_available('mon', is_mon_available)
3123
3124 # assimilate and minimize config
3125 if not args.no_minimize_config:
3126 logger.info('Assimilating anything we can from ceph.conf...')
3127 cli([
3128 'config', 'assimilate-conf',
3129 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3130 ], {
3131 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3132 })
3133 logger.info('Generating new minimal ceph.conf...')
3134 cli([
3135 'config', 'generate-minimal-conf',
3136 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3137 ], {
3138 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3139 })
3140 # re-read our minimized config
3141 with open(mon_dir + '/config', 'r') as f:
3142 config = f.read()
3143 logger.info('Restarting the monitor...')
3144 call_throws([
3145 'systemctl',
3146 'restart',
3147 get_unit_name(fsid, 'mon', mon_id)
3148 ])
3149
3150 if mon_network:
3151 logger.info('Setting mon public_network...')
3152 cli(['config', 'set', 'mon', 'public_network', mon_network])
3153
f6b5b4d7
TL
3154 if ipv6:
3155 logger.info('Enabling IPv6 (ms_bind_ipv6)')
3156 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
3157
9f95a23c
TL
3158 # create mgr
3159 logger.info('Creating mgr...')
3160 mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
3161 mgr_c = get_container(fsid, 'mgr', mgr_id)
f6b5b4d7 3162 # Note:the default port used by the Prometheus node exporter is opened in fw
9f95a23c 3163 deploy_daemon(fsid, 'mgr', mgr_id, mgr_c, uid, gid,
f6b5b4d7 3164 config=config, keyring=mgr_keyring, ports=[9283])
9f95a23c
TL
3165
3166 # output files
3167 with open(args.output_keyring, 'w') as f:
3168 os.fchmod(f.fileno(), 0o600)
3169 f.write('[client.admin]\n'
3170 '\tkey = ' + admin_key + '\n')
3171 logger.info('Wrote keyring to %s' % args.output_keyring)
3172
3173 with open(args.output_config, 'w') as f:
3174 f.write(config)
3175 logger.info('Wrote config to %s' % args.output_config)
3176
3177 # wait for the service to become available
3178 logger.info('Waiting for mgr to start...')
3179 def is_mgr_available():
3180 # type: () -> bool
f6b5b4d7 3181 timeout=args.timeout if args.timeout else 60 # seconds
e306af50
TL
3182 try:
3183 out = cli(['status', '-f', 'json-pretty'], timeout=timeout)
3184 j = json.loads(out)
3185 return j.get('mgrmap', {}).get('available', False)
3186 except Exception as e:
3187 logger.debug('status failed: %s' % e)
3188 return False
9f95a23c
TL
3189 is_available('mgr', is_mgr_available)
3190
3191 # wait for mgr to restart (after enabling a module)
3192 def wait_for_mgr_restart():
3193 # first get latest mgrmap epoch from the mon
3194 out = cli(['mgr', 'dump'])
3195 j = json.loads(out)
3196 epoch = j['epoch']
3197 # wait for mgr to have it
3198 logger.info('Waiting for the mgr to restart...')
3199 def mgr_has_latest_epoch():
3200 # type: () -> bool
3201 try:
3202 out = cli(['tell', 'mgr', 'mgr_status'])
3203 j = json.loads(out)
3204 return j['mgrmap_epoch'] >= epoch
3205 except Exception as e:
3206 logger.debug('tell mgr mgr_status failed: %s' % e)
3207 return False
3208 is_available('Mgr epoch %d' % epoch, mgr_has_latest_epoch)
3209
3210 # ssh
3211 if not args.skip_ssh:
f6b5b4d7
TL
3212 cli(['config-key', 'set', 'mgr/cephadm/ssh_user', args.ssh_user])
3213
9f95a23c
TL
3214 logger.info('Enabling cephadm module...')
3215 cli(['mgr', 'module', 'enable', 'cephadm'])
3216 wait_for_mgr_restart()
3217
3218 logger.info('Setting orchestrator backend to cephadm...')
3219 cli(['orch', 'set', 'backend', 'cephadm'])
3220
e306af50
TL
3221 if args.ssh_config:
3222 logger.info('Using provided ssh config...')
3223 mounts = {
3224 pathify(args.ssh_config.name): '/tmp/cephadm-ssh-config:z',
3225 }
3226 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
3227
3228 if args.ssh_private_key and args.ssh_public_key:
3229 logger.info('Using provided ssh keys...')
3230 mounts = {
3231 pathify(args.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
3232 pathify(args.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
3233 }
3234 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
3235 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
3236 else:
3237 logger.info('Generating ssh key...')
3238 cli(['cephadm', 'generate-key'])
3239 ssh_pub = cli(['cephadm', 'get-pub-key'])
3240
3241 with open(args.output_pub_ssh_key, 'w') as f:
3242 f.write(ssh_pub)
3243 logger.info('Wrote public SSH key to to %s' % args.output_pub_ssh_key)
3244
f6b5b4d7
TL
3245 logger.info('Adding key to %s@localhost\'s authorized_keys...' % args.ssh_user)
3246 try:
3247 s_pwd = pwd.getpwnam(args.ssh_user)
3248 except KeyError as e:
3249 raise Error('Cannot find uid/gid for ssh-user: %s' % (args.ssh_user))
3250 ssh_uid = s_pwd.pw_uid
3251 ssh_gid = s_pwd.pw_gid
3252 ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
3253
3254 if not os.path.exists(ssh_dir):
3255 makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
3256
3257 auth_keys_file = '%s/authorized_keys' % ssh_dir
e306af50 3258 add_newline = False
f6b5b4d7 3259
e306af50
TL
3260 if os.path.exists(auth_keys_file):
3261 with open(auth_keys_file, 'r') as f:
3262 f.seek(0, os.SEEK_END)
3263 if f.tell() > 0:
3264 f.seek(f.tell()-1, os.SEEK_SET) # go to last char
3265 if f.read() != '\n':
3266 add_newline = True
f6b5b4d7 3267
e306af50 3268 with open(auth_keys_file, 'a') as f:
f6b5b4d7 3269 os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it
e306af50
TL
3270 os.fchmod(f.fileno(), 0o600) # just in case we created it
3271 if add_newline:
3272 f.write('\n')
3273 f.write(ssh_pub.strip() + '\n')
9f95a23c
TL
3274
3275 host = get_hostname()
3276 logger.info('Adding host %s...' % host)
f6b5b4d7
TL
3277 try:
3278 cli(['orch', 'host', 'add', host])
3279 except RuntimeError as e:
3280 raise Error('Failed to add host <%s>: %s' % (host, e))
9f95a23c
TL
3281
3282 if not args.orphan_initial_daemons:
3283 for t in ['mon', 'mgr', 'crash']:
3284 logger.info('Deploying %s service with default placement...' % t)
3285 cli(['orch', 'apply', t])
3286
3287 if not args.skip_monitoring_stack:
3288 logger.info('Enabling mgr prometheus module...')
3289 cli(['mgr', 'module', 'enable', 'prometheus'])
3290 for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
3291 logger.info('Deploying %s service with default placement...' % t)
3292 cli(['orch', 'apply', t])
3293
f6b5b4d7
TL
3294 if args.registry_url and args.registry_username and args.registry_password:
3295 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', args.registry_url, '--force'])
3296 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', args.registry_username, '--force'])
3297 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', args.registry_password, '--force'])
3298
7f7e6c64 3299 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(container_init), '--force'])
f91f0fd5 3300
9f95a23c 3301 if not args.skip_dashboard:
f6b5b4d7
TL
3302 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
3303 # if the user does not want to use SSL he can change this setting once the cluster is up
3304 cli(["config", "set", "mgr", "mgr/dashboard/ssl_server_port" , str(args.ssl_dashboard_port)])
3305
3306 # configuring dashboard parameters
9f95a23c
TL
3307 logger.info('Enabling the dashboard module...')
3308 cli(['mgr', 'module', 'enable', 'dashboard'])
3309 wait_for_mgr_restart()
3310
3311 # dashboard crt and key
3312 if args.dashboard_key and args.dashboard_crt:
3313 logger.info('Using provided dashboard certificate...')
e306af50
TL
3314 mounts = {
3315 pathify(args.dashboard_crt.name): '/tmp/dashboard.crt:z',
3316 pathify(args.dashboard_key.name): '/tmp/dashboard.key:z'
3317 }
9f95a23c
TL
3318 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
3319 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
3320 else:
3321 logger.info('Generating a dashboard self-signed certificate...')
3322 cli(['dashboard', 'create-self-signed-cert'])
3323
3324 logger.info('Creating initial admin user...')
3325 password = args.initial_dashboard_password or generate_password()
cd265ab1
TL
3326 tmp_password_file = write_tmp(password, uid, gid)
3327 cmd = ['dashboard', 'ac-user-create', args.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
9f95a23c
TL
3328 if not args.dashboard_password_noupdate:
3329 cmd.append('--pwd-update-required')
cd265ab1 3330 cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'})
9f95a23c
TL
3331 logger.info('Fetching dashboard port number...')
3332 out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
3333 port = int(out)
3334
f6b5b4d7
TL
3335 # Open dashboard port
3336 fw = Firewalld()
3337 fw.open_ports([port])
3338 fw.apply_rules()
3339
9f95a23c
TL
3340 logger.info('Ceph Dashboard is now available at:\n\n'
3341 '\t URL: https://%s:%s/\n'
3342 '\t User: %s\n'
3343 '\tPassword: %s\n' % (
3344 get_fqdn(), port,
3345 args.initial_dashboard_user,
3346 password))
f6b5b4d7 3347
e306af50
TL
3348 if args.apply_spec:
3349 logger.info('Applying %s to cluster' % args.apply_spec)
3350
3351 with open(args.apply_spec) as f:
3352 for line in f:
3353 if 'hostname:' in line:
3354 line = line.replace('\n', '')
3355 split = line.split(': ')
3356 if split[1] != host:
3357 logger.info('Adding ssh key to %s' % split[1])
3358
3359 ssh_key = '/etc/ceph/ceph.pub'
3360 if args.ssh_public_key:
3361 ssh_key = args.ssh_public_key.name
7f7e6c64 3362 out, err, code = call_throws(['sudo', '-u', args.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (args.ssh_user, split[1])])
e306af50
TL
3363
3364 mounts = {}
3365 mounts[pathify(args.apply_spec)] = '/tmp/spec.yml:z'
3366
3367 out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
3368 logger.info(out)
9f95a23c
TL
3369
3370 logger.info('You can access the Ceph CLI with:\n\n'
3371 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
3372 sys.argv[0],
3373 fsid,
3374 args.output_config,
3375 args.output_keyring))
3376 logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
3377 '\tceph telemetry on\n\n'
3378 'For more information see:\n\n'
3379 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
3380 logger.info('Bootstrap complete.')
3381 return 0
3382
3383##################################
3384
f6b5b4d7
TL
3385def command_registry_login():
3386 if args.registry_json:
3387 logger.info("Pulling custom registry login info from %s." % args.registry_json)
3388 d = get_parm(args.registry_json)
3389 if d.get('url') and d.get('username') and d.get('password'):
3390 args.registry_url = d.get('url')
3391 args.registry_username = d.get('username')
3392 args.registry_password = d.get('password')
3393 registry_login(args.registry_url, args.registry_username, args.registry_password)
3394 else:
3395 raise Error("json provided for custom registry login did not include all necessary fields. "
3396 "Please setup json file as\n"
3397 "{\n"
3398 " \"url\": \"REGISTRY_URL\",\n"
3399 " \"username\": \"REGISTRY_USERNAME\",\n"
3400 " \"password\": \"REGISTRY_PASSWORD\"\n"
3401 "}\n")
3402 elif args.registry_url and args.registry_username and args.registry_password:
3403 registry_login(args.registry_url, args.registry_username, args.registry_password)
3404 else:
3405 raise Error("Invalid custom registry arguments received. To login to a custom registry include "
3406 "--registry-url, --registry-username and --registry-password "
3407 "options or --registry-json option")
3408 return 0
3409
3410def registry_login(url, username, password):
3411 logger.info("Logging into custom registry.")
3412 try:
3413 out, _, _ = call_throws([container_path, 'login',
3414 '-u', username,
3415 '-p', password,
3416 url])
3417 except:
3418 raise Error("Failed to login to custom registry @ %s as %s with given password" % (args.registry_url, args.registry_username))
3419
3420##################################
3421
3422
9f95a23c
TL
3423def extract_uid_gid_monitoring(daemon_type):
3424 # type: (str) -> Tuple[int, int]
3425
3426 if daemon_type == 'prometheus':
3427 uid, gid = extract_uid_gid(file_path='/etc/prometheus')
3428 elif daemon_type == 'node-exporter':
3429 uid, gid = 65534, 65534
3430 elif daemon_type == 'grafana':
3431 uid, gid = extract_uid_gid(file_path='/var/lib/grafana')
3432 elif daemon_type == 'alertmanager':
f6b5b4d7 3433 uid, gid = extract_uid_gid(file_path=['/etc/alertmanager', '/etc/prometheus'])
9f95a23c
TL
3434 else:
3435 raise Error("{} not implemented yet".format(daemon_type))
3436 return uid, gid
3437
3438
3439@default_image
3440def command_deploy():
3441 # type: () -> None
e306af50 3442 daemon_type, daemon_id = args.name.split('.', 1)
9f95a23c
TL
3443
3444 l = FileLock(args.fsid)
3445 l.acquire()
3446
3447 if daemon_type not in get_supported_daemons():
3448 raise Error('daemon type %s not recognized' % daemon_type)
3449
e306af50
TL
3450 redeploy = False
3451 unit_name = get_unit_name(args.fsid, daemon_type, daemon_id)
7f7e6c64 3452 container_name = 'ceph-%s-%s.%s' % (args.fsid, daemon_type, daemon_id)
e306af50 3453 (_, state, _) = check_unit(unit_name)
7f7e6c64 3454 if state == 'running' or is_container_running(container_name):
e306af50
TL
3455 redeploy = True
3456
3457 if args.reconfig:
3458 logger.info('%s daemon %s ...' % ('Reconfig', args.name))
3459 elif redeploy:
3460 logger.info('%s daemon %s ...' % ('Redeploy', args.name))
3461 else:
3462 logger.info('%s daemon %s ...' % ('Deploy', args.name))
9f95a23c 3463
f6b5b4d7
TL
3464 # Get and check ports explicitly required to be opened
3465 daemon_ports = [] # type: List[int]
3466 if args.tcp_ports:
3467 daemon_ports = list(map(int, args.tcp_ports.split()))
3468
9f95a23c 3469 if daemon_type in Ceph.daemons:
e306af50
TL
3470 config, keyring = get_config_and_keyring()
3471 uid, gid = extract_uid_gid()
9f95a23c 3472 make_var_run(args.fsid, uid, gid)
f6b5b4d7 3473
9f95a23c
TL
3474 c = get_container(args.fsid, daemon_type, daemon_id,
3475 ptrace=args.allow_ptrace)
3476 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3477 config=config, keyring=keyring,
3478 osd_fsid=args.osd_fsid,
f6b5b4d7
TL
3479 reconfig=args.reconfig,
3480 ports=daemon_ports)
9f95a23c
TL
3481
3482 elif daemon_type in Monitoring.components:
3483 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
9f95a23c 3484 # Default Checks
e306af50 3485 if not args.reconfig and not redeploy:
f6b5b4d7 3486 daemon_ports.extend(Monitoring.port_map[daemon_type])
9f95a23c
TL
3487
3488 # make sure provided config-json is sufficient
3489 config = get_parm(args.config_json) # type: ignore
3490 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
3491 required_args = Monitoring.components[daemon_type].get('config-json-args', list())
3492 if required_files:
3493 if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
3494 raise Error("{} deployment requires config-json which must "
3495 "contain file content for {}".format(daemon_type.capitalize(), ', '.join(required_files)))
3496 if required_args:
3497 if not config or not all(c in config.keys() for c in required_args): # type: ignore
3498 raise Error("{} deployment requires config-json which must "
3499 "contain arg for {}".format(daemon_type.capitalize(), ', '.join(required_args)))
3500
9f95a23c
TL
3501 uid, gid = extract_uid_gid_monitoring(daemon_type)
3502 c = get_container(args.fsid, daemon_type, daemon_id)
3503 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
f6b5b4d7
TL
3504 reconfig=args.reconfig,
3505 ports=daemon_ports)
9f95a23c
TL
3506
3507 elif daemon_type == NFSGanesha.daemon_type:
e306af50 3508 if not args.reconfig and not redeploy:
f6b5b4d7
TL
3509 daemon_ports.extend(NFSGanesha.port_map.values())
3510
e306af50 3511 config, keyring = get_config_and_keyring()
9f95a23c 3512 # TODO: extract ganesha uid/gid (997, 994) ?
e306af50 3513 uid, gid = extract_uid_gid()
9f95a23c
TL
3514 c = get_container(args.fsid, daemon_type, daemon_id)
3515 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3516 config=config, keyring=keyring,
f6b5b4d7
TL
3517 reconfig=args.reconfig,
3518 ports=daemon_ports)
e306af50 3519
1911f103 3520 elif daemon_type == CephIscsi.daemon_type:
e306af50
TL
3521 config, keyring = get_config_and_keyring()
3522 uid, gid = extract_uid_gid()
1911f103
TL
3523 c = get_container(args.fsid, daemon_type, daemon_id)
3524 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3525 config=config, keyring=keyring,
f6b5b4d7
TL
3526 reconfig=args.reconfig,
3527 ports=daemon_ports)
f91f0fd5
TL
3528
3529 elif daemon_type == CustomContainer.daemon_type:
3530 cc = CustomContainer.init(args.fsid, daemon_id)
3531 if not args.reconfig and not redeploy:
3532 daemon_ports.extend(cc.ports)
3533 c = get_container(args.fsid, daemon_type, daemon_id,
3534 privileged=cc.privileged,
3535 ptrace=args.allow_ptrace)
3536 deploy_daemon(args.fsid, daemon_type, daemon_id, c,
3537 uid=cc.uid, gid=cc.gid, config=None,
3538 keyring=None, reconfig=args.reconfig,
3539 ports=daemon_ports)
3540
9f95a23c 3541 else:
f91f0fd5
TL
3542 raise Error('daemon type {} not implemented in command_deploy function'
3543 .format(daemon_type))
9f95a23c
TL
3544
3545##################################
3546
f6b5b4d7 3547
9f95a23c
TL
3548@infer_image
3549def command_run():
3550 # type: () -> int
3551 (daemon_type, daemon_id) = args.name.split('.', 1)
3552 c = get_container(args.fsid, daemon_type, daemon_id)
3553 command = c.run_cmd()
3554 return call_timeout(command, args.timeout)
3555
3556##################################
3557
f6b5b4d7 3558
9f95a23c 3559@infer_fsid
e306af50 3560@infer_config
9f95a23c
TL
3561@infer_image
3562def command_shell():
3563 # type: () -> int
3564 if args.fsid:
3565 make_log_dir(args.fsid)
3566 if args.name:
3567 if '.' in args.name:
3568 (daemon_type, daemon_id) = args.name.split('.', 1)
3569 else:
3570 daemon_type = args.name
3571 daemon_id = None
3572 else:
3573 daemon_type = 'osd' # get the most mounts
3574 daemon_id = None
3575
3576 if daemon_id and not args.fsid:
3577 raise Error('must pass --fsid to specify cluster')
3578
3579 # use /etc/ceph files by default, if present. we do this instead of
3580 # making these defaults in the arg parser because we don't want an error
3581 # if they don't exist.
9f95a23c
TL
3582 if not args.keyring and os.path.exists(SHELL_DEFAULT_KEYRING):
3583 args.keyring = SHELL_DEFAULT_KEYRING
3584
3585 container_args = [] # type: List[str]
3586 mounts = get_container_mounts(args.fsid, daemon_type, daemon_id,
3587 no_config=True if args.config else False)
f6b5b4d7 3588 binds = get_container_binds(args.fsid, daemon_type, daemon_id)
9f95a23c
TL
3589 if args.config:
3590 mounts[pathify(args.config)] = '/etc/ceph/ceph.conf:z'
3591 if args.keyring:
3592 mounts[pathify(args.keyring)] = '/etc/ceph/ceph.keyring:z'
e306af50 3593 if args.mount:
f91f0fd5
TL
3594 for _mount in args.mount:
3595 split_src_dst = _mount.split(':')
3596 mount = pathify(split_src_dst[0])
3597 filename = os.path.basename(split_src_dst[0])
3598 if len(split_src_dst) > 1:
3599 dst = split_src_dst[1] + ':z' if len(split_src_dst) == 3 else split_src_dst[1]
3600 mounts[mount] = dst
3601 else:
3602 mounts[mount] = '/mnt/{}:z'.format(filename)
9f95a23c
TL
3603 if args.command:
3604 command = args.command
3605 else:
3606 command = ['bash']
3607 container_args += [
3608 '-it',
3609 '-e', 'LANG=C',
3610 '-e', "PS1=%s" % CUSTOM_PS1,
3611 ]
3612 if args.fsid:
3613 home = os.path.join(args.data_dir, args.fsid, 'home')
3614 if not os.path.exists(home):
3615 logger.debug('Creating root home at %s' % home)
3616 makedirs(home, 0, 0, 0o660)
3617 if os.path.exists('/etc/skel'):
3618 for f in os.listdir('/etc/skel'):
3619 if f.startswith('.bash'):
3620 shutil.copyfile(os.path.join('/etc/skel', f),
3621 os.path.join(home, f))
3622 mounts[home] = '/root'
3623
3624 c = CephContainer(
3625 image=args.image,
3626 entrypoint='doesnotmatter',
3627 args=[],
3628 container_args=container_args,
3629 volume_mounts=mounts,
f6b5b4d7 3630 bind_mounts=binds,
9f95a23c
TL
3631 envs=args.env,
3632 privileged=True)
3633 command = c.shell_cmd(command)
3634
3635 return call_timeout(command, args.timeout)
3636
3637##################################
3638
f6b5b4d7 3639
9f95a23c
TL
3640@infer_fsid
3641def command_enter():
3642 # type: () -> int
3643 if not args.fsid:
3644 raise Error('must pass --fsid to specify cluster')
3645 (daemon_type, daemon_id) = args.name.split('.', 1)
3646 container_args = [] # type: List[str]
3647 if args.command:
3648 command = args.command
3649 else:
3650 command = ['sh']
3651 container_args += [
3652 '-it',
3653 '-e', 'LANG=C',
3654 '-e', "PS1=%s" % CUSTOM_PS1,
3655 ]
1911f103
TL
3656 c = CephContainer(
3657 image=args.image,
3658 entrypoint='doesnotmatter',
3659 container_args=container_args,
3660 cname='ceph-%s-%s.%s' % (args.fsid, daemon_type, daemon_id),
3661 )
9f95a23c
TL
3662 command = c.exec_cmd(command)
3663 return call_timeout(command, args.timeout)
3664
3665##################################
3666
f6b5b4d7 3667
9f95a23c
TL
3668@infer_fsid
3669@infer_image
3670def command_ceph_volume():
3671 # type: () -> None
3672 if args.fsid:
3673 make_log_dir(args.fsid)
3674
1911f103
TL
3675 l = FileLock(args.fsid)
3676 l.acquire()
3677
9f95a23c
TL
3678 (uid, gid) = (0, 0) # ceph-volume runs as root
3679 mounts = get_container_mounts(args.fsid, 'osd', None)
3680
3681 tmp_config = None
3682 tmp_keyring = None
3683
801d1391 3684 (config, keyring) = get_config_and_keyring()
9f95a23c 3685
801d1391 3686 if config:
9f95a23c
TL
3687 # tmp config file
3688 tmp_config = write_tmp(config, uid, gid)
9f95a23c 3689 mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
801d1391
TL
3690
3691 if keyring:
3692 # tmp keyring file
3693 tmp_keyring = write_tmp(keyring, uid, gid)
9f95a23c
TL
3694 mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
3695
3696 c = CephContainer(
3697 image=args.image,
3698 entrypoint='/usr/sbin/ceph-volume',
e306af50 3699 envs=args.env,
9f95a23c
TL
3700 args=args.command,
3701 privileged=True,
3702 volume_mounts=mounts,
3703 )
adb31ebb 3704 out, err, code = call_throws(c.run_cmd(), verbosity=CallVerbosity.VERBOSE)
9f95a23c
TL
3705 if not code:
3706 print(out)
3707
3708##################################
3709
f6b5b4d7 3710
9f95a23c
TL
3711@infer_fsid
3712def command_unit():
3713 # type: () -> None
3714 if not args.fsid:
3715 raise Error('must pass --fsid to specify cluster')
e306af50
TL
3716
3717 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
3718
9f95a23c
TL
3719 call_throws([
3720 'systemctl',
3721 args.command,
adb31ebb
TL
3722 unit_name],
3723 verbosity=CallVerbosity.VERBOSE,
3724 desc=''
3725 )
9f95a23c
TL
3726
3727##################################
3728
f6b5b4d7 3729
9f95a23c
TL
3730@infer_fsid
3731def command_logs():
3732 # type: () -> None
3733 if not args.fsid:
3734 raise Error('must pass --fsid to specify cluster')
3735
e306af50 3736 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
9f95a23c
TL
3737
3738 cmd = [find_program('journalctl')]
3739 cmd.extend(['-u', unit_name])
3740 if args.command:
3741 cmd.extend(args.command)
3742
3743 # call this directly, without our wrapper, so that we get an unmolested
3744 # stdout with logger prefixing.
3745 logger.debug("Running command: %s" % ' '.join(cmd))
3746 subprocess.call(cmd) # type: ignore
3747
3748##################################
3749
f6b5b4d7 3750
9f95a23c
TL
3751def list_networks():
3752 # type: () -> Dict[str,List[str]]
3753
3754 ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
3755 ## so we'll need to use a regex to parse 'ip' command output.
3756 #out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
3757 #j = json.loads(out)
3758 #for x in j:
3759
f6b5b4d7
TL
3760 res = _list_ipv4_networks()
3761 res.update(_list_ipv6_networks())
3762 return res
3763
3764
3765def _list_ipv4_networks():
9f95a23c 3766 out, _, _ = call_throws([find_executable('ip'), 'route', 'ls'])
f6b5b4d7
TL
3767 return _parse_ipv4_route(out)
3768
9f95a23c 3769
f6b5b4d7 3770def _parse_ipv4_route(out):
9f95a23c
TL
3771 r = {} # type: Dict[str,List[str]]
3772 p = re.compile(r'^(\S+) (.*)scope link (.*)src (\S+)')
3773 for line in out.splitlines():
3774 m = p.findall(line)
3775 if not m:
3776 continue
3777 net = m[0][0]
3778 ip = m[0][3]
3779 if net not in r:
3780 r[net] = []
3781 r[net].append(ip)
3782 return r
3783
f6b5b4d7
TL
3784
3785def _list_ipv6_networks():
3786 routes, _, _ = call_throws([find_executable('ip'), '-6', 'route', 'ls'])
3787 ips, _, _ = call_throws([find_executable('ip'), '-6', 'addr', 'ls'])
3788 return _parse_ipv6_route(routes, ips)
3789
3790
3791def _parse_ipv6_route(routes, ips):
3792 r = {} # type: Dict[str,List[str]]
3793 route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
3794 ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
3795 for line in routes.splitlines():
3796 m = route_p.findall(line)
3797 if not m or m[0][0].lower() == 'default':
3798 continue
3799 net = m[0][0]
3800 if net not in r:
3801 r[net] = []
3802
3803 for line in ips.splitlines():
3804 m = ip_p.findall(line)
3805 if not m:
3806 continue
3807 ip = m[0][0]
3808 # find the network it belongs to
3809 net = [n for n in r.keys()
3810 if ipaddress.ip_address(unicode(ip)) in ipaddress.ip_network(unicode(n))]
3811 if net:
3812 r[net[0]].append(ip)
3813
3814 return r
3815
3816
9f95a23c
TL
3817def command_list_networks():
3818 # type: () -> None
3819 r = list_networks()
3820 print(json.dumps(r, indent=4))
3821
3822##################################
3823
f6b5b4d7 3824
9f95a23c
TL
3825def command_ls():
3826 # type: () -> None
f91f0fd5 3827
9f95a23c
TL
3828 ls = list_daemons(detail=not args.no_detail,
3829 legacy_dir=args.legacy_dir)
3830 print(json.dumps(ls, indent=4))
3831
f6b5b4d7 3832
9f95a23c
TL
3833def list_daemons(detail=True, legacy_dir=None):
3834 # type: (bool, Optional[str]) -> List[Dict[str, str]]
3835 host_version = None
3836 ls = []
3837
3838 data_dir = args.data_dir
3839 if legacy_dir is not None:
3840 data_dir = os.path.abspath(legacy_dir + data_dir)
3841
3842 # keep track of ceph versions we see
3843 seen_versions = {} # type: Dict[str, Optional[str]]
3844
3845 # /var/lib/ceph
3846 if os.path.exists(data_dir):
3847 for i in os.listdir(data_dir):
3848 if i in ['mon', 'osd', 'mds', 'mgr']:
3849 daemon_type = i
3850 for j in os.listdir(os.path.join(data_dir, i)):
3851 if '-' not in j:
3852 continue
3853 (cluster, daemon_id) = j.split('-', 1)
3854 fsid = get_legacy_daemon_fsid(
3855 cluster, daemon_type, daemon_id,
3856 legacy_dir=legacy_dir)
e306af50 3857 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
9f95a23c
TL
3858 i = {
3859 'style': 'legacy',
3860 'name': '%s.%s' % (daemon_type, daemon_id),
3861 'fsid': fsid if fsid is not None else 'unknown',
e306af50 3862 'systemd_unit': legacy_unit_name,
9f95a23c
TL
3863 }
3864 if detail:
e306af50 3865 (i['enabled'], i['state'], _) = check_unit(legacy_unit_name)
9f95a23c
TL
3866 if not host_version:
3867 try:
3868 out, err, code = call(['ceph', '-v'])
3869 if not code and out.startswith('ceph version '):
3870 host_version = out.split(' ')[2]
3871 except Exception:
3872 pass
3873 i['host_version'] = host_version
3874 ls.append(i)
3875 elif is_fsid(i):
3876 fsid = str(i) # convince mypy that fsid is a str here
3877 for j in os.listdir(os.path.join(data_dir, i)):
3878 if '.' in j:
3879 name = j
3880 (daemon_type, daemon_id) = j.split('.', 1)
3881 unit_name = get_unit_name(fsid,
3882 daemon_type,
3883 daemon_id)
3884 else:
3885 continue
3886 i = {
3887 'style': 'cephadm:v1',
3888 'name': name,
3889 'fsid': fsid,
e306af50 3890 'systemd_unit': unit_name,
9f95a23c
TL
3891 }
3892 if detail:
3893 # get container id
3894 (i['enabled'], i['state'], _) = check_unit(unit_name)
3895 container_id = None
3896 image_name = None
3897 image_id = None
3898 version = None
3899 start_stamp = None
3900
3901 if 'podman' in container_path and get_podman_version() < (1, 6, 2):
3902 image_field = '.ImageID'
3903 else:
3904 image_field = '.Image'
3905
3906 out, err, code = call(
3907 [
3908 container_path, 'inspect',
3909 '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field,
3910 'ceph-%s-%s' % (fsid, j)
3911 ],
adb31ebb 3912 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
3913 if not code:
3914 (container_id, image_name, image_id, start,
3915 version) = out.strip().split(',')
3916 image_id = normalize_container_id(image_id)
3917 daemon_type = name.split('.', 1)[0]
3918 start_stamp = try_convert_datetime(start)
3919 if not version or '.' not in version:
3920 version = seen_versions.get(image_id, None)
3921 if daemon_type == NFSGanesha.daemon_type:
3922 version = NFSGanesha.get_version(container_id)
1911f103
TL
3923 if daemon_type == CephIscsi.daemon_type:
3924 version = CephIscsi.get_version(container_id)
9f95a23c
TL
3925 elif not version:
3926 if daemon_type in Ceph.daemons:
3927 out, err, code = call(
3928 [container_path, 'exec', container_id,
3929 'ceph', '-v'])
3930 if not code and \
3931 out.startswith('ceph version '):
3932 version = out.split(' ')[2]
3933 seen_versions[image_id] = version
3934 elif daemon_type == 'grafana':
3935 out, err, code = call(
3936 [container_path, 'exec', container_id,
3937 'grafana-server', '-v'])
3938 if not code and \
3939 out.startswith('Version '):
3940 version = out.split(' ')[1]
3941 seen_versions[image_id] = version
3942 elif daemon_type in ['prometheus',
3943 'alertmanager',
3944 'node-exporter']:
7f7e6c64
TL
3945 version = Monitoring.get_version(container_path, container_id, daemon_type)
3946 seen_versions[image_id] = version
f91f0fd5
TL
3947 elif daemon_type == CustomContainer.daemon_type:
3948 # Because a custom container can contain
3949 # everything, we do not know which command
3950 # to execute to get the version.
3951 pass
9f95a23c 3952 else:
f91f0fd5 3953 logger.warning('version for unknown daemon type %s' % daemon_type)
9f95a23c
TL
3954 else:
3955 vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore
3956 try:
3957 with open(vfile, 'r') as f:
3958 image_name = f.read().strip() or None
3959 except IOError:
3960 pass
3961 i['container_id'] = container_id
3962 i['container_image_name'] = image_name
3963 i['container_image_id'] = image_id
3964 i['version'] = version
3965 i['started'] = start_stamp
3966 i['created'] = get_file_timestamp(
3967 os.path.join(data_dir, fsid, j, 'unit.created')
3968 )
3969 i['deployed'] = get_file_timestamp(
3970 os.path.join(data_dir, fsid, j, 'unit.image'))
3971 i['configured'] = get_file_timestamp(
3972 os.path.join(data_dir, fsid, j, 'unit.configured'))
3973
3974 ls.append(i)
3975
9f95a23c
TL
3976 return ls
3977
3978
e306af50
TL
3979def get_daemon_description(fsid, name, detail=False, legacy_dir=None):
3980 # type: (str, str, bool, Optional[str]) -> Dict[str, str]
3981
3982 for d in list_daemons(detail=detail, legacy_dir=legacy_dir):
3983 if d['fsid'] != fsid:
3984 continue
3985 if d['name'] != name:
3986 continue
3987 return d
3988 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
3989
3990
9f95a23c
TL
3991##################################
3992
3993@default_image
3994def command_adopt():
3995 # type: () -> None
3996
3997 if not args.skip_pull:
f6b5b4d7 3998 _pull_image(args.image)
9f95a23c
TL
3999
4000 (daemon_type, daemon_id) = args.name.split('.', 1)
4001
4002 # legacy check
4003 if args.style != 'legacy':
4004 raise Error('adoption of style %s not implemented' % args.style)
4005
4006 # lock
4007 fsid = get_legacy_daemon_fsid(args.cluster,
4008 daemon_type,
4009 daemon_id,
4010 legacy_dir=args.legacy_dir)
4011 if not fsid:
4012 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
4013 l = FileLock(fsid)
4014 l.acquire()
4015
4016 # call correct adoption
4017 if daemon_type in Ceph.daemons:
4018 command_adopt_ceph(daemon_type, daemon_id, fsid);
4019 elif daemon_type == 'prometheus':
4020 command_adopt_prometheus(daemon_id, fsid)
4021 elif daemon_type == 'grafana':
4022 command_adopt_grafana(daemon_id, fsid)
4023 elif daemon_type == 'node-exporter':
4024 raise Error('adoption of node-exporter not implemented')
4025 elif daemon_type == 'alertmanager':
801d1391 4026 command_adopt_alertmanager(daemon_id, fsid)
9f95a23c
TL
4027 else:
4028 raise Error('daemon type %s not recognized' % daemon_type)
4029
4030
1911f103
TL
4031class AdoptOsd(object):
4032 def __init__(self, osd_data_dir, osd_id):
4033 # type: (str, str) -> None
4034 self.osd_data_dir = osd_data_dir
4035 self.osd_id = osd_id
4036
4037 def check_online_osd(self):
4038 # type: () -> Tuple[Optional[str], Optional[str]]
4039
4040 osd_fsid, osd_type = None, None
4041
4042 path = os.path.join(self.osd_data_dir, 'fsid')
4043 try:
4044 with open(path, 'r') as f:
4045 osd_fsid = f.read().strip()
4046 logger.info("Found online OSD at %s" % path)
1911f103
TL
4047 except IOError:
4048 logger.info('Unable to read OSD fsid from %s' % path)
e306af50
TL
4049 if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
4050 with open(os.path.join(self.osd_data_dir, 'type')) as f:
4051 osd_type = f.read().strip()
4052 else:
4053 logger.info('"type" file missing for OSD data dir')
1911f103
TL
4054
4055 return osd_fsid, osd_type
4056
4057 def check_offline_lvm_osd(self):
4058 # type: () -> Tuple[Optional[str], Optional[str]]
4059
4060 osd_fsid, osd_type = None, None
4061
4062 c = CephContainer(
4063 image=args.image,
4064 entrypoint='/usr/sbin/ceph-volume',
4065 args=['lvm', 'list', '--format=json'],
4066 privileged=True
4067 )
adb31ebb 4068 out, err, code = call_throws(c.run_cmd())
1911f103
TL
4069 if not code:
4070 try:
4071 js = json.loads(out)
4072 if self.osd_id in js:
4073 logger.info("Found offline LVM OSD {}".format(self.osd_id))
4074 osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
4075 for device in js[self.osd_id]:
4076 if device['tags']['ceph.type'] == 'block':
4077 osd_type = 'bluestore'
4078 break
4079 if device['tags']['ceph.type'] == 'data':
4080 osd_type = 'filestore'
4081 break
4082 except ValueError as e:
4083 logger.info("Invalid JSON in ceph-volume lvm list: {}".format(e))
4084
4085 return osd_fsid, osd_type
4086
4087 def check_offline_simple_osd(self):
4088 # type: () -> Tuple[Optional[str], Optional[str]]
4089
4090 osd_fsid, osd_type = None, None
4091
4092 osd_file = glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self.osd_id))
4093 if len(osd_file) == 1:
4094 with open(osd_file[0], 'r') as f:
4095 try:
4096 js = json.loads(f.read())
4097 logger.info("Found offline simple OSD {}".format(self.osd_id))
4098 osd_fsid = js["fsid"]
4099 osd_type = js["type"]
4100 if osd_type != "filestore":
4101 # need this to be mounted for the adopt to work, as it
4102 # needs to move files from this directory
4103 call_throws(['mount', js["data"]["path"], self.osd_data_dir])
4104 except ValueError as e:
4105 logger.info("Invalid JSON in {}: {}".format(osd_file, e))
4106
4107 return osd_fsid, osd_type
4108
9f95a23c
TL
4109
4110def command_adopt_ceph(daemon_type, daemon_id, fsid):
4111 # type: (str, str, str) -> None
4112
4113 (uid, gid) = extract_uid_gid()
4114
4115 data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
4116 (daemon_type, args.cluster, daemon_id))
4117 data_dir_src = os.path.abspath(args.legacy_dir + data_dir_src)
4118
1911f103
TL
4119 if not os.path.exists(data_dir_src):
4120 raise Error("{}.{} data directory '{}' does not exist. "
4121 "Incorrect ID specified, or daemon alrady adopted?".format(
4122 daemon_type, daemon_id, data_dir_src))
4123
9f95a23c
TL
4124 osd_fsid = None
4125 if daemon_type == 'osd':
1911f103
TL
4126 adopt_osd = AdoptOsd(data_dir_src, daemon_id)
4127 osd_fsid, osd_type = adopt_osd.check_online_osd()
4128 if not osd_fsid:
4129 osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
4130 if not osd_fsid:
4131 osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
4132 if not osd_fsid:
4133 raise Error('Unable to find OSD {}'.format(daemon_id))
4134 logger.info('objectstore_type is %s' % osd_type)
e306af50 4135 assert osd_type
1911f103 4136 if osd_type == 'filestore':
9f95a23c
TL
4137 raise Error('FileStore is not supported by cephadm')
4138
4139 # NOTE: implicit assumption here that the units correspond to the
4140 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
4141 # CLUSTER field.
4142 unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
4143 (enabled, state, _) = check_unit(unit_name)
4144 if state == 'running':
4145 logger.info('Stopping old systemd unit %s...' % unit_name)
4146 call_throws(['systemctl', 'stop', unit_name])
4147 if enabled:
4148 logger.info('Disabling old systemd unit %s...' % unit_name)
4149 call_throws(['systemctl', 'disable', unit_name])
4150
4151 # data
4152 logger.info('Moving data...')
4153 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4154 uid=uid, gid=gid)
4155 move_files(glob(os.path.join(data_dir_src, '*')),
4156 data_dir_dst,
4157 uid=uid, gid=gid)
4158 logger.debug('Remove dir \'%s\'' % (data_dir_src))
4159 if os.path.ismount(data_dir_src):
4160 call_throws(['umount', data_dir_src])
4161 os.rmdir(data_dir_src)
4162
4163 logger.info('Chowning content...')
4164 call_throws(['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
4165
4166 if daemon_type == 'mon':
4167 # rename *.ldb -> *.sst, in case they are coming from ubuntu
4168 store = os.path.join(data_dir_dst, 'store.db')
4169 num_renamed = 0
4170 if os.path.exists(store):
4171 for oldf in os.listdir(store):
4172 if oldf.endswith('.ldb'):
4173 newf = oldf.replace('.ldb', '.sst')
4174 oldp = os.path.join(store, oldf)
4175 newp = os.path.join(store, newf)
4176 logger.debug('Renaming %s -> %s' % (oldp, newp))
4177 os.rename(oldp, newp)
4178 if num_renamed:
4179 logger.info('Renamed %d leveldb *.ldb files to *.sst',
4180 num_renamed)
4181 if daemon_type == 'osd':
4182 for n in ['block', 'block.db', 'block.wal']:
4183 p = os.path.join(data_dir_dst, n)
4184 if os.path.exists(p):
4185 logger.info('Chowning %s...' % p)
4186 os.chown(p, uid, gid)
4187 # disable the ceph-volume 'simple' mode files on the host
4188 simple_fn = os.path.join('/etc/ceph/osd',
4189 '%s-%s.json' % (daemon_id, osd_fsid))
4190 if os.path.exists(simple_fn):
4191 new_fn = simple_fn + '.adopted-by-cephadm'
4192 logger.info('Renaming %s -> %s', simple_fn, new_fn)
4193 os.rename(simple_fn, new_fn)
4194 logger.info('Disabling host unit ceph-volume@ simple unit...')
1911f103
TL
4195 call(['systemctl', 'disable',
4196 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
9f95a23c
TL
4197 else:
4198 # assume this is an 'lvm' c-v for now, but don't error
4199 # out if it's not.
4200 logger.info('Disabling host unit ceph-volume@ lvm unit...')
4201 call(['systemctl', 'disable',
4202 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
4203
4204 # config
4205 config_src = '/etc/ceph/%s.conf' % (args.cluster)
4206 config_src = os.path.abspath(args.legacy_dir + config_src)
4207 config_dst = os.path.join(data_dir_dst, 'config')
4208 copy_files([config_src], config_dst, uid=uid, gid=gid)
4209
4210 # logs
4211 logger.info('Moving logs...')
4212 log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
4213 (args.cluster, daemon_type, daemon_id))
4214 log_dir_src = os.path.abspath(args.legacy_dir + log_dir_src)
4215 log_dir_dst = make_log_dir(fsid, uid=uid, gid=gid)
4216 move_files(glob(log_dir_src),
4217 log_dir_dst,
4218 uid=uid, gid=gid)
4219
4220 logger.info('Creating new units...')
4221 make_var_run(fsid, uid, gid)
4222 c = get_container(fsid, daemon_type, daemon_id)
4223 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
4224 enable=True, # unconditionally enable the new unit
1911f103 4225 start=(state == 'running' or args.force_start),
9f95a23c
TL
4226 osd_fsid=osd_fsid)
4227 update_firewalld(daemon_type)
4228
4229
4230def command_adopt_prometheus(daemon_id, fsid):
4231 # type: (str, str) -> None
4232
4233 daemon_type = 'prometheus'
4234 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4235
4236 _stop_and_disable('prometheus')
4237
4238 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4239 uid=uid, gid=gid)
4240
4241 # config
4242 config_src = '/etc/prometheus/prometheus.yml'
4243 config_src = os.path.abspath(args.legacy_dir + config_src)
4244 config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
1911f103 4245 makedirs(config_dst, uid, gid, 0o755)
9f95a23c
TL
4246 copy_files([config_src], config_dst, uid=uid, gid=gid)
4247
4248 # data
4249 data_src = '/var/lib/prometheus/metrics/'
4250 data_src = os.path.abspath(args.legacy_dir + data_src)
4251 data_dst = os.path.join(data_dir_dst, 'data')
4252 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4253
4254 make_var_run(fsid, uid, gid)
4255 c = get_container(fsid, daemon_type, daemon_id)
4256 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4257 update_firewalld(daemon_type)
4258
f6b5b4d7 4259
9f95a23c
TL
4260def command_adopt_grafana(daemon_id, fsid):
4261 # type: (str, str) -> None
4262
4263 daemon_type = 'grafana'
4264 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4265
4266 _stop_and_disable('grafana-server')
4267
4268 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4269 uid=uid, gid=gid)
4270
4271 # config
4272 config_src = '/etc/grafana/grafana.ini'
4273 config_src = os.path.abspath(args.legacy_dir + config_src)
4274 config_dst = os.path.join(data_dir_dst, 'etc/grafana')
4275 makedirs(config_dst, uid, gid, 0o755)
4276 copy_files([config_src], config_dst, uid=uid, gid=gid)
4277
4278 prov_src = '/etc/grafana/provisioning/'
4279 prov_src = os.path.abspath(args.legacy_dir + prov_src)
4280 prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
4281 copy_tree([prov_src], prov_dst, uid=uid, gid=gid)
4282
4283 # cert
4284 cert = '/etc/grafana/grafana.crt'
4285 key = '/etc/grafana/grafana.key'
4286 if os.path.exists(cert) and os.path.exists(key):
4287 cert_src = '/etc/grafana/grafana.crt'
4288 cert_src = os.path.abspath(args.legacy_dir + cert_src)
4289 makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
4290 cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
4291 copy_files([cert_src], cert_dst, uid=uid, gid=gid)
4292
4293 key_src = '/etc/grafana/grafana.key'
4294 key_src = os.path.abspath(args.legacy_dir + key_src)
4295 key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
4296 copy_files([key_src], key_dst, uid=uid, gid=gid)
4297
4298 _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
4299 else:
4300 logger.debug("Skipping ssl, missing cert {} or key {}".format(cert, key))
4301
9f95a23c
TL
4302 # data - possible custom dashboards/plugins
4303 data_src = '/var/lib/grafana/'
4304 data_src = os.path.abspath(args.legacy_dir + data_src)
4305 data_dst = os.path.join(data_dir_dst, 'data')
4306 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4307
4308 make_var_run(fsid, uid, gid)
4309 c = get_container(fsid, daemon_type, daemon_id)
4310 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4311 update_firewalld(daemon_type)
4312
f6b5b4d7 4313
801d1391
TL
4314def command_adopt_alertmanager(daemon_id, fsid):
4315 # type: (str, str) -> None
4316
4317 daemon_type = 'alertmanager'
4318 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4319
4320 _stop_and_disable('prometheus-alertmanager')
4321
4322 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4323 uid=uid, gid=gid)
4324
4325 # config
4326 config_src = '/etc/prometheus/alertmanager.yml'
4327 config_src = os.path.abspath(args.legacy_dir + config_src)
4328 config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
4329 makedirs(config_dst, uid, gid, 0o755)
4330 copy_files([config_src], config_dst, uid=uid, gid=gid)
4331
4332 # data
4333 data_src = '/var/lib/prometheus/alertmanager/'
4334 data_src = os.path.abspath(args.legacy_dir + data_src)
4335 data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
4336 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4337
4338 make_var_run(fsid, uid, gid)
4339 c = get_container(fsid, daemon_type, daemon_id)
4340 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4341 update_firewalld(daemon_type)
4342
f6b5b4d7 4343
9f95a23c
TL
4344def _adjust_grafana_ini(filename):
4345 # type: (str) -> None
4346
4347 # Update cert_file, cert_key pathnames in server section
4348 # ConfigParser does not preserve comments
4349 try:
4350 with open(filename, "r") as grafana_ini:
4351 lines = grafana_ini.readlines()
4352 with open("{}.new".format(filename), "w") as grafana_ini:
4353 server_section=False
4354 for line in lines:
4355 if line.startswith('['):
4356 server_section=False
4357 if line.startswith('[server]'):
4358 server_section=True
4359 if server_section:
4360 line = re.sub(r'^cert_file.*',
4361 'cert_file = /etc/grafana/certs/cert_file', line)
4362 line = re.sub(r'^cert_key.*',
4363 'cert_key = /etc/grafana/certs/cert_key', line)
4364 grafana_ini.write(line)
4365 os.rename("{}.new".format(filename), filename)
4366 except OSError as err:
4367 raise Error("Cannot update {}: {}".format(filename, err))
4368
4369
4370def _stop_and_disable(unit_name):
4371 # type: (str) -> None
4372
4373 (enabled, state, _) = check_unit(unit_name)
4374 if state == 'running':
4375 logger.info('Stopping old systemd unit %s...' % unit_name)
4376 call_throws(['systemctl', 'stop', unit_name])
4377 if enabled:
4378 logger.info('Disabling old systemd unit %s...' % unit_name)
4379 call_throws(['systemctl', 'disable', unit_name])
4380
4381
4382##################################
4383
4384def command_rm_daemon():
4385 # type: () -> None
4386
4387 l = FileLock(args.fsid)
4388 l.acquire()
4389
e306af50
TL
4390 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
4391
9f95a23c
TL
4392 (daemon_type, daemon_id) = args.name.split('.', 1)
4393 if daemon_type in ['mon', 'osd'] and not args.force:
4394 raise Error('must pass --force to proceed: '
4395 'this command may destroy precious data!')
e306af50 4396
9f95a23c 4397 call(['systemctl', 'stop', unit_name],
adb31ebb 4398 verbosity=CallVerbosity.DEBUG)
9f95a23c 4399 call(['systemctl', 'reset-failed', unit_name],
adb31ebb 4400 verbosity=CallVerbosity.DEBUG)
9f95a23c 4401 call(['systemctl', 'disable', unit_name],
adb31ebb 4402 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4403 data_dir = get_data_dir(args.fsid, daemon_type, daemon_id)
4404 if daemon_type in ['mon', 'osd', 'prometheus'] and \
4405 not args.force_delete_data:
4406 # rename it out of the way -- do not delete
4407 backup_dir = os.path.join(args.data_dir, args.fsid, 'removed')
4408 if not os.path.exists(backup_dir):
4409 makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
4410 dirname = '%s.%s_%s' % (daemon_type, daemon_id,
4411 datetime.datetime.utcnow().strftime(DATEFMT))
4412 os.rename(data_dir,
4413 os.path.join(backup_dir, dirname))
4414 else:
4415 call_throws(['rm', '-rf', data_dir])
4416
4417##################################
4418
f6b5b4d7 4419
9f95a23c
TL
4420def command_rm_cluster():
4421 # type: () -> None
4422 if not args.force:
4423 raise Error('must pass --force to proceed: '
4424 'this command may destroy precious data!')
4425
4426 l = FileLock(args.fsid)
4427 l.acquire()
4428
4429 # stop + disable individual daemon units
4430 for d in list_daemons(detail=False):
4431 if d['fsid'] != args.fsid:
4432 continue
4433 if d['style'] != 'cephadm:v1':
4434 continue
4435 unit_name = get_unit_name(args.fsid, d['name'])
4436 call(['systemctl', 'stop', unit_name],
adb31ebb 4437 verbosity=CallVerbosity.DEBUG)
9f95a23c 4438 call(['systemctl', 'reset-failed', unit_name],
adb31ebb 4439 verbosity=CallVerbosity.DEBUG)
9f95a23c 4440 call(['systemctl', 'disable', unit_name],
adb31ebb 4441 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4442
4443 # cluster units
4444 for unit_name in ['ceph-%s.target' % args.fsid]:
4445 call(['systemctl', 'stop', unit_name],
adb31ebb 4446 verbosity=CallVerbosity.DEBUG)
9f95a23c 4447 call(['systemctl', 'reset-failed', unit_name],
adb31ebb 4448 verbosity=CallVerbosity.DEBUG)
9f95a23c 4449 call(['systemctl', 'disable', unit_name],
adb31ebb 4450 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4451
4452 slice_name = 'system-%s.slice' % (('ceph-%s' % args.fsid).replace('-',
4453 '\\x2d'))
4454 call(['systemctl', 'stop', slice_name],
adb31ebb 4455 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4456
4457 # rm units
4458 call_throws(['rm', '-f', args.unit_dir +
4459 '/ceph-%s@.service' % args.fsid])
4460 call_throws(['rm', '-f', args.unit_dir +
4461 '/ceph-%s.target' % args.fsid])
4462 call_throws(['rm', '-rf',
4463 args.unit_dir + '/ceph-%s.target.wants' % args.fsid])
4464 # rm data
4465 call_throws(['rm', '-rf', args.data_dir + '/' + args.fsid])
4466 # rm logs
4467 call_throws(['rm', '-rf', args.log_dir + '/' + args.fsid])
4468 call_throws(['rm', '-rf', args.log_dir +
4469 '/*.wants/ceph-%s@*' % args.fsid])
4470 # rm logrotate config
4471 call_throws(['rm', '-f', args.logrotate_dir + '/ceph-%s' % args.fsid])
4472
1911f103
TL
4473 # clean up config, keyring, and pub key files
4474 files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
4475
4476 if os.path.exists(files[0]):
4477 valid_fsid = False
4478 with open(files[0]) as f:
4479 if args.fsid in f.read():
4480 valid_fsid = True
4481 if valid_fsid:
4482 for n in range(0, len(files)):
4483 if os.path.exists(files[n]):
4484 os.remove(files[n])
4485
9f95a23c
TL
4486
4487##################################
4488
4489def check_time_sync(enabler=None):
4490 # type: (Optional[Packager]) -> bool
4491 units = [
4492 'chrony.service', # 18.04 (at least)
4493 'chronyd.service', # el / opensuse
4494 'systemd-timesyncd.service',
4495 'ntpd.service', # el7 (at least)
4496 'ntp.service', # 18.04 (at least)
f91f0fd5 4497 'ntpsec.service', # 20.04 (at least) / buster
9f95a23c 4498 ]
e306af50 4499 if not check_units(units, enabler):
9f95a23c
TL
4500 logger.warning('No time sync service is running; checked for %s' % units)
4501 return False
4502 return True
4503
f6b5b4d7 4504
9f95a23c
TL
4505def command_check_host():
4506 # type: () -> None
f6b5b4d7
TL
4507 global container_path
4508
1911f103 4509 errors = []
9f95a23c
TL
4510 commands = ['systemctl', 'lvcreate']
4511
1911f103 4512 if args.docker:
f6b5b4d7 4513 container_path = find_program('docker')
1911f103
TL
4514 else:
4515 for i in CONTAINER_PREFERENCE:
4516 try:
4517 container_path = find_program(i)
4518 break
4519 except Exception as e:
4520 logger.debug('Could not locate %s: %s' % (i, e))
4521 if not container_path:
4522 errors.append('Unable to locate any of %s' % CONTAINER_PREFERENCE)
4523 else:
4524 logger.info('podman|docker (%s) is present' % container_path)
4525
9f95a23c
TL
4526 for command in commands:
4527 try:
4528 find_program(command)
4529 logger.info('%s is present' % command)
4530 except ValueError:
1911f103 4531 errors.append('%s binary does not appear to be installed' % command)
9f95a23c
TL
4532
4533 # check for configured+running chronyd or ntp
4534 if not check_time_sync():
1911f103 4535 errors.append('No time synchronization is active')
9f95a23c
TL
4536
4537 if 'expect_hostname' in args and args.expect_hostname:
1911f103
TL
4538 if get_hostname().lower() != args.expect_hostname.lower():
4539 errors.append('hostname "%s" does not match expected hostname "%s"' % (
9f95a23c
TL
4540 get_hostname(), args.expect_hostname))
4541 logger.info('Hostname "%s" matches what is expected.',
4542 args.expect_hostname)
4543
1911f103
TL
4544 if errors:
4545 raise Error('\n'.join(errors))
4546
9f95a23c
TL
4547 logger.info('Host looks OK')
4548
4549##################################
4550
f6b5b4d7 4551
9f95a23c
TL
4552def command_prepare_host():
4553 # type: () -> None
4554 logger.info('Verifying podman|docker is present...')
4555 pkg = None
4556 if not container_path:
4557 if not pkg:
4558 pkg = create_packager()
4559 pkg.install_podman()
4560
4561 logger.info('Verifying lvm2 is present...')
4562 if not find_executable('lvcreate'):
4563 if not pkg:
4564 pkg = create_packager()
4565 pkg.install(['lvm2'])
4566
4567 logger.info('Verifying time synchronization is in place...')
4568 if not check_time_sync():
4569 if not pkg:
4570 pkg = create_packager()
4571 pkg.install(['chrony'])
4572 # check again, and this time try to enable
4573 # the service
4574 check_time_sync(enabler=pkg)
4575
4576 if 'expect_hostname' in args and args.expect_hostname and args.expect_hostname != get_hostname():
4577 logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), args.expect_hostname))
4578 call_throws(['hostname', args.expect_hostname])
4579 with open('/etc/hostname', 'w') as f:
4580 f.write(args.expect_hostname + '\n')
4581
4582 logger.info('Repeating the final host check...')
4583 command_check_host()
4584
4585##################################
4586
f6b5b4d7 4587
9f95a23c
TL
4588class CustomValidation(argparse.Action):
4589
4590 def _check_name(self, values):
4591 try:
4592 (daemon_type, daemon_id) = values.split('.', 1)
4593 except ValueError:
4594 raise argparse.ArgumentError(self,
4595 "must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com")
4596
4597 daemons = get_supported_daemons()
4598 if daemon_type not in daemons:
4599 raise argparse.ArgumentError(self,
4600 "name must declare the type of daemon e.g. "
4601 "{}".format(', '.join(daemons)))
4602
4603 def __call__(self, parser, namespace, values, option_string=None):
4604 if self.dest == "name":
4605 self._check_name(values)
4606 setattr(namespace, self.dest, values)
4607
4608##################################
4609
f6b5b4d7 4610
9f95a23c 4611def get_distro():
e306af50 4612 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
9f95a23c
TL
4613 distro = None
4614 distro_version = None
4615 distro_codename = None
4616 with open('/etc/os-release', 'r') as f:
4617 for line in f.readlines():
4618 line = line.strip()
4619 if '=' not in line or line.startswith('#'):
4620 continue
4621 (var, val) = line.split('=', 1)
4622 if val[0] == '"' and val[-1] == '"':
4623 val = val[1:-1]
4624 if var == 'ID':
4625 distro = val.lower()
4626 elif var == 'VERSION_ID':
4627 distro_version = val.lower()
4628 elif var == 'VERSION_CODENAME':
4629 distro_codename = val.lower()
4630 return distro, distro_version, distro_codename
4631
f6b5b4d7 4632
9f95a23c
TL
4633class Packager(object):
4634 def __init__(self, stable=None, version=None, branch=None, commit=None):
4635 assert \
4636 (stable and not version and not branch and not commit) or \
4637 (not stable and version and not branch and not commit) or \
4638 (not stable and not version and branch) or \
4639 (not stable and not version and not branch and not commit)
4640 self.stable = stable
4641 self.version = version
4642 self.branch = branch
4643 self.commit = commit
4644
4645 def add_repo(self):
4646 raise NotImplementedError
4647
4648 def rm_repo(self):
4649 raise NotImplementedError
4650
4651 def query_shaman(self, distro, distro_version, branch, commit):
4652 # query shaman
f91f0fd5 4653 logger.info('Fetching repo metadata from shaman and chacra...')
9f95a23c
TL
4654 shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
4655 distro=distro,
4656 distro_version=distro_version,
4657 branch=branch,
4658 sha1=commit or 'latest',
4659 arch=get_arch()
4660 )
4661 try:
4662 shaman_response = urlopen(shaman_url)
4663 except HTTPError as err:
f91f0fd5 4664 logger.error('repository not found in shaman (might not be available yet)')
9f95a23c
TL
4665 raise Error('%s, failed to fetch %s' % (err, shaman_url))
4666 try:
4667 chacra_url = shaman_response.geturl()
4668 chacra_response = urlopen(chacra_url)
4669 except HTTPError as err:
f91f0fd5 4670 logger.error('repository not found in chacra (might not be available yet)')
9f95a23c
TL
4671 raise Error('%s, failed to fetch %s' % (err, chacra_url))
4672 return chacra_response.read().decode('utf-8')
4673
4674 def repo_gpgkey(self):
4675 if args.gpg_url:
4676 return args.gpg_url
4677 if self.stable or self.version:
4678 return 'https://download.ceph.com/keys/release.asc', 'release'
4679 else:
4680 return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
4681
4682 def enable_service(self, service):
4683 """
4684 Start and enable the service (typically using systemd).
4685 """
4686 call_throws(['systemctl', 'enable', '--now', service])
4687
4688
4689class Apt(Packager):
4690 DISTRO_NAMES = {
4691 'ubuntu': 'ubuntu',
4692 'debian': 'debian',
4693 }
4694
4695 def __init__(self, stable, version, branch, commit,
4696 distro, distro_version, distro_codename):
4697 super(Apt, self).__init__(stable=stable, version=version,
4698 branch=branch, commit=commit)
4699 self.distro = self.DISTRO_NAMES[distro]
4700 self.distro_codename = distro_codename
f91f0fd5 4701 self.distro_version = distro_version
9f95a23c
TL
4702
4703 def repo_path(self):
4704 return '/etc/apt/sources.list.d/ceph.list'
4705
4706 def add_repo(self):
4707 url, name = self.repo_gpgkey()
f91f0fd5 4708 logger.info('Installing repo GPG key from %s...' % url)
9f95a23c
TL
4709 try:
4710 response = urlopen(url)
4711 except HTTPError as err:
f91f0fd5 4712 logger.error('failed to fetch GPG repo key from %s: %s' % (
9f95a23c
TL
4713 url, err))
4714 raise Error('failed to fetch GPG key')
4715 key = response.read().decode('utf-8')
4716 with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'w') as f:
4717 f.write(key)
4718
4719 if self.version:
4720 content = 'deb %s/debian-%s/ %s main\n' % (
4721 args.repo_url, self.version, self.distro_codename)
4722 elif self.stable:
4723 content = 'deb %s/debian-%s/ %s main\n' % (
4724 args.repo_url, self.stable, self.distro_codename)
4725 else:
4726 content = self.query_shaman(self.distro, self.distro_codename, self.branch,
4727 self.commit)
4728
f91f0fd5 4729 logger.info('Installing repo file at %s...' % self.repo_path())
9f95a23c
TL
4730 with open(self.repo_path(), 'w') as f:
4731 f.write(content)
4732
4733 def rm_repo(self):
4734 for name in ['autobuild', 'release']:
4735 p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
4736 if os.path.exists(p):
f91f0fd5 4737 logger.info('Removing repo GPG key %s...' % p)
9f95a23c
TL
4738 os.unlink(p)
4739 if os.path.exists(self.repo_path()):
f91f0fd5 4740 logger.info('Removing repo at %s...' % self.repo_path())
9f95a23c
TL
4741 os.unlink(self.repo_path())
4742
f91f0fd5
TL
4743 if self.distro == 'ubuntu':
4744 self.rm_kubic_repo()
4745
9f95a23c 4746 def install(self, ls):
f91f0fd5 4747 logger.info('Installing packages %s...' % ls)
adb31ebb 4748 call_throws(['apt-get', 'install', '-y'] + ls)
9f95a23c
TL
4749
4750 def install_podman(self):
4751 if self.distro == 'ubuntu':
f91f0fd5
TL
4752 logger.info('Setting up repo for podman...')
4753 self.add_kubic_repo()
adb31ebb 4754 call_throws(['apt-get', 'update'])
9f95a23c 4755
f91f0fd5 4756 logger.info('Attempting podman install...')
9f95a23c
TL
4757 try:
4758 self.install(['podman'])
4759 except Error as e:
f91f0fd5 4760 logger.info('Podman did not work. Falling back to docker...')
9f95a23c
TL
4761 self.install(['docker.io'])
4762
f91f0fd5
TL
4763 def kubic_repo_url(self):
4764 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
4765 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version
4766
4767 def kubic_repo_path(self):
4768 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
4769
4770 def kubric_repo_gpgkey_url(self):
4771 return '%s/Release.key' % self.kubic_repo_url()
4772
4773 def kubric_repo_gpgkey_path(self):
4774 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
4775
4776 def add_kubic_repo(self):
4777 url = self.kubric_repo_gpgkey_url()
4778 logger.info('Installing repo GPG key from %s...' % url)
4779 try:
4780 response = urlopen(url)
4781 except HTTPError as err:
4782 logger.error('failed to fetch GPG repo key from %s: %s' % (
4783 url, err))
4784 raise Error('failed to fetch GPG key')
4785 key = response.read().decode('utf-8')
4786 tmp_key = write_tmp(key, 0, 0)
4787 keyring = self.kubric_repo_gpgkey_path()
4788 call_throws(['apt-key', '--keyring', keyring, 'add', tmp_key.name])
4789
4790 logger.info('Installing repo file at %s...' % self.kubic_repo_path())
4791 content = 'deb %s /\n' % self.kubic_repo_url()
4792 with open(self.kubic_repo_path(), 'w') as f:
4793 f.write(content)
4794
4795 def rm_kubic_repo(self):
4796 keyring = self.kubric_repo_gpgkey_path()
4797 if os.path.exists(keyring):
4798 logger.info('Removing repo GPG key %s...' % keyring)
4799 os.unlink(keyring)
4800
4801 p = self.kubic_repo_path()
4802 if os.path.exists(p):
4803 logger.info('Removing repo at %s...' % p)
4804 os.unlink(p)
4805
f6b5b4d7 4806
9f95a23c
TL
4807class YumDnf(Packager):
4808 DISTRO_NAMES = {
4809 'centos': ('centos', 'el'),
4810 'rhel': ('centos', 'el'),
4811 'scientific': ('centos', 'el'),
4812 'fedora': ('fedora', 'fc'),
4813 }
4814
4815 def __init__(self, stable, version, branch, commit,
4816 distro, distro_version):
4817 super(YumDnf, self).__init__(stable=stable, version=version,
4818 branch=branch, commit=commit)
4819 self.major = int(distro_version.split('.')[0])
4820 self.distro_normalized = self.DISTRO_NAMES[distro][0]
4821 self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
4822 if (self.distro_code == 'fc' and self.major >= 30) or \
4823 (self.distro_code == 'el' and self.major >= 8):
4824 self.tool = 'dnf'
4825 else:
4826 self.tool = 'yum'
4827
4828 def custom_repo(self, **kw):
4829 """
4830 Repo files need special care in that a whole line should not be present
4831 if there is no value for it. Because we were using `format()` we could
4832 not conditionally add a line for a repo file. So the end result would
4833 contain a key with a missing value (say if we were passing `None`).
4834
4835 For example, it could look like::
4836
4837 [ceph repo]
4838 name= ceph repo
4839 proxy=
4840 gpgcheck=
4841
4842 Which breaks. This function allows us to conditionally add lines,
4843 preserving an order and be more careful.
4844
4845 Previously, and for historical purposes, this is how the template used
4846 to look::
4847
4848 custom_repo =
4849 [{repo_name}]
4850 name={name}
4851 baseurl={baseurl}
4852 enabled={enabled}
4853 gpgcheck={gpgcheck}
4854 type={_type}
4855 gpgkey={gpgkey}
4856 proxy={proxy}
4857
4858 """
4859 lines = []
4860
4861 # by using tuples (vs a dict) we preserve the order of what we want to
4862 # return, like starting with a [repo name]
4863 tmpl = (
4864 ('reponame', '[%s]'),
4865 ('name', 'name=%s'),
4866 ('baseurl', 'baseurl=%s'),
4867 ('enabled', 'enabled=%s'),
4868 ('gpgcheck', 'gpgcheck=%s'),
4869 ('_type', 'type=%s'),
4870 ('gpgkey', 'gpgkey=%s'),
4871 ('proxy', 'proxy=%s'),
4872 ('priority', 'priority=%s'),
4873 )
4874
4875 for line in tmpl:
4876 tmpl_key, tmpl_value = line # key values from tmpl
4877
4878 # ensure that there is an actual value (not None nor empty string)
4879 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4880 lines.append(tmpl_value % kw.get(tmpl_key))
4881
4882 return '\n'.join(lines)
4883
4884 def repo_path(self):
4885 return '/etc/yum.repos.d/ceph.repo'
4886
4887 def repo_baseurl(self):
4888 assert self.stable or self.version
4889 if self.version:
4890 return '%s/rpm-%s/%s' % (args.repo_url, self.version,
4891 self.distro_code)
4892 else:
4893 return '%s/rpm-%s/%s' % (args.repo_url, self.stable,
4894 self.distro_code)
4895
4896 def add_repo(self):
4897 if self.stable or self.version:
4898 content = ''
4899 for n, t in {
4900 'Ceph': '$basearch',
4901 'Ceph-noarch': 'noarch',
4902 'Ceph-source': 'SRPMS'}.items():
4903 content += '[%s]\n' % (n)
4904 content += self.custom_repo(
4905 name='Ceph %s' % t,
4906 baseurl=self.repo_baseurl() + '/' + t,
4907 enabled=1,
4908 gpgcheck=1,
4909 gpgkey=self.repo_gpgkey()[0],
4910 )
4911 content += '\n\n'
4912 else:
4913 content = self.query_shaman(self.distro_normalized, self.major,
4914 self.branch,
4915 self.commit)
4916
f91f0fd5 4917 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
4918 with open(self.repo_path(), 'w') as f:
4919 f.write(content)
4920
4921 if self.distro_code.startswith('el'):
4922 logger.info('Enabling EPEL...')
4923 call_throws([self.tool, 'install', '-y', 'epel-release'])
9f95a23c
TL
4924
4925 def rm_repo(self):
4926 if os.path.exists(self.repo_path()):
4927 os.unlink(self.repo_path())
9f95a23c
TL
4928
4929 def install(self, ls):
4930 logger.info('Installing packages %s...' % ls)
4931 call_throws([self.tool, 'install', '-y'] + ls)
4932
4933 def install_podman(self):
4934 self.install(['podman'])
4935
4936
4937class Zypper(Packager):
4938 DISTRO_NAMES = [
4939 'sles',
4940 'opensuse-tumbleweed',
4941 'opensuse-leap'
4942 ]
4943
4944 def __init__(self, stable, version, branch, commit,
4945 distro, distro_version):
4946 super(Zypper, self).__init__(stable=stable, version=version,
4947 branch=branch, commit=commit)
4948 self.tool = 'zypper'
4949 self.distro = 'opensuse'
4950 self.distro_version = '15.1'
4951 if 'tumbleweed' not in distro and distro_version is not None:
4952 self.distro_version = distro_version
4953
4954 def custom_repo(self, **kw):
4955 """
4956 See YumDnf for format explanation.
4957 """
4958 lines = []
4959
4960 # by using tuples (vs a dict) we preserve the order of what we want to
4961 # return, like starting with a [repo name]
4962 tmpl = (
4963 ('reponame', '[%s]'),
4964 ('name', 'name=%s'),
4965 ('baseurl', 'baseurl=%s'),
4966 ('enabled', 'enabled=%s'),
4967 ('gpgcheck', 'gpgcheck=%s'),
4968 ('_type', 'type=%s'),
4969 ('gpgkey', 'gpgkey=%s'),
4970 ('proxy', 'proxy=%s'),
4971 ('priority', 'priority=%s'),
4972 )
4973
4974 for line in tmpl:
4975 tmpl_key, tmpl_value = line # key values from tmpl
4976
4977 # ensure that there is an actual value (not None nor empty string)
4978 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4979 lines.append(tmpl_value % kw.get(tmpl_key))
4980
4981 return '\n'.join(lines)
4982
4983 def repo_path(self):
4984 return '/etc/zypp/repos.d/ceph.repo'
4985
4986 def repo_baseurl(self):
4987 assert self.stable or self.version
4988 if self.version:
4989 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4990 else:
4991 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4992
4993 def add_repo(self):
4994 if self.stable or self.version:
4995 content = ''
4996 for n, t in {
4997 'Ceph': '$basearch',
4998 'Ceph-noarch': 'noarch',
4999 'Ceph-source': 'SRPMS'}.items():
5000 content += '[%s]\n' % (n)
5001 content += self.custom_repo(
5002 name='Ceph %s' % t,
5003 baseurl=self.repo_baseurl() + '/' + t,
5004 enabled=1,
5005 gpgcheck=1,
5006 gpgkey=self.repo_gpgkey()[0],
5007 )
5008 content += '\n\n'
5009 else:
5010 content = self.query_shaman(self.distro, self.distro_version,
5011 self.branch,
5012 self.commit)
5013
f91f0fd5 5014 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
5015 with open(self.repo_path(), 'w') as f:
5016 f.write(content)
5017
5018 def rm_repo(self):
5019 if os.path.exists(self.repo_path()):
5020 os.unlink(self.repo_path())
5021
5022 def install(self, ls):
5023 logger.info('Installing packages %s...' % ls)
5024 call_throws([self.tool, 'in', '-y'] + ls)
5025
5026 def install_podman(self):
5027 self.install(['podman'])
5028
5029
5030def create_packager(stable=None, version=None, branch=None, commit=None):
5031 distro, distro_version, distro_codename = get_distro()
5032 if distro in YumDnf.DISTRO_NAMES:
5033 return YumDnf(stable=stable, version=version,
5034 branch=branch, commit=commit,
5035 distro=distro, distro_version=distro_version)
5036 elif distro in Apt.DISTRO_NAMES:
5037 return Apt(stable=stable, version=version,
5038 branch=branch, commit=commit,
5039 distro=distro, distro_version=distro_version,
5040 distro_codename=distro_codename)
5041 elif distro in Zypper.DISTRO_NAMES:
5042 return Zypper(stable=stable, version=version,
5043 branch=branch, commit=commit,
5044 distro=distro, distro_version=distro_version)
5045 raise Error('Distro %s version %s not supported' % (distro, distro_version))
5046
5047
5048def command_add_repo():
5049 if args.version and args.release:
5050 raise Error('you can specify either --release or --version but not both')
1911f103
TL
5051 if not args.version and not args.release and not args.dev and not args.dev_commit:
5052 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
9f95a23c
TL
5053 if args.version:
5054 try:
5055 (x, y, z) = args.version.split('.')
5056 except Exception as e:
5057 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
5058
5059 pkg = create_packager(stable=args.release,
5060 version=args.version,
5061 branch=args.dev,
5062 commit=args.dev_commit)
5063 pkg.add_repo()
5064
f6b5b4d7 5065
9f95a23c
TL
5066def command_rm_repo():
5067 pkg = create_packager()
5068 pkg.rm_repo()
5069
f6b5b4d7 5070
9f95a23c
TL
5071def command_install():
5072 pkg = create_packager()
5073 pkg.install(args.packages)
5074
5075##################################
5076
f91f0fd5
TL
5077def get_ipv4_address(ifname):
5078 # type: (str) -> str
5079 def _extract(sock, offset):
5080 return socket.inet_ntop(
5081 socket.AF_INET,
5082 fcntl.ioctl(
5083 sock.fileno(),
5084 offset,
5085 struct.pack('256s', bytes(ifname[:15], 'utf-8'))
5086 )[20:24])
5087
5088 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
5089 try:
5090 addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR
5091 dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK
5092 except OSError:
5093 # interface does not have an ipv4 address
5094 return ''
5095
5096 dec_mask = sum([bin(int(i)).count('1')
5097 for i in dq_mask.split('.')])
5098 return '{}/{}'.format(addr, dec_mask)
5099
5100
5101def get_ipv6_address(ifname):
5102 # type: (str) -> str
5103 if not os.path.exists('/proc/net/if_inet6'):
5104 return ''
5105
5106 raw = read_file(['/proc/net/if_inet6'])
5107 data = raw.splitlines()
5108 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
5109 # field 0 is ipv6, field 2 is scope
5110 for iface_setting in data:
5111 field = iface_setting.split()
5112 if field[-1] == ifname:
5113 ipv6_raw = field[0]
5114 ipv6_fmtd = ":".join([ipv6_raw[_p:_p+4] for _p in range(0, len(field[0]),4)])
5115 # apply naming rules using ipaddress module
5116 ipv6 = ipaddress.ip_address(ipv6_fmtd)
5117 return "{}/{}".format(str(ipv6), int('0x{}'.format(field[2]), 16))
5118 return ''
5119
5120
5121def bytes_to_human(num, mode='decimal'):
5122 # type: (float, str) -> str
5123 """Convert a bytes value into it's human-readable form.
5124
5125 :param num: number, in bytes, to convert
5126 :param mode: Either decimal (default) or binary to determine divisor
5127 :returns: string representing the bytes value in a more readable format
5128 """
5129 unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
5130 divisor = 1000.0
5131 yotta = "YB"
5132
5133 if mode == 'binary':
5134 unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
5135 divisor = 1024.0
5136 yotta = "YiB"
5137
5138 for unit in unit_list:
5139 if abs(num) < divisor:
5140 return "%3.1f%s" % (num, unit)
5141 num /= divisor
5142 return "%.1f%s" % (num, yotta)
5143
5144
5145def read_file(path_list, file_name=''):
5146 # type: (List[str], str) -> str
5147 """Returns the content of the first file found within the `path_list`
5148
5149 :param path_list: list of file paths to search
5150 :param file_name: optional file_name to be applied to a file path
5151 :returns: content of the file or 'Unknown'
5152 """
5153 for path in path_list:
5154 if file_name:
5155 file_path = os.path.join(path, file_name)
5156 else:
5157 file_path = path
5158 if os.path.exists(file_path):
5159 with open(file_path, 'r') as f:
5160 try:
5161 content = f.read().strip()
5162 except OSError:
5163 # sysfs may populate the file, but for devices like
5164 # virtio reads can fail
5165 return "Unknown"
5166 else:
5167 return content
5168 return "Unknown"
5169
5170
5171##################################
5172class HostFacts():
5173 _dmi_path_list = ['/sys/class/dmi/id']
5174 _nic_path_list = ['/sys/class/net']
5175 _selinux_path_list = ['/etc/selinux/config']
5176 _apparmor_path_list = ['/etc/apparmor']
5177 _disk_vendor_workarounds = {
5178 "0x1af4": "Virtio Block Device"
5179 }
5180
5181 def __init__(self):
5182 self.cpu_model = 'Unknown'
5183 self.cpu_count = 0
5184 self.cpu_cores = 0
5185 self.cpu_threads = 0
5186 self.interfaces = {}
5187
5188 self._meminfo = read_file(['/proc/meminfo']).splitlines()
5189 self._get_cpuinfo()
5190 self._process_nics()
5191 self.arch = platform.processor()
5192 self.kernel = platform.release()
5193
5194 def _get_cpuinfo(self):
5195 # type: () -> None
5196 """Determine cpu information via /proc/cpuinfo"""
5197 raw = read_file(['/proc/cpuinfo'])
5198 output = raw.splitlines()
5199 cpu_set = set()
5200
5201 for line in output:
5202 field = [l.strip() for l in line.split(':')]
5203 if "model name" in line:
5204 self.cpu_model = field[1]
5205 if "physical id" in line:
5206 cpu_set.add(field[1])
5207 if "siblings" in line:
5208 self.cpu_threads = int(field[1].strip())
5209 if "cpu cores" in line:
5210 self.cpu_cores = int(field[1].strip())
5211 pass
5212 self.cpu_count = len(cpu_set)
5213
5214 def _get_block_devs(self):
5215 # type: () -> List[str]
5216 """Determine the list of block devices by looking at /sys/block"""
5217 return [dev for dev in os.listdir('/sys/block')
5218 if not dev.startswith('dm')]
5219
5220 def _get_devs_by_type(self, rota='0'):
5221 # type: (str) -> List[str]
5222 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
5223 devs = list()
5224 for blk_dev in self._get_block_devs():
5225 rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev)
5226 rot_value = read_file([rot_path])
5227 if rot_value == rota:
5228 devs.append(blk_dev)
5229 return devs
5230
5231 @property
5232 def operating_system(self):
5233 # type: () -> str
5234 """Determine OS version"""
5235 raw_info = read_file(['/etc/os-release'])
5236 os_release = raw_info.splitlines()
5237 rel_str = 'Unknown'
5238 rel_dict = dict()
5239
5240 for line in os_release:
5241 if "=" in line:
5242 var_name, var_value = line.split('=')
5243 rel_dict[var_name] = var_value.strip('"')
5244
5245 # Would normally use PRETTY_NAME, but NAME and VERSION are more
5246 # consistent
5247 if all(_v in rel_dict for _v in ["NAME", "VERSION"]):
5248 rel_str = "{} {}".format(rel_dict['NAME'], rel_dict['VERSION'])
5249 return rel_str
5250
5251 @property
5252 def hostname(self):
5253 # type: () -> str
5254 """Return the hostname"""
5255 return platform.node()
5256
5257 @property
5258 def subscribed(self):
5259 # type: () -> str
5260 """Highlevel check to see if the host is subscribed to receive updates/support"""
5261 def _red_hat():
5262 # type: () -> str
5263 # RHEL 7 and RHEL 8
5264 entitlements_dir = '/etc/pki/entitlement'
5265 if os.path.exists(entitlements_dir):
5266 pems = glob('{}/*.pem'.format(entitlements_dir))
5267 if len(pems) >= 2:
5268 return "Yes"
5269
5270 return "No"
5271
5272 os_name = self.operating_system
5273 if os_name.upper().startswith("RED HAT"):
5274 return _red_hat()
5275
5276 return "Unknown"
5277
5278 @property
5279 def hdd_count(self):
5280 # type: () -> int
5281 """Return a count of HDDs (spinners)"""
5282 return len(self._get_devs_by_type(rota='1'))
5283
5284 def _get_capacity(self, dev):
5285 # type: (str) -> int
5286 """Determine the size of a given device"""
5287 size_path = os.path.join('/sys/block', dev, 'size')
5288 size_blocks = int(read_file([size_path]))
5289 blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size')
5290 blk_count = int(read_file([blk_path]))
5291 return size_blocks * blk_count
5292
5293 def _get_capacity_by_type(self, rota='0'):
5294 # type: (str) -> int
5295 """Return the total capacity of a category of device (flash or hdd)"""
5296 devs = self._get_devs_by_type(rota=rota)
5297 capacity = 0
5298 for dev in devs:
5299 capacity += self._get_capacity(dev)
5300 return capacity
5301
5302 def _dev_list(self, dev_list):
5303 # type: (List[str]) -> List[Dict[str, object]]
5304 """Return a 'pretty' name list for each device in the `dev_list`"""
5305 disk_list = list()
5306
5307 for dev in dev_list:
5308 disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
5309 disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
5310 disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
5311 vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
5312 disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
5313 disk_size_bytes = self._get_capacity(dev)
5314 disk_list.append({
5315 "description": "{} {} ({})".format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
5316 "vendor": disk_vendor,
5317 "model": disk_model,
5318 "rev": disk_rev,
5319 "wwid": disk_wwid,
5320 "dev_name": dev,
5321 "disk_size_bytes": disk_size_bytes,
5322 }
5323 )
5324 return disk_list
5325
5326 @property
5327 def hdd_list(self):
5328 # type: () -> List[Dict[str, object]]
5329 """Return a list of devices that are HDDs (spinners)"""
5330 devs = self._get_devs_by_type(rota='1')
5331 return self._dev_list(devs)
5332
5333 @property
5334 def flash_list(self):
5335 # type: () -> List[Dict[str, object]]
5336 """Return a list of devices that are flash based (SSD, NVMe)"""
5337 devs = self._get_devs_by_type(rota='0')
5338 return self._dev_list(devs)
5339
5340 @property
5341 def hdd_capacity_bytes(self):
5342 # type: () -> int
5343 """Return the total capacity for all HDD devices (bytes)"""
5344 return self._get_capacity_by_type(rota='1')
5345
5346 @property
5347 def hdd_capacity(self):
5348 # type: () -> str
5349 """Return the total capacity for all HDD devices (human readable format)"""
5350 return bytes_to_human(self.hdd_capacity_bytes)
5351
5352 @property
5353 def cpu_load(self):
5354 # type: () -> Dict[str, float]
5355 """Return the cpu load average data for the host"""
5356 raw = read_file(['/proc/loadavg']).strip()
5357 data = raw.split()
5358 return {
5359 "1min": float(data[0]),
5360 "5min": float(data[1]),
5361 "15min": float(data[2]),
5362 }
5363
5364 @property
5365 def flash_count(self):
5366 # type: () -> int
5367 """Return the number of flash devices in the system (SSD, NVMe)"""
5368 return len(self._get_devs_by_type(rota='0'))
5369
5370 @property
5371 def flash_capacity_bytes(self):
5372 # type: () -> int
5373 """Return the total capacity for all flash devices (bytes)"""
5374 return self._get_capacity_by_type(rota='0')
5375
5376 @property
5377 def flash_capacity(self):
5378 # type: () -> str
5379 """Return the total capacity for all Flash devices (human readable format)"""
5380 return bytes_to_human(self.flash_capacity_bytes)
5381
5382 def _process_nics(self):
5383 # type: () -> None
5384 """Look at the NIC devices and extract network related metadata"""
5385 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
5386 hw_lookup = {
5387 "1": "ethernet",
5388 "32": "infiniband",
5389 "772": "loopback",
5390 }
5391
5392 for nic_path in HostFacts._nic_path_list:
5393 if not os.path.exists(nic_path):
5394 continue
5395 for iface in os.listdir(nic_path):
5396
5397 lower_devs_list = [os.path.basename(link.replace("lower_", "")) for link in glob(os.path.join(nic_path, iface, "lower_*"))]
5398 upper_devs_list = [os.path.basename(link.replace("upper_", "")) for link in glob(os.path.join(nic_path, iface, "upper_*"))]
5399
5400 try:
5401 mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
5402 except ValueError:
5403 mtu = 0
5404
5405 operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
5406 try:
5407 speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
5408 except (OSError, ValueError):
5409 # OSError : device doesn't support the ethtool get_link_ksettings
5410 # ValueError : raised when the read fails, and returns Unknown
5411 #
5412 # Either way, we show a -1 when speed isn't available
5413 speed = -1
5414
5415 if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
5416 nic_type = "bridge"
5417 elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
5418 nic_type = "bonding"
5419 else:
5420 nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), "Unknown")
5421
5422 dev_link = os.path.join(nic_path, iface, 'device')
5423 if os.path.exists(dev_link):
5424 iftype = 'physical'
5425 driver_path = os.path.join(dev_link, 'driver')
5426 if os.path.exists(driver_path):
5427 driver = os.path.basename(
5428 os.path.realpath(driver_path))
5429 else:
5430 driver = 'Unknown'
5431
5432 else:
5433 iftype = 'logical'
5434 driver = ''
5435
5436 self.interfaces[iface] = {
5437 "mtu": mtu,
5438 "upper_devs_list": upper_devs_list,
5439 "lower_devs_list": lower_devs_list,
5440 "operstate": operstate,
5441 "iftype": iftype,
5442 "nic_type": nic_type,
5443 "driver": driver,
5444 "speed": speed,
5445 "ipv4_address": get_ipv4_address(iface),
5446 "ipv6_address": get_ipv6_address(iface),
5447 }
5448
5449 @property
5450 def nic_count(self):
5451 # type: () -> int
5452 """Return a total count of all physical NICs detected in the host"""
5453 phys_devs = []
5454 for iface in self.interfaces:
5455 if self.interfaces[iface]["iftype"] == 'physical':
5456 phys_devs.append(iface)
5457 return len(phys_devs)
5458
5459
5460 def _get_mem_data(self, field_name):
5461 # type: (str) -> int
5462 for line in self._meminfo:
5463 if line.startswith(field_name):
5464 _d = line.split()
5465 return int(_d[1])
5466 return 0
5467
5468 @property
5469 def memory_total_kb(self):
5470 # type: () -> int
5471 """Determine the memory installed (kb)"""
5472 return self._get_mem_data('MemTotal')
5473
5474 @property
5475 def memory_free_kb(self):
5476 # type: () -> int
5477 """Determine the memory free (not cache, immediately usable)"""
5478 return self._get_mem_data('MemFree')
5479
5480 @property
5481 def memory_available_kb(self):
5482 # type: () -> int
5483 """Determine the memory available to new applications without swapping"""
5484 return self._get_mem_data('MemAvailable')
5485
5486 @property
5487 def vendor(self):
5488 # type: () -> str
5489 """Determine server vendor from DMI data in sysfs"""
5490 return read_file(HostFacts._dmi_path_list, "sys_vendor")
5491
5492 @property
5493 def model(self):
5494 # type: () -> str
5495 """Determine server model information from DMI data in sysfs"""
5496 family = read_file(HostFacts._dmi_path_list, "product_family")
5497 product = read_file(HostFacts._dmi_path_list, "product_name")
5498 if family == 'Unknown' and product:
5499 return "{}".format(product)
5500
5501 return "{} ({})".format(family, product)
5502
5503 @property
5504 def bios_version(self):
5505 # type: () -> str
5506 """Determine server BIOS version from DMI data in sysfs"""
5507 return read_file(HostFacts._dmi_path_list, "bios_version")
5508
5509 @property
5510 def bios_date(self):
5511 # type: () -> str
5512 """Determine server BIOS date from DMI data in sysfs"""
5513 return read_file(HostFacts._dmi_path_list, "bios_date")
5514
5515 @property
5516 def timestamp(self):
5517 # type: () -> float
5518 """Return the current time as Epoch seconds"""
5519 return time.time()
5520
5521 @property
5522 def system_uptime(self):
5523 # type: () -> float
5524 """Return the system uptime (in secs)"""
5525 raw_time = read_file(['/proc/uptime'])
5526 up_secs, _ = raw_time.split()
5527 return float(up_secs)
5528
f91f0fd5
TL
5529 def kernel_security(self):
5530 # type: () -> Dict[str, str]
5531 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
5532 def _fetch_selinux():
5533 """Read the selinux config file to determine state"""
5534 security = {}
5535 for selinux_path in HostFacts._selinux_path_list:
5536 if os.path.exists(selinux_path):
5537 selinux_config = read_file([selinux_path]).splitlines()
5538 security['type'] = 'SELinux'
5539 for line in selinux_config:
5540 if line.strip().startswith('#'):
5541 continue
5542 k, v = line.split('=')
5543 security[k] = v
5544 if security['SELINUX'].lower() == "disabled":
5545 security['description'] = "SELinux: Disabled"
5546 else:
5547 security['description'] = "SELinux: Enabled({}, {})".format(security['SELINUX'], security['SELINUXTYPE'])
5548 return security
5549
5550 def _fetch_apparmor():
5551 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
5552 security = {}
5553 for apparmor_path in HostFacts._apparmor_path_list:
5554 if os.path.exists(apparmor_path):
5555 security['type'] = "AppArmor"
5556 security['description'] = "AppArmor: Enabled"
5557 try:
5558 profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
5559 except OSError:
5560 pass
5561 else:
5562 summary = {} # type: Dict[str, int]
5563 for line in profiles.split('\n'):
5564 item, mode = line.split(' ')
5565 mode= mode.strip('()')
5566 if mode in summary:
5567 summary[mode] += 1
5568 else:
5569 summary[mode] = 0
5570 summary_str = ",".join(["{} {}".format(v, k) for k, v in summary.items()])
5571 security = {**security, **summary} # type: ignore
5572 security['description'] += "({})".format(summary_str)
5573
5574 return security
5575
5576 if os.path.exists('/sys/kernel/security/lsm'):
5577 lsm = read_file(['/sys/kernel/security/lsm']).strip()
5578 if 'selinux' in lsm:
5579 return _fetch_selinux()
5580 elif 'apparmor' in lsm:
5581 return _fetch_apparmor()
5582 else:
5583 return {
5584 "type": "Unknown",
5585 "description": "Linux Security Module framework is active, but is not using SELinux or AppArmor"
5586 }
5587
5588 return {
5589 "type": "None",
5590 "description": "Linux Security Module framework is not available"
5591 }
5592
adb31ebb
TL
5593 @property
5594 def kernel_parameters(self):
5595 # type: () -> Dict[str, str]
5596 """Get kernel parameters required/used in Ceph clusters"""
5597
5598 k_param = {}
5599 out, _, _ = call_throws(['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
5600 if out:
5601 param_list = out.split('\n')
5602 param_dict = { param.split(" = ")[0]:param.split(" = ")[-1] for param in param_list}
5603
5604 # return only desired parameters
5605 if 'net.ipv4.ip_nonlocal_bind' in param_dict:
5606 k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
5607
5608 return k_param
5609
f91f0fd5
TL
5610 def dump(self):
5611 # type: () -> str
5612 """Return the attributes of this HostFacts object as json"""
5613 data = {k: getattr(self, k) for k in dir(self)
5614 if not k.startswith('_') and
5615 isinstance(getattr(self, k),
5616 (float, int, str, list, dict, tuple))
5617 }
5618 return json.dumps(data, indent=2, sort_keys=True)
5619
5620##################################
5621
5622def command_gather_facts():
5623 """gather_facts is intended to provide host releated metadata to the caller"""
5624 host = HostFacts()
5625 print(host.dump())
5626
5627
5628##################################
5629
f6b5b4d7 5630
9f95a23c
TL
5631def _get_parser():
5632 # type: () -> argparse.ArgumentParser
5633 parser = argparse.ArgumentParser(
5634 description='Bootstrap Ceph daemons with systemd and containers.',
5635 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
5636 parser.add_argument(
5637 '--image',
5638 help='container image. Can also be set via the "CEPHADM_IMAGE" '
5639 'env var')
5640 parser.add_argument(
5641 '--docker',
5642 action='store_true',
5643 help='use docker instead of podman')
5644 parser.add_argument(
5645 '--data-dir',
5646 default=DATA_DIR,
5647 help='base directory for daemon data')
5648 parser.add_argument(
5649 '--log-dir',
5650 default=LOG_DIR,
5651 help='base directory for daemon logs')
5652 parser.add_argument(
5653 '--logrotate-dir',
5654 default=LOGROTATE_DIR,
5655 help='location of logrotate configuration files')
5656 parser.add_argument(
5657 '--unit-dir',
5658 default=UNIT_DIR,
5659 help='base directory for systemd units')
5660 parser.add_argument(
5661 '--verbose', '-v',
5662 action='store_true',
5663 help='Show debug-level log messages')
5664 parser.add_argument(
5665 '--timeout',
5666 type=int,
5667 default=DEFAULT_TIMEOUT,
5668 help='timeout in seconds')
5669 parser.add_argument(
5670 '--retry',
5671 type=int,
5672 default=DEFAULT_RETRY,
5673 help='max number of retries')
e306af50
TL
5674 parser.add_argument(
5675 '--env', '-e',
5676 action='append',
5677 default=[],
5678 help='set environment variable')
7f7e6c64
TL
5679 parser.add_argument(
5680 '--no-container-init',
5681 action='store_true',
5682 default=not CONTAINER_INIT,
5683 help='Do not run podman/docker with `--init`')
9f95a23c
TL
5684
5685 subparsers = parser.add_subparsers(help='sub-command')
5686
5687 parser_version = subparsers.add_parser(
5688 'version', help='get ceph version from container')
5689 parser_version.set_defaults(func=command_version)
5690
5691 parser_pull = subparsers.add_parser(
5692 'pull', help='pull latest image version')
5693 parser_pull.set_defaults(func=command_pull)
5694
5695 parser_inspect_image = subparsers.add_parser(
5696 'inspect-image', help='inspect local container image')
5697 parser_inspect_image.set_defaults(func=command_inspect_image)
5698
5699 parser_ls = subparsers.add_parser(
5700 'ls', help='list daemon instances on this host')
5701 parser_ls.set_defaults(func=command_ls)
5702 parser_ls.add_argument(
5703 '--no-detail',
5704 action='store_true',
5705 help='Do not include daemon status')
5706 parser_ls.add_argument(
5707 '--legacy-dir',
5708 default='/',
5709 help='base directory for legacy daemon data')
5710
5711 parser_list_networks = subparsers.add_parser(
5712 'list-networks', help='list IP networks')
5713 parser_list_networks.set_defaults(func=command_list_networks)
5714
5715 parser_adopt = subparsers.add_parser(
5716 'adopt', help='adopt daemon deployed with a different tool')
5717 parser_adopt.set_defaults(func=command_adopt)
5718 parser_adopt.add_argument(
5719 '--name', '-n',
5720 required=True,
5721 help='daemon name (type.id)')
5722 parser_adopt.add_argument(
5723 '--style',
5724 required=True,
5725 help='deployment style (legacy, ...)')
5726 parser_adopt.add_argument(
5727 '--cluster',
5728 default='ceph',
5729 help='cluster name')
5730 parser_adopt.add_argument(
5731 '--legacy-dir',
5732 default='/',
5733 help='base directory for legacy daemon data')
5734 parser_adopt.add_argument(
5735 '--config-json',
5736 help='Additional configuration information in JSON format')
5737 parser_adopt.add_argument(
5738 '--skip-firewalld',
5739 action='store_true',
5740 help='Do not configure firewalld')
5741 parser_adopt.add_argument(
5742 '--skip-pull',
5743 action='store_true',
5744 help='do not pull the latest image before adopting')
1911f103
TL
5745 parser_adopt.add_argument(
5746 '--force-start',
5747 action='store_true',
5748 help="start newly adoped daemon, even if it wasn't running previously")
f91f0fd5
TL
5749 parser_adopt.add_argument(
5750 '--container-init',
5751 action='store_true',
7f7e6c64
TL
5752 default=CONTAINER_INIT,
5753 help=argparse.SUPPRESS)
9f95a23c
TL
5754
5755 parser_rm_daemon = subparsers.add_parser(
5756 'rm-daemon', help='remove daemon instance')
5757 parser_rm_daemon.set_defaults(func=command_rm_daemon)
5758 parser_rm_daemon.add_argument(
5759 '--name', '-n',
5760 required=True,
5761 action=CustomValidation,
5762 help='daemon name (type.id)')
5763 parser_rm_daemon.add_argument(
5764 '--fsid',
5765 required=True,
5766 help='cluster FSID')
5767 parser_rm_daemon.add_argument(
5768 '--force',
5769 action='store_true',
5770 help='proceed, even though this may destroy valuable data')
5771 parser_rm_daemon.add_argument(
5772 '--force-delete-data',
5773 action='store_true',
5774 help='delete valuable daemon data instead of making a backup')
5775
5776 parser_rm_cluster = subparsers.add_parser(
5777 'rm-cluster', help='remove all daemons for a cluster')
5778 parser_rm_cluster.set_defaults(func=command_rm_cluster)
5779 parser_rm_cluster.add_argument(
5780 '--fsid',
5781 required=True,
5782 help='cluster FSID')
5783 parser_rm_cluster.add_argument(
5784 '--force',
5785 action='store_true',
5786 help='proceed, even though this may destroy valuable data')
5787
5788 parser_run = subparsers.add_parser(
5789 'run', help='run a ceph daemon, in a container, in the foreground')
5790 parser_run.set_defaults(func=command_run)
5791 parser_run.add_argument(
5792 '--name', '-n',
5793 required=True,
5794 help='daemon name (type.id)')
5795 parser_run.add_argument(
5796 '--fsid',
5797 required=True,
5798 help='cluster FSID')
5799
5800 parser_shell = subparsers.add_parser(
5801 'shell', help='run an interactive shell inside a daemon container')
5802 parser_shell.set_defaults(func=command_shell)
5803 parser_shell.add_argument(
5804 '--fsid',
5805 help='cluster FSID')
5806 parser_shell.add_argument(
5807 '--name', '-n',
5808 help='daemon name (type.id)')
5809 parser_shell.add_argument(
5810 '--config', '-c',
5811 help='ceph.conf to pass through to the container')
5812 parser_shell.add_argument(
5813 '--keyring', '-k',
5814 help='ceph.keyring to pass through to the container')
e306af50
TL
5815 parser_shell.add_argument(
5816 '--mount', '-m',
f91f0fd5
TL
5817 help=("mount a file or directory in the container. "
5818 "Support multiple mounts. "
5819 "ie: `--mount /foo /bar:/bar`. "
5820 "When no destination is passed, default is /mnt"),
5821 nargs='+')
9f95a23c
TL
5822 parser_shell.add_argument(
5823 '--env', '-e',
5824 action='append',
5825 default=[],
5826 help='set environment variable')
5827 parser_shell.add_argument(
e306af50 5828 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
5829 help='command (optional)')
5830
5831 parser_enter = subparsers.add_parser(
5832 'enter', help='run an interactive shell inside a running daemon container')
5833 parser_enter.set_defaults(func=command_enter)
5834 parser_enter.add_argument(
5835 '--fsid',
5836 help='cluster FSID')
5837 parser_enter.add_argument(
5838 '--name', '-n',
5839 required=True,
5840 help='daemon name (type.id)')
5841 parser_enter.add_argument(
e306af50 5842 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
5843 help='command')
5844
5845 parser_ceph_volume = subparsers.add_parser(
5846 'ceph-volume', help='run ceph-volume inside a container')
5847 parser_ceph_volume.set_defaults(func=command_ceph_volume)
5848 parser_ceph_volume.add_argument(
5849 '--fsid',
5850 help='cluster FSID')
5851 parser_ceph_volume.add_argument(
5852 '--config-json',
5853 help='JSON file with config and (client.bootrap-osd) key')
801d1391
TL
5854 parser_ceph_volume.add_argument(
5855 '--config', '-c',
5856 help='ceph conf file')
5857 parser_ceph_volume.add_argument(
5858 '--keyring', '-k',
5859 help='ceph.keyring to pass through to the container')
9f95a23c 5860 parser_ceph_volume.add_argument(
e306af50 5861 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
5862 help='command')
5863
5864 parser_unit = subparsers.add_parser(
5865 'unit', help='operate on the daemon\'s systemd unit')
5866 parser_unit.set_defaults(func=command_unit)
5867 parser_unit.add_argument(
5868 'command',
5869 help='systemd command (start, stop, restart, enable, disable, ...)')
5870 parser_unit.add_argument(
5871 '--fsid',
5872 help='cluster FSID')
5873 parser_unit.add_argument(
5874 '--name', '-n',
5875 required=True,
5876 help='daemon name (type.id)')
5877
5878 parser_logs = subparsers.add_parser(
5879 'logs', help='print journald logs for a daemon container')
5880 parser_logs.set_defaults(func=command_logs)
5881 parser_logs.add_argument(
5882 '--fsid',
5883 help='cluster FSID')
5884 parser_logs.add_argument(
5885 '--name', '-n',
5886 required=True,
5887 help='daemon name (type.id)')
5888 parser_logs.add_argument(
5889 'command', nargs='*',
5890 help='additional journalctl args')
5891
5892 parser_bootstrap = subparsers.add_parser(
5893 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
5894 parser_bootstrap.set_defaults(func=command_bootstrap)
5895 parser_bootstrap.add_argument(
5896 '--config', '-c',
5897 help='ceph conf file to incorporate')
5898 parser_bootstrap.add_argument(
5899 '--mon-id',
5900 required=False,
5901 help='mon id (default: local hostname)')
5902 parser_bootstrap.add_argument(
5903 '--mon-addrv',
5904 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
5905 parser_bootstrap.add_argument(
5906 '--mon-ip',
5907 help='mon IP')
5908 parser_bootstrap.add_argument(
5909 '--mgr-id',
5910 required=False,
5911 help='mgr id (default: randomly generated)')
5912 parser_bootstrap.add_argument(
5913 '--fsid',
5914 help='cluster FSID')
5915 parser_bootstrap.add_argument(
5916 '--output-dir',
5917 default='/etc/ceph',
5918 help='directory to write config, keyring, and pub key files')
5919 parser_bootstrap.add_argument(
5920 '--output-keyring',
5921 help='location to write keyring file with new cluster admin and mon keys')
5922 parser_bootstrap.add_argument(
5923 '--output-config',
5924 help='location to write conf file to connect to new cluster')
5925 parser_bootstrap.add_argument(
5926 '--output-pub-ssh-key',
5927 help='location to write the cluster\'s public SSH key')
5928 parser_bootstrap.add_argument(
5929 '--skip-ssh',
5930 action='store_true',
5931 help='skip setup of ssh key on local host')
5932 parser_bootstrap.add_argument(
5933 '--initial-dashboard-user',
5934 default='admin',
5935 help='Initial user for the dashboard')
5936 parser_bootstrap.add_argument(
5937 '--initial-dashboard-password',
5938 help='Initial password for the initial dashboard user')
f6b5b4d7
TL
5939 parser_bootstrap.add_argument(
5940 '--ssl-dashboard-port',
5941 type=int,
5942 default = 8443,
5943 help='Port number used to connect with dashboard using SSL')
9f95a23c
TL
5944 parser_bootstrap.add_argument(
5945 '--dashboard-key',
e306af50 5946 type=argparse.FileType('r'),
9f95a23c
TL
5947 help='Dashboard key')
5948 parser_bootstrap.add_argument(
5949 '--dashboard-crt',
e306af50 5950 type=argparse.FileType('r'),
9f95a23c
TL
5951 help='Dashboard certificate')
5952
e306af50
TL
5953 parser_bootstrap.add_argument(
5954 '--ssh-config',
5955 type=argparse.FileType('r'),
5956 help='SSH config')
5957 parser_bootstrap.add_argument(
5958 '--ssh-private-key',
5959 type=argparse.FileType('r'),
5960 help='SSH private key')
5961 parser_bootstrap.add_argument(
5962 '--ssh-public-key',
5963 type=argparse.FileType('r'),
5964 help='SSH public key')
f6b5b4d7
TL
5965 parser_bootstrap.add_argument(
5966 '--ssh-user',
5967 default='root',
5968 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
e306af50 5969
9f95a23c
TL
5970 parser_bootstrap.add_argument(
5971 '--skip-mon-network',
5972 action='store_true',
5973 help='set mon public_network based on bootstrap mon ip')
5974 parser_bootstrap.add_argument(
5975 '--skip-dashboard',
5976 action='store_true',
5977 help='do not enable the Ceph Dashboard')
5978 parser_bootstrap.add_argument(
5979 '--dashboard-password-noupdate',
5980 action='store_true',
5981 help='stop forced dashboard password change')
5982 parser_bootstrap.add_argument(
5983 '--no-minimize-config',
5984 action='store_true',
5985 help='do not assimilate and minimize the config file')
5986 parser_bootstrap.add_argument(
5987 '--skip-ping-check',
5988 action='store_true',
5989 help='do not verify that mon IP is pingable')
5990 parser_bootstrap.add_argument(
5991 '--skip-pull',
5992 action='store_true',
5993 help='do not pull the latest image before bootstrapping')
5994 parser_bootstrap.add_argument(
5995 '--skip-firewalld',
5996 action='store_true',
5997 help='Do not configure firewalld')
5998 parser_bootstrap.add_argument(
5999 '--allow-overwrite',
6000 action='store_true',
6001 help='allow overwrite of existing --output-* config/keyring/ssh files')
6002 parser_bootstrap.add_argument(
6003 '--allow-fqdn-hostname',
6004 action='store_true',
6005 help='allow hostname that is fully-qualified (contains ".")')
6006 parser_bootstrap.add_argument(
6007 '--skip-prepare-host',
6008 action='store_true',
6009 help='Do not prepare host')
6010 parser_bootstrap.add_argument(
6011 '--orphan-initial-daemons',
6012 action='store_true',
6013 help='Do not create initial mon, mgr, and crash service specs')
6014 parser_bootstrap.add_argument(
6015 '--skip-monitoring-stack',
6016 action='store_true',
6017 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
e306af50
TL
6018 parser_bootstrap.add_argument(
6019 '--apply-spec',
6020 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
6021
e306af50
TL
6022 parser_bootstrap.add_argument(
6023 '--shared_ceph_folder',
6024 metavar='CEPH_SOURCE_FOLDER',
6025 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c 6026
f6b5b4d7
TL
6027 parser_bootstrap.add_argument(
6028 '--registry-url',
6029 help='url for custom registry')
6030 parser_bootstrap.add_argument(
6031 '--registry-username',
6032 help='username for custom registry')
6033 parser_bootstrap.add_argument(
6034 '--registry-password',
6035 help='password for custom registry')
6036 parser_bootstrap.add_argument(
6037 '--registry-json',
6038 help='json file with custom registry login info (URL, Username, Password)')
f91f0fd5
TL
6039 parser_bootstrap.add_argument(
6040 '--container-init',
6041 action='store_true',
7f7e6c64
TL
6042 default=CONTAINER_INIT,
6043 help=argparse.SUPPRESS)
f6b5b4d7 6044
9f95a23c
TL
6045 parser_deploy = subparsers.add_parser(
6046 'deploy', help='deploy a daemon')
6047 parser_deploy.set_defaults(func=command_deploy)
6048 parser_deploy.add_argument(
6049 '--name',
6050 required=True,
6051 action=CustomValidation,
6052 help='daemon name (type.id)')
6053 parser_deploy.add_argument(
6054 '--fsid',
6055 required=True,
6056 help='cluster FSID')
6057 parser_deploy.add_argument(
6058 '--config', '-c',
6059 help='config file for new daemon')
6060 parser_deploy.add_argument(
6061 '--config-json',
6062 help='Additional configuration information in JSON format')
6063 parser_deploy.add_argument(
6064 '--keyring',
6065 help='keyring for new daemon')
6066 parser_deploy.add_argument(
6067 '--key',
6068 help='key for new daemon')
6069 parser_deploy.add_argument(
6070 '--osd-fsid',
6071 help='OSD uuid, if creating an OSD container')
6072 parser_deploy.add_argument(
6073 '--skip-firewalld',
6074 action='store_true',
6075 help='Do not configure firewalld')
f6b5b4d7
TL
6076 parser_deploy.add_argument(
6077 '--tcp-ports',
6078 help='List of tcp ports to open in the host firewall')
9f95a23c
TL
6079 parser_deploy.add_argument(
6080 '--reconfig',
6081 action='store_true',
6082 help='Reconfigure a previously deployed daemon')
6083 parser_deploy.add_argument(
6084 '--allow-ptrace',
6085 action='store_true',
6086 help='Allow SYS_PTRACE on daemon container')
f91f0fd5
TL
6087 parser_deploy.add_argument(
6088 '--container-init',
6089 action='store_true',
7f7e6c64
TL
6090 default=CONTAINER_INIT,
6091 help=argparse.SUPPRESS)
9f95a23c
TL
6092
6093 parser_check_host = subparsers.add_parser(
6094 'check-host', help='check host configuration')
6095 parser_check_host.set_defaults(func=command_check_host)
6096 parser_check_host.add_argument(
6097 '--expect-hostname',
6098 help='Check that hostname matches an expected value')
6099
6100 parser_prepare_host = subparsers.add_parser(
6101 'prepare-host', help='prepare a host for cephadm use')
6102 parser_prepare_host.set_defaults(func=command_prepare_host)
6103 parser_prepare_host.add_argument(
6104 '--expect-hostname',
6105 help='Set hostname')
6106
6107 parser_add_repo = subparsers.add_parser(
6108 'add-repo', help='configure package repository')
6109 parser_add_repo.set_defaults(func=command_add_repo)
6110 parser_add_repo.add_argument(
6111 '--release',
1911f103 6112 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
9f95a23c
TL
6113 parser_add_repo.add_argument(
6114 '--version',
6115 help='use specific upstream version (x.y.z)')
6116 parser_add_repo.add_argument(
6117 '--dev',
6118 help='use specified bleeding edge build from git branch or tag')
6119 parser_add_repo.add_argument(
6120 '--dev-commit',
6121 help='use specified bleeding edge build from git commit')
6122 parser_add_repo.add_argument(
6123 '--gpg-url',
6124 help='specify alternative GPG key location')
6125 parser_add_repo.add_argument(
6126 '--repo-url',
6127 default='https://download.ceph.com',
6128 help='specify alternative repo location')
6129 # TODO: proxy?
6130
6131 parser_rm_repo = subparsers.add_parser(
6132 'rm-repo', help='remove package repository configuration')
6133 parser_rm_repo.set_defaults(func=command_rm_repo)
6134
6135 parser_install = subparsers.add_parser(
6136 'install', help='install ceph package(s)')
6137 parser_install.set_defaults(func=command_install)
6138 parser_install.add_argument(
6139 'packages', nargs='*',
6140 default=['cephadm'],
6141 help='packages')
6142
f6b5b4d7
TL
6143 parser_registry_login = subparsers.add_parser(
6144 'registry-login', help='log host into authenticated registry')
6145 parser_registry_login.set_defaults(func=command_registry_login)
6146 parser_registry_login.add_argument(
6147 '--registry-url',
6148 help='url for custom registry')
6149 parser_registry_login.add_argument(
6150 '--registry-username',
6151 help='username for custom registry')
6152 parser_registry_login.add_argument(
6153 '--registry-password',
6154 help='password for custom registry')
6155 parser_registry_login.add_argument(
6156 '--registry-json',
6157 help='json file with custom registry login info (URL, Username, Password)')
6158 parser_registry_login.add_argument(
6159 '--fsid',
6160 help='cluster FSID')
6161
f91f0fd5
TL
6162 parser_gather_facts = subparsers.add_parser(
6163 'gather-facts', help='gather and return host related information (JSON format)')
6164 parser_gather_facts.set_defaults(func=command_gather_facts)
6165
9f95a23c
TL
6166 return parser
6167
f6b5b4d7 6168
9f95a23c
TL
6169def _parse_args(av):
6170 parser = _get_parser()
7f7e6c64 6171
e306af50
TL
6172 args = parser.parse_args(av)
6173 if 'command' in args and args.command and args.command[0] == "--":
6174 args.command.pop(0)
7f7e6c64
TL
6175
6176 # workaround argparse to deprecate the subparser `--container-init` flag
6177 # container_init and no_container_init must always be mutually exclusive
6178 container_init_args = ('--container-init', '--no-container-init')
6179 if set(container_init_args).issubset(av):
6180 parser.error('argument %s: not allowed with argument %s' % (container_init_args))
6181 elif '--container-init' in av:
6182 args.no_container_init = not args.container_init
6183 else:
6184 args.container_init = not args.no_container_init
6185 assert args.container_init is not args.no_container_init
6186
e306af50 6187 return args
9f95a23c 6188
f6b5b4d7 6189
9f95a23c 6190if __name__ == "__main__":
f91f0fd5
TL
6191
6192 # root?
6193 if os.geteuid() != 0:
6194 sys.stderr.write('ERROR: cephadm should be run as root\n')
6195 sys.exit(1)
6196
6197 # Logger configuration
6198 if not os.path.exists(LOG_DIR):
6199 os.makedirs(LOG_DIR)
6200 dictConfig(logging_config)
6201 logger = logging.getLogger()
6202
9f95a23c
TL
6203 # allow argv to be injected
6204 try:
f6b5b4d7 6205 av = injected_argv # type: ignore
9f95a23c
TL
6206 except NameError:
6207 av = sys.argv[1:]
f91f0fd5 6208 logger.debug("%s\ncephadm %s" % ("-" * 80, av))
9f95a23c
TL
6209 args = _parse_args(av)
6210
f91f0fd5 6211 # More verbose console output
9f95a23c 6212 if args.verbose:
f91f0fd5
TL
6213 for handler in logger.handlers:
6214 if handler.name == "console":
6215 handler.setLevel(logging.DEBUG)
9f95a23c 6216
9f95a23c
TL
6217 if 'func' not in args:
6218 sys.stderr.write('No command specified; pass -h or --help for usage\n')
6219 sys.exit(1)
6220
1911f103
TL
6221 # podman or docker?
6222 if args.func != command_check_host:
6223 if args.docker:
6224 container_path = find_program('docker')
6225 else:
6226 for i in CONTAINER_PREFERENCE:
6227 try:
6228 container_path = find_program(i)
6229 break
6230 except Exception as e:
6231 logger.debug('Could not locate %s: %s' % (i, e))
6232 if not container_path and args.func != command_prepare_host\
6233 and args.func != command_add_repo:
6234 sys.stderr.write('Unable to locate any of %s\n' % CONTAINER_PREFERENCE)
6235 sys.exit(1)
6236
7f7e6c64
TL
6237 # container-init?
6238 container_init = args.container_init
6239 logger.debug('container_init=%s' % (container_init))
6240
9f95a23c
TL
6241 try:
6242 r = args.func()
6243 except Error as e:
6244 if args.verbose:
6245 raise
6246 sys.stderr.write('ERROR: %s\n' % e)
6247 sys.exit(1)
6248 if not r:
6249 r = 0
6250 sys.exit(r)