]> git.proxmox.com Git - ceph.git/blame - ceph/src/cephadm/cephadm
import ceph 15.2.10
[ceph.git] / ceph / src / cephadm / cephadm
CommitLineData
9f95a23c
TL
1#!/usr/bin/python3
2
3DEFAULT_IMAGE='docker.io/ceph/ceph:v15'
1911f103 4DEFAULT_IMAGE_IS_MASTER=False
f6b5b4d7
TL
5LATEST_STABLE_RELEASE = 'octopus'
6DATA_DIR = '/var/lib/ceph'
7LOG_DIR = '/var/log/ceph'
8LOCK_DIR = '/run/cephadm'
9LOGROTATE_DIR = '/etc/logrotate.d'
10UNIT_DIR = '/etc/systemd/system'
11LOG_DIR_MODE = 0o770
12DATA_DIR_MODE = 0o700
9f95a23c 13CONTAINER_PREFERENCE = ['podman', 'docker'] # prefer podman to docker
f6b5b4d7
TL
14CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
15DEFAULT_TIMEOUT = None # in seconds
16DEFAULT_RETRY = 10
17SHELL_DEFAULT_CONF = '/etc/ceph/ceph.conf'
18SHELL_DEFAULT_KEYRING = '/etc/ceph/ceph.client.admin.keyring'
9f95a23c
TL
19
20"""
21You can invoke cephadm in two ways:
22
231. The normal way, at the command line.
24
252. By piping the script to the python3 binary. In this latter case, you should
26 prepend one or more lines to the beginning of the script.
27
28 For arguments,
29
30 injected_argv = [...]
31
32 e.g.,
33
34 injected_argv = ['ls']
35
36 For reading stdin from the '--config-json -' argument,
37
38 injected_stdin = '...'
39"""
9f95a23c
TL
40import argparse
41import datetime
42import fcntl
f6b5b4d7 43import ipaddress
9f95a23c
TL
44import json
45import logging
f91f0fd5 46from logging.config import dictConfig
9f95a23c
TL
47import os
48import platform
f6b5b4d7 49import pwd
9f95a23c 50import random
9f95a23c
TL
51import select
52import shutil
53import socket
54import string
55import subprocess
56import sys
57import tempfile
58import time
59import errno
f91f0fd5 60import struct
adb31ebb 61from enum import Enum
9f95a23c 62try:
f6b5b4d7 63 from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO
9f95a23c
TL
64except ImportError:
65 pass
f91f0fd5
TL
66
67import re
9f95a23c
TL
68import uuid
69
70from functools import wraps
71from glob import glob
72from threading import Thread
73
74if sys.version_info >= (3, 0):
75 from io import StringIO
76else:
77 from StringIO import StringIO
78
79if sys.version_info >= (3, 2):
80 from configparser import ConfigParser
81else:
82 from ConfigParser import SafeConfigParser
83
84if sys.version_info >= (3, 0):
85 from urllib.request import urlopen
86 from urllib.error import HTTPError
87else:
88 from urllib2 import urlopen, HTTPError
89
f6b5b4d7
TL
90if sys.version_info > (3, 0):
91 unicode = str
92
9f95a23c
TL
93container_path = ''
94cached_stdin = None
95
adb31ebb 96DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
9f95a23c 97
f91f0fd5
TL
98# Log and console output config
99logging_config = {
100 'version': 1,
101 'disable_existing_loggers': True,
102 'formatters': {
103 'cephadm': {
104 'format': '%(asctime)s %(levelname)s %(message)s'
105 },
106 },
107 'handlers': {
108 'console':{
109 'level':'INFO',
110 'class':'logging.StreamHandler',
111 },
112 'log_file': {
113 'level': 'DEBUG',
114 'class': 'logging.handlers.RotatingFileHandler',
115 'formatter': 'cephadm',
116 'filename': '%s/cephadm.log' % LOG_DIR,
117 'maxBytes': 1024000,
118 'backupCount': 1,
119 }
120 },
121 'loggers': {
122 '': {
123 'level': 'DEBUG',
124 'handlers': ['console', 'log_file'],
125 }
126 }
127}
e306af50
TL
128
129class termcolor:
130 yellow = '\033[93m'
131 red = '\033[31m'
132 end = '\033[0m'
133
f6b5b4d7 134
9f95a23c
TL
135class Error(Exception):
136 pass
137
f6b5b4d7 138
9f95a23c
TL
139class TimeoutExpired(Error):
140 pass
141
142##################################
143
f6b5b4d7 144
9f95a23c
TL
145class Ceph(object):
146 daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
147 'crash')
148
149##################################
150
f6b5b4d7 151
9f95a23c
TL
152class Monitoring(object):
153 """Define the configs for the monitoring containers"""
154
155 port_map = {
156 "prometheus": [9095], # Avoid default 9090, due to conflict with cockpit UI
157 "node-exporter": [9100],
158 "grafana": [3000],
159 "alertmanager": [9093, 9094],
160 }
161
162 components = {
163 "prometheus": {
f91f0fd5 164 "image": "docker.io/prom/prometheus:v2.18.1",
9f95a23c
TL
165 "cpus": '2',
166 "memory": '4GB',
167 "args": [
168 "--config.file=/etc/prometheus/prometheus.yml",
169 "--storage.tsdb.path=/prometheus",
170 "--web.listen-address=:{}".format(port_map['prometheus'][0]),
171 ],
172 "config-json-files": [
173 "prometheus.yml",
174 ],
175 },
176 "node-exporter": {
f91f0fd5 177 "image": "docker.io/prom/node-exporter:v0.18.1",
9f95a23c
TL
178 "cpus": "1",
179 "memory": "1GB",
180 "args": [
181 "--no-collector.timex",
182 ],
183 },
184 "grafana": {
cd265ab1 185 "image": "docker.io/ceph/ceph-grafana:6.7.4",
9f95a23c
TL
186 "cpus": "2",
187 "memory": "4GB",
188 "args": [],
189 "config-json-files": [
190 "grafana.ini",
191 "provisioning/datasources/ceph-dashboard.yml",
192 "certs/cert_file",
193 "certs/cert_key",
194 ],
195 },
196 "alertmanager": {
f91f0fd5 197 "image": "docker.io/prom/alertmanager:v0.20.0",
9f95a23c
TL
198 "cpus": "2",
199 "memory": "2GB",
f91f0fd5
TL
200 "args": [
201 "--web.listen-address=:{}".format(port_map['alertmanager'][0]),
202 "--cluster.listen-address=:{}".format(port_map['alertmanager'][1]),
203 ],
9f95a23c
TL
204 "config-json-files": [
205 "alertmanager.yml",
206 ],
207 "config-json-args": [
208 "peers",
209 ],
210 },
211 } # type: ignore
212
213##################################
214
f6b5b4d7 215
9f95a23c
TL
216class NFSGanesha(object):
217 """Defines a NFS-Ganesha container"""
218
219 daemon_type = 'nfs'
220 entrypoint = '/usr/bin/ganesha.nfsd'
221 daemon_args = ['-F', '-L', 'STDERR']
222
223 required_files = ['ganesha.conf']
224
225 port_map = {
226 "nfs" : 2049,
227 }
228
229 def __init__(self,
230 fsid,
231 daemon_id,
232 config_json,
233 image=DEFAULT_IMAGE):
234 # type: (str, Union[int, str], Dict, str) -> None
235 self.fsid = fsid
236 self.daemon_id = daemon_id
237 self.image = image
238
9f95a23c 239 # config-json options
f91f0fd5
TL
240 self.pool = dict_get(config_json, 'pool', require=True)
241 self.namespace = dict_get(config_json, 'namespace')
242 self.userid = dict_get(config_json, 'userid')
243 self.extra_args = dict_get(config_json, 'extra_args', [])
244 self.files = dict_get(config_json, 'files', {})
245 self.rgw = dict_get(config_json, 'rgw', {})
9f95a23c
TL
246
247 # validate the supplied args
248 self.validate()
249
250 @classmethod
251 def init(cls, fsid, daemon_id):
252 # type: (str, Union[int, str]) -> NFSGanesha
253 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
254
f91f0fd5 255 def get_container_mounts(self, data_dir):
9f95a23c
TL
256 # type: (str) -> Dict[str, str]
257 mounts = dict()
258 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
259 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
260 mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
f91f0fd5
TL
261 if self.rgw:
262 cluster = self.rgw.get('cluster', 'ceph')
263 rgw_user = self.rgw.get('user', 'admin')
264 mounts[os.path.join(data_dir, 'keyring.rgw')] = \
265 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
9f95a23c
TL
266 return mounts
267
268 @staticmethod
269 def get_container_envs():
270 # type: () -> List[str]
271 envs = [
272 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
273 ]
274 return envs
275
276 @staticmethod
277 def get_version(container_id):
e306af50 278 # type: (str) -> Optional[str]
9f95a23c
TL
279 version = None
280 out, err, code = call(
281 [container_path, 'exec', container_id,
282 NFSGanesha.entrypoint, '-v'])
283 if code == 0:
284 match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
285 if match:
286 version = match.group(1)
287 return version
288
289 def validate(self):
e306af50 290 # type: () -> None
9f95a23c
TL
291 if not is_fsid(self.fsid):
292 raise Error('not an fsid: %s' % self.fsid)
293 if not self.daemon_id:
294 raise Error('invalid daemon_id: %s' % self.daemon_id)
295 if not self.image:
296 raise Error('invalid image: %s' % self.image)
297
298 # check for the required files
299 if self.required_files:
300 for fname in self.required_files:
301 if fname not in self.files:
302 raise Error('required file missing from config-json: %s' % fname)
303
f91f0fd5
TL
304 # check for an RGW config
305 if self.rgw:
306 if not self.rgw.get('keyring'):
307 raise Error('RGW keyring is missing')
308 if not self.rgw.get('user'):
309 raise Error('RGW user is missing')
310
9f95a23c
TL
311 def get_daemon_name(self):
312 # type: () -> str
313 return '%s.%s' % (self.daemon_type, self.daemon_id)
314
315 def get_container_name(self, desc=None):
316 # type: (Optional[str]) -> str
317 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
318 if desc:
319 cname = '%s-%s' % (cname, desc)
320 return cname
321
1911f103
TL
322 def get_daemon_args(self):
323 # type: () -> List[str]
324 return self.daemon_args + self.extra_args
325
9f95a23c
TL
326 def create_daemon_dirs(self, data_dir, uid, gid):
327 # type: (str, int, int) -> None
328 """Create files under the container data dir"""
329 if not os.path.isdir(data_dir):
330 raise OSError('data_dir is not a directory: %s' % (data_dir))
331
332 logger.info('Creating ganesha config...')
333
334 # create the ganesha conf dir
335 config_dir = os.path.join(data_dir, 'etc/ganesha')
336 makedirs(config_dir, uid, gid, 0o755)
337
338 # populate files from the config-json
339 for fname in self.files:
340 config_file = os.path.join(config_dir, fname)
f91f0fd5 341 config_content = dict_get_join(self.files, fname)
9f95a23c
TL
342 logger.info('Write file: %s' % (config_file))
343 with open(config_file, 'w') as f:
344 os.fchown(f.fileno(), uid, gid)
345 os.fchmod(f.fileno(), 0o600)
346 f.write(config_content)
347
f91f0fd5
TL
348 # write the RGW keyring
349 if self.rgw:
350 keyring_path = os.path.join(data_dir, 'keyring.rgw')
351 with open(keyring_path, 'w') as f:
352 os.fchmod(f.fileno(), 0o600)
353 os.fchown(f.fileno(), uid, gid)
354 f.write(self.rgw.get('keyring', ''))
355
9f95a23c
TL
356 def get_rados_grace_container(self, action):
357 # type: (str) -> CephContainer
358 """Container for a ganesha action on the grace db"""
359 entrypoint = '/usr/bin/ganesha-rados-grace'
360
361 assert self.pool
362 args=['--pool', self.pool]
363 if self.namespace:
364 args += ['--ns', self.namespace]
1911f103
TL
365 if self.userid:
366 args += ['--userid', self.userid]
9f95a23c
TL
367 args += [action, self.get_daemon_name()]
368
369 data_dir = get_data_dir(self.fsid, self.daemon_type, self.daemon_id)
370 volume_mounts = self.get_container_mounts(data_dir)
371 envs = self.get_container_envs()
372
f6b5b4d7 373 logger.info('Creating RADOS grace for action: %s' % action)
9f95a23c
TL
374 c = CephContainer(
375 image=self.image,
376 entrypoint=entrypoint,
377 args=args,
378 volume_mounts=volume_mounts,
f6b5b4d7 379 cname=self.get_container_name(desc='grace-%s' % action),
9f95a23c
TL
380 envs=envs
381 )
382 return c
383
384##################################
385
f6b5b4d7 386
1911f103
TL
387class CephIscsi(object):
388 """Defines a Ceph-Iscsi container"""
389
390 daemon_type = 'iscsi'
391 entrypoint = '/usr/bin/rbd-target-api'
392
393 required_files = ['iscsi-gateway.cfg']
394
395 def __init__(self,
396 fsid,
397 daemon_id,
398 config_json,
399 image=DEFAULT_IMAGE):
400 # type: (str, Union[int, str], Dict, str) -> None
401 self.fsid = fsid
402 self.daemon_id = daemon_id
403 self.image = image
404
1911f103 405 # config-json options
f91f0fd5 406 self.files = dict_get(config_json, 'files', {})
1911f103
TL
407
408 # validate the supplied args
409 self.validate()
410
411 @classmethod
412 def init(cls, fsid, daemon_id):
413 # type: (str, Union[int, str]) -> CephIscsi
414 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
415
416 @staticmethod
417 def get_container_mounts(data_dir, log_dir):
418 # type: (str, str) -> Dict[str, str]
419 mounts = dict()
420 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
421 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
422 mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
f91f0fd5 423 mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
1911f103 424 mounts[log_dir] = '/var/log/rbd-target-api:z'
f91f0fd5 425 mounts['/dev'] = '/dev'
1911f103
TL
426 return mounts
427
f6b5b4d7
TL
428 @staticmethod
429 def get_container_binds():
430 # type: () -> List[List[str]]
431 binds = []
432 lib_modules = ['type=bind',
433 'source=/lib/modules',
434 'destination=/lib/modules',
435 'ro=true']
436 binds.append(lib_modules)
437 return binds
438
1911f103
TL
439 @staticmethod
440 def get_version(container_id):
e306af50 441 # type: (str) -> Optional[str]
1911f103
TL
442 version = None
443 out, err, code = call(
444 [container_path, 'exec', container_id,
445 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"])
446 if code == 0:
f6b5b4d7 447 version = out.strip()
1911f103
TL
448 return version
449
450 def validate(self):
e306af50 451 # type: () -> None
1911f103
TL
452 if not is_fsid(self.fsid):
453 raise Error('not an fsid: %s' % self.fsid)
454 if not self.daemon_id:
455 raise Error('invalid daemon_id: %s' % self.daemon_id)
456 if not self.image:
457 raise Error('invalid image: %s' % self.image)
458
459 # check for the required files
460 if self.required_files:
461 for fname in self.required_files:
462 if fname not in self.files:
463 raise Error('required file missing from config-json: %s' % fname)
464
465 def get_daemon_name(self):
466 # type: () -> str
467 return '%s.%s' % (self.daemon_type, self.daemon_id)
468
469 def get_container_name(self, desc=None):
470 # type: (Optional[str]) -> str
471 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
472 if desc:
473 cname = '%s-%s' % (cname, desc)
474 return cname
475
1911f103
TL
476 def create_daemon_dirs(self, data_dir, uid, gid):
477 # type: (str, int, int) -> None
478 """Create files under the container data dir"""
479 if not os.path.isdir(data_dir):
480 raise OSError('data_dir is not a directory: %s' % (data_dir))
481
482 logger.info('Creating ceph-iscsi config...')
483 configfs_dir = os.path.join(data_dir, 'configfs')
484 makedirs(configfs_dir, uid, gid, 0o755)
485
486 # populate files from the config-json
487 for fname in self.files:
488 config_file = os.path.join(data_dir, fname)
f91f0fd5 489 config_content = dict_get_join(self.files, fname)
1911f103
TL
490 logger.info('Write file: %s' % (config_file))
491 with open(config_file, 'w') as f:
492 os.fchown(f.fileno(), uid, gid)
493 os.fchmod(f.fileno(), 0o600)
494 f.write(config_content)
495
496 @staticmethod
497 def configfs_mount_umount(data_dir, mount=True):
e306af50 498 # type: (str, bool) -> List[str]
1911f103
TL
499 mount_path = os.path.join(data_dir, 'configfs')
500 if mount:
501 cmd = "if ! grep -qs {0} /proc/mounts; then " \
502 "mount -t configfs none {0}; fi".format(mount_path)
503 else:
504 cmd = "if grep -qs {0} /proc/mounts; then " \
505 "umount {0}; fi".format(mount_path)
506 return cmd.split()
507
f6b5b4d7
TL
508 def get_tcmu_runner_container(self):
509 # type: () -> CephContainer
510 tcmu_container = get_container(self.fsid, self.daemon_type, self.daemon_id)
511 tcmu_container.entrypoint = "/usr/bin/tcmu-runner"
f6b5b4d7 512 tcmu_container.cname = self.get_container_name(desc='tcmu')
f91f0fd5
TL
513 # remove extra container args for tcmu container.
514 # extra args could cause issue with forking service type
515 tcmu_container.container_args = []
f6b5b4d7
TL
516 return tcmu_container
517
1911f103
TL
518##################################
519
f6b5b4d7 520
f91f0fd5
TL
521class CustomContainer(object):
522 """Defines a custom container"""
523 daemon_type = 'container'
524
525 def __init__(self, fsid: str, daemon_id: Union[int, str],
526 config_json: Dict, image: str) -> None:
527 self.fsid = fsid
528 self.daemon_id = daemon_id
529 self.image = image
530
531 # config-json options
532 self.entrypoint = dict_get(config_json, 'entrypoint')
533 self.uid = dict_get(config_json, 'uid', 65534) # nobody
534 self.gid = dict_get(config_json, 'gid', 65534) # nobody
535 self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
536 self.args = dict_get(config_json, 'args', [])
537 self.envs = dict_get(config_json, 'envs', [])
538 self.privileged = dict_get(config_json, 'privileged', False)
539 self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
540 self.ports = dict_get(config_json, 'ports', [])
541 self.dirs = dict_get(config_json, 'dirs', [])
542 self.files = dict_get(config_json, 'files', {})
543
544 @classmethod
545 def init(cls, fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
546 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
547
548 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
549 """
550 Create dirs/files below the container data directory.
551 """
552 logger.info('Creating custom container configuration '
553 'dirs/files in {} ...'.format(data_dir))
554
555 if not os.path.isdir(data_dir):
556 raise OSError('data_dir is not a directory: %s' % data_dir)
557
558 for dir_path in self.dirs:
559 logger.info('Creating directory: {}'.format(dir_path))
560 dir_path = os.path.join(data_dir, dir_path.strip('/'))
561 makedirs(dir_path, uid, gid, 0o755)
562
563 for file_path in self.files:
564 logger.info('Creating file: {}'.format(file_path))
565 content = dict_get_join(self.files, file_path)
566 file_path = os.path.join(data_dir, file_path.strip('/'))
567 with open(file_path, 'w', encoding='utf-8') as f:
568 os.fchown(f.fileno(), uid, gid)
569 os.fchmod(f.fileno(), 0o600)
570 f.write(content)
571
572 def get_daemon_args(self) -> List[str]:
573 return []
574
575 def get_container_args(self) -> List[str]:
576 return self.args
577
578 def get_container_envs(self) -> List[str]:
579 return self.envs
580
581 def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
582 """
583 Get the volume mounts. Relative source paths will be located below
584 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
585
586 Example:
587 {
588 /foo/conf: /conf
589 foo/conf: /conf
590 }
591 becomes
592 {
593 /foo/conf: /conf
594 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
595 }
596 """
597 mounts = {}
598 for source, destination in self.volume_mounts.items():
599 source = os.path.join(data_dir, source)
600 mounts[source] = destination
601 return mounts
602
603 def get_container_binds(self, data_dir: str) -> List[List[str]]:
604 """
605 Get the bind mounts. Relative `source=...` paths will be located below
606 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
607
608 Example:
609 [
610 'type=bind',
611 'source=lib/modules',
612 'destination=/lib/modules',
613 'ro=true'
614 ]
615 becomes
616 [
617 ...
618 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
619 ...
620 ]
621 """
622 binds = self.bind_mounts.copy()
623 for bind in binds:
624 for index, value in enumerate(bind):
625 match = re.match(r'^source=(.+)$', value)
626 if match:
627 bind[index] = 'source={}'.format(os.path.join(
628 data_dir, match.group(1)))
629 return binds
630
631##################################
632
633
634def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
635 """
636 Helper function to get a key from a dictionary.
637 :param d: The dictionary to process.
638 :param key: The name of the key to get.
639 :param default: The default value in case the key does not
640 exist. Default is `None`.
641 :param require: Set to `True` if the key is required. An
642 exception will be raised if the key does not exist in
643 the given dictionary.
644 :return: Returns the value of the given key.
645 :raises: :exc:`self.Error` if the given key does not exist
646 and `require` is set to `True`.
647 """
648 if require and key not in d.keys():
649 raise Error('{} missing from dict'.format(key))
650 return d.get(key, default)
651
652##################################
653
654
655def dict_get_join(d: Dict, key: str) -> Any:
656 """
657 Helper function to get the value of a given key from a dictionary.
658 `List` values will be converted to a string by joining them with a
659 line break.
660 :param d: The dictionary to process.
661 :param key: The name of the key to get.
662 :return: Returns the value of the given key. If it was a `list`, it
663 will be joining with a line break.
664 """
665 value = d.get(key)
666 if isinstance(value, list):
667 value = '\n'.join(map(str, value))
668 return value
669
670##################################
671
672
9f95a23c 673def get_supported_daemons():
e306af50 674 # type: () -> List[str]
9f95a23c
TL
675 supported_daemons = list(Ceph.daemons)
676 supported_daemons.extend(Monitoring.components)
677 supported_daemons.append(NFSGanesha.daemon_type)
1911f103 678 supported_daemons.append(CephIscsi.daemon_type)
f91f0fd5 679 supported_daemons.append(CustomContainer.daemon_type)
9f95a23c
TL
680 assert len(supported_daemons) == len(set(supported_daemons))
681 return supported_daemons
682
683##################################
684
f6b5b4d7 685
9f95a23c 686def attempt_bind(s, address, port):
e306af50 687 # type: (socket.socket, str, int) -> None
9f95a23c
TL
688 try:
689 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
690 s.bind((address, port))
691 except (socket.error, OSError) as e: # py2 and py3
692 msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
693 logger.warning(msg)
694 if e.errno == errno.EADDRINUSE:
695 raise OSError(msg)
696 elif e.errno == errno.EADDRNOTAVAIL:
697 pass
698 finally:
699 s.close()
700
f6b5b4d7 701
9f95a23c 702def port_in_use(port_num):
e306af50 703 # type: (int) -> bool
9f95a23c 704 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
e306af50 705 logger.info('Verifying port %d ...' % port_num)
9f95a23c
TL
706 try:
707 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
708 attempt_bind(s, '0.0.0.0', port_num)
709
710 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
711 attempt_bind(s, '::', port_num)
712 except OSError:
713 return True
714 else:
715 return False
716
f6b5b4d7 717
9f95a23c 718def check_ip_port(ip, port):
e306af50 719 # type: (str, int) -> None
9f95a23c
TL
720 if not args.skip_ping_check:
721 logger.info('Verifying IP %s port %d ...' % (ip, port))
f91f0fd5 722 if is_ipv6(ip):
9f95a23c 723 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
f91f0fd5 724 ip = unwrap_ipv6(ip)
9f95a23c
TL
725 else:
726 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
727 try:
728 attempt_bind(s, ip, port)
729 except OSError as e:
730 raise Error(e)
731
732##################################
733
734# this is an abbreviated version of
735# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
736# that drops all of the compatibility (this is Unix/Linux only).
737
738try:
739 TimeoutError
740except NameError:
741 TimeoutError = OSError
742
f6b5b4d7 743
9f95a23c
TL
744class Timeout(TimeoutError):
745 """
746 Raised when the lock could not be acquired in *timeout*
747 seconds.
748 """
749
750 def __init__(self, lock_file):
751 """
752 """
753 #: The path of the file lock.
754 self.lock_file = lock_file
755 return None
756
757 def __str__(self):
758 temp = "The file lock '{}' could not be acquired."\
759 .format(self.lock_file)
760 return temp
761
762
763class _Acquire_ReturnProxy(object):
764 def __init__(self, lock):
765 self.lock = lock
766 return None
767
768 def __enter__(self):
769 return self.lock
770
771 def __exit__(self, exc_type, exc_value, traceback):
772 self.lock.release()
773 return None
774
775
776class FileLock(object):
f6b5b4d7 777 def __init__(self, name, timeout=-1):
9f95a23c
TL
778 if not os.path.exists(LOCK_DIR):
779 os.mkdir(LOCK_DIR, 0o700)
780 self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
781
782 # The file descriptor for the *_lock_file* as it is returned by the
783 # os.open() function.
784 # This file lock is only NOT None, if the object currently holds the
785 # lock.
786 self._lock_file_fd = None
787 self.timeout = timeout
788 # The lock counter is used for implementing the nested locking
789 # mechanism. Whenever the lock is acquired, the counter is increased and
790 # the lock is only released, when this value is 0 again.
791 self._lock_counter = 0
792 return None
793
794 @property
795 def is_locked(self):
796 return self._lock_file_fd is not None
797
798 def acquire(self, timeout=None, poll_intervall=0.05):
799 """
800 Acquires the file lock or fails with a :exc:`Timeout` error.
801 .. code-block:: python
802 # You can use this method in the context manager (recommended)
803 with lock.acquire():
804 pass
805 # Or use an equivalent try-finally construct:
806 lock.acquire()
807 try:
808 pass
809 finally:
810 lock.release()
811 :arg float timeout:
812 The maximum time waited for the file lock.
813 If ``timeout < 0``, there is no timeout and this method will
814 block until the lock could be acquired.
815 If ``timeout`` is None, the default :attr:`~timeout` is used.
816 :arg float poll_intervall:
817 We check once in *poll_intervall* seconds if we can acquire the
818 file lock.
819 :raises Timeout:
820 if the lock could not be acquired in *timeout* seconds.
821 .. versionchanged:: 2.0.0
822 This method returns now a *proxy* object instead of *self*,
823 so that it can be used in a with statement without side effects.
824 """
825 # Use the default timeout, if no timeout is provided.
826 if timeout is None:
827 timeout = self.timeout
828
829 # Increment the number right at the beginning.
830 # We can still undo it, if something fails.
831 self._lock_counter += 1
832
833 lock_id = id(self)
834 lock_filename = self._lock_file
835 start_time = time.time()
836 try:
837 while True:
838 if not self.is_locked:
839 logger.debug('Acquiring lock %s on %s', lock_id,
840 lock_filename)
841 self._acquire()
842
843 if self.is_locked:
844 logger.debug('Lock %s acquired on %s', lock_id,
845 lock_filename)
846 break
847 elif timeout >= 0 and time.time() - start_time > timeout:
848 logger.warning('Timeout acquiring lock %s on %s', lock_id,
849 lock_filename)
850 raise Timeout(self._lock_file)
851 else:
852 logger.debug(
853 'Lock %s not acquired on %s, waiting %s seconds ...',
854 lock_id, lock_filename, poll_intervall
855 )
856 time.sleep(poll_intervall)
f6b5b4d7 857 except: # noqa
9f95a23c
TL
858 # Something did go wrong, so decrement the counter.
859 self._lock_counter = max(0, self._lock_counter - 1)
860
861 raise
862 return _Acquire_ReturnProxy(lock = self)
863
f6b5b4d7 864 def release(self, force=False):
9f95a23c
TL
865 """
866 Releases the file lock.
867 Please note, that the lock is only completly released, if the lock
868 counter is 0.
869 Also note, that the lock file itself is not automatically deleted.
870 :arg bool force:
871 If true, the lock counter is ignored and the lock is released in
872 every case.
873 """
874 if self.is_locked:
875 self._lock_counter -= 1
876
877 if self._lock_counter == 0 or force:
878 lock_id = id(self)
879 lock_filename = self._lock_file
880
881 logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
882 self._release()
883 self._lock_counter = 0
884 logger.debug('Lock %s released on %s', lock_id, lock_filename)
885
886 return None
887
888 def __enter__(self):
889 self.acquire()
890 return self
891
892 def __exit__(self, exc_type, exc_value, traceback):
893 self.release()
894 return None
895
896 def __del__(self):
f6b5b4d7 897 self.release(force=True)
9f95a23c
TL
898 return None
899
9f95a23c
TL
900 def _acquire(self):
901 open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
902 fd = os.open(self._lock_file, open_mode)
903
904 try:
905 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
906 except (IOError, OSError):
907 os.close(fd)
908 else:
909 self._lock_file_fd = fd
910 return None
911
912 def _release(self):
913 # Do not remove the lockfile:
914 #
915 # https://github.com/benediktschmitt/py-filelock/issues/31
916 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
917 fd = self._lock_file_fd
918 self._lock_file_fd = None
f6b5b4d7
TL
919 fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
920 os.close(fd) # type: ignore
9f95a23c
TL
921 return None
922
923
924##################################
925# Popen wrappers, lifted from ceph-volume
926
adb31ebb
TL
927class CallVerbosity(Enum):
928 SILENT = 0
929 # log stdout/stderr to logger.debug
930 DEBUG = 1
931 # On a non-zero exit status, it will forcefully set
932 # logging ON for the terminal
933 VERBOSE_ON_FAILURE = 2
934 # log at info (instead of debug) level.
935 VERBOSE = 3
936
937
938def call(command: List[str],
939 desc: Optional[str] = None,
940 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
941 timeout: Optional[int] = DEFAULT_TIMEOUT,
942 **kwargs) -> Tuple[str, str, int]:
9f95a23c
TL
943 """
944 Wrap subprocess.Popen to
945
946 - log stdout/stderr to a logger,
947 - decode utf-8
948 - cleanly return out, err, returncode
949
9f95a23c
TL
950 :param timeout: timeout in seconds
951 """
adb31ebb 952 if desc is None:
9f95a23c 953 desc = command[0]
adb31ebb
TL
954 if desc:
955 desc += ': '
9f95a23c
TL
956 timeout = timeout or args.timeout
957
958 logger.debug("Running command: %s" % ' '.join(command))
959 process = subprocess.Popen(
960 command,
961 stdout=subprocess.PIPE,
962 stderr=subprocess.PIPE,
963 close_fds=True,
964 **kwargs
965 )
966 # get current p.stdout flags, add O_NONBLOCK
967 assert process.stdout is not None
968 assert process.stderr is not None
969 stdout_flags = fcntl.fcntl(process.stdout, fcntl.F_GETFL)
970 stderr_flags = fcntl.fcntl(process.stderr, fcntl.F_GETFL)
971 fcntl.fcntl(process.stdout, fcntl.F_SETFL, stdout_flags | os.O_NONBLOCK)
972 fcntl.fcntl(process.stderr, fcntl.F_SETFL, stderr_flags | os.O_NONBLOCK)
973
974 out = ''
975 err = ''
976 reads = None
977 stop = False
978 out_buffer = '' # partial line (no newline yet)
979 err_buffer = '' # partial line (no newline yet)
980 start_time = time.time()
981 end_time = None
982 if timeout:
983 end_time = start_time + timeout
984 while not stop:
985 if end_time and (time.time() >= end_time):
9f95a23c 986 stop = True
f6b5b4d7 987 if process.poll() is None:
adb31ebb 988 logger.info(desc + 'timeout after %s seconds' % timeout)
f6b5b4d7 989 process.kill()
9f95a23c
TL
990 if reads and process.poll() is not None:
991 # we want to stop, but first read off anything remaining
992 # on stdout/stderr
993 stop = True
994 else:
995 reads, _, _ = select.select(
996 [process.stdout.fileno(), process.stderr.fileno()],
997 [], [], timeout
998 )
999 for fd in reads:
1000 try:
1001 message_b = os.read(fd, 1024)
1002 if isinstance(message_b, bytes):
1003 message = message_b.decode('utf-8')
1004 if isinstance(message_b, str):
1005 message = message_b
801d1391
TL
1006 if stop and message:
1007 # process has terminated, but have more to read still, so not stopping yet
1008 # (os.read returns '' when it encounters EOF)
1009 stop = False
1010 if not message:
1011 continue
9f95a23c
TL
1012 if fd == process.stdout.fileno():
1013 out += message
1014 message = out_buffer + message
1015 lines = message.split('\n')
1016 out_buffer = lines.pop()
1017 for line in lines:
adb31ebb
TL
1018 if verbosity == CallVerbosity.VERBOSE:
1019 logger.info(desc + 'stdout ' + line)
1020 elif verbosity != CallVerbosity.SILENT:
1021 logger.debug(desc + 'stdout ' + line)
9f95a23c
TL
1022 elif fd == process.stderr.fileno():
1023 err += message
1024 message = err_buffer + message
1025 lines = message.split('\n')
1026 err_buffer = lines.pop()
1027 for line in lines:
adb31ebb
TL
1028 if verbosity == CallVerbosity.VERBOSE:
1029 logger.info(desc + 'stderr ' + line)
1030 elif verbosity != CallVerbosity.SILENT:
1031 logger.debug(desc + 'stderr ' + line)
9f95a23c
TL
1032 else:
1033 assert False
1034 except (IOError, OSError):
1035 pass
adb31ebb
TL
1036 if verbosity == CallVerbosity.VERBOSE:
1037 logger.debug(desc + 'profile rt=%s, stop=%s, exit=%s, reads=%s'
f6b5b4d7 1038 % (time.time()-start_time, stop, process.poll(), reads))
9f95a23c
TL
1039
1040 returncode = process.wait()
1041
1042 if out_buffer != '':
adb31ebb
TL
1043 if verbosity == CallVerbosity.VERBOSE:
1044 logger.info(desc + 'stdout ' + out_buffer)
1045 elif verbosity != CallVerbosity.SILENT:
1046 logger.debug(desc + 'stdout ' + out_buffer)
9f95a23c 1047 if err_buffer != '':
adb31ebb
TL
1048 if verbosity == CallVerbosity.VERBOSE:
1049 logger.info(desc + 'stderr ' + err_buffer)
1050 elif verbosity != CallVerbosity.SILENT:
1051 logger.debug(desc + 'stderr ' + err_buffer)
9f95a23c 1052
adb31ebb 1053 if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE:
9f95a23c
TL
1054 # dump stdout + stderr
1055 logger.info('Non-zero exit code %d from %s' % (returncode, ' '.join(command)))
1056 for line in out.splitlines():
adb31ebb 1057 logger.info(desc + 'stdout ' + line)
9f95a23c 1058 for line in err.splitlines():
adb31ebb 1059 logger.info(desc + 'stderr ' + line)
9f95a23c
TL
1060
1061 return out, err, returncode
1062
1063
adb31ebb
TL
1064def call_throws(command: List[str],
1065 desc: Optional[str] = None,
1066 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
1067 timeout: Optional[int] = DEFAULT_TIMEOUT,
1068 **kwargs) -> Tuple[str, str, int]:
1069 out, err, ret = call(command, desc, verbosity, timeout, **kwargs)
9f95a23c
TL
1070 if ret:
1071 raise RuntimeError('Failed command: %s' % ' '.join(command))
1072 return out, err, ret
1073
1074
1075def call_timeout(command, timeout):
1076 # type: (List[str], int) -> int
1077
1078 logger.debug('Running command (timeout=%s): %s'
1079 % (timeout, ' '.join(command)))
1080
1081 def raise_timeout(command, timeout):
1082 # type: (List[str], int) -> NoReturn
1083 msg = 'Command \'%s\' timed out after %s seconds' % (command, timeout)
1084 logger.debug(msg)
1085 raise TimeoutExpired(msg)
1086
1087 def call_timeout_py2(command, timeout):
1088 # type: (List[str], int) -> int
1089 proc = subprocess.Popen(command)
1090 thread = Thread(target=proc.wait)
1091 thread.start()
1092 thread.join(timeout)
1093 if thread.is_alive():
1094 proc.kill()
1095 thread.join()
1096 raise_timeout(command, timeout)
1097 return proc.returncode
1098
1099 def call_timeout_py3(command, timeout):
1100 # type: (List[str], int) -> int
1101 try:
1102 return subprocess.call(command, timeout=timeout)
1103 except subprocess.TimeoutExpired as e:
1104 raise_timeout(command, timeout)
1105
1106 ret = 1
1107 if sys.version_info >= (3, 3):
1108 ret = call_timeout_py3(command, timeout)
1109 else:
1110 # py2 subprocess has no timeout arg
1111 ret = call_timeout_py2(command, timeout)
1112 return ret
1113
1114##################################
1115
f6b5b4d7 1116
9f95a23c
TL
1117def is_available(what, func):
1118 # type: (str, Callable[[], bool]) -> None
1119 """
1120 Wait for a service to become available
1121
1122 :param what: the name of the service
1123 :param func: the callable object that determines availability
1124 """
1125 retry = args.retry
f6b5b4d7 1126 logger.info('Waiting for %s...' % what)
9f95a23c
TL
1127 num = 1
1128 while True:
1129 if func():
e306af50 1130 logger.info('%s is available'
f6b5b4d7 1131 % what)
9f95a23c
TL
1132 break
1133 elif num > retry:
1134 raise Error('%s not available after %s tries'
1135 % (what, retry))
1136
1137 logger.info('%s not available, waiting (%s/%s)...'
1138 % (what, num, retry))
1139
1140 num += 1
1141 time.sleep(1)
1142
1143
1144def read_config(fn):
1145 # type: (Optional[str]) -> ConfigParser
1146 # bend over backwards here because py2's ConfigParser doesn't like
1147 # whitespace before config option names (e.g., '\n foo = bar\n').
1148 # Yeesh!
1149 if sys.version_info >= (3, 2):
1150 cp = ConfigParser()
1151 else:
1152 cp = SafeConfigParser()
1153
1154 if fn:
1155 with open(fn, 'r') as f:
1156 raw_conf = f.read()
1157 nice_conf = re.sub(r'\n(\s)+', r'\n', raw_conf)
1158 s_io = StringIO(nice_conf)
1159 if sys.version_info >= (3, 2):
1160 cp.read_file(s_io)
1161 else:
1162 cp.readfp(s_io)
1163
1164 return cp
1165
f6b5b4d7 1166
9f95a23c
TL
1167def pathify(p):
1168 # type: (str) -> str
e306af50
TL
1169 p = os.path.expanduser(p)
1170 return os.path.abspath(p)
9f95a23c 1171
f6b5b4d7 1172
9f95a23c 1173def get_file_timestamp(fn):
e306af50 1174 # type: (str) -> Optional[str]
9f95a23c
TL
1175 try:
1176 mt = os.path.getmtime(fn)
1177 return datetime.datetime.fromtimestamp(
1178 mt, tz=datetime.timezone.utc
1179 ).strftime(DATEFMT)
adb31ebb 1180 except Exception:
9f95a23c
TL
1181 return None
1182
f6b5b4d7 1183
9f95a23c 1184def try_convert_datetime(s):
e306af50 1185 # type: (str) -> Optional[str]
9f95a23c
TL
1186 # This is super irritating because
1187 # 1) podman and docker use different formats
1188 # 2) python's strptime can't parse either one
1189 #
1190 # I've seen:
1191 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1192 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1193 # 2020-03-03 15:52:30.136257504 -0600 CST
1194 # (In the podman case, there is a different string format for
1195 # 'inspect' and 'inspect --format {{.Created}}'!!)
1196
1197 # In *all* cases, the 9 digit second precision is too much for
1198 # python's strptime. Shorten it to 6 digits.
1199 p = re.compile(r'(\.[\d]{6})[\d]*')
1200 s = p.sub(r'\1', s)
1201
adb31ebb 1202 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
9f95a23c
TL
1203 if s and s[-1] == 'Z':
1204 s = s[:-1] + '-0000'
1205
adb31ebb 1206 # cut off the redundant 'CST' part that strptime can't parse, if
9f95a23c
TL
1207 # present.
1208 v = s.split(' ')
1209 s = ' '.join(v[0:3])
1210
1211 # try parsing with several format strings
1212 fmts = [
1213 '%Y-%m-%dT%H:%M:%S.%f%z',
1214 '%Y-%m-%d %H:%M:%S.%f %z',
1215 ]
1216 for f in fmts:
1217 try:
1218 # return timestamp normalized to UTC, rendered as DATEFMT.
1219 return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
1220 except ValueError:
1221 pass
1222 return None
1223
f6b5b4d7 1224
9f95a23c
TL
1225def get_podman_version():
1226 # type: () -> Tuple[int, ...]
1227 if 'podman' not in container_path:
1228 raise ValueError('not using podman')
1229 out, _, _ = call_throws([container_path, '--version'])
1230 return _parse_podman_version(out)
1231
f6b5b4d7 1232
9f95a23c
TL
1233def _parse_podman_version(out):
1234 # type: (str) -> Tuple[int, ...]
1235 _, _, version_str = out.strip().split()
1236
1237 def to_int(val, org_e=None):
1238 if not val and org_e:
1239 raise org_e
1240 try:
1241 return int(val)
1242 except ValueError as e:
1243 return to_int(val[0:-1], org_e or e)
1244
1245 return tuple(map(to_int, version_str.split('.')))
1246
1247
1248def get_hostname():
1249 # type: () -> str
1250 return socket.gethostname()
1251
f6b5b4d7 1252
9f95a23c
TL
1253def get_fqdn():
1254 # type: () -> str
1255 return socket.getfqdn() or socket.gethostname()
1256
f6b5b4d7 1257
9f95a23c
TL
1258def get_arch():
1259 # type: () -> str
1260 return platform.uname().machine
1261
f6b5b4d7 1262
9f95a23c
TL
1263def generate_service_id():
1264 # type: () -> str
1265 return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
1266 for _ in range(6))
1267
f6b5b4d7 1268
9f95a23c
TL
1269def generate_password():
1270 # type: () -> str
1271 return ''.join(random.choice(string.ascii_lowercase + string.digits)
1272 for i in range(10))
1273
f6b5b4d7 1274
9f95a23c
TL
1275def normalize_container_id(i):
1276 # type: (str) -> str
1277 # docker adds the sha256: prefix, but AFAICS both
1278 # docker (18.09.7 in bionic at least) and podman
1279 # both always use sha256, so leave off the prefix
1280 # for consistency.
1281 prefix = 'sha256:'
1282 if i.startswith(prefix):
1283 i = i[len(prefix):]
1284 return i
1285
f6b5b4d7 1286
9f95a23c
TL
1287def make_fsid():
1288 # type: () -> str
1289 return str(uuid.uuid1())
1290
f6b5b4d7 1291
9f95a23c
TL
1292def is_fsid(s):
1293 # type: (str) -> bool
1294 try:
1295 uuid.UUID(s)
1296 except ValueError:
1297 return False
1298 return True
1299
f6b5b4d7 1300
9f95a23c
TL
1301def infer_fsid(func):
1302 """
1303 If we only find a single fsid in /var/lib/ceph/*, use that
1304 """
1305 @wraps(func)
1306 def _infer_fsid():
1307 if args.fsid:
1308 logger.debug('Using specified fsid: %s' % args.fsid)
1309 return func()
1310
f6b5b4d7 1311 fsids_set = set()
9f95a23c
TL
1312 daemon_list = list_daemons(detail=False)
1313 for daemon in daemon_list:
f6b5b4d7
TL
1314 if not is_fsid(daemon['fsid']):
1315 # 'unknown' fsid
1316 continue
1317 elif 'name' not in args or not args.name:
1318 # args.name not specified
1319 fsids_set.add(daemon['fsid'])
9f95a23c 1320 elif daemon['name'] == args.name:
f6b5b4d7
TL
1321 # args.name is a match
1322 fsids_set.add(daemon['fsid'])
1323 fsids = sorted(fsids_set)
9f95a23c
TL
1324
1325 if not fsids:
1326 # some commands do not always require an fsid
1327 pass
1328 elif len(fsids) == 1:
1329 logger.info('Inferring fsid %s' % fsids[0])
1330 args.fsid = fsids[0]
1331 else:
1332 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids)
1333 return func()
1334
1335 return _infer_fsid
1336
f6b5b4d7 1337
e306af50
TL
1338def infer_config(func):
1339 """
1340 If we find a MON daemon, use the config from that container
1341 """
1342 @wraps(func)
1343 def _infer_config():
1344 if args.config:
1345 logger.debug('Using specified config: %s' % args.config)
1346 return func()
1347 config = None
1348 if args.fsid:
1349 name = args.name
1350 if not name:
1351 daemon_list = list_daemons(detail=False)
1352 for daemon in daemon_list:
1353 if daemon['name'].startswith('mon.'):
1354 name = daemon['name']
1355 break
1356 if name:
1357 config = '/var/lib/ceph/{}/{}/config'.format(args.fsid, name)
1358 if config:
1359 logger.info('Inferring config %s' % config)
1360 args.config = config
1361 elif os.path.exists(SHELL_DEFAULT_CONF):
1362 logger.debug('Using default config: %s' % SHELL_DEFAULT_CONF)
1363 args.config = SHELL_DEFAULT_CONF
1364 return func()
1365
1366 return _infer_config
1367
f6b5b4d7 1368
1911f103
TL
1369def _get_default_image():
1370 if DEFAULT_IMAGE_IS_MASTER:
1911f103
TL
1371 warn = '''This is a development version of cephadm.
1372For information regarding the latest stable release:
1373 https://docs.ceph.com/docs/{}/cephadm/install
1374'''.format(LATEST_STABLE_RELEASE)
1375 for line in warn.splitlines():
e306af50 1376 logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
1911f103
TL
1377 return DEFAULT_IMAGE
1378
f6b5b4d7 1379
9f95a23c
TL
1380def infer_image(func):
1381 """
1382 Use the most recent ceph image
1383 """
1384 @wraps(func)
1385 def _infer_image():
1386 if not args.image:
1387 args.image = os.environ.get('CEPHADM_IMAGE')
1388 if not args.image:
1389 args.image = get_last_local_ceph_image()
1390 if not args.image:
1911f103 1391 args.image = _get_default_image()
9f95a23c
TL
1392 return func()
1393
1394 return _infer_image
1395
f6b5b4d7 1396
9f95a23c
TL
1397def default_image(func):
1398 @wraps(func)
1399 def _default_image():
1400 if not args.image:
1401 if 'name' in args and args.name:
1402 type_ = args.name.split('.', 1)[0]
1403 if type_ in Monitoring.components:
1404 args.image = Monitoring.components[type_]['image']
1405 if not args.image:
1406 args.image = os.environ.get('CEPHADM_IMAGE')
1407 if not args.image:
1911f103
TL
1408 args.image = _get_default_image()
1409
9f95a23c
TL
1410 return func()
1411
1412 return _default_image
1413
f6b5b4d7 1414
9f95a23c
TL
1415def get_last_local_ceph_image():
1416 """
1417 :return: The most recent local ceph image (already pulled)
1418 """
1419 out, _, _ = call_throws(
1420 [container_path, 'images',
1421 '--filter', 'label=ceph=True',
f91f0fd5 1422 '--filter', 'dangling=false',
adb31ebb
TL
1423 '--format', '{{.Repository}}@{{.Digest}}'])
1424 return _filter_last_local_ceph_image(out)
1425
1426
1427def _filter_last_local_ceph_image(out):
1428 # str -> Optional[str]
1429 for image in out.splitlines():
1430 if image and not image.endswith('@'):
1431 logger.info('Using recent ceph image %s' % image)
1432 return image
9f95a23c
TL
1433 return None
1434
f6b5b4d7 1435
9f95a23c 1436def write_tmp(s, uid, gid):
e306af50 1437 # type: (str, int, int) -> Any
9f95a23c
TL
1438 tmp_f = tempfile.NamedTemporaryFile(mode='w',
1439 prefix='ceph-tmp')
1440 os.fchown(tmp_f.fileno(), uid, gid)
1441 tmp_f.write(s)
1442 tmp_f.flush()
1443
1444 return tmp_f
1445
f6b5b4d7 1446
9f95a23c
TL
1447def makedirs(dir, uid, gid, mode):
1448 # type: (str, int, int, int) -> None
1449 if not os.path.exists(dir):
1450 os.makedirs(dir, mode=mode)
1451 else:
1452 os.chmod(dir, mode)
1453 os.chown(dir, uid, gid)
1454 os.chmod(dir, mode) # the above is masked by umask...
1455
f6b5b4d7 1456
9f95a23c
TL
1457def get_data_dir(fsid, t, n):
1458 # type: (str, str, Union[int, str]) -> str
1459 return os.path.join(args.data_dir, fsid, '%s.%s' % (t, n))
1460
f6b5b4d7 1461
9f95a23c
TL
1462def get_log_dir(fsid):
1463 # type: (str) -> str
1464 return os.path.join(args.log_dir, fsid)
1465
f6b5b4d7 1466
9f95a23c
TL
1467def make_data_dir_base(fsid, uid, gid):
1468 # type: (str, int, int) -> str
1469 data_dir_base = os.path.join(args.data_dir, fsid)
1470 makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
1471 makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
1472 makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
1473 DATA_DIR_MODE)
1474 return data_dir_base
1475
f6b5b4d7 1476
9f95a23c 1477def make_data_dir(fsid, daemon_type, daemon_id, uid=None, gid=None):
f6b5b4d7
TL
1478 # type: (str, str, Union[int, str], Optional[int], Optional[int]) -> str
1479 if uid is None or gid is None:
1480 uid, gid = extract_uid_gid()
9f95a23c
TL
1481 make_data_dir_base(fsid, uid, gid)
1482 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1483 makedirs(data_dir, uid, gid, DATA_DIR_MODE)
1484 return data_dir
1485
f6b5b4d7 1486
9f95a23c 1487def make_log_dir(fsid, uid=None, gid=None):
f6b5b4d7
TL
1488 # type: (str, Optional[int], Optional[int]) -> str
1489 if uid is None or gid is None:
1490 uid, gid = extract_uid_gid()
9f95a23c
TL
1491 log_dir = get_log_dir(fsid)
1492 makedirs(log_dir, uid, gid, LOG_DIR_MODE)
1493 return log_dir
1494
f6b5b4d7 1495
9f95a23c
TL
1496def make_var_run(fsid, uid, gid):
1497 # type: (str, int, int) -> None
1498 call_throws(['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
1499 '/var/run/ceph/%s' % fsid])
1500
f6b5b4d7 1501
9f95a23c 1502def copy_tree(src, dst, uid=None, gid=None):
f6b5b4d7 1503 # type: (List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1504 """
1505 Copy a directory tree from src to dst
1506 """
f91f0fd5 1507 if uid is None or gid is None:
9f95a23c
TL
1508 (uid, gid) = extract_uid_gid()
1509
1510 for src_dir in src:
1511 dst_dir = dst
1512 if os.path.isdir(dst):
1513 dst_dir = os.path.join(dst, os.path.basename(src_dir))
1514
1515 logger.debug('copy directory \'%s\' -> \'%s\'' % (src_dir, dst_dir))
1516 shutil.rmtree(dst_dir, ignore_errors=True)
1517 shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8
1518
1519 for dirpath, dirnames, filenames in os.walk(dst_dir):
1520 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dirpath))
1521 os.chown(dirpath, uid, gid)
1522 for filename in filenames:
1523 logger.debug('chown %s:%s \'%s\'' % (uid, gid, filename))
1524 os.chown(os.path.join(dirpath, filename), uid, gid)
1525
1526
1527def copy_files(src, dst, uid=None, gid=None):
f6b5b4d7 1528 # type: (List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1529 """
1530 Copy a files from src to dst
1531 """
f91f0fd5 1532 if uid is None or gid is None:
9f95a23c
TL
1533 (uid, gid) = extract_uid_gid()
1534
1535 for src_file in src:
1536 dst_file = dst
1537 if os.path.isdir(dst):
1538 dst_file = os.path.join(dst, os.path.basename(src_file))
1539
1540 logger.debug('copy file \'%s\' -> \'%s\'' % (src_file, dst_file))
1541 shutil.copyfile(src_file, dst_file)
1542
1543 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1544 os.chown(dst_file, uid, gid)
1545
f6b5b4d7 1546
9f95a23c 1547def move_files(src, dst, uid=None, gid=None):
f6b5b4d7 1548 # type: (List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1549 """
1550 Move files from src to dst
1551 """
f91f0fd5 1552 if uid is None or gid is None:
9f95a23c
TL
1553 (uid, gid) = extract_uid_gid()
1554
1555 for src_file in src:
1556 dst_file = dst
1557 if os.path.isdir(dst):
1558 dst_file = os.path.join(dst, os.path.basename(src_file))
1559
1560 if os.path.islink(src_file):
1561 # shutil.move() in py2 does not handle symlinks correctly
1562 src_rl = os.readlink(src_file)
1563 logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
1564 os.symlink(src_rl, dst_file)
1565 os.unlink(src_file)
1566 else:
1567 logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
1568 shutil.move(src_file, dst_file)
1569 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1570 os.chown(dst_file, uid, gid)
1571
f6b5b4d7 1572
9f95a23c
TL
1573## copied from distutils ##
1574def find_executable(executable, path=None):
1575 """Tries to find 'executable' in the directories listed in 'path'.
1576 A string listing directories separated by 'os.pathsep'; defaults to
1577 os.environ['PATH']. Returns the complete filename or None if not found.
1578 """
1579 _, ext = os.path.splitext(executable)
1580 if (sys.platform == 'win32') and (ext != '.exe'):
1581 executable = executable + '.exe'
1582
1583 if os.path.isfile(executable):
1584 return executable
1585
1586 if path is None:
1587 path = os.environ.get('PATH', None)
1588 if path is None:
1589 try:
1590 path = os.confstr("CS_PATH")
1591 except (AttributeError, ValueError):
1592 # os.confstr() or CS_PATH is not available
1593 path = os.defpath
1594 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1595 # set to an empty string
1596
1597 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1598 if not path:
1599 return None
1600
1601 paths = path.split(os.pathsep)
1602 for p in paths:
1603 f = os.path.join(p, executable)
1604 if os.path.isfile(f):
1605 # the file exists, we have a shot at spawn working
1606 return f
1607 return None
1608
f6b5b4d7 1609
9f95a23c
TL
1610def find_program(filename):
1611 # type: (str) -> str
1612 name = find_executable(filename)
1613 if name is None:
1614 raise ValueError('%s not found' % filename)
1615 return name
1616
f6b5b4d7 1617
9f95a23c
TL
1618def get_unit_name(fsid, daemon_type, daemon_id=None):
1619 # type: (str, str, Optional[Union[int, str]]) -> str
1620 # accept either name or type + id
1621 if daemon_id is not None:
1622 return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
1623 else:
1624 return 'ceph-%s@%s' % (fsid, daemon_type)
1625
f6b5b4d7 1626
e306af50
TL
1627def get_unit_name_by_daemon_name(fsid, name):
1628 daemon = get_daemon_description(fsid, name)
1629 try:
1630 return daemon['systemd_unit']
1631 except KeyError:
1632 raise Error('Failed to get unit name for {}'.format(daemon))
1633
f6b5b4d7 1634
9f95a23c
TL
1635def check_unit(unit_name):
1636 # type: (str) -> Tuple[bool, str, bool]
1637 # NOTE: we ignore the exit code here because systemctl outputs
1638 # various exit codes based on the state of the service, but the
1639 # string result is more explicit (and sufficient).
1640 enabled = False
1641 installed = False
1642 try:
1643 out, err, code = call(['systemctl', 'is-enabled', unit_name],
adb31ebb 1644 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
1645 if code == 0:
1646 enabled = True
1647 installed = True
1648 elif "disabled" in out:
1649 installed = True
1650 except Exception as e:
1651 logger.warning('unable to run systemctl: %s' % e)
1652 enabled = False
1653 installed = False
1654
1655 state = 'unknown'
1656 try:
1657 out, err, code = call(['systemctl', 'is-active', unit_name],
adb31ebb 1658 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
1659 out = out.strip()
1660 if out in ['active']:
1661 state = 'running'
1662 elif out in ['inactive']:
1663 state = 'stopped'
1664 elif out in ['failed', 'auto-restart']:
1665 state = 'error'
1666 else:
1667 state = 'unknown'
1668 except Exception as e:
1669 logger.warning('unable to run systemctl: %s' % e)
1670 state = 'unknown'
1671 return (enabled, state, installed)
1672
f6b5b4d7 1673
9f95a23c
TL
1674def check_units(units, enabler=None):
1675 # type: (List[str], Optional[Packager]) -> bool
1676 for u in units:
1677 (enabled, state, installed) = check_unit(u)
1678 if enabled and state == 'running':
1679 logger.info('Unit %s is enabled and running' % u)
1680 return True
1681 if enabler is not None:
1682 if installed:
1683 logger.info('Enabling unit %s' % u)
1684 enabler.enable_service(u)
1685 return False
1686
f6b5b4d7 1687
9f95a23c 1688def get_legacy_config_fsid(cluster, legacy_dir=None):
f6b5b4d7 1689 # type: (str, Optional[str]) -> Optional[str]
9f95a23c
TL
1690 config_file = '/etc/ceph/%s.conf' % cluster
1691 if legacy_dir is not None:
1692 config_file = os.path.abspath(legacy_dir + config_file)
1693
1694 if os.path.exists(config_file):
1695 config = read_config(config_file)
1696 if config.has_section('global') and config.has_option('global', 'fsid'):
1697 return config.get('global', 'fsid')
1698 return None
1699
f6b5b4d7 1700
9f95a23c 1701def get_legacy_daemon_fsid(cluster, daemon_type, daemon_id, legacy_dir=None):
f6b5b4d7 1702 # type: (str, str, Union[int, str], Optional[str]) -> Optional[str]
9f95a23c
TL
1703 fsid = None
1704 if daemon_type == 'osd':
1705 try:
1706 fsid_file = os.path.join(args.data_dir,
1707 daemon_type,
1708 'ceph-%s' % daemon_id,
1709 'ceph_fsid')
1710 if legacy_dir is not None:
1711 fsid_file = os.path.abspath(legacy_dir + fsid_file)
1712 with open(fsid_file, 'r') as f:
1713 fsid = f.read().strip()
1714 except IOError:
1715 pass
1716 if not fsid:
1717 fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
1718 return fsid
1719
f6b5b4d7 1720
9f95a23c
TL
1721def get_daemon_args(fsid, daemon_type, daemon_id):
1722 # type: (str, str, Union[int, str]) -> List[str]
1723 r = list() # type: List[str]
1724
1725 if daemon_type in Ceph.daemons and daemon_type != 'crash':
1726 r += [
1727 '--setuser', 'ceph',
1728 '--setgroup', 'ceph',
1729 '--default-log-to-file=false',
1730 '--default-log-to-stderr=true',
1731 '--default-log-stderr-prefix="debug "',
1732 ]
1733 if daemon_type == 'mon':
1734 r += [
1735 '--default-mon-cluster-log-to-file=false',
1736 '--default-mon-cluster-log-to-stderr=true',
1737 ]
1738 elif daemon_type in Monitoring.components:
1739 metadata = Monitoring.components[daemon_type]
1740 r += metadata.get('args', list())
1741 if daemon_type == 'alertmanager':
1742 config = get_parm(args.config_json)
1743 peers = config.get('peers', list()) # type: ignore
1744 for peer in peers:
1745 r += ["--cluster.peer={}".format(peer)]
f6b5b4d7
TL
1746 # some alertmanager, by default, look elsewhere for a config
1747 r += ["--config.file=/etc/alertmanager/alertmanager.yml"]
9f95a23c 1748 elif daemon_type == NFSGanesha.daemon_type:
1911f103
TL
1749 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1750 r += nfs_ganesha.get_daemon_args()
f91f0fd5
TL
1751 elif daemon_type == CustomContainer.daemon_type:
1752 cc = CustomContainer.init(fsid, daemon_id)
1753 r.extend(cc.get_daemon_args())
9f95a23c
TL
1754
1755 return r
1756
f6b5b4d7 1757
9f95a23c 1758def create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid,
e306af50
TL
1759 config=None, keyring=None):
1760 # type: (str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
9f95a23c
TL
1761 data_dir = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid)
1762 make_log_dir(fsid, uid=uid, gid=gid)
1763
1764 if config:
1765 config_path = os.path.join(data_dir, 'config')
1766 with open(config_path, 'w') as f:
1767 os.fchown(f.fileno(), uid, gid)
1768 os.fchmod(f.fileno(), 0o600)
1769 f.write(config)
f91f0fd5 1770
9f95a23c
TL
1771 if keyring:
1772 keyring_path = os.path.join(data_dir, 'keyring')
1773 with open(keyring_path, 'w') as f:
1774 os.fchmod(f.fileno(), 0o600)
1775 os.fchown(f.fileno(), uid, gid)
1776 f.write(keyring)
1777
1778 if daemon_type in Monitoring.components.keys():
f91f0fd5 1779 config_json: Dict[str, Any] = get_parm(args.config_json)
9f95a23c
TL
1780 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
1781
1782 # Set up directories specific to the monitoring component
1783 config_dir = ''
1784 if daemon_type == 'prometheus':
1785 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1786 config_dir = 'etc/prometheus'
1787 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1788 makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
1789 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1790 elif daemon_type == 'grafana':
1791 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1792 config_dir = 'etc/grafana'
1793 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1794 makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
1795 makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
1796 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1797 elif daemon_type == 'alertmanager':
1798 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1799 config_dir = 'etc/alertmanager'
1800 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1801 makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
1802
9f95a23c
TL
1803 # populate the config directory for the component from the config-json
1804 for fname in required_files:
f91f0fd5
TL
1805 if 'files' in config_json: # type: ignore
1806 content = dict_get_join(config_json['files'], fname)
9f95a23c
TL
1807 with open(os.path.join(data_dir_root, config_dir, fname), 'w') as f:
1808 os.fchown(f.fileno(), uid, gid)
1809 os.fchmod(f.fileno(), 0o600)
1810 f.write(content)
1811
f91f0fd5 1812 elif daemon_type == NFSGanesha.daemon_type:
9f95a23c
TL
1813 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1814 nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
1815
f91f0fd5 1816 elif daemon_type == CephIscsi.daemon_type:
1911f103
TL
1817 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
1818 ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
1819
f91f0fd5
TL
1820 elif daemon_type == CustomContainer.daemon_type:
1821 cc = CustomContainer.init(fsid, daemon_id)
1822 cc.create_daemon_dirs(data_dir, uid, gid)
1823
f6b5b4d7 1824
9f95a23c
TL
1825def get_parm(option):
1826 # type: (str) -> Dict[str, str]
1827
1828 if not option:
1829 return dict()
1830
1831 global cached_stdin
1832 if option == '-':
1833 if cached_stdin is not None:
1834 j = cached_stdin
1835 else:
1836 try:
1837 j = injected_stdin # type: ignore
1838 except NameError:
1839 j = sys.stdin.read()
1840 cached_stdin = j
1841 else:
1842 # inline json string
1843 if option[0] == '{' and option[-1] == '}':
1844 j = option
1845 # json file
1846 elif os.path.exists(option):
1847 with open(option, 'r') as f:
1848 j = f.read()
1849 else:
1850 raise Error("Config file {} not found".format(option))
1851
1852 try:
1853 js = json.loads(j)
1854 except ValueError as e:
1855 raise Error("Invalid JSON in {}: {}".format(option, e))
1856 else:
1857 return js
1858
f6b5b4d7 1859
9f95a23c 1860def get_config_and_keyring():
801d1391
TL
1861 # type: () -> Tuple[Optional[str], Optional[str]]
1862 config = None
1863 keyring = None
1864
9f95a23c
TL
1865 if 'config_json' in args and args.config_json:
1866 d = get_parm(args.config_json)
1867 config = d.get('config')
1868 keyring = d.get('keyring')
1869
1870 if 'config' in args and args.config:
1871 with open(args.config, 'r') as f:
1872 config = f.read()
1873
1874 if 'key' in args and args.key:
1875 keyring = '[%s]\n\tkey = %s\n' % (args.name, args.key)
1876 elif 'keyring' in args and args.keyring:
1877 with open(args.keyring, 'r') as f:
1878 keyring = f.read()
1879
f6b5b4d7
TL
1880 return config, keyring
1881
1882
1883def get_container_binds(fsid, daemon_type, daemon_id):
1884 # type: (str, str, Union[int, str, None]) -> List[List[str]]
1885 binds = list()
1886
1887 if daemon_type == CephIscsi.daemon_type:
f6b5b4d7 1888 binds.extend(CephIscsi.get_container_binds())
f91f0fd5
TL
1889 elif daemon_type == CustomContainer.daemon_type:
1890 assert daemon_id
1891 cc = CustomContainer.init(fsid, daemon_id)
1892 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1893 binds.extend(cc.get_container_binds(data_dir))
f6b5b4d7
TL
1894
1895 return binds
1896
9f95a23c
TL
1897
1898def get_container_mounts(fsid, daemon_type, daemon_id,
1899 no_config=False):
1900 # type: (str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
1901 mounts = dict()
1902
1903 if daemon_type in Ceph.daemons:
1904 if fsid:
1905 run_path = os.path.join('/var/run/ceph', fsid);
1906 if os.path.exists(run_path):
1907 mounts[run_path] = '/var/run/ceph:z'
1908 log_dir = get_log_dir(fsid)
1909 mounts[log_dir] = '/var/log/ceph:z'
1910 crash_dir = '/var/lib/ceph/%s/crash' % fsid
1911 if os.path.exists(crash_dir):
1912 mounts[crash_dir] = '/var/lib/ceph/crash:z'
1913
1914 if daemon_type in Ceph.daemons and daemon_id:
1915 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1916 if daemon_type == 'rgw':
1917 cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
1918 else:
1919 cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
1920 if daemon_type != 'crash':
1921 mounts[data_dir] = cdata_dir + ':z'
1922 if not no_config:
1923 mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
1924 if daemon_type == 'rbd-mirror' or daemon_type == 'crash':
1925 # these do not search for their keyrings in a data directory
1926 mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
1927
1928 if daemon_type in ['mon', 'osd']:
1929 mounts['/dev'] = '/dev' # FIXME: narrow this down?
1930 mounts['/run/udev'] = '/run/udev'
1931 if daemon_type == 'osd':
1932 mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
1933 mounts['/run/lvm'] = '/run/lvm'
1934 mounts['/run/lock/lvm'] = '/run/lock/lvm'
1935
e306af50
TL
1936 try:
1937 if args.shared_ceph_folder: # make easy manager modules/ceph-volume development
1938 ceph_folder = pathify(args.shared_ceph_folder)
1939 if os.path.exists(ceph_folder):
1940 mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
1941 mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
1942 mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
1943 mounts[ceph_folder + '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
1944 mounts[ceph_folder + '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
1945 else:
1946 logger.error('{}{}{}'.format(termcolor.red,
1947 'Ceph shared source folder does not exist.',
1948 termcolor.end))
1949 except AttributeError:
1950 pass
1951
9f95a23c
TL
1952 if daemon_type in Monitoring.components and daemon_id:
1953 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1954 if daemon_type == 'prometheus':
1955 mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
1956 mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
1957 elif daemon_type == 'node-exporter':
1958 mounts['/proc'] = '/host/proc:ro'
1959 mounts['/sys'] = '/host/sys:ro'
1960 mounts['/'] = '/rootfs:ro'
1961 elif daemon_type == "grafana":
1962 mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
1963 mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
1964 mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
1965 elif daemon_type == 'alertmanager':
f6b5b4d7 1966 mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
9f95a23c
TL
1967
1968 if daemon_type == NFSGanesha.daemon_type:
1969 assert daemon_id
1970 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
f91f0fd5
TL
1971 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1972 mounts.update(nfs_ganesha.get_container_mounts(data_dir))
9f95a23c 1973
1911f103
TL
1974 if daemon_type == CephIscsi.daemon_type:
1975 assert daemon_id
1976 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1977 log_dir = get_log_dir(fsid)
1978 mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
1979
f91f0fd5
TL
1980 if daemon_type == CustomContainer.daemon_type:
1981 assert daemon_id
1982 cc = CustomContainer.init(fsid, daemon_id)
1983 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1984 mounts.update(cc.get_container_mounts(data_dir))
1985
9f95a23c
TL
1986 return mounts
1987
f6b5b4d7 1988
f91f0fd5
TL
1989def get_container(fsid: str, daemon_type: str, daemon_id: Union[int, str],
1990 privileged: bool = False,
1991 ptrace: bool = False,
1992 container_args: Optional[List[str]] = None) -> 'CephContainer':
1993 entrypoint: str = ''
1994 name: str = ''
1995 ceph_args: List[str] = []
1996 envs: List[str] = []
1997 host_network: bool = True
1998
1999 if container_args is None:
2000 container_args = []
9f95a23c
TL
2001 if daemon_type in ['mon', 'osd']:
2002 # mon and osd need privileged in order for libudev to query devices
2003 privileged = True
2004 if daemon_type == 'rgw':
2005 entrypoint = '/usr/bin/radosgw'
2006 name = 'client.rgw.%s' % daemon_id
2007 elif daemon_type == 'rbd-mirror':
2008 entrypoint = '/usr/bin/rbd-mirror'
2009 name = 'client.rbd-mirror.%s' % daemon_id
2010 elif daemon_type == 'crash':
2011 entrypoint = '/usr/bin/ceph-crash'
2012 name = 'client.crash.%s' % daemon_id
2013 elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
2014 entrypoint = '/usr/bin/ceph-' + daemon_type
2015 name = '%s.%s' % (daemon_type, daemon_id)
2016 elif daemon_type in Monitoring.components:
2017 entrypoint = ''
9f95a23c
TL
2018 elif daemon_type == NFSGanesha.daemon_type:
2019 entrypoint = NFSGanesha.entrypoint
2020 name = '%s.%s' % (daemon_type, daemon_id)
f91f0fd5 2021 envs.extend(NFSGanesha.get_container_envs())
1911f103
TL
2022 elif daemon_type == CephIscsi.daemon_type:
2023 entrypoint = CephIscsi.entrypoint
2024 name = '%s.%s' % (daemon_type, daemon_id)
e306af50
TL
2025 # So the container can modprobe iscsi_target_mod and have write perms
2026 # to configfs we need to make this a privileged container.
2027 privileged = True
f91f0fd5
TL
2028 elif daemon_type == CustomContainer.daemon_type:
2029 cc = CustomContainer.init(fsid, daemon_id)
2030 entrypoint = cc.entrypoint
2031 host_network = False
2032 envs.extend(cc.get_container_envs())
2033 container_args.extend(cc.get_container_args())
9f95a23c 2034
9f95a23c
TL
2035 if daemon_type in Monitoring.components:
2036 uid, gid = extract_uid_gid_monitoring(daemon_type)
9f95a23c
TL
2037 monitoring_args = [
2038 '--user',
2039 str(uid),
2040 # FIXME: disable cpu/memory limits for the time being (not supported
2041 # by ubuntu 18.04 kernel!)
9f95a23c
TL
2042 ]
2043 container_args.extend(monitoring_args)
2044 elif daemon_type == 'crash':
2045 ceph_args = ['-n', name]
2046 elif daemon_type in Ceph.daemons:
2047 ceph_args = ['-n', name, '-f']
2048
f91f0fd5
TL
2049 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2050 # so service can have Type=Forking
2051 if 'podman' in container_path:
2052 runtime_dir = '/run'
2053 container_args.extend(['-d',
2054 '--conmon-pidfile',
2055 runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
2056 '--cidfile',
2057 runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id)])
9f95a23c
TL
2058
2059 return CephContainer(
2060 image=args.image,
2061 entrypoint=entrypoint,
2062 args=ceph_args + get_daemon_args(fsid, daemon_type, daemon_id),
2063 container_args=container_args,
2064 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
f6b5b4d7 2065 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
9f95a23c
TL
2066 cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
2067 envs=envs,
2068 privileged=privileged,
2069 ptrace=ptrace,
f91f0fd5
TL
2070 init=args.container_init,
2071 host_network=host_network,
9f95a23c
TL
2072 )
2073
f6b5b4d7 2074
9f95a23c 2075def extract_uid_gid(img='', file_path='/var/lib/ceph'):
f6b5b4d7 2076 # type: (str, Union[str, List[str]]) -> Tuple[int, int]
9f95a23c
TL
2077
2078 if not img:
2079 img = args.image
2080
f6b5b4d7
TL
2081 if isinstance(file_path, str):
2082 paths = [file_path]
2083 else:
2084 paths = file_path
2085
2086 for fp in paths:
2087 try:
2088 out = CephContainer(
2089 image=img,
2090 entrypoint='stat',
2091 args=['-c', '%u %g', fp]
2092 ).run()
2093 uid, gid = out.split(' ')
2094 return int(uid), int(gid)
2095 except RuntimeError:
2096 pass
2097 raise RuntimeError('uid/gid not found')
2098
9f95a23c
TL
2099
2100def deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid,
2101 config=None, keyring=None,
2102 osd_fsid=None,
f6b5b4d7
TL
2103 reconfig=False,
2104 ports=None):
2105 # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
2106
2107 ports = ports or []
2108 if any([port_in_use(port) for port in ports]):
2109 raise Error("TCP Port(s) '{}' required for {} already in use".format(",".join(map(str, ports)), daemon_type))
2110
9f95a23c
TL
2111 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2112 if reconfig and not os.path.exists(data_dir):
2113 raise Error('cannot reconfig, data path %s does not exist' % data_dir)
2114 if daemon_type == 'mon' and not os.path.exists(data_dir):
2115 assert config
2116 assert keyring
2117 # tmp keyring file
2118 tmp_keyring = write_tmp(keyring, uid, gid)
2119
2120 # tmp config file
2121 tmp_config = write_tmp(config, uid, gid)
2122
2123 # --mkfs
2124 create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid)
2125 mon_dir = get_data_dir(fsid, 'mon', daemon_id)
2126 log_dir = get_log_dir(fsid)
2127 out = CephContainer(
2128 image=args.image,
2129 entrypoint='/usr/bin/ceph-mon',
2130 args=['--mkfs',
2131 '-i', str(daemon_id),
2132 '--fsid', fsid,
2133 '-c', '/tmp/config',
2134 '--keyring', '/tmp/keyring',
2135 ] + get_daemon_args(fsid, 'mon', daemon_id),
2136 volume_mounts={
2137 log_dir: '/var/log/ceph:z',
2138 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
2139 tmp_keyring.name: '/tmp/keyring:z',
2140 tmp_config.name: '/tmp/config:z',
2141 },
2142 ).run()
2143
2144 # write conf
2145 with open(mon_dir + '/config', 'w') as f:
2146 os.fchown(f.fileno(), uid, gid)
2147 os.fchmod(f.fileno(), 0o600)
2148 f.write(config)
2149 else:
2150 # dirs, conf, keyring
2151 create_daemon_dirs(
2152 fsid, daemon_type, daemon_id,
2153 uid, gid,
2154 config, keyring)
2155
2156 if not reconfig:
2157 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
2158 osd_fsid=osd_fsid)
2159
2160 if not os.path.exists(data_dir + '/unit.created'):
2161 with open(data_dir + '/unit.created', 'w') as f:
2162 os.fchmod(f.fileno(), 0o600)
2163 os.fchown(f.fileno(), uid, gid)
2164 f.write('mtime is time the daemon deployment was created\n')
2165
2166 with open(data_dir + '/unit.configured', 'w') as f:
2167 f.write('mtime is time we were last configured\n')
2168 os.fchmod(f.fileno(), 0o600)
2169 os.fchown(f.fileno(), uid, gid)
2170
2171 update_firewalld(daemon_type)
2172
f6b5b4d7
TL
2173 # Open ports explicitly required for the daemon
2174 if ports:
2175 fw = Firewalld()
2176 fw.open_ports(ports)
2177 fw.apply_rules()
2178
9f95a23c
TL
2179 if reconfig and daemon_type not in Ceph.daemons:
2180 # ceph daemons do not need a restart; others (presumably) do to pick
2181 # up the new config
2182 call_throws(['systemctl', 'reset-failed',
2183 get_unit_name(fsid, daemon_type, daemon_id)])
2184 call_throws(['systemctl', 'restart',
2185 get_unit_name(fsid, daemon_type, daemon_id)])
2186
f6b5b4d7
TL
2187def _write_container_cmd_to_bash(file_obj, container, comment=None, background=False):
2188 # type: (IO[str], CephContainer, Optional[str], Optional[bool]) -> None
2189 if comment:
f91f0fd5 2190 # Sometimes adding a comment, especially if there are multiple containers in one
f6b5b4d7
TL
2191 # unit file, makes it easier to read and grok.
2192 file_obj.write('# ' + comment + '\n')
2193 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
adb31ebb 2194 file_obj.write('! '+ ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
f6b5b4d7
TL
2195 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
2196 if 'podman' in container_path:
adb31ebb 2197 file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + ' 2> /dev/null\n')
f6b5b4d7
TL
2198
2199 # container run command
2200 file_obj.write(' '.join(container.run_cmd()) + (' &' if background else '') + '\n')
2201
f91f0fd5 2202
9f95a23c
TL
2203def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
2204 enable=True, start=True,
2205 osd_fsid=None):
2206 # type: (str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None
2207 # cmd
2208 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2209 with open(data_dir + '/unit.run.new', 'w') as f:
f6b5b4d7 2210 f.write('set -e\n')
f91f0fd5
TL
2211
2212 if daemon_type in Ceph.daemons:
2213 install_path = find_program('install')
2214 f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
2215
9f95a23c
TL
2216 # pre-start cmd(s)
2217 if daemon_type == 'osd':
2218 # osds have a pre-start step
2219 assert osd_fsid
f6b5b4d7
TL
2220 simple_fn = os.path.join('/etc/ceph/osd',
2221 '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
2222 if os.path.exists(simple_fn):
2223 f.write('# Simple OSDs need chown on startup:\n')
2224 for n in ['block', 'block.db', 'block.wal']:
2225 p = os.path.join(data_dir, n)
2226 f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
2227 else:
f6b5b4d7
TL
2228 prestart = CephContainer(
2229 image=args.image,
2230 entrypoint='/usr/sbin/ceph-volume',
2231 args=[
2232 'lvm', 'activate',
2233 str(daemon_id), osd_fsid,
2234 '--no-systemd'
2235 ],
2236 privileged=True,
2237 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
2238 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
2239 cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
2240 )
f91f0fd5 2241 _write_container_cmd_to_bash(f, prestart, 'LVM OSDs use ceph-volume lvm activate')
9f95a23c
TL
2242 elif daemon_type == NFSGanesha.daemon_type:
2243 # add nfs to the rados grace db
2244 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2245 prestart = nfs_ganesha.get_rados_grace_container('add')
f91f0fd5 2246 _write_container_cmd_to_bash(f, prestart, 'add daemon to rados grace')
1911f103
TL
2247 elif daemon_type == CephIscsi.daemon_type:
2248 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
f6b5b4d7
TL
2249 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
2250 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
2251 _write_container_cmd_to_bash(f, tcmu_container, 'iscsi tcmu-runnter container', background=True)
1911f103 2252
f6b5b4d7 2253 _write_container_cmd_to_bash(f, c, '%s.%s' % (daemon_type, str(daemon_id)))
9f95a23c
TL
2254 os.fchmod(f.fileno(), 0o600)
2255 os.rename(data_dir + '/unit.run.new',
2256 data_dir + '/unit.run')
2257
2258 # post-stop command(s)
2259 with open(data_dir + '/unit.poststop.new', 'w') as f:
2260 if daemon_type == 'osd':
2261 assert osd_fsid
2262 poststop = CephContainer(
2263 image=args.image,
2264 entrypoint='/usr/sbin/ceph-volume',
2265 args=[
2266 'lvm', 'deactivate',
2267 str(daemon_id), osd_fsid,
2268 ],
2269 privileged=True,
2270 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
f6b5b4d7 2271 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
9f95a23c
TL
2272 cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
2273 daemon_id),
2274 )
f91f0fd5 2275 _write_container_cmd_to_bash(f, poststop, 'deactivate osd')
9f95a23c
TL
2276 elif daemon_type == NFSGanesha.daemon_type:
2277 # remove nfs from the rados grace db
2278 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2279 poststop = nfs_ganesha.get_rados_grace_container('remove')
f91f0fd5 2280 _write_container_cmd_to_bash(f, poststop, 'remove daemon from rados grace')
1911f103 2281 elif daemon_type == CephIscsi.daemon_type:
f6b5b4d7
TL
2282 # make sure we also stop the tcmu container
2283 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
2284 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
2285 f.write('! '+ ' '.join(tcmu_container.stop_cmd()) + '\n')
1911f103 2286 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
9f95a23c
TL
2287 os.fchmod(f.fileno(), 0o600)
2288 os.rename(data_dir + '/unit.poststop.new',
2289 data_dir + '/unit.poststop')
2290
2291 with open(data_dir + '/unit.image.new', 'w') as f:
2292 f.write(c.image + '\n')
2293 os.fchmod(f.fileno(), 0o600)
2294 os.rename(data_dir + '/unit.image.new',
2295 data_dir + '/unit.image')
2296
2297 # systemd
2298 install_base_units(fsid)
1911f103 2299 unit = get_unit_file(fsid)
9f95a23c
TL
2300 unit_file = 'ceph-%s@.service' % (fsid)
2301 with open(args.unit_dir + '/' + unit_file + '.new', 'w') as f:
2302 f.write(unit)
2303 os.rename(args.unit_dir + '/' + unit_file + '.new',
2304 args.unit_dir + '/' + unit_file)
2305 call_throws(['systemctl', 'daemon-reload'])
2306
2307 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
2308 call(['systemctl', 'stop', unit_name],
adb31ebb 2309 verbosity=CallVerbosity.DEBUG)
9f95a23c 2310 call(['systemctl', 'reset-failed', unit_name],
adb31ebb 2311 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
2312 if enable:
2313 call_throws(['systemctl', 'enable', unit_name])
2314 if start:
2315 call_throws(['systemctl', 'start', unit_name])
2316
9f95a23c 2317
f6b5b4d7
TL
2318
2319class Firewalld(object):
2320 def __init__(self):
2321 # type: () -> None
2322 self.available = self.check()
2323
2324 def check(self):
2325 # type: () -> bool
2326 self.cmd = find_executable('firewall-cmd')
2327 if not self.cmd:
2328 logger.debug('firewalld does not appear to be present')
2329 return False
2330 (enabled, state, _) = check_unit('firewalld.service')
2331 if not enabled:
2332 logger.debug('firewalld.service is not enabled')
2333 return False
2334 if state != "running":
2335 logger.debug('firewalld.service is not running')
2336 return False
2337
2338 logger.info("firewalld ready")
2339 return True
2340
2341 def enable_service_for(self, daemon_type):
2342 # type: (str) -> None
2343 if not self.available:
2344 logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)
2345 return
2346
2347 if daemon_type == 'mon':
2348 svc = 'ceph-mon'
2349 elif daemon_type in ['mgr', 'mds', 'osd']:
2350 svc = 'ceph'
2351 elif daemon_type == NFSGanesha.daemon_type:
2352 svc = 'nfs'
2353 else:
2354 return
2355
adb31ebb 2356 out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
2357 if ret:
2358 logger.info('Enabling firewalld service %s in current zone...' % svc)
f6b5b4d7 2359 out, err, ret = call([self.cmd, '--permanent', '--add-service', svc])
9f95a23c
TL
2360 if ret:
2361 raise RuntimeError(
2362 'unable to add service %s to current zone: %s' % (svc, err))
2363 else:
2364 logger.debug('firewalld service %s is enabled in current zone' % svc)
f6b5b4d7
TL
2365
2366 def open_ports(self, fw_ports):
2367 # type: (List[int]) -> None
2368 if not self.available:
2369 logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
2370 return
2371
2372 for port in fw_ports:
2373 tcp_port = str(port) + '/tcp'
adb31ebb 2374 out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
9f95a23c 2375 if ret:
f6b5b4d7
TL
2376 logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
2377 out, err, ret = call([self.cmd, '--permanent', '--add-port', tcp_port])
2378 if ret:
2379 raise RuntimeError('unable to add port %s to current zone: %s' %
2380 (tcp_port, err))
2381 else:
2382 logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
2383
2384 def apply_rules(self):
2385 # type: () -> None
2386 if not self.available:
2387 return
2388
2389 call_throws([self.cmd, '--reload'])
2390
2391
2392def update_firewalld(daemon_type):
2393 # type: (str) -> None
2394 firewall = Firewalld()
2395
2396 firewall.enable_service_for(daemon_type)
2397
2398 fw_ports = []
2399
2400 if daemon_type in Monitoring.port_map.keys():
2401 fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc
2402
2403 firewall.open_ports(fw_ports)
2404 firewall.apply_rules()
9f95a23c
TL
2405
2406def install_base_units(fsid):
2407 # type: (str) -> None
2408 """
2409 Set up ceph.target and ceph-$fsid.target units.
2410 """
2411 # global unit
2412 existed = os.path.exists(args.unit_dir + '/ceph.target')
2413 with open(args.unit_dir + '/ceph.target.new', 'w') as f:
2414 f.write('[Unit]\n'
2415 'Description=All Ceph clusters and services\n'
2416 '\n'
2417 '[Install]\n'
2418 'WantedBy=multi-user.target\n')
2419 os.rename(args.unit_dir + '/ceph.target.new',
2420 args.unit_dir + '/ceph.target')
2421 if not existed:
2422 # we disable before enable in case a different ceph.target
2423 # (from the traditional package) is present; while newer
2424 # systemd is smart enough to disable the old
2425 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2426 # some older versions of systemd error out with EEXIST.
2427 call_throws(['systemctl', 'disable', 'ceph.target'])
2428 call_throws(['systemctl', 'enable', 'ceph.target'])
2429 call_throws(['systemctl', 'start', 'ceph.target'])
2430
2431 # cluster unit
2432 existed = os.path.exists(args.unit_dir + '/ceph-%s.target' % fsid)
2433 with open(args.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f:
2434 f.write('[Unit]\n'
2435 'Description=Ceph cluster {fsid}\n'
2436 'PartOf=ceph.target\n'
2437 'Before=ceph.target\n'
2438 '\n'
2439 '[Install]\n'
2440 'WantedBy=multi-user.target ceph.target\n'.format(
2441 fsid=fsid)
2442 )
2443 os.rename(args.unit_dir + '/ceph-%s.target.new' % fsid,
2444 args.unit_dir + '/ceph-%s.target' % fsid)
2445 if not existed:
2446 call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid])
2447 call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid])
2448
2449 # logrotate for the cluster
2450 with open(args.logrotate_dir + '/ceph-%s' % fsid, 'w') as f:
2451 """
2452 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
2453 in all containers, but I don't see an elegant way to send SIGHUP *just* to
2454 the daemons for this cluster. (1) systemd kill -s will get the signal to
2455 podman, but podman will exit. (2) podman kill will get the signal to the
2456 first child (bash), but that isn't the ceph daemon. This is simpler and
2457 should be harmless.
2458 """
2459 f.write("""# created by cephadm
2460/var/log/ceph/%s/*.log {
2461 rotate 7
2462 daily
2463 compress
2464 sharedscripts
2465 postrotate
2466 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror || pkill -1 -x "ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror" || true
2467 endscript
2468 missingok
2469 notifempty
2470 su root root
2471}
2472""" % fsid)
2473
f6b5b4d7 2474
1911f103
TL
2475def get_unit_file(fsid):
2476 # type: (str) -> str
f91f0fd5
TL
2477 extra_args = ''
2478 if 'podman' in container_path:
2479 extra_args = ('ExecStartPre=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2480 'ExecStopPost=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2481 'Type=forking\n'
2482 'PIDFile=/%t/%n-pid\n')
2483
9f95a23c
TL
2484 u = """# generated by cephadm
2485[Unit]
2486Description=Ceph %i for {fsid}
2487
2488# According to:
2489# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
2490# these can be removed once ceph-mon will dynamically change network
2491# configuration.
2492After=network-online.target local-fs.target time-sync.target
2493Wants=network-online.target local-fs.target time-sync.target
2494
2495PartOf=ceph-{fsid}.target
2496Before=ceph-{fsid}.target
2497
2498[Service]
2499LimitNOFILE=1048576
2500LimitNPROC=1048576
2501EnvironmentFile=-/etc/environment
9f95a23c
TL
2502ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
2503ExecStop=-{container_path} stop ceph-{fsid}-%i
2504ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
2505KillMode=none
2506Restart=on-failure
2507RestartSec=10s
2508TimeoutStartSec=120
e306af50 2509TimeoutStopSec=120
9f95a23c
TL
2510StartLimitInterval=30min
2511StartLimitBurst=5
f91f0fd5 2512{extra_args}
9f95a23c
TL
2513[Install]
2514WantedBy=ceph-{fsid}.target
2515""".format(
2516 container_path=container_path,
9f95a23c 2517 fsid=fsid,
f91f0fd5
TL
2518 data_dir=args.data_dir,
2519 extra_args=extra_args)
2520
9f95a23c
TL
2521 return u
2522
2523##################################
2524
f6b5b4d7 2525
9f95a23c
TL
2526class CephContainer:
2527 def __init__(self,
f91f0fd5
TL
2528 image: str,
2529 entrypoint: str,
2530 args: List[str] = [],
2531 volume_mounts: Dict[str, str] = {},
2532 cname: str = '',
2533 container_args: List[str] = [],
2534 envs: Optional[List[str]] = None,
2535 privileged: bool = False,
2536 ptrace: bool = False,
2537 bind_mounts: Optional[List[List[str]]] = None,
2538 init: bool = False,
2539 host_network: bool = True,
2540 ) -> None:
9f95a23c
TL
2541 self.image = image
2542 self.entrypoint = entrypoint
2543 self.args = args
2544 self.volume_mounts = volume_mounts
2545 self.cname = cname
2546 self.container_args = container_args
2547 self.envs = envs
2548 self.privileged = privileged
2549 self.ptrace = ptrace
f6b5b4d7 2550 self.bind_mounts = bind_mounts if bind_mounts else []
f91f0fd5
TL
2551 self.init = init
2552 self.host_network = host_network
9f95a23c 2553
f91f0fd5
TL
2554 def run_cmd(self) -> List[str]:
2555 cmd_args: List[str] = [
2556 str(container_path),
2557 'run',
2558 '--rm',
2559 '--ipc=host',
2560 ]
2561 envs: List[str] = [
2562 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2563 '-e', 'NODE_NAME=%s' % get_hostname(),
2564 ]
2565 vols: List[str] = []
2566 binds: List[str] = []
9f95a23c 2567
f91f0fd5
TL
2568 if self.host_network:
2569 cmd_args.append('--net=host')
2570 if self.entrypoint:
2571 cmd_args.extend(['--entrypoint', self.entrypoint])
9f95a23c 2572 if self.privileged:
f91f0fd5
TL
2573 cmd_args.extend([
2574 '--privileged',
2575 # let OSD etc read block devs that haven't been chowned
2576 '--group-add=disk'])
2577 if self.ptrace and not self.privileged:
2578 # if privileged, the SYS_PTRACE cap is already added
2579 # in addition, --cap-add and --privileged are mutually
2580 # exclusive since podman >= 2.0
2581 cmd_args.append('--cap-add=SYS_PTRACE')
2582 if self.init:
2583 cmd_args.append('--init')
2584 if self.cname:
2585 cmd_args.extend(['--name', self.cname])
2586 if self.envs:
2587 for env in self.envs:
2588 envs.extend(['-e', env])
2589
9f95a23c
TL
2590 vols = sum(
2591 [['-v', '%s:%s' % (host_dir, container_dir)]
2592 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7 2593 binds = sum([['--mount', '{}'.format(','.join(bind))]
f91f0fd5
TL
2594 for bind in self.bind_mounts], [])
2595
2596 return cmd_args + self.container_args + envs + vols + binds + [
2597 self.image,
2598 ] + self.args # type: ignore
2599
2600 def shell_cmd(self, cmd: List[str]) -> List[str]:
2601 cmd_args: List[str] = [
9f95a23c
TL
2602 str(container_path),
2603 'run',
2604 '--rm',
e306af50 2605 '--ipc=host',
f91f0fd5
TL
2606 ]
2607 envs: List[str] = [
2608 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2609 '-e', 'NODE_NAME=%s' % get_hostname(),
2610 ]
2611 vols: List[str] = []
2612 binds: List[str] = []
9f95a23c 2613
f91f0fd5
TL
2614 if self.host_network:
2615 cmd_args.append('--net=host')
9f95a23c 2616 if self.privileged:
f91f0fd5
TL
2617 cmd_args.extend([
2618 '--privileged',
2619 # let OSD etc read block devs that haven't been chowned
2620 '--group-add=disk',
2621 ])
2622 if self.envs:
2623 for env in self.envs:
2624 envs.extend(['-e', env])
2625
9f95a23c
TL
2626 vols = sum(
2627 [['-v', '%s:%s' % (host_dir, container_dir)]
2628 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7
TL
2629 binds = sum([['--mount', '{}'.format(','.join(bind))]
2630 for bind in self.bind_mounts], [])
f91f0fd5
TL
2631
2632 return cmd_args + self.container_args + envs + vols + binds + [
9f95a23c 2633 '--entrypoint', cmd[0],
f91f0fd5 2634 self.image,
9f95a23c
TL
2635 ] + cmd[1:]
2636
2637 def exec_cmd(self, cmd):
2638 # type: (List[str]) -> List[str]
2639 return [
2640 str(container_path),
2641 'exec',
2642 ] + self.container_args + [
2643 self.cname,
2644 ] + cmd
2645
f6b5b4d7
TL
2646 def rm_cmd(self, storage=False):
2647 # type: (bool) -> List[str]
2648 ret = [
2649 str(container_path),
2650 'rm', '-f',
2651 ]
2652 if storage:
2653 ret.append('--storage')
2654 ret.append(self.cname)
2655 return ret
2656
2657 def stop_cmd(self):
2658 # type () -> List[str]
2659 ret = [
2660 str(container_path),
2661 'stop', self.cname,
2662 ]
2663 return ret
2664
9f95a23c
TL
2665 def run(self, timeout=DEFAULT_TIMEOUT):
2666 # type: (Optional[int]) -> str
9f95a23c
TL
2667 out, _, _ = call_throws(
2668 self.run_cmd(), desc=self.entrypoint, timeout=timeout)
2669 return out
2670
2671##################################
2672
f6b5b4d7 2673
9f95a23c
TL
2674@infer_image
2675def command_version():
2676 # type: () -> int
2677 out = CephContainer(args.image, 'ceph', ['--version']).run()
2678 print(out.strip())
2679 return 0
2680
2681##################################
2682
f6b5b4d7 2683
9f95a23c
TL
2684@infer_image
2685def command_pull():
2686 # type: () -> int
f6b5b4d7
TL
2687
2688 _pull_image(args.image)
9f95a23c
TL
2689 return command_inspect_image()
2690
f6b5b4d7
TL
2691
2692def _pull_image(image):
2693 # type: (str) -> None
2694 logger.info('Pulling container image %s...' % image)
2695
2696 ignorelist = [
2697 "error creating read-write layer with ID",
2698 "net/http: TLS handshake timeout",
2699 "Digest did not match, expected",
2700 ]
2701
2702 cmd = [container_path, 'pull', image]
2703 cmd_str = ' '.join(cmd)
2704
2705 for sleep_secs in [1, 4, 25]:
2706 out, err, ret = call(cmd)
2707 if not ret:
2708 return
2709
2710 if not any(pattern in err for pattern in ignorelist):
2711 raise RuntimeError('Failed command: %s' % cmd_str)
2712
2713 logger.info('"%s failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))
2714 time.sleep(sleep_secs)
2715
2716 raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str)
9f95a23c
TL
2717##################################
2718
f6b5b4d7 2719
9f95a23c
TL
2720@infer_image
2721def command_inspect_image():
2722 # type: () -> int
2723 out, err, ret = call_throws([
2724 container_path, 'inspect',
cd265ab1 2725 '--format', '{{.ID}},{{.RepoDigests}}',
9f95a23c
TL
2726 args.image])
2727 if ret:
2728 return errno.ENOENT
f91f0fd5
TL
2729 info_from = get_image_info_from_inspect(out.strip(), args.image)
2730
9f95a23c 2731 ver = CephContainer(args.image, 'ceph', ['--version']).run().strip()
f91f0fd5
TL
2732 info_from['ceph_version'] = ver
2733
2734 print(json.dumps(info_from, indent=4, sort_keys=True))
2735 return 0
2736
2737
2738def get_image_info_from_inspect(out, image):
2739 # type: (str, str) -> Dict[str, str]
2740 image_id, digests = out.split(',', 1)
2741 if not out:
2742 raise Error('inspect {}: empty result'.format(image))
9f95a23c 2743 r = {
f91f0fd5 2744 'image_id': normalize_container_id(image_id)
9f95a23c 2745 }
f91f0fd5 2746 if digests:
cd265ab1 2747 json_digests = digests[1:-1].split(' ')
f91f0fd5
TL
2748 if json_digests:
2749 r['repo_digest'] = json_digests[0]
2750 return r
2751
9f95a23c
TL
2752
2753##################################
2754
f91f0fd5 2755
f6b5b4d7
TL
2756def unwrap_ipv6(address):
2757 # type: (str) -> str
2758 if address.startswith('[') and address.endswith(']'):
2759 return address[1:-1]
2760 return address
2761
2762
f91f0fd5
TL
2763def wrap_ipv6(address):
2764 # type: (str) -> str
2765
2766 # We cannot assume it's already wrapped or even an IPv6 address if
2767 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
2768 # the ValueError
2769 try:
2770 if ipaddress.ip_address(unicode(address)).version == 6:
2771 return f"[{address}]"
2772 except ValueError:
2773 pass
2774
2775 return address
2776
2777
f6b5b4d7
TL
2778def is_ipv6(address):
2779 # type: (str) -> bool
2780 address = unwrap_ipv6(address)
2781 try:
2782 return ipaddress.ip_address(unicode(address)).version == 6
2783 except ValueError:
2784 logger.warning("Address: {} isn't a valid IP address".format(address))
2785 return False
2786
2787
9f95a23c
TL
2788@default_image
2789def command_bootstrap():
2790 # type: () -> int
2791
2792 if not args.output_config:
2793 args.output_config = os.path.join(args.output_dir, 'ceph.conf')
2794 if not args.output_keyring:
2795 args.output_keyring = os.path.join(args.output_dir,
2796 'ceph.client.admin.keyring')
2797 if not args.output_pub_ssh_key:
2798 args.output_pub_ssh_key = os.path.join(args.output_dir, 'ceph.pub')
2799
2800 # verify output files
2801 for f in [args.output_config, args.output_keyring, args.output_pub_ssh_key]:
2802 if not args.allow_overwrite:
2803 if os.path.exists(f):
2804 raise Error('%s already exists; delete or pass '
2805 '--allow-overwrite to overwrite' % f)
2806 dirname = os.path.dirname(f)
2807 if dirname and not os.path.exists(dirname):
adb31ebb
TL
2808 fname = os.path.basename(f)
2809 logger.info(f"Creating directory {dirname} for {fname}")
2810 try:
2811 # use makedirs to create intermediate missing dirs
2812 os.makedirs(dirname, 0o755)
2813 except PermissionError:
2814 raise Error(f"Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.")
2815
9f95a23c
TL
2816
2817 if not args.skip_prepare_host:
2818 command_prepare_host()
2819 else:
2820 logger.info('Skip prepare_host')
2821
2822 # initial vars
2823 fsid = args.fsid or make_fsid()
2824 hostname = get_hostname()
2825 if '.' in hostname and not args.allow_fqdn_hostname:
2826 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
2827 mon_id = args.mon_id or hostname
2828 mgr_id = args.mgr_id or generate_service_id()
f91f0fd5 2829 logger.info('Cluster fsid: %s' % fsid)
f6b5b4d7 2830 ipv6 = False
9f95a23c
TL
2831
2832 l = FileLock(fsid)
2833 l.acquire()
2834
2835 # ip
2836 r = re.compile(r':(\d+)$')
f6b5b4d7 2837 base_ip = ''
9f95a23c 2838 if args.mon_ip:
f6b5b4d7 2839 ipv6 = is_ipv6(args.mon_ip)
f91f0fd5
TL
2840 if ipv6:
2841 args.mon_ip = wrap_ipv6(args.mon_ip)
9f95a23c
TL
2842 hasport = r.findall(args.mon_ip)
2843 if hasport:
2844 port = int(hasport[0])
2845 if port == 6789:
2846 addr_arg = '[v1:%s]' % args.mon_ip
2847 elif port == 3300:
2848 addr_arg = '[v2:%s]' % args.mon_ip
2849 else:
2850 logger.warning('Using msgr2 protocol for unrecognized port %d' %
2851 port)
2852 addr_arg = '[v2:%s]' % args.mon_ip
2853 base_ip = args.mon_ip[0:-(len(str(port)))-1]
2854 check_ip_port(base_ip, port)
2855 else:
2856 base_ip = args.mon_ip
2857 addr_arg = '[v2:%s:3300,v1:%s:6789]' % (args.mon_ip, args.mon_ip)
2858 check_ip_port(args.mon_ip, 3300)
2859 check_ip_port(args.mon_ip, 6789)
2860 elif args.mon_addrv:
2861 addr_arg = args.mon_addrv
2862 if addr_arg[0] != '[' or addr_arg[-1] != ']':
2863 raise Error('--mon-addrv value %s must use square backets' %
2864 addr_arg)
f6b5b4d7 2865 ipv6 = addr_arg.count('[') > 1
9f95a23c
TL
2866 for addr in addr_arg[1:-1].split(','):
2867 hasport = r.findall(addr)
2868 if not hasport:
2869 raise Error('--mon-addrv value %s must include port number' %
2870 addr_arg)
2871 port = int(hasport[0])
2872 # strip off v1: or v2: prefix
2873 addr = re.sub(r'^\w+:', '', addr)
2874 base_ip = addr[0:-(len(str(port)))-1]
2875 check_ip_port(base_ip, port)
2876 else:
2877 raise Error('must specify --mon-ip or --mon-addrv')
2878 logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg))
2879
2880 mon_network = None
2881 if not args.skip_mon_network:
2882 # make sure IP is configured locally, and then figure out the
2883 # CIDR network
2884 for net, ips in list_networks().items():
f6b5b4d7
TL
2885 if ipaddress.ip_address(unicode(unwrap_ipv6(base_ip))) in \
2886 [ipaddress.ip_address(unicode(ip)) for ip in ips]:
9f95a23c
TL
2887 mon_network = net
2888 logger.info('Mon IP %s is in CIDR network %s' % (base_ip,
2889 mon_network))
2890 break
2891 if not mon_network:
2892 raise Error('Failed to infer CIDR network for mon ip %s; pass '
2893 '--skip-mon-network to configure it later' % base_ip)
2894
2895 # config
2896 cp = read_config(args.config)
2897 if not cp.has_section('global'):
2898 cp.add_section('global')
2899 cp.set('global', 'fsid', fsid);
2900 cp.set('global', 'mon host', addr_arg)
2901 cp.set('global', 'container_image', args.image)
2902 cpf = StringIO()
2903 cp.write(cpf)
2904 config = cpf.getvalue()
2905
f6b5b4d7
TL
2906 if args.registry_json or args.registry_url:
2907 command_registry_login()
2908
9f95a23c 2909 if not args.skip_pull:
f6b5b4d7 2910 _pull_image(args.image)
9f95a23c
TL
2911
2912 logger.info('Extracting ceph user uid/gid from container image...')
2913 (uid, gid) = extract_uid_gid()
2914
2915 # create some initial keys
2916 logger.info('Creating initial keys...')
2917 mon_key = CephContainer(
2918 image=args.image,
2919 entrypoint='/usr/bin/ceph-authtool',
2920 args=['--gen-print-key'],
2921 ).run().strip()
2922 admin_key = CephContainer(
2923 image=args.image,
2924 entrypoint='/usr/bin/ceph-authtool',
2925 args=['--gen-print-key'],
2926 ).run().strip()
2927 mgr_key = CephContainer(
2928 image=args.image,
2929 entrypoint='/usr/bin/ceph-authtool',
2930 args=['--gen-print-key'],
2931 ).run().strip()
2932
2933 keyring = ('[mon.]\n'
2934 '\tkey = %s\n'
2935 '\tcaps mon = allow *\n'
2936 '[client.admin]\n'
2937 '\tkey = %s\n'
2938 '\tcaps mon = allow *\n'
2939 '\tcaps mds = allow *\n'
2940 '\tcaps mgr = allow *\n'
2941 '\tcaps osd = allow *\n'
2942 '[mgr.%s]\n'
2943 '\tkey = %s\n'
2944 '\tcaps mon = profile mgr\n'
2945 '\tcaps mds = allow *\n'
2946 '\tcaps osd = allow *\n'
2947 % (mon_key, admin_key, mgr_id, mgr_key))
2948
2949 # tmp keyring file
2950 tmp_bootstrap_keyring = write_tmp(keyring, uid, gid)
2951
2952 # create initial monmap, tmp monmap file
2953 logger.info('Creating initial monmap...')
2954 tmp_monmap = write_tmp('', 0, 0)
2955 out = CephContainer(
2956 image=args.image,
2957 entrypoint='/usr/bin/monmaptool',
2958 args=['--create',
2959 '--clobber',
2960 '--fsid', fsid,
2961 '--addv', mon_id, addr_arg,
2962 '/tmp/monmap'
2963 ],
2964 volume_mounts={
2965 tmp_monmap.name: '/tmp/monmap:z',
2966 },
2967 ).run()
2968
2969 # pass monmap file to ceph user for use by ceph-mon --mkfs below
2970 os.fchown(tmp_monmap.fileno(), uid, gid)
2971
2972 # create mon
2973 logger.info('Creating mon...')
2974 create_daemon_dirs(fsid, 'mon', mon_id, uid, gid)
2975 mon_dir = get_data_dir(fsid, 'mon', mon_id)
2976 log_dir = get_log_dir(fsid)
2977 out = CephContainer(
2978 image=args.image,
2979 entrypoint='/usr/bin/ceph-mon',
2980 args=['--mkfs',
2981 '-i', mon_id,
2982 '--fsid', fsid,
2983 '-c', '/dev/null',
2984 '--monmap', '/tmp/monmap',
2985 '--keyring', '/tmp/keyring',
2986 ] + get_daemon_args(fsid, 'mon', mon_id),
2987 volume_mounts={
2988 log_dir: '/var/log/ceph:z',
2989 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
2990 tmp_bootstrap_keyring.name: '/tmp/keyring:z',
2991 tmp_monmap.name: '/tmp/monmap:z',
2992 },
2993 ).run()
2994
2995 with open(mon_dir + '/config', 'w') as f:
2996 os.fchown(f.fileno(), uid, gid)
2997 os.fchmod(f.fileno(), 0o600)
2998 f.write(config)
2999
3000 make_var_run(fsid, uid, gid)
3001 mon_c = get_container(fsid, 'mon', mon_id)
3002 deploy_daemon(fsid, 'mon', mon_id, mon_c, uid, gid,
3003 config=None, keyring=None)
3004
3005 # client.admin key + config to issue various CLI commands
3006 tmp_admin_keyring = write_tmp('[client.admin]\n'
3007 '\tkey = ' + admin_key + '\n',
3008 uid, gid)
3009 tmp_config = write_tmp(config, uid, gid)
3010
3011 # a CLI helper to reduce our typing
3012 def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT):
3013 # type: (List[str], Dict[str, str], Optional[int]) -> str
3014 mounts = {
3015 log_dir: '/var/log/ceph:z',
3016 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
3017 tmp_config.name: '/etc/ceph/ceph.conf:z',
3018 }
3019 for k, v in extra_mounts.items():
3020 mounts[k] = v
3021 timeout = timeout or args.timeout
3022 return CephContainer(
3023 image=args.image,
3024 entrypoint='/usr/bin/ceph',
3025 args=cmd,
3026 volume_mounts=mounts,
3027 ).run(timeout=timeout)
3028
3029 logger.info('Waiting for mon to start...')
3030 c = CephContainer(
3031 image=args.image,
3032 entrypoint='/usr/bin/ceph',
3033 args=[
3034 'status'],
3035 volume_mounts={
3036 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
3037 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
3038 tmp_config.name: '/etc/ceph/ceph.conf:z',
3039 },
3040 )
3041
3042 # wait for the service to become available
3043 def is_mon_available():
3044 # type: () -> bool
f6b5b4d7 3045 timeout=args.timeout if args.timeout else 60 # seconds
9f95a23c
TL
3046 out, err, ret = call(c.run_cmd(),
3047 desc=c.entrypoint,
3048 timeout=timeout)
3049 return ret == 0
3050 is_available('mon', is_mon_available)
3051
3052 # assimilate and minimize config
3053 if not args.no_minimize_config:
3054 logger.info('Assimilating anything we can from ceph.conf...')
3055 cli([
3056 'config', 'assimilate-conf',
3057 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3058 ], {
3059 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3060 })
3061 logger.info('Generating new minimal ceph.conf...')
3062 cli([
3063 'config', 'generate-minimal-conf',
3064 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3065 ], {
3066 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3067 })
3068 # re-read our minimized config
3069 with open(mon_dir + '/config', 'r') as f:
3070 config = f.read()
3071 logger.info('Restarting the monitor...')
3072 call_throws([
3073 'systemctl',
3074 'restart',
3075 get_unit_name(fsid, 'mon', mon_id)
3076 ])
3077
3078 if mon_network:
3079 logger.info('Setting mon public_network...')
3080 cli(['config', 'set', 'mon', 'public_network', mon_network])
3081
f6b5b4d7
TL
3082 if ipv6:
3083 logger.info('Enabling IPv6 (ms_bind_ipv6)')
3084 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
3085
9f95a23c
TL
3086 # create mgr
3087 logger.info('Creating mgr...')
3088 mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
3089 mgr_c = get_container(fsid, 'mgr', mgr_id)
f6b5b4d7 3090 # Note:the default port used by the Prometheus node exporter is opened in fw
9f95a23c 3091 deploy_daemon(fsid, 'mgr', mgr_id, mgr_c, uid, gid,
f6b5b4d7 3092 config=config, keyring=mgr_keyring, ports=[9283])
9f95a23c
TL
3093
3094 # output files
3095 with open(args.output_keyring, 'w') as f:
3096 os.fchmod(f.fileno(), 0o600)
3097 f.write('[client.admin]\n'
3098 '\tkey = ' + admin_key + '\n')
3099 logger.info('Wrote keyring to %s' % args.output_keyring)
3100
3101 with open(args.output_config, 'w') as f:
3102 f.write(config)
3103 logger.info('Wrote config to %s' % args.output_config)
3104
3105 # wait for the service to become available
3106 logger.info('Waiting for mgr to start...')
3107 def is_mgr_available():
3108 # type: () -> bool
f6b5b4d7 3109 timeout=args.timeout if args.timeout else 60 # seconds
e306af50
TL
3110 try:
3111 out = cli(['status', '-f', 'json-pretty'], timeout=timeout)
3112 j = json.loads(out)
3113 return j.get('mgrmap', {}).get('available', False)
3114 except Exception as e:
3115 logger.debug('status failed: %s' % e)
3116 return False
9f95a23c
TL
3117 is_available('mgr', is_mgr_available)
3118
3119 # wait for mgr to restart (after enabling a module)
3120 def wait_for_mgr_restart():
3121 # first get latest mgrmap epoch from the mon
3122 out = cli(['mgr', 'dump'])
3123 j = json.loads(out)
3124 epoch = j['epoch']
3125 # wait for mgr to have it
3126 logger.info('Waiting for the mgr to restart...')
3127 def mgr_has_latest_epoch():
3128 # type: () -> bool
3129 try:
3130 out = cli(['tell', 'mgr', 'mgr_status'])
3131 j = json.loads(out)
3132 return j['mgrmap_epoch'] >= epoch
3133 except Exception as e:
3134 logger.debug('tell mgr mgr_status failed: %s' % e)
3135 return False
3136 is_available('Mgr epoch %d' % epoch, mgr_has_latest_epoch)
3137
3138 # ssh
3139 if not args.skip_ssh:
f6b5b4d7
TL
3140 cli(['config-key', 'set', 'mgr/cephadm/ssh_user', args.ssh_user])
3141
9f95a23c
TL
3142 logger.info('Enabling cephadm module...')
3143 cli(['mgr', 'module', 'enable', 'cephadm'])
3144 wait_for_mgr_restart()
3145
3146 logger.info('Setting orchestrator backend to cephadm...')
3147 cli(['orch', 'set', 'backend', 'cephadm'])
3148
e306af50
TL
3149 if args.ssh_config:
3150 logger.info('Using provided ssh config...')
3151 mounts = {
3152 pathify(args.ssh_config.name): '/tmp/cephadm-ssh-config:z',
3153 }
3154 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
3155
3156 if args.ssh_private_key and args.ssh_public_key:
3157 logger.info('Using provided ssh keys...')
3158 mounts = {
3159 pathify(args.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
3160 pathify(args.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
3161 }
3162 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
3163 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
3164 else:
3165 logger.info('Generating ssh key...')
3166 cli(['cephadm', 'generate-key'])
3167 ssh_pub = cli(['cephadm', 'get-pub-key'])
3168
3169 with open(args.output_pub_ssh_key, 'w') as f:
3170 f.write(ssh_pub)
3171 logger.info('Wrote public SSH key to to %s' % args.output_pub_ssh_key)
3172
f6b5b4d7
TL
3173 logger.info('Adding key to %s@localhost\'s authorized_keys...' % args.ssh_user)
3174 try:
3175 s_pwd = pwd.getpwnam(args.ssh_user)
3176 except KeyError as e:
3177 raise Error('Cannot find uid/gid for ssh-user: %s' % (args.ssh_user))
3178 ssh_uid = s_pwd.pw_uid
3179 ssh_gid = s_pwd.pw_gid
3180 ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
3181
3182 if not os.path.exists(ssh_dir):
3183 makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
3184
3185 auth_keys_file = '%s/authorized_keys' % ssh_dir
e306af50 3186 add_newline = False
f6b5b4d7 3187
e306af50
TL
3188 if os.path.exists(auth_keys_file):
3189 with open(auth_keys_file, 'r') as f:
3190 f.seek(0, os.SEEK_END)
3191 if f.tell() > 0:
3192 f.seek(f.tell()-1, os.SEEK_SET) # go to last char
3193 if f.read() != '\n':
3194 add_newline = True
f6b5b4d7 3195
e306af50 3196 with open(auth_keys_file, 'a') as f:
f6b5b4d7 3197 os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it
e306af50
TL
3198 os.fchmod(f.fileno(), 0o600) # just in case we created it
3199 if add_newline:
3200 f.write('\n')
3201 f.write(ssh_pub.strip() + '\n')
9f95a23c
TL
3202
3203 host = get_hostname()
3204 logger.info('Adding host %s...' % host)
f6b5b4d7
TL
3205 try:
3206 cli(['orch', 'host', 'add', host])
3207 except RuntimeError as e:
3208 raise Error('Failed to add host <%s>: %s' % (host, e))
9f95a23c
TL
3209
3210 if not args.orphan_initial_daemons:
3211 for t in ['mon', 'mgr', 'crash']:
3212 logger.info('Deploying %s service with default placement...' % t)
3213 cli(['orch', 'apply', t])
3214
3215 if not args.skip_monitoring_stack:
3216 logger.info('Enabling mgr prometheus module...')
3217 cli(['mgr', 'module', 'enable', 'prometheus'])
3218 for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
3219 logger.info('Deploying %s service with default placement...' % t)
3220 cli(['orch', 'apply', t])
3221
f6b5b4d7
TL
3222 if args.registry_url and args.registry_username and args.registry_password:
3223 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', args.registry_url, '--force'])
3224 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', args.registry_username, '--force'])
3225 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', args.registry_password, '--force'])
3226
f91f0fd5
TL
3227 if args.container_init:
3228 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(args.container_init), '--force'])
3229
9f95a23c 3230 if not args.skip_dashboard:
f6b5b4d7
TL
3231 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
3232 # if the user does not want to use SSL he can change this setting once the cluster is up
3233 cli(["config", "set", "mgr", "mgr/dashboard/ssl_server_port" , str(args.ssl_dashboard_port)])
3234
3235 # configuring dashboard parameters
9f95a23c
TL
3236 logger.info('Enabling the dashboard module...')
3237 cli(['mgr', 'module', 'enable', 'dashboard'])
3238 wait_for_mgr_restart()
3239
3240 # dashboard crt and key
3241 if args.dashboard_key and args.dashboard_crt:
3242 logger.info('Using provided dashboard certificate...')
e306af50
TL
3243 mounts = {
3244 pathify(args.dashboard_crt.name): '/tmp/dashboard.crt:z',
3245 pathify(args.dashboard_key.name): '/tmp/dashboard.key:z'
3246 }
9f95a23c
TL
3247 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
3248 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
3249 else:
3250 logger.info('Generating a dashboard self-signed certificate...')
3251 cli(['dashboard', 'create-self-signed-cert'])
3252
3253 logger.info('Creating initial admin user...')
3254 password = args.initial_dashboard_password or generate_password()
cd265ab1
TL
3255 tmp_password_file = write_tmp(password, uid, gid)
3256 cmd = ['dashboard', 'ac-user-create', args.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
9f95a23c
TL
3257 if not args.dashboard_password_noupdate:
3258 cmd.append('--pwd-update-required')
cd265ab1 3259 cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'})
9f95a23c
TL
3260 logger.info('Fetching dashboard port number...')
3261 out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
3262 port = int(out)
3263
f6b5b4d7
TL
3264 # Open dashboard port
3265 fw = Firewalld()
3266 fw.open_ports([port])
3267 fw.apply_rules()
3268
9f95a23c
TL
3269 logger.info('Ceph Dashboard is now available at:\n\n'
3270 '\t URL: https://%s:%s/\n'
3271 '\t User: %s\n'
3272 '\tPassword: %s\n' % (
3273 get_fqdn(), port,
3274 args.initial_dashboard_user,
3275 password))
f6b5b4d7 3276
e306af50
TL
3277 if args.apply_spec:
3278 logger.info('Applying %s to cluster' % args.apply_spec)
3279
3280 with open(args.apply_spec) as f:
3281 for line in f:
3282 if 'hostname:' in line:
3283 line = line.replace('\n', '')
3284 split = line.split(': ')
3285 if split[1] != host:
3286 logger.info('Adding ssh key to %s' % split[1])
3287
3288 ssh_key = '/etc/ceph/ceph.pub'
3289 if args.ssh_public_key:
3290 ssh_key = args.ssh_public_key.name
f6b5b4d7 3291 out, err, code = call_throws(['ssh-copy-id', '-f', '-i', ssh_key, '%s@%s' % (args.ssh_user, split[1])])
e306af50
TL
3292
3293 mounts = {}
3294 mounts[pathify(args.apply_spec)] = '/tmp/spec.yml:z'
3295
3296 out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
3297 logger.info(out)
9f95a23c
TL
3298
3299 logger.info('You can access the Ceph CLI with:\n\n'
3300 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
3301 sys.argv[0],
3302 fsid,
3303 args.output_config,
3304 args.output_keyring))
3305 logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
3306 '\tceph telemetry on\n\n'
3307 'For more information see:\n\n'
3308 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
3309 logger.info('Bootstrap complete.')
3310 return 0
3311
3312##################################
3313
f6b5b4d7
TL
3314def command_registry_login():
3315 if args.registry_json:
3316 logger.info("Pulling custom registry login info from %s." % args.registry_json)
3317 d = get_parm(args.registry_json)
3318 if d.get('url') and d.get('username') and d.get('password'):
3319 args.registry_url = d.get('url')
3320 args.registry_username = d.get('username')
3321 args.registry_password = d.get('password')
3322 registry_login(args.registry_url, args.registry_username, args.registry_password)
3323 else:
3324 raise Error("json provided for custom registry login did not include all necessary fields. "
3325 "Please setup json file as\n"
3326 "{\n"
3327 " \"url\": \"REGISTRY_URL\",\n"
3328 " \"username\": \"REGISTRY_USERNAME\",\n"
3329 " \"password\": \"REGISTRY_PASSWORD\"\n"
3330 "}\n")
3331 elif args.registry_url and args.registry_username and args.registry_password:
3332 registry_login(args.registry_url, args.registry_username, args.registry_password)
3333 else:
3334 raise Error("Invalid custom registry arguments received. To login to a custom registry include "
3335 "--registry-url, --registry-username and --registry-password "
3336 "options or --registry-json option")
3337 return 0
3338
3339def registry_login(url, username, password):
3340 logger.info("Logging into custom registry.")
3341 try:
3342 out, _, _ = call_throws([container_path, 'login',
3343 '-u', username,
3344 '-p', password,
3345 url])
3346 except:
3347 raise Error("Failed to login to custom registry @ %s as %s with given password" % (args.registry_url, args.registry_username))
3348
3349##################################
3350
3351
9f95a23c
TL
3352def extract_uid_gid_monitoring(daemon_type):
3353 # type: (str) -> Tuple[int, int]
3354
3355 if daemon_type == 'prometheus':
3356 uid, gid = extract_uid_gid(file_path='/etc/prometheus')
3357 elif daemon_type == 'node-exporter':
3358 uid, gid = 65534, 65534
3359 elif daemon_type == 'grafana':
3360 uid, gid = extract_uid_gid(file_path='/var/lib/grafana')
3361 elif daemon_type == 'alertmanager':
f6b5b4d7 3362 uid, gid = extract_uid_gid(file_path=['/etc/alertmanager', '/etc/prometheus'])
9f95a23c
TL
3363 else:
3364 raise Error("{} not implemented yet".format(daemon_type))
3365 return uid, gid
3366
3367
3368@default_image
3369def command_deploy():
3370 # type: () -> None
e306af50 3371 daemon_type, daemon_id = args.name.split('.', 1)
9f95a23c
TL
3372
3373 l = FileLock(args.fsid)
3374 l.acquire()
3375
3376 if daemon_type not in get_supported_daemons():
3377 raise Error('daemon type %s not recognized' % daemon_type)
3378
e306af50
TL
3379 redeploy = False
3380 unit_name = get_unit_name(args.fsid, daemon_type, daemon_id)
3381 (_, state, _) = check_unit(unit_name)
3382 if state == 'running':
3383 redeploy = True
3384
3385 if args.reconfig:
3386 logger.info('%s daemon %s ...' % ('Reconfig', args.name))
3387 elif redeploy:
3388 logger.info('%s daemon %s ...' % ('Redeploy', args.name))
3389 else:
3390 logger.info('%s daemon %s ...' % ('Deploy', args.name))
9f95a23c 3391
f6b5b4d7
TL
3392 # Get and check ports explicitly required to be opened
3393 daemon_ports = [] # type: List[int]
3394 if args.tcp_ports:
3395 daemon_ports = list(map(int, args.tcp_ports.split()))
3396
9f95a23c 3397 if daemon_type in Ceph.daemons:
e306af50
TL
3398 config, keyring = get_config_and_keyring()
3399 uid, gid = extract_uid_gid()
9f95a23c 3400 make_var_run(args.fsid, uid, gid)
f6b5b4d7 3401
9f95a23c
TL
3402 c = get_container(args.fsid, daemon_type, daemon_id,
3403 ptrace=args.allow_ptrace)
3404 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3405 config=config, keyring=keyring,
3406 osd_fsid=args.osd_fsid,
f6b5b4d7
TL
3407 reconfig=args.reconfig,
3408 ports=daemon_ports)
9f95a23c
TL
3409
3410 elif daemon_type in Monitoring.components:
3411 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
9f95a23c 3412 # Default Checks
e306af50 3413 if not args.reconfig and not redeploy:
f6b5b4d7 3414 daemon_ports.extend(Monitoring.port_map[daemon_type])
9f95a23c
TL
3415
3416 # make sure provided config-json is sufficient
3417 config = get_parm(args.config_json) # type: ignore
3418 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
3419 required_args = Monitoring.components[daemon_type].get('config-json-args', list())
3420 if required_files:
3421 if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
3422 raise Error("{} deployment requires config-json which must "
3423 "contain file content for {}".format(daemon_type.capitalize(), ', '.join(required_files)))
3424 if required_args:
3425 if not config or not all(c in config.keys() for c in required_args): # type: ignore
3426 raise Error("{} deployment requires config-json which must "
3427 "contain arg for {}".format(daemon_type.capitalize(), ', '.join(required_args)))
3428
9f95a23c
TL
3429 uid, gid = extract_uid_gid_monitoring(daemon_type)
3430 c = get_container(args.fsid, daemon_type, daemon_id)
3431 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
f6b5b4d7
TL
3432 reconfig=args.reconfig,
3433 ports=daemon_ports)
9f95a23c
TL
3434
3435 elif daemon_type == NFSGanesha.daemon_type:
e306af50 3436 if not args.reconfig and not redeploy:
f6b5b4d7
TL
3437 daemon_ports.extend(NFSGanesha.port_map.values())
3438
e306af50 3439 config, keyring = get_config_and_keyring()
9f95a23c 3440 # TODO: extract ganesha uid/gid (997, 994) ?
e306af50 3441 uid, gid = extract_uid_gid()
9f95a23c
TL
3442 c = get_container(args.fsid, daemon_type, daemon_id)
3443 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3444 config=config, keyring=keyring,
f6b5b4d7
TL
3445 reconfig=args.reconfig,
3446 ports=daemon_ports)
e306af50 3447
1911f103 3448 elif daemon_type == CephIscsi.daemon_type:
e306af50
TL
3449 config, keyring = get_config_and_keyring()
3450 uid, gid = extract_uid_gid()
1911f103
TL
3451 c = get_container(args.fsid, daemon_type, daemon_id)
3452 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3453 config=config, keyring=keyring,
f6b5b4d7
TL
3454 reconfig=args.reconfig,
3455 ports=daemon_ports)
f91f0fd5
TL
3456
3457 elif daemon_type == CustomContainer.daemon_type:
3458 cc = CustomContainer.init(args.fsid, daemon_id)
3459 if not args.reconfig and not redeploy:
3460 daemon_ports.extend(cc.ports)
3461 c = get_container(args.fsid, daemon_type, daemon_id,
3462 privileged=cc.privileged,
3463 ptrace=args.allow_ptrace)
3464 deploy_daemon(args.fsid, daemon_type, daemon_id, c,
3465 uid=cc.uid, gid=cc.gid, config=None,
3466 keyring=None, reconfig=args.reconfig,
3467 ports=daemon_ports)
3468
9f95a23c 3469 else:
f91f0fd5
TL
3470 raise Error('daemon type {} not implemented in command_deploy function'
3471 .format(daemon_type))
9f95a23c
TL
3472
3473##################################
3474
f6b5b4d7 3475
9f95a23c
TL
3476@infer_image
3477def command_run():
3478 # type: () -> int
3479 (daemon_type, daemon_id) = args.name.split('.', 1)
3480 c = get_container(args.fsid, daemon_type, daemon_id)
3481 command = c.run_cmd()
3482 return call_timeout(command, args.timeout)
3483
3484##################################
3485
f6b5b4d7 3486
9f95a23c 3487@infer_fsid
e306af50 3488@infer_config
9f95a23c
TL
3489@infer_image
3490def command_shell():
3491 # type: () -> int
3492 if args.fsid:
3493 make_log_dir(args.fsid)
3494 if args.name:
3495 if '.' in args.name:
3496 (daemon_type, daemon_id) = args.name.split('.', 1)
3497 else:
3498 daemon_type = args.name
3499 daemon_id = None
3500 else:
3501 daemon_type = 'osd' # get the most mounts
3502 daemon_id = None
3503
3504 if daemon_id and not args.fsid:
3505 raise Error('must pass --fsid to specify cluster')
3506
3507 # use /etc/ceph files by default, if present. we do this instead of
3508 # making these defaults in the arg parser because we don't want an error
3509 # if they don't exist.
9f95a23c
TL
3510 if not args.keyring and os.path.exists(SHELL_DEFAULT_KEYRING):
3511 args.keyring = SHELL_DEFAULT_KEYRING
3512
3513 container_args = [] # type: List[str]
3514 mounts = get_container_mounts(args.fsid, daemon_type, daemon_id,
3515 no_config=True if args.config else False)
f6b5b4d7 3516 binds = get_container_binds(args.fsid, daemon_type, daemon_id)
9f95a23c
TL
3517 if args.config:
3518 mounts[pathify(args.config)] = '/etc/ceph/ceph.conf:z'
3519 if args.keyring:
3520 mounts[pathify(args.keyring)] = '/etc/ceph/ceph.keyring:z'
e306af50 3521 if args.mount:
f91f0fd5
TL
3522 for _mount in args.mount:
3523 split_src_dst = _mount.split(':')
3524 mount = pathify(split_src_dst[0])
3525 filename = os.path.basename(split_src_dst[0])
3526 if len(split_src_dst) > 1:
3527 dst = split_src_dst[1] + ':z' if len(split_src_dst) == 3 else split_src_dst[1]
3528 mounts[mount] = dst
3529 else:
3530 mounts[mount] = '/mnt/{}:z'.format(filename)
9f95a23c
TL
3531 if args.command:
3532 command = args.command
3533 else:
3534 command = ['bash']
3535 container_args += [
3536 '-it',
3537 '-e', 'LANG=C',
3538 '-e', "PS1=%s" % CUSTOM_PS1,
3539 ]
3540 if args.fsid:
3541 home = os.path.join(args.data_dir, args.fsid, 'home')
3542 if not os.path.exists(home):
3543 logger.debug('Creating root home at %s' % home)
3544 makedirs(home, 0, 0, 0o660)
3545 if os.path.exists('/etc/skel'):
3546 for f in os.listdir('/etc/skel'):
3547 if f.startswith('.bash'):
3548 shutil.copyfile(os.path.join('/etc/skel', f),
3549 os.path.join(home, f))
3550 mounts[home] = '/root'
3551
3552 c = CephContainer(
3553 image=args.image,
3554 entrypoint='doesnotmatter',
3555 args=[],
3556 container_args=container_args,
3557 volume_mounts=mounts,
f6b5b4d7 3558 bind_mounts=binds,
9f95a23c
TL
3559 envs=args.env,
3560 privileged=True)
3561 command = c.shell_cmd(command)
3562
3563 return call_timeout(command, args.timeout)
3564
3565##################################
3566
f6b5b4d7 3567
9f95a23c
TL
3568@infer_fsid
3569def command_enter():
3570 # type: () -> int
3571 if not args.fsid:
3572 raise Error('must pass --fsid to specify cluster')
3573 (daemon_type, daemon_id) = args.name.split('.', 1)
3574 container_args = [] # type: List[str]
3575 if args.command:
3576 command = args.command
3577 else:
3578 command = ['sh']
3579 container_args += [
3580 '-it',
3581 '-e', 'LANG=C',
3582 '-e', "PS1=%s" % CUSTOM_PS1,
3583 ]
1911f103
TL
3584 c = CephContainer(
3585 image=args.image,
3586 entrypoint='doesnotmatter',
3587 container_args=container_args,
3588 cname='ceph-%s-%s.%s' % (args.fsid, daemon_type, daemon_id),
3589 )
9f95a23c
TL
3590 command = c.exec_cmd(command)
3591 return call_timeout(command, args.timeout)
3592
3593##################################
3594
f6b5b4d7 3595
9f95a23c
TL
3596@infer_fsid
3597@infer_image
3598def command_ceph_volume():
3599 # type: () -> None
3600 if args.fsid:
3601 make_log_dir(args.fsid)
3602
1911f103
TL
3603 l = FileLock(args.fsid)
3604 l.acquire()
3605
9f95a23c
TL
3606 (uid, gid) = (0, 0) # ceph-volume runs as root
3607 mounts = get_container_mounts(args.fsid, 'osd', None)
3608
3609 tmp_config = None
3610 tmp_keyring = None
3611
801d1391 3612 (config, keyring) = get_config_and_keyring()
9f95a23c 3613
801d1391 3614 if config:
9f95a23c
TL
3615 # tmp config file
3616 tmp_config = write_tmp(config, uid, gid)
9f95a23c 3617 mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
801d1391
TL
3618
3619 if keyring:
3620 # tmp keyring file
3621 tmp_keyring = write_tmp(keyring, uid, gid)
9f95a23c
TL
3622 mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
3623
3624 c = CephContainer(
3625 image=args.image,
3626 entrypoint='/usr/sbin/ceph-volume',
e306af50 3627 envs=args.env,
9f95a23c
TL
3628 args=args.command,
3629 privileged=True,
3630 volume_mounts=mounts,
3631 )
adb31ebb 3632 out, err, code = call_throws(c.run_cmd(), verbosity=CallVerbosity.VERBOSE)
9f95a23c
TL
3633 if not code:
3634 print(out)
3635
3636##################################
3637
f6b5b4d7 3638
9f95a23c
TL
3639@infer_fsid
3640def command_unit():
3641 # type: () -> None
3642 if not args.fsid:
3643 raise Error('must pass --fsid to specify cluster')
e306af50
TL
3644
3645 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
3646
9f95a23c
TL
3647 call_throws([
3648 'systemctl',
3649 args.command,
adb31ebb
TL
3650 unit_name],
3651 verbosity=CallVerbosity.VERBOSE,
3652 desc=''
3653 )
9f95a23c
TL
3654
3655##################################
3656
f6b5b4d7 3657
9f95a23c
TL
3658@infer_fsid
3659def command_logs():
3660 # type: () -> None
3661 if not args.fsid:
3662 raise Error('must pass --fsid to specify cluster')
3663
e306af50 3664 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
9f95a23c
TL
3665
3666 cmd = [find_program('journalctl')]
3667 cmd.extend(['-u', unit_name])
3668 if args.command:
3669 cmd.extend(args.command)
3670
3671 # call this directly, without our wrapper, so that we get an unmolested
3672 # stdout with logger prefixing.
3673 logger.debug("Running command: %s" % ' '.join(cmd))
3674 subprocess.call(cmd) # type: ignore
3675
3676##################################
3677
f6b5b4d7 3678
9f95a23c
TL
3679def list_networks():
3680 # type: () -> Dict[str,List[str]]
3681
3682 ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
3683 ## so we'll need to use a regex to parse 'ip' command output.
3684 #out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
3685 #j = json.loads(out)
3686 #for x in j:
3687
f6b5b4d7
TL
3688 res = _list_ipv4_networks()
3689 res.update(_list_ipv6_networks())
3690 return res
3691
3692
3693def _list_ipv4_networks():
9f95a23c 3694 out, _, _ = call_throws([find_executable('ip'), 'route', 'ls'])
f6b5b4d7
TL
3695 return _parse_ipv4_route(out)
3696
9f95a23c 3697
f6b5b4d7 3698def _parse_ipv4_route(out):
9f95a23c
TL
3699 r = {} # type: Dict[str,List[str]]
3700 p = re.compile(r'^(\S+) (.*)scope link (.*)src (\S+)')
3701 for line in out.splitlines():
3702 m = p.findall(line)
3703 if not m:
3704 continue
3705 net = m[0][0]
3706 ip = m[0][3]
3707 if net not in r:
3708 r[net] = []
3709 r[net].append(ip)
3710 return r
3711
f6b5b4d7
TL
3712
3713def _list_ipv6_networks():
3714 routes, _, _ = call_throws([find_executable('ip'), '-6', 'route', 'ls'])
3715 ips, _, _ = call_throws([find_executable('ip'), '-6', 'addr', 'ls'])
3716 return _parse_ipv6_route(routes, ips)
3717
3718
3719def _parse_ipv6_route(routes, ips):
3720 r = {} # type: Dict[str,List[str]]
3721 route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
3722 ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
3723 for line in routes.splitlines():
3724 m = route_p.findall(line)
3725 if not m or m[0][0].lower() == 'default':
3726 continue
3727 net = m[0][0]
3728 if net not in r:
3729 r[net] = []
3730
3731 for line in ips.splitlines():
3732 m = ip_p.findall(line)
3733 if not m:
3734 continue
3735 ip = m[0][0]
3736 # find the network it belongs to
3737 net = [n for n in r.keys()
3738 if ipaddress.ip_address(unicode(ip)) in ipaddress.ip_network(unicode(n))]
3739 if net:
3740 r[net[0]].append(ip)
3741
3742 return r
3743
3744
9f95a23c
TL
3745def command_list_networks():
3746 # type: () -> None
3747 r = list_networks()
3748 print(json.dumps(r, indent=4))
3749
3750##################################
3751
f6b5b4d7 3752
9f95a23c
TL
3753def command_ls():
3754 # type: () -> None
f91f0fd5 3755
9f95a23c
TL
3756 ls = list_daemons(detail=not args.no_detail,
3757 legacy_dir=args.legacy_dir)
3758 print(json.dumps(ls, indent=4))
3759
f6b5b4d7 3760
9f95a23c
TL
3761def list_daemons(detail=True, legacy_dir=None):
3762 # type: (bool, Optional[str]) -> List[Dict[str, str]]
3763 host_version = None
3764 ls = []
3765
3766 data_dir = args.data_dir
3767 if legacy_dir is not None:
3768 data_dir = os.path.abspath(legacy_dir + data_dir)
3769
3770 # keep track of ceph versions we see
3771 seen_versions = {} # type: Dict[str, Optional[str]]
3772
3773 # /var/lib/ceph
3774 if os.path.exists(data_dir):
3775 for i in os.listdir(data_dir):
3776 if i in ['mon', 'osd', 'mds', 'mgr']:
3777 daemon_type = i
3778 for j in os.listdir(os.path.join(data_dir, i)):
3779 if '-' not in j:
3780 continue
3781 (cluster, daemon_id) = j.split('-', 1)
3782 fsid = get_legacy_daemon_fsid(
3783 cluster, daemon_type, daemon_id,
3784 legacy_dir=legacy_dir)
e306af50 3785 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
9f95a23c
TL
3786 i = {
3787 'style': 'legacy',
3788 'name': '%s.%s' % (daemon_type, daemon_id),
3789 'fsid': fsid if fsid is not None else 'unknown',
e306af50 3790 'systemd_unit': legacy_unit_name,
9f95a23c
TL
3791 }
3792 if detail:
e306af50 3793 (i['enabled'], i['state'], _) = check_unit(legacy_unit_name)
9f95a23c
TL
3794 if not host_version:
3795 try:
3796 out, err, code = call(['ceph', '-v'])
3797 if not code and out.startswith('ceph version '):
3798 host_version = out.split(' ')[2]
3799 except Exception:
3800 pass
3801 i['host_version'] = host_version
3802 ls.append(i)
3803 elif is_fsid(i):
3804 fsid = str(i) # convince mypy that fsid is a str here
3805 for j in os.listdir(os.path.join(data_dir, i)):
3806 if '.' in j:
3807 name = j
3808 (daemon_type, daemon_id) = j.split('.', 1)
3809 unit_name = get_unit_name(fsid,
3810 daemon_type,
3811 daemon_id)
3812 else:
3813 continue
3814 i = {
3815 'style': 'cephadm:v1',
3816 'name': name,
3817 'fsid': fsid,
e306af50 3818 'systemd_unit': unit_name,
9f95a23c
TL
3819 }
3820 if detail:
3821 # get container id
3822 (i['enabled'], i['state'], _) = check_unit(unit_name)
3823 container_id = None
3824 image_name = None
3825 image_id = None
3826 version = None
3827 start_stamp = None
3828
3829 if 'podman' in container_path and get_podman_version() < (1, 6, 2):
3830 image_field = '.ImageID'
3831 else:
3832 image_field = '.Image'
3833
3834 out, err, code = call(
3835 [
3836 container_path, 'inspect',
3837 '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field,
3838 'ceph-%s-%s' % (fsid, j)
3839 ],
adb31ebb 3840 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
3841 if not code:
3842 (container_id, image_name, image_id, start,
3843 version) = out.strip().split(',')
3844 image_id = normalize_container_id(image_id)
3845 daemon_type = name.split('.', 1)[0]
3846 start_stamp = try_convert_datetime(start)
3847 if not version or '.' not in version:
3848 version = seen_versions.get(image_id, None)
3849 if daemon_type == NFSGanesha.daemon_type:
3850 version = NFSGanesha.get_version(container_id)
1911f103
TL
3851 if daemon_type == CephIscsi.daemon_type:
3852 version = CephIscsi.get_version(container_id)
9f95a23c
TL
3853 elif not version:
3854 if daemon_type in Ceph.daemons:
3855 out, err, code = call(
3856 [container_path, 'exec', container_id,
3857 'ceph', '-v'])
3858 if not code and \
3859 out.startswith('ceph version '):
3860 version = out.split(' ')[2]
3861 seen_versions[image_id] = version
3862 elif daemon_type == 'grafana':
3863 out, err, code = call(
3864 [container_path, 'exec', container_id,
3865 'grafana-server', '-v'])
3866 if not code and \
3867 out.startswith('Version '):
3868 version = out.split(' ')[1]
3869 seen_versions[image_id] = version
3870 elif daemon_type in ['prometheus',
3871 'alertmanager',
3872 'node-exporter']:
3873 cmd = daemon_type.replace('-', '_')
3874 out, err, code = call(
3875 [container_path, 'exec', container_id,
3876 cmd, '--version'])
3877 if not code and \
3878 err.startswith('%s, version ' % cmd):
3879 version = err.split(' ')[2]
3880 seen_versions[image_id] = version
f91f0fd5
TL
3881 elif daemon_type == CustomContainer.daemon_type:
3882 # Because a custom container can contain
3883 # everything, we do not know which command
3884 # to execute to get the version.
3885 pass
9f95a23c 3886 else:
f91f0fd5 3887 logger.warning('version for unknown daemon type %s' % daemon_type)
9f95a23c
TL
3888 else:
3889 vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore
3890 try:
3891 with open(vfile, 'r') as f:
3892 image_name = f.read().strip() or None
3893 except IOError:
3894 pass
3895 i['container_id'] = container_id
3896 i['container_image_name'] = image_name
3897 i['container_image_id'] = image_id
3898 i['version'] = version
3899 i['started'] = start_stamp
3900 i['created'] = get_file_timestamp(
3901 os.path.join(data_dir, fsid, j, 'unit.created')
3902 )
3903 i['deployed'] = get_file_timestamp(
3904 os.path.join(data_dir, fsid, j, 'unit.image'))
3905 i['configured'] = get_file_timestamp(
3906 os.path.join(data_dir, fsid, j, 'unit.configured'))
3907
3908 ls.append(i)
3909
9f95a23c
TL
3910 return ls
3911
3912
e306af50
TL
3913def get_daemon_description(fsid, name, detail=False, legacy_dir=None):
3914 # type: (str, str, bool, Optional[str]) -> Dict[str, str]
3915
3916 for d in list_daemons(detail=detail, legacy_dir=legacy_dir):
3917 if d['fsid'] != fsid:
3918 continue
3919 if d['name'] != name:
3920 continue
3921 return d
3922 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
3923
3924
9f95a23c
TL
3925##################################
3926
3927@default_image
3928def command_adopt():
3929 # type: () -> None
3930
3931 if not args.skip_pull:
f6b5b4d7 3932 _pull_image(args.image)
9f95a23c
TL
3933
3934 (daemon_type, daemon_id) = args.name.split('.', 1)
3935
3936 # legacy check
3937 if args.style != 'legacy':
3938 raise Error('adoption of style %s not implemented' % args.style)
3939
3940 # lock
3941 fsid = get_legacy_daemon_fsid(args.cluster,
3942 daemon_type,
3943 daemon_id,
3944 legacy_dir=args.legacy_dir)
3945 if not fsid:
3946 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
3947 l = FileLock(fsid)
3948 l.acquire()
3949
3950 # call correct adoption
3951 if daemon_type in Ceph.daemons:
3952 command_adopt_ceph(daemon_type, daemon_id, fsid);
3953 elif daemon_type == 'prometheus':
3954 command_adopt_prometheus(daemon_id, fsid)
3955 elif daemon_type == 'grafana':
3956 command_adopt_grafana(daemon_id, fsid)
3957 elif daemon_type == 'node-exporter':
3958 raise Error('adoption of node-exporter not implemented')
3959 elif daemon_type == 'alertmanager':
801d1391 3960 command_adopt_alertmanager(daemon_id, fsid)
9f95a23c
TL
3961 else:
3962 raise Error('daemon type %s not recognized' % daemon_type)
3963
3964
1911f103
TL
3965class AdoptOsd(object):
3966 def __init__(self, osd_data_dir, osd_id):
3967 # type: (str, str) -> None
3968 self.osd_data_dir = osd_data_dir
3969 self.osd_id = osd_id
3970
3971 def check_online_osd(self):
3972 # type: () -> Tuple[Optional[str], Optional[str]]
3973
3974 osd_fsid, osd_type = None, None
3975
3976 path = os.path.join(self.osd_data_dir, 'fsid')
3977 try:
3978 with open(path, 'r') as f:
3979 osd_fsid = f.read().strip()
3980 logger.info("Found online OSD at %s" % path)
1911f103
TL
3981 except IOError:
3982 logger.info('Unable to read OSD fsid from %s' % path)
e306af50
TL
3983 if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
3984 with open(os.path.join(self.osd_data_dir, 'type')) as f:
3985 osd_type = f.read().strip()
3986 else:
3987 logger.info('"type" file missing for OSD data dir')
1911f103
TL
3988
3989 return osd_fsid, osd_type
3990
3991 def check_offline_lvm_osd(self):
3992 # type: () -> Tuple[Optional[str], Optional[str]]
3993
3994 osd_fsid, osd_type = None, None
3995
3996 c = CephContainer(
3997 image=args.image,
3998 entrypoint='/usr/sbin/ceph-volume',
3999 args=['lvm', 'list', '--format=json'],
4000 privileged=True
4001 )
adb31ebb 4002 out, err, code = call_throws(c.run_cmd())
1911f103
TL
4003 if not code:
4004 try:
4005 js = json.loads(out)
4006 if self.osd_id in js:
4007 logger.info("Found offline LVM OSD {}".format(self.osd_id))
4008 osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
4009 for device in js[self.osd_id]:
4010 if device['tags']['ceph.type'] == 'block':
4011 osd_type = 'bluestore'
4012 break
4013 if device['tags']['ceph.type'] == 'data':
4014 osd_type = 'filestore'
4015 break
4016 except ValueError as e:
4017 logger.info("Invalid JSON in ceph-volume lvm list: {}".format(e))
4018
4019 return osd_fsid, osd_type
4020
4021 def check_offline_simple_osd(self):
4022 # type: () -> Tuple[Optional[str], Optional[str]]
4023
4024 osd_fsid, osd_type = None, None
4025
4026 osd_file = glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self.osd_id))
4027 if len(osd_file) == 1:
4028 with open(osd_file[0], 'r') as f:
4029 try:
4030 js = json.loads(f.read())
4031 logger.info("Found offline simple OSD {}".format(self.osd_id))
4032 osd_fsid = js["fsid"]
4033 osd_type = js["type"]
4034 if osd_type != "filestore":
4035 # need this to be mounted for the adopt to work, as it
4036 # needs to move files from this directory
4037 call_throws(['mount', js["data"]["path"], self.osd_data_dir])
4038 except ValueError as e:
4039 logger.info("Invalid JSON in {}: {}".format(osd_file, e))
4040
4041 return osd_fsid, osd_type
4042
9f95a23c
TL
4043
4044def command_adopt_ceph(daemon_type, daemon_id, fsid):
4045 # type: (str, str, str) -> None
4046
4047 (uid, gid) = extract_uid_gid()
4048
4049 data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
4050 (daemon_type, args.cluster, daemon_id))
4051 data_dir_src = os.path.abspath(args.legacy_dir + data_dir_src)
4052
1911f103
TL
4053 if not os.path.exists(data_dir_src):
4054 raise Error("{}.{} data directory '{}' does not exist. "
4055 "Incorrect ID specified, or daemon alrady adopted?".format(
4056 daemon_type, daemon_id, data_dir_src))
4057
9f95a23c
TL
4058 osd_fsid = None
4059 if daemon_type == 'osd':
1911f103
TL
4060 adopt_osd = AdoptOsd(data_dir_src, daemon_id)
4061 osd_fsid, osd_type = adopt_osd.check_online_osd()
4062 if not osd_fsid:
4063 osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
4064 if not osd_fsid:
4065 osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
4066 if not osd_fsid:
4067 raise Error('Unable to find OSD {}'.format(daemon_id))
4068 logger.info('objectstore_type is %s' % osd_type)
e306af50 4069 assert osd_type
1911f103 4070 if osd_type == 'filestore':
9f95a23c
TL
4071 raise Error('FileStore is not supported by cephadm')
4072
4073 # NOTE: implicit assumption here that the units correspond to the
4074 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
4075 # CLUSTER field.
4076 unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
4077 (enabled, state, _) = check_unit(unit_name)
4078 if state == 'running':
4079 logger.info('Stopping old systemd unit %s...' % unit_name)
4080 call_throws(['systemctl', 'stop', unit_name])
4081 if enabled:
4082 logger.info('Disabling old systemd unit %s...' % unit_name)
4083 call_throws(['systemctl', 'disable', unit_name])
4084
4085 # data
4086 logger.info('Moving data...')
4087 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4088 uid=uid, gid=gid)
4089 move_files(glob(os.path.join(data_dir_src, '*')),
4090 data_dir_dst,
4091 uid=uid, gid=gid)
4092 logger.debug('Remove dir \'%s\'' % (data_dir_src))
4093 if os.path.ismount(data_dir_src):
4094 call_throws(['umount', data_dir_src])
4095 os.rmdir(data_dir_src)
4096
4097 logger.info('Chowning content...')
4098 call_throws(['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
4099
4100 if daemon_type == 'mon':
4101 # rename *.ldb -> *.sst, in case they are coming from ubuntu
4102 store = os.path.join(data_dir_dst, 'store.db')
4103 num_renamed = 0
4104 if os.path.exists(store):
4105 for oldf in os.listdir(store):
4106 if oldf.endswith('.ldb'):
4107 newf = oldf.replace('.ldb', '.sst')
4108 oldp = os.path.join(store, oldf)
4109 newp = os.path.join(store, newf)
4110 logger.debug('Renaming %s -> %s' % (oldp, newp))
4111 os.rename(oldp, newp)
4112 if num_renamed:
4113 logger.info('Renamed %d leveldb *.ldb files to *.sst',
4114 num_renamed)
4115 if daemon_type == 'osd':
4116 for n in ['block', 'block.db', 'block.wal']:
4117 p = os.path.join(data_dir_dst, n)
4118 if os.path.exists(p):
4119 logger.info('Chowning %s...' % p)
4120 os.chown(p, uid, gid)
4121 # disable the ceph-volume 'simple' mode files on the host
4122 simple_fn = os.path.join('/etc/ceph/osd',
4123 '%s-%s.json' % (daemon_id, osd_fsid))
4124 if os.path.exists(simple_fn):
4125 new_fn = simple_fn + '.adopted-by-cephadm'
4126 logger.info('Renaming %s -> %s', simple_fn, new_fn)
4127 os.rename(simple_fn, new_fn)
4128 logger.info('Disabling host unit ceph-volume@ simple unit...')
1911f103
TL
4129 call(['systemctl', 'disable',
4130 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
9f95a23c
TL
4131 else:
4132 # assume this is an 'lvm' c-v for now, but don't error
4133 # out if it's not.
4134 logger.info('Disabling host unit ceph-volume@ lvm unit...')
4135 call(['systemctl', 'disable',
4136 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
4137
4138 # config
4139 config_src = '/etc/ceph/%s.conf' % (args.cluster)
4140 config_src = os.path.abspath(args.legacy_dir + config_src)
4141 config_dst = os.path.join(data_dir_dst, 'config')
4142 copy_files([config_src], config_dst, uid=uid, gid=gid)
4143
4144 # logs
4145 logger.info('Moving logs...')
4146 log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
4147 (args.cluster, daemon_type, daemon_id))
4148 log_dir_src = os.path.abspath(args.legacy_dir + log_dir_src)
4149 log_dir_dst = make_log_dir(fsid, uid=uid, gid=gid)
4150 move_files(glob(log_dir_src),
4151 log_dir_dst,
4152 uid=uid, gid=gid)
4153
4154 logger.info('Creating new units...')
4155 make_var_run(fsid, uid, gid)
4156 c = get_container(fsid, daemon_type, daemon_id)
4157 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
4158 enable=True, # unconditionally enable the new unit
1911f103 4159 start=(state == 'running' or args.force_start),
9f95a23c
TL
4160 osd_fsid=osd_fsid)
4161 update_firewalld(daemon_type)
4162
4163
4164def command_adopt_prometheus(daemon_id, fsid):
4165 # type: (str, str) -> None
4166
4167 daemon_type = 'prometheus'
4168 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4169
4170 _stop_and_disable('prometheus')
4171
4172 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4173 uid=uid, gid=gid)
4174
4175 # config
4176 config_src = '/etc/prometheus/prometheus.yml'
4177 config_src = os.path.abspath(args.legacy_dir + config_src)
4178 config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
1911f103 4179 makedirs(config_dst, uid, gid, 0o755)
9f95a23c
TL
4180 copy_files([config_src], config_dst, uid=uid, gid=gid)
4181
4182 # data
4183 data_src = '/var/lib/prometheus/metrics/'
4184 data_src = os.path.abspath(args.legacy_dir + data_src)
4185 data_dst = os.path.join(data_dir_dst, 'data')
4186 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4187
4188 make_var_run(fsid, uid, gid)
4189 c = get_container(fsid, daemon_type, daemon_id)
4190 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4191 update_firewalld(daemon_type)
4192
f6b5b4d7 4193
9f95a23c
TL
4194def command_adopt_grafana(daemon_id, fsid):
4195 # type: (str, str) -> None
4196
4197 daemon_type = 'grafana'
4198 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4199
4200 _stop_and_disable('grafana-server')
4201
4202 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4203 uid=uid, gid=gid)
4204
4205 # config
4206 config_src = '/etc/grafana/grafana.ini'
4207 config_src = os.path.abspath(args.legacy_dir + config_src)
4208 config_dst = os.path.join(data_dir_dst, 'etc/grafana')
4209 makedirs(config_dst, uid, gid, 0o755)
4210 copy_files([config_src], config_dst, uid=uid, gid=gid)
4211
4212 prov_src = '/etc/grafana/provisioning/'
4213 prov_src = os.path.abspath(args.legacy_dir + prov_src)
4214 prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
4215 copy_tree([prov_src], prov_dst, uid=uid, gid=gid)
4216
4217 # cert
4218 cert = '/etc/grafana/grafana.crt'
4219 key = '/etc/grafana/grafana.key'
4220 if os.path.exists(cert) and os.path.exists(key):
4221 cert_src = '/etc/grafana/grafana.crt'
4222 cert_src = os.path.abspath(args.legacy_dir + cert_src)
4223 makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
4224 cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
4225 copy_files([cert_src], cert_dst, uid=uid, gid=gid)
4226
4227 key_src = '/etc/grafana/grafana.key'
4228 key_src = os.path.abspath(args.legacy_dir + key_src)
4229 key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
4230 copy_files([key_src], key_dst, uid=uid, gid=gid)
4231
4232 _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
4233 else:
4234 logger.debug("Skipping ssl, missing cert {} or key {}".format(cert, key))
4235
9f95a23c
TL
4236 # data - possible custom dashboards/plugins
4237 data_src = '/var/lib/grafana/'
4238 data_src = os.path.abspath(args.legacy_dir + data_src)
4239 data_dst = os.path.join(data_dir_dst, 'data')
4240 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4241
4242 make_var_run(fsid, uid, gid)
4243 c = get_container(fsid, daemon_type, daemon_id)
4244 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4245 update_firewalld(daemon_type)
4246
f6b5b4d7 4247
801d1391
TL
4248def command_adopt_alertmanager(daemon_id, fsid):
4249 # type: (str, str) -> None
4250
4251 daemon_type = 'alertmanager'
4252 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4253
4254 _stop_and_disable('prometheus-alertmanager')
4255
4256 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4257 uid=uid, gid=gid)
4258
4259 # config
4260 config_src = '/etc/prometheus/alertmanager.yml'
4261 config_src = os.path.abspath(args.legacy_dir + config_src)
4262 config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
4263 makedirs(config_dst, uid, gid, 0o755)
4264 copy_files([config_src], config_dst, uid=uid, gid=gid)
4265
4266 # data
4267 data_src = '/var/lib/prometheus/alertmanager/'
4268 data_src = os.path.abspath(args.legacy_dir + data_src)
4269 data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
4270 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4271
4272 make_var_run(fsid, uid, gid)
4273 c = get_container(fsid, daemon_type, daemon_id)
4274 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4275 update_firewalld(daemon_type)
4276
f6b5b4d7 4277
9f95a23c
TL
4278def _adjust_grafana_ini(filename):
4279 # type: (str) -> None
4280
4281 # Update cert_file, cert_key pathnames in server section
4282 # ConfigParser does not preserve comments
4283 try:
4284 with open(filename, "r") as grafana_ini:
4285 lines = grafana_ini.readlines()
4286 with open("{}.new".format(filename), "w") as grafana_ini:
4287 server_section=False
4288 for line in lines:
4289 if line.startswith('['):
4290 server_section=False
4291 if line.startswith('[server]'):
4292 server_section=True
4293 if server_section:
4294 line = re.sub(r'^cert_file.*',
4295 'cert_file = /etc/grafana/certs/cert_file', line)
4296 line = re.sub(r'^cert_key.*',
4297 'cert_key = /etc/grafana/certs/cert_key', line)
4298 grafana_ini.write(line)
4299 os.rename("{}.new".format(filename), filename)
4300 except OSError as err:
4301 raise Error("Cannot update {}: {}".format(filename, err))
4302
4303
4304def _stop_and_disable(unit_name):
4305 # type: (str) -> None
4306
4307 (enabled, state, _) = check_unit(unit_name)
4308 if state == 'running':
4309 logger.info('Stopping old systemd unit %s...' % unit_name)
4310 call_throws(['systemctl', 'stop', unit_name])
4311 if enabled:
4312 logger.info('Disabling old systemd unit %s...' % unit_name)
4313 call_throws(['systemctl', 'disable', unit_name])
4314
4315
4316##################################
4317
4318def command_rm_daemon():
4319 # type: () -> None
4320
4321 l = FileLock(args.fsid)
4322 l.acquire()
4323
e306af50
TL
4324 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
4325
9f95a23c
TL
4326 (daemon_type, daemon_id) = args.name.split('.', 1)
4327 if daemon_type in ['mon', 'osd'] and not args.force:
4328 raise Error('must pass --force to proceed: '
4329 'this command may destroy precious data!')
e306af50 4330
9f95a23c 4331 call(['systemctl', 'stop', unit_name],
adb31ebb 4332 verbosity=CallVerbosity.DEBUG)
9f95a23c 4333 call(['systemctl', 'reset-failed', unit_name],
adb31ebb 4334 verbosity=CallVerbosity.DEBUG)
9f95a23c 4335 call(['systemctl', 'disable', unit_name],
adb31ebb 4336 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4337 data_dir = get_data_dir(args.fsid, daemon_type, daemon_id)
4338 if daemon_type in ['mon', 'osd', 'prometheus'] and \
4339 not args.force_delete_data:
4340 # rename it out of the way -- do not delete
4341 backup_dir = os.path.join(args.data_dir, args.fsid, 'removed')
4342 if not os.path.exists(backup_dir):
4343 makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
4344 dirname = '%s.%s_%s' % (daemon_type, daemon_id,
4345 datetime.datetime.utcnow().strftime(DATEFMT))
4346 os.rename(data_dir,
4347 os.path.join(backup_dir, dirname))
4348 else:
4349 call_throws(['rm', '-rf', data_dir])
4350
4351##################################
4352
f6b5b4d7 4353
9f95a23c
TL
4354def command_rm_cluster():
4355 # type: () -> None
4356 if not args.force:
4357 raise Error('must pass --force to proceed: '
4358 'this command may destroy precious data!')
4359
4360 l = FileLock(args.fsid)
4361 l.acquire()
4362
4363 # stop + disable individual daemon units
4364 for d in list_daemons(detail=False):
4365 if d['fsid'] != args.fsid:
4366 continue
4367 if d['style'] != 'cephadm:v1':
4368 continue
4369 unit_name = get_unit_name(args.fsid, d['name'])
4370 call(['systemctl', 'stop', unit_name],
adb31ebb 4371 verbosity=CallVerbosity.DEBUG)
9f95a23c 4372 call(['systemctl', 'reset-failed', unit_name],
adb31ebb 4373 verbosity=CallVerbosity.DEBUG)
9f95a23c 4374 call(['systemctl', 'disable', unit_name],
adb31ebb 4375 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4376
4377 # cluster units
4378 for unit_name in ['ceph-%s.target' % args.fsid]:
4379 call(['systemctl', 'stop', unit_name],
adb31ebb 4380 verbosity=CallVerbosity.DEBUG)
9f95a23c 4381 call(['systemctl', 'reset-failed', unit_name],
adb31ebb 4382 verbosity=CallVerbosity.DEBUG)
9f95a23c 4383 call(['systemctl', 'disable', unit_name],
adb31ebb 4384 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4385
4386 slice_name = 'system-%s.slice' % (('ceph-%s' % args.fsid).replace('-',
4387 '\\x2d'))
4388 call(['systemctl', 'stop', slice_name],
adb31ebb 4389 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4390
4391 # rm units
4392 call_throws(['rm', '-f', args.unit_dir +
4393 '/ceph-%s@.service' % args.fsid])
4394 call_throws(['rm', '-f', args.unit_dir +
4395 '/ceph-%s.target' % args.fsid])
4396 call_throws(['rm', '-rf',
4397 args.unit_dir + '/ceph-%s.target.wants' % args.fsid])
4398 # rm data
4399 call_throws(['rm', '-rf', args.data_dir + '/' + args.fsid])
4400 # rm logs
4401 call_throws(['rm', '-rf', args.log_dir + '/' + args.fsid])
4402 call_throws(['rm', '-rf', args.log_dir +
4403 '/*.wants/ceph-%s@*' % args.fsid])
4404 # rm logrotate config
4405 call_throws(['rm', '-f', args.logrotate_dir + '/ceph-%s' % args.fsid])
4406
1911f103
TL
4407 # clean up config, keyring, and pub key files
4408 files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
4409
4410 if os.path.exists(files[0]):
4411 valid_fsid = False
4412 with open(files[0]) as f:
4413 if args.fsid in f.read():
4414 valid_fsid = True
4415 if valid_fsid:
4416 for n in range(0, len(files)):
4417 if os.path.exists(files[n]):
4418 os.remove(files[n])
4419
9f95a23c
TL
4420
4421##################################
4422
4423def check_time_sync(enabler=None):
4424 # type: (Optional[Packager]) -> bool
4425 units = [
4426 'chrony.service', # 18.04 (at least)
4427 'chronyd.service', # el / opensuse
4428 'systemd-timesyncd.service',
4429 'ntpd.service', # el7 (at least)
4430 'ntp.service', # 18.04 (at least)
f91f0fd5 4431 'ntpsec.service', # 20.04 (at least) / buster
9f95a23c 4432 ]
e306af50 4433 if not check_units(units, enabler):
9f95a23c
TL
4434 logger.warning('No time sync service is running; checked for %s' % units)
4435 return False
4436 return True
4437
f6b5b4d7 4438
9f95a23c
TL
4439def command_check_host():
4440 # type: () -> None
f6b5b4d7
TL
4441 global container_path
4442
1911f103 4443 errors = []
9f95a23c
TL
4444 commands = ['systemctl', 'lvcreate']
4445
1911f103 4446 if args.docker:
f6b5b4d7 4447 container_path = find_program('docker')
1911f103
TL
4448 else:
4449 for i in CONTAINER_PREFERENCE:
4450 try:
4451 container_path = find_program(i)
4452 break
4453 except Exception as e:
4454 logger.debug('Could not locate %s: %s' % (i, e))
4455 if not container_path:
4456 errors.append('Unable to locate any of %s' % CONTAINER_PREFERENCE)
4457 else:
4458 logger.info('podman|docker (%s) is present' % container_path)
4459
9f95a23c
TL
4460 for command in commands:
4461 try:
4462 find_program(command)
4463 logger.info('%s is present' % command)
4464 except ValueError:
1911f103 4465 errors.append('%s binary does not appear to be installed' % command)
9f95a23c
TL
4466
4467 # check for configured+running chronyd or ntp
4468 if not check_time_sync():
1911f103 4469 errors.append('No time synchronization is active')
9f95a23c
TL
4470
4471 if 'expect_hostname' in args and args.expect_hostname:
1911f103
TL
4472 if get_hostname().lower() != args.expect_hostname.lower():
4473 errors.append('hostname "%s" does not match expected hostname "%s"' % (
9f95a23c
TL
4474 get_hostname(), args.expect_hostname))
4475 logger.info('Hostname "%s" matches what is expected.',
4476 args.expect_hostname)
4477
1911f103
TL
4478 if errors:
4479 raise Error('\n'.join(errors))
4480
9f95a23c
TL
4481 logger.info('Host looks OK')
4482
4483##################################
4484
f6b5b4d7 4485
9f95a23c
TL
4486def command_prepare_host():
4487 # type: () -> None
4488 logger.info('Verifying podman|docker is present...')
4489 pkg = None
4490 if not container_path:
4491 if not pkg:
4492 pkg = create_packager()
4493 pkg.install_podman()
4494
4495 logger.info('Verifying lvm2 is present...')
4496 if not find_executable('lvcreate'):
4497 if not pkg:
4498 pkg = create_packager()
4499 pkg.install(['lvm2'])
4500
4501 logger.info('Verifying time synchronization is in place...')
4502 if not check_time_sync():
4503 if not pkg:
4504 pkg = create_packager()
4505 pkg.install(['chrony'])
4506 # check again, and this time try to enable
4507 # the service
4508 check_time_sync(enabler=pkg)
4509
4510 if 'expect_hostname' in args and args.expect_hostname and args.expect_hostname != get_hostname():
4511 logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), args.expect_hostname))
4512 call_throws(['hostname', args.expect_hostname])
4513 with open('/etc/hostname', 'w') as f:
4514 f.write(args.expect_hostname + '\n')
4515
4516 logger.info('Repeating the final host check...')
4517 command_check_host()
4518
4519##################################
4520
f6b5b4d7 4521
9f95a23c
TL
4522class CustomValidation(argparse.Action):
4523
4524 def _check_name(self, values):
4525 try:
4526 (daemon_type, daemon_id) = values.split('.', 1)
4527 except ValueError:
4528 raise argparse.ArgumentError(self,
4529 "must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com")
4530
4531 daemons = get_supported_daemons()
4532 if daemon_type not in daemons:
4533 raise argparse.ArgumentError(self,
4534 "name must declare the type of daemon e.g. "
4535 "{}".format(', '.join(daemons)))
4536
4537 def __call__(self, parser, namespace, values, option_string=None):
4538 if self.dest == "name":
4539 self._check_name(values)
4540 setattr(namespace, self.dest, values)
4541
4542##################################
4543
f6b5b4d7 4544
9f95a23c 4545def get_distro():
e306af50 4546 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
9f95a23c
TL
4547 distro = None
4548 distro_version = None
4549 distro_codename = None
4550 with open('/etc/os-release', 'r') as f:
4551 for line in f.readlines():
4552 line = line.strip()
4553 if '=' not in line or line.startswith('#'):
4554 continue
4555 (var, val) = line.split('=', 1)
4556 if val[0] == '"' and val[-1] == '"':
4557 val = val[1:-1]
4558 if var == 'ID':
4559 distro = val.lower()
4560 elif var == 'VERSION_ID':
4561 distro_version = val.lower()
4562 elif var == 'VERSION_CODENAME':
4563 distro_codename = val.lower()
4564 return distro, distro_version, distro_codename
4565
f6b5b4d7 4566
9f95a23c
TL
4567class Packager(object):
4568 def __init__(self, stable=None, version=None, branch=None, commit=None):
4569 assert \
4570 (stable and not version and not branch and not commit) or \
4571 (not stable and version and not branch and not commit) or \
4572 (not stable and not version and branch) or \
4573 (not stable and not version and not branch and not commit)
4574 self.stable = stable
4575 self.version = version
4576 self.branch = branch
4577 self.commit = commit
4578
4579 def add_repo(self):
4580 raise NotImplementedError
4581
4582 def rm_repo(self):
4583 raise NotImplementedError
4584
4585 def query_shaman(self, distro, distro_version, branch, commit):
4586 # query shaman
f91f0fd5 4587 logger.info('Fetching repo metadata from shaman and chacra...')
9f95a23c
TL
4588 shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
4589 distro=distro,
4590 distro_version=distro_version,
4591 branch=branch,
4592 sha1=commit or 'latest',
4593 arch=get_arch()
4594 )
4595 try:
4596 shaman_response = urlopen(shaman_url)
4597 except HTTPError as err:
f91f0fd5 4598 logger.error('repository not found in shaman (might not be available yet)')
9f95a23c
TL
4599 raise Error('%s, failed to fetch %s' % (err, shaman_url))
4600 try:
4601 chacra_url = shaman_response.geturl()
4602 chacra_response = urlopen(chacra_url)
4603 except HTTPError as err:
f91f0fd5 4604 logger.error('repository not found in chacra (might not be available yet)')
9f95a23c
TL
4605 raise Error('%s, failed to fetch %s' % (err, chacra_url))
4606 return chacra_response.read().decode('utf-8')
4607
4608 def repo_gpgkey(self):
4609 if args.gpg_url:
4610 return args.gpg_url
4611 if self.stable or self.version:
4612 return 'https://download.ceph.com/keys/release.asc', 'release'
4613 else:
4614 return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
4615
4616 def enable_service(self, service):
4617 """
4618 Start and enable the service (typically using systemd).
4619 """
4620 call_throws(['systemctl', 'enable', '--now', service])
4621
4622
4623class Apt(Packager):
4624 DISTRO_NAMES = {
4625 'ubuntu': 'ubuntu',
4626 'debian': 'debian',
4627 }
4628
4629 def __init__(self, stable, version, branch, commit,
4630 distro, distro_version, distro_codename):
4631 super(Apt, self).__init__(stable=stable, version=version,
4632 branch=branch, commit=commit)
4633 self.distro = self.DISTRO_NAMES[distro]
4634 self.distro_codename = distro_codename
f91f0fd5 4635 self.distro_version = distro_version
9f95a23c
TL
4636
4637 def repo_path(self):
4638 return '/etc/apt/sources.list.d/ceph.list'
4639
4640 def add_repo(self):
4641 url, name = self.repo_gpgkey()
f91f0fd5 4642 logger.info('Installing repo GPG key from %s...' % url)
9f95a23c
TL
4643 try:
4644 response = urlopen(url)
4645 except HTTPError as err:
f91f0fd5 4646 logger.error('failed to fetch GPG repo key from %s: %s' % (
9f95a23c
TL
4647 url, err))
4648 raise Error('failed to fetch GPG key')
4649 key = response.read().decode('utf-8')
4650 with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'w') as f:
4651 f.write(key)
4652
4653 if self.version:
4654 content = 'deb %s/debian-%s/ %s main\n' % (
4655 args.repo_url, self.version, self.distro_codename)
4656 elif self.stable:
4657 content = 'deb %s/debian-%s/ %s main\n' % (
4658 args.repo_url, self.stable, self.distro_codename)
4659 else:
4660 content = self.query_shaman(self.distro, self.distro_codename, self.branch,
4661 self.commit)
4662
f91f0fd5 4663 logger.info('Installing repo file at %s...' % self.repo_path())
9f95a23c
TL
4664 with open(self.repo_path(), 'w') as f:
4665 f.write(content)
4666
4667 def rm_repo(self):
4668 for name in ['autobuild', 'release']:
4669 p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
4670 if os.path.exists(p):
f91f0fd5 4671 logger.info('Removing repo GPG key %s...' % p)
9f95a23c
TL
4672 os.unlink(p)
4673 if os.path.exists(self.repo_path()):
f91f0fd5 4674 logger.info('Removing repo at %s...' % self.repo_path())
9f95a23c
TL
4675 os.unlink(self.repo_path())
4676
f91f0fd5
TL
4677 if self.distro == 'ubuntu':
4678 self.rm_kubic_repo()
4679
9f95a23c 4680 def install(self, ls):
f91f0fd5 4681 logger.info('Installing packages %s...' % ls)
adb31ebb 4682 call_throws(['apt-get', 'install', '-y'] + ls)
9f95a23c
TL
4683
4684 def install_podman(self):
4685 if self.distro == 'ubuntu':
f91f0fd5
TL
4686 logger.info('Setting up repo for podman...')
4687 self.add_kubic_repo()
adb31ebb 4688 call_throws(['apt-get', 'update'])
9f95a23c 4689
f91f0fd5 4690 logger.info('Attempting podman install...')
9f95a23c
TL
4691 try:
4692 self.install(['podman'])
4693 except Error as e:
f91f0fd5 4694 logger.info('Podman did not work. Falling back to docker...')
9f95a23c
TL
4695 self.install(['docker.io'])
4696
f91f0fd5
TL
4697 def kubic_repo_url(self):
4698 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
4699 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version
4700
4701 def kubic_repo_path(self):
4702 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
4703
4704 def kubric_repo_gpgkey_url(self):
4705 return '%s/Release.key' % self.kubic_repo_url()
4706
4707 def kubric_repo_gpgkey_path(self):
4708 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
4709
4710 def add_kubic_repo(self):
4711 url = self.kubric_repo_gpgkey_url()
4712 logger.info('Installing repo GPG key from %s...' % url)
4713 try:
4714 response = urlopen(url)
4715 except HTTPError as err:
4716 logger.error('failed to fetch GPG repo key from %s: %s' % (
4717 url, err))
4718 raise Error('failed to fetch GPG key')
4719 key = response.read().decode('utf-8')
4720 tmp_key = write_tmp(key, 0, 0)
4721 keyring = self.kubric_repo_gpgkey_path()
4722 call_throws(['apt-key', '--keyring', keyring, 'add', tmp_key.name])
4723
4724 logger.info('Installing repo file at %s...' % self.kubic_repo_path())
4725 content = 'deb %s /\n' % self.kubic_repo_url()
4726 with open(self.kubic_repo_path(), 'w') as f:
4727 f.write(content)
4728
4729 def rm_kubic_repo(self):
4730 keyring = self.kubric_repo_gpgkey_path()
4731 if os.path.exists(keyring):
4732 logger.info('Removing repo GPG key %s...' % keyring)
4733 os.unlink(keyring)
4734
4735 p = self.kubic_repo_path()
4736 if os.path.exists(p):
4737 logger.info('Removing repo at %s...' % p)
4738 os.unlink(p)
4739
f6b5b4d7 4740
9f95a23c
TL
4741class YumDnf(Packager):
4742 DISTRO_NAMES = {
4743 'centos': ('centos', 'el'),
4744 'rhel': ('centos', 'el'),
4745 'scientific': ('centos', 'el'),
4746 'fedora': ('fedora', 'fc'),
4747 }
4748
4749 def __init__(self, stable, version, branch, commit,
4750 distro, distro_version):
4751 super(YumDnf, self).__init__(stable=stable, version=version,
4752 branch=branch, commit=commit)
4753 self.major = int(distro_version.split('.')[0])
4754 self.distro_normalized = self.DISTRO_NAMES[distro][0]
4755 self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
4756 if (self.distro_code == 'fc' and self.major >= 30) or \
4757 (self.distro_code == 'el' and self.major >= 8):
4758 self.tool = 'dnf'
4759 else:
4760 self.tool = 'yum'
4761
4762 def custom_repo(self, **kw):
4763 """
4764 Repo files need special care in that a whole line should not be present
4765 if there is no value for it. Because we were using `format()` we could
4766 not conditionally add a line for a repo file. So the end result would
4767 contain a key with a missing value (say if we were passing `None`).
4768
4769 For example, it could look like::
4770
4771 [ceph repo]
4772 name= ceph repo
4773 proxy=
4774 gpgcheck=
4775
4776 Which breaks. This function allows us to conditionally add lines,
4777 preserving an order and be more careful.
4778
4779 Previously, and for historical purposes, this is how the template used
4780 to look::
4781
4782 custom_repo =
4783 [{repo_name}]
4784 name={name}
4785 baseurl={baseurl}
4786 enabled={enabled}
4787 gpgcheck={gpgcheck}
4788 type={_type}
4789 gpgkey={gpgkey}
4790 proxy={proxy}
4791
4792 """
4793 lines = []
4794
4795 # by using tuples (vs a dict) we preserve the order of what we want to
4796 # return, like starting with a [repo name]
4797 tmpl = (
4798 ('reponame', '[%s]'),
4799 ('name', 'name=%s'),
4800 ('baseurl', 'baseurl=%s'),
4801 ('enabled', 'enabled=%s'),
4802 ('gpgcheck', 'gpgcheck=%s'),
4803 ('_type', 'type=%s'),
4804 ('gpgkey', 'gpgkey=%s'),
4805 ('proxy', 'proxy=%s'),
4806 ('priority', 'priority=%s'),
4807 )
4808
4809 for line in tmpl:
4810 tmpl_key, tmpl_value = line # key values from tmpl
4811
4812 # ensure that there is an actual value (not None nor empty string)
4813 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4814 lines.append(tmpl_value % kw.get(tmpl_key))
4815
4816 return '\n'.join(lines)
4817
4818 def repo_path(self):
4819 return '/etc/yum.repos.d/ceph.repo'
4820
4821 def repo_baseurl(self):
4822 assert self.stable or self.version
4823 if self.version:
4824 return '%s/rpm-%s/%s' % (args.repo_url, self.version,
4825 self.distro_code)
4826 else:
4827 return '%s/rpm-%s/%s' % (args.repo_url, self.stable,
4828 self.distro_code)
4829
4830 def add_repo(self):
4831 if self.stable or self.version:
4832 content = ''
4833 for n, t in {
4834 'Ceph': '$basearch',
4835 'Ceph-noarch': 'noarch',
4836 'Ceph-source': 'SRPMS'}.items():
4837 content += '[%s]\n' % (n)
4838 content += self.custom_repo(
4839 name='Ceph %s' % t,
4840 baseurl=self.repo_baseurl() + '/' + t,
4841 enabled=1,
4842 gpgcheck=1,
4843 gpgkey=self.repo_gpgkey()[0],
4844 )
4845 content += '\n\n'
4846 else:
4847 content = self.query_shaman(self.distro_normalized, self.major,
4848 self.branch,
4849 self.commit)
4850
f91f0fd5 4851 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
4852 with open(self.repo_path(), 'w') as f:
4853 f.write(content)
4854
4855 if self.distro_code.startswith('el'):
4856 logger.info('Enabling EPEL...')
4857 call_throws([self.tool, 'install', '-y', 'epel-release'])
9f95a23c
TL
4858
4859 def rm_repo(self):
4860 if os.path.exists(self.repo_path()):
4861 os.unlink(self.repo_path())
9f95a23c
TL
4862
4863 def install(self, ls):
4864 logger.info('Installing packages %s...' % ls)
4865 call_throws([self.tool, 'install', '-y'] + ls)
4866
4867 def install_podman(self):
4868 self.install(['podman'])
4869
4870
4871class Zypper(Packager):
4872 DISTRO_NAMES = [
4873 'sles',
4874 'opensuse-tumbleweed',
4875 'opensuse-leap'
4876 ]
4877
4878 def __init__(self, stable, version, branch, commit,
4879 distro, distro_version):
4880 super(Zypper, self).__init__(stable=stable, version=version,
4881 branch=branch, commit=commit)
4882 self.tool = 'zypper'
4883 self.distro = 'opensuse'
4884 self.distro_version = '15.1'
4885 if 'tumbleweed' not in distro and distro_version is not None:
4886 self.distro_version = distro_version
4887
4888 def custom_repo(self, **kw):
4889 """
4890 See YumDnf for format explanation.
4891 """
4892 lines = []
4893
4894 # by using tuples (vs a dict) we preserve the order of what we want to
4895 # return, like starting with a [repo name]
4896 tmpl = (
4897 ('reponame', '[%s]'),
4898 ('name', 'name=%s'),
4899 ('baseurl', 'baseurl=%s'),
4900 ('enabled', 'enabled=%s'),
4901 ('gpgcheck', 'gpgcheck=%s'),
4902 ('_type', 'type=%s'),
4903 ('gpgkey', 'gpgkey=%s'),
4904 ('proxy', 'proxy=%s'),
4905 ('priority', 'priority=%s'),
4906 )
4907
4908 for line in tmpl:
4909 tmpl_key, tmpl_value = line # key values from tmpl
4910
4911 # ensure that there is an actual value (not None nor empty string)
4912 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4913 lines.append(tmpl_value % kw.get(tmpl_key))
4914
4915 return '\n'.join(lines)
4916
4917 def repo_path(self):
4918 return '/etc/zypp/repos.d/ceph.repo'
4919
4920 def repo_baseurl(self):
4921 assert self.stable or self.version
4922 if self.version:
4923 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4924 else:
4925 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4926
4927 def add_repo(self):
4928 if self.stable or self.version:
4929 content = ''
4930 for n, t in {
4931 'Ceph': '$basearch',
4932 'Ceph-noarch': 'noarch',
4933 'Ceph-source': 'SRPMS'}.items():
4934 content += '[%s]\n' % (n)
4935 content += self.custom_repo(
4936 name='Ceph %s' % t,
4937 baseurl=self.repo_baseurl() + '/' + t,
4938 enabled=1,
4939 gpgcheck=1,
4940 gpgkey=self.repo_gpgkey()[0],
4941 )
4942 content += '\n\n'
4943 else:
4944 content = self.query_shaman(self.distro, self.distro_version,
4945 self.branch,
4946 self.commit)
4947
f91f0fd5 4948 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
4949 with open(self.repo_path(), 'w') as f:
4950 f.write(content)
4951
4952 def rm_repo(self):
4953 if os.path.exists(self.repo_path()):
4954 os.unlink(self.repo_path())
4955
4956 def install(self, ls):
4957 logger.info('Installing packages %s...' % ls)
4958 call_throws([self.tool, 'in', '-y'] + ls)
4959
4960 def install_podman(self):
4961 self.install(['podman'])
4962
4963
4964def create_packager(stable=None, version=None, branch=None, commit=None):
4965 distro, distro_version, distro_codename = get_distro()
4966 if distro in YumDnf.DISTRO_NAMES:
4967 return YumDnf(stable=stable, version=version,
4968 branch=branch, commit=commit,
4969 distro=distro, distro_version=distro_version)
4970 elif distro in Apt.DISTRO_NAMES:
4971 return Apt(stable=stable, version=version,
4972 branch=branch, commit=commit,
4973 distro=distro, distro_version=distro_version,
4974 distro_codename=distro_codename)
4975 elif distro in Zypper.DISTRO_NAMES:
4976 return Zypper(stable=stable, version=version,
4977 branch=branch, commit=commit,
4978 distro=distro, distro_version=distro_version)
4979 raise Error('Distro %s version %s not supported' % (distro, distro_version))
4980
4981
4982def command_add_repo():
4983 if args.version and args.release:
4984 raise Error('you can specify either --release or --version but not both')
1911f103
TL
4985 if not args.version and not args.release and not args.dev and not args.dev_commit:
4986 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
9f95a23c
TL
4987 if args.version:
4988 try:
4989 (x, y, z) = args.version.split('.')
4990 except Exception as e:
4991 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
4992
4993 pkg = create_packager(stable=args.release,
4994 version=args.version,
4995 branch=args.dev,
4996 commit=args.dev_commit)
4997 pkg.add_repo()
4998
f6b5b4d7 4999
9f95a23c
TL
5000def command_rm_repo():
5001 pkg = create_packager()
5002 pkg.rm_repo()
5003
f6b5b4d7 5004
9f95a23c
TL
5005def command_install():
5006 pkg = create_packager()
5007 pkg.install(args.packages)
5008
5009##################################
5010
f91f0fd5
TL
5011def get_ipv4_address(ifname):
5012 # type: (str) -> str
5013 def _extract(sock, offset):
5014 return socket.inet_ntop(
5015 socket.AF_INET,
5016 fcntl.ioctl(
5017 sock.fileno(),
5018 offset,
5019 struct.pack('256s', bytes(ifname[:15], 'utf-8'))
5020 )[20:24])
5021
5022 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
5023 try:
5024 addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR
5025 dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK
5026 except OSError:
5027 # interface does not have an ipv4 address
5028 return ''
5029
5030 dec_mask = sum([bin(int(i)).count('1')
5031 for i in dq_mask.split('.')])
5032 return '{}/{}'.format(addr, dec_mask)
5033
5034
5035def get_ipv6_address(ifname):
5036 # type: (str) -> str
5037 if not os.path.exists('/proc/net/if_inet6'):
5038 return ''
5039
5040 raw = read_file(['/proc/net/if_inet6'])
5041 data = raw.splitlines()
5042 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
5043 # field 0 is ipv6, field 2 is scope
5044 for iface_setting in data:
5045 field = iface_setting.split()
5046 if field[-1] == ifname:
5047 ipv6_raw = field[0]
5048 ipv6_fmtd = ":".join([ipv6_raw[_p:_p+4] for _p in range(0, len(field[0]),4)])
5049 # apply naming rules using ipaddress module
5050 ipv6 = ipaddress.ip_address(ipv6_fmtd)
5051 return "{}/{}".format(str(ipv6), int('0x{}'.format(field[2]), 16))
5052 return ''
5053
5054
5055def bytes_to_human(num, mode='decimal'):
5056 # type: (float, str) -> str
5057 """Convert a bytes value into it's human-readable form.
5058
5059 :param num: number, in bytes, to convert
5060 :param mode: Either decimal (default) or binary to determine divisor
5061 :returns: string representing the bytes value in a more readable format
5062 """
5063 unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
5064 divisor = 1000.0
5065 yotta = "YB"
5066
5067 if mode == 'binary':
5068 unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
5069 divisor = 1024.0
5070 yotta = "YiB"
5071
5072 for unit in unit_list:
5073 if abs(num) < divisor:
5074 return "%3.1f%s" % (num, unit)
5075 num /= divisor
5076 return "%.1f%s" % (num, yotta)
5077
5078
5079def read_file(path_list, file_name=''):
5080 # type: (List[str], str) -> str
5081 """Returns the content of the first file found within the `path_list`
5082
5083 :param path_list: list of file paths to search
5084 :param file_name: optional file_name to be applied to a file path
5085 :returns: content of the file or 'Unknown'
5086 """
5087 for path in path_list:
5088 if file_name:
5089 file_path = os.path.join(path, file_name)
5090 else:
5091 file_path = path
5092 if os.path.exists(file_path):
5093 with open(file_path, 'r') as f:
5094 try:
5095 content = f.read().strip()
5096 except OSError:
5097 # sysfs may populate the file, but for devices like
5098 # virtio reads can fail
5099 return "Unknown"
5100 else:
5101 return content
5102 return "Unknown"
5103
5104
5105##################################
5106class HostFacts():
5107 _dmi_path_list = ['/sys/class/dmi/id']
5108 _nic_path_list = ['/sys/class/net']
5109 _selinux_path_list = ['/etc/selinux/config']
5110 _apparmor_path_list = ['/etc/apparmor']
5111 _disk_vendor_workarounds = {
5112 "0x1af4": "Virtio Block Device"
5113 }
5114
5115 def __init__(self):
5116 self.cpu_model = 'Unknown'
5117 self.cpu_count = 0
5118 self.cpu_cores = 0
5119 self.cpu_threads = 0
5120 self.interfaces = {}
5121
5122 self._meminfo = read_file(['/proc/meminfo']).splitlines()
5123 self._get_cpuinfo()
5124 self._process_nics()
5125 self.arch = platform.processor()
5126 self.kernel = platform.release()
5127
5128 def _get_cpuinfo(self):
5129 # type: () -> None
5130 """Determine cpu information via /proc/cpuinfo"""
5131 raw = read_file(['/proc/cpuinfo'])
5132 output = raw.splitlines()
5133 cpu_set = set()
5134
5135 for line in output:
5136 field = [l.strip() for l in line.split(':')]
5137 if "model name" in line:
5138 self.cpu_model = field[1]
5139 if "physical id" in line:
5140 cpu_set.add(field[1])
5141 if "siblings" in line:
5142 self.cpu_threads = int(field[1].strip())
5143 if "cpu cores" in line:
5144 self.cpu_cores = int(field[1].strip())
5145 pass
5146 self.cpu_count = len(cpu_set)
5147
5148 def _get_block_devs(self):
5149 # type: () -> List[str]
5150 """Determine the list of block devices by looking at /sys/block"""
5151 return [dev for dev in os.listdir('/sys/block')
5152 if not dev.startswith('dm')]
5153
5154 def _get_devs_by_type(self, rota='0'):
5155 # type: (str) -> List[str]
5156 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
5157 devs = list()
5158 for blk_dev in self._get_block_devs():
5159 rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev)
5160 rot_value = read_file([rot_path])
5161 if rot_value == rota:
5162 devs.append(blk_dev)
5163 return devs
5164
5165 @property
5166 def operating_system(self):
5167 # type: () -> str
5168 """Determine OS version"""
5169 raw_info = read_file(['/etc/os-release'])
5170 os_release = raw_info.splitlines()
5171 rel_str = 'Unknown'
5172 rel_dict = dict()
5173
5174 for line in os_release:
5175 if "=" in line:
5176 var_name, var_value = line.split('=')
5177 rel_dict[var_name] = var_value.strip('"')
5178
5179 # Would normally use PRETTY_NAME, but NAME and VERSION are more
5180 # consistent
5181 if all(_v in rel_dict for _v in ["NAME", "VERSION"]):
5182 rel_str = "{} {}".format(rel_dict['NAME'], rel_dict['VERSION'])
5183 return rel_str
5184
5185 @property
5186 def hostname(self):
5187 # type: () -> str
5188 """Return the hostname"""
5189 return platform.node()
5190
5191 @property
5192 def subscribed(self):
5193 # type: () -> str
5194 """Highlevel check to see if the host is subscribed to receive updates/support"""
5195 def _red_hat():
5196 # type: () -> str
5197 # RHEL 7 and RHEL 8
5198 entitlements_dir = '/etc/pki/entitlement'
5199 if os.path.exists(entitlements_dir):
5200 pems = glob('{}/*.pem'.format(entitlements_dir))
5201 if len(pems) >= 2:
5202 return "Yes"
5203
5204 return "No"
5205
5206 os_name = self.operating_system
5207 if os_name.upper().startswith("RED HAT"):
5208 return _red_hat()
5209
5210 return "Unknown"
5211
5212 @property
5213 def hdd_count(self):
5214 # type: () -> int
5215 """Return a count of HDDs (spinners)"""
5216 return len(self._get_devs_by_type(rota='1'))
5217
5218 def _get_capacity(self, dev):
5219 # type: (str) -> int
5220 """Determine the size of a given device"""
5221 size_path = os.path.join('/sys/block', dev, 'size')
5222 size_blocks = int(read_file([size_path]))
5223 blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size')
5224 blk_count = int(read_file([blk_path]))
5225 return size_blocks * blk_count
5226
5227 def _get_capacity_by_type(self, rota='0'):
5228 # type: (str) -> int
5229 """Return the total capacity of a category of device (flash or hdd)"""
5230 devs = self._get_devs_by_type(rota=rota)
5231 capacity = 0
5232 for dev in devs:
5233 capacity += self._get_capacity(dev)
5234 return capacity
5235
5236 def _dev_list(self, dev_list):
5237 # type: (List[str]) -> List[Dict[str, object]]
5238 """Return a 'pretty' name list for each device in the `dev_list`"""
5239 disk_list = list()
5240
5241 for dev in dev_list:
5242 disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
5243 disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
5244 disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
5245 vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
5246 disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
5247 disk_size_bytes = self._get_capacity(dev)
5248 disk_list.append({
5249 "description": "{} {} ({})".format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
5250 "vendor": disk_vendor,
5251 "model": disk_model,
5252 "rev": disk_rev,
5253 "wwid": disk_wwid,
5254 "dev_name": dev,
5255 "disk_size_bytes": disk_size_bytes,
5256 }
5257 )
5258 return disk_list
5259
5260 @property
5261 def hdd_list(self):
5262 # type: () -> List[Dict[str, object]]
5263 """Return a list of devices that are HDDs (spinners)"""
5264 devs = self._get_devs_by_type(rota='1')
5265 return self._dev_list(devs)
5266
5267 @property
5268 def flash_list(self):
5269 # type: () -> List[Dict[str, object]]
5270 """Return a list of devices that are flash based (SSD, NVMe)"""
5271 devs = self._get_devs_by_type(rota='0')
5272 return self._dev_list(devs)
5273
5274 @property
5275 def hdd_capacity_bytes(self):
5276 # type: () -> int
5277 """Return the total capacity for all HDD devices (bytes)"""
5278 return self._get_capacity_by_type(rota='1')
5279
5280 @property
5281 def hdd_capacity(self):
5282 # type: () -> str
5283 """Return the total capacity for all HDD devices (human readable format)"""
5284 return bytes_to_human(self.hdd_capacity_bytes)
5285
5286 @property
5287 def cpu_load(self):
5288 # type: () -> Dict[str, float]
5289 """Return the cpu load average data for the host"""
5290 raw = read_file(['/proc/loadavg']).strip()
5291 data = raw.split()
5292 return {
5293 "1min": float(data[0]),
5294 "5min": float(data[1]),
5295 "15min": float(data[2]),
5296 }
5297
5298 @property
5299 def flash_count(self):
5300 # type: () -> int
5301 """Return the number of flash devices in the system (SSD, NVMe)"""
5302 return len(self._get_devs_by_type(rota='0'))
5303
5304 @property
5305 def flash_capacity_bytes(self):
5306 # type: () -> int
5307 """Return the total capacity for all flash devices (bytes)"""
5308 return self._get_capacity_by_type(rota='0')
5309
5310 @property
5311 def flash_capacity(self):
5312 # type: () -> str
5313 """Return the total capacity for all Flash devices (human readable format)"""
5314 return bytes_to_human(self.flash_capacity_bytes)
5315
5316 def _process_nics(self):
5317 # type: () -> None
5318 """Look at the NIC devices and extract network related metadata"""
5319 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
5320 hw_lookup = {
5321 "1": "ethernet",
5322 "32": "infiniband",
5323 "772": "loopback",
5324 }
5325
5326 for nic_path in HostFacts._nic_path_list:
5327 if not os.path.exists(nic_path):
5328 continue
5329 for iface in os.listdir(nic_path):
5330
5331 lower_devs_list = [os.path.basename(link.replace("lower_", "")) for link in glob(os.path.join(nic_path, iface, "lower_*"))]
5332 upper_devs_list = [os.path.basename(link.replace("upper_", "")) for link in glob(os.path.join(nic_path, iface, "upper_*"))]
5333
5334 try:
5335 mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
5336 except ValueError:
5337 mtu = 0
5338
5339 operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
5340 try:
5341 speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
5342 except (OSError, ValueError):
5343 # OSError : device doesn't support the ethtool get_link_ksettings
5344 # ValueError : raised when the read fails, and returns Unknown
5345 #
5346 # Either way, we show a -1 when speed isn't available
5347 speed = -1
5348
5349 if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
5350 nic_type = "bridge"
5351 elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
5352 nic_type = "bonding"
5353 else:
5354 nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), "Unknown")
5355
5356 dev_link = os.path.join(nic_path, iface, 'device')
5357 if os.path.exists(dev_link):
5358 iftype = 'physical'
5359 driver_path = os.path.join(dev_link, 'driver')
5360 if os.path.exists(driver_path):
5361 driver = os.path.basename(
5362 os.path.realpath(driver_path))
5363 else:
5364 driver = 'Unknown'
5365
5366 else:
5367 iftype = 'logical'
5368 driver = ''
5369
5370 self.interfaces[iface] = {
5371 "mtu": mtu,
5372 "upper_devs_list": upper_devs_list,
5373 "lower_devs_list": lower_devs_list,
5374 "operstate": operstate,
5375 "iftype": iftype,
5376 "nic_type": nic_type,
5377 "driver": driver,
5378 "speed": speed,
5379 "ipv4_address": get_ipv4_address(iface),
5380 "ipv6_address": get_ipv6_address(iface),
5381 }
5382
5383 @property
5384 def nic_count(self):
5385 # type: () -> int
5386 """Return a total count of all physical NICs detected in the host"""
5387 phys_devs = []
5388 for iface in self.interfaces:
5389 if self.interfaces[iface]["iftype"] == 'physical':
5390 phys_devs.append(iface)
5391 return len(phys_devs)
5392
5393
5394 def _get_mem_data(self, field_name):
5395 # type: (str) -> int
5396 for line in self._meminfo:
5397 if line.startswith(field_name):
5398 _d = line.split()
5399 return int(_d[1])
5400 return 0
5401
5402 @property
5403 def memory_total_kb(self):
5404 # type: () -> int
5405 """Determine the memory installed (kb)"""
5406 return self._get_mem_data('MemTotal')
5407
5408 @property
5409 def memory_free_kb(self):
5410 # type: () -> int
5411 """Determine the memory free (not cache, immediately usable)"""
5412 return self._get_mem_data('MemFree')
5413
5414 @property
5415 def memory_available_kb(self):
5416 # type: () -> int
5417 """Determine the memory available to new applications without swapping"""
5418 return self._get_mem_data('MemAvailable')
5419
5420 @property
5421 def vendor(self):
5422 # type: () -> str
5423 """Determine server vendor from DMI data in sysfs"""
5424 return read_file(HostFacts._dmi_path_list, "sys_vendor")
5425
5426 @property
5427 def model(self):
5428 # type: () -> str
5429 """Determine server model information from DMI data in sysfs"""
5430 family = read_file(HostFacts._dmi_path_list, "product_family")
5431 product = read_file(HostFacts._dmi_path_list, "product_name")
5432 if family == 'Unknown' and product:
5433 return "{}".format(product)
5434
5435 return "{} ({})".format(family, product)
5436
5437 @property
5438 def bios_version(self):
5439 # type: () -> str
5440 """Determine server BIOS version from DMI data in sysfs"""
5441 return read_file(HostFacts._dmi_path_list, "bios_version")
5442
5443 @property
5444 def bios_date(self):
5445 # type: () -> str
5446 """Determine server BIOS date from DMI data in sysfs"""
5447 return read_file(HostFacts._dmi_path_list, "bios_date")
5448
5449 @property
5450 def timestamp(self):
5451 # type: () -> float
5452 """Return the current time as Epoch seconds"""
5453 return time.time()
5454
5455 @property
5456 def system_uptime(self):
5457 # type: () -> float
5458 """Return the system uptime (in secs)"""
5459 raw_time = read_file(['/proc/uptime'])
5460 up_secs, _ = raw_time.split()
5461 return float(up_secs)
5462
f91f0fd5
TL
5463 def kernel_security(self):
5464 # type: () -> Dict[str, str]
5465 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
5466 def _fetch_selinux():
5467 """Read the selinux config file to determine state"""
5468 security = {}
5469 for selinux_path in HostFacts._selinux_path_list:
5470 if os.path.exists(selinux_path):
5471 selinux_config = read_file([selinux_path]).splitlines()
5472 security['type'] = 'SELinux'
5473 for line in selinux_config:
5474 if line.strip().startswith('#'):
5475 continue
5476 k, v = line.split('=')
5477 security[k] = v
5478 if security['SELINUX'].lower() == "disabled":
5479 security['description'] = "SELinux: Disabled"
5480 else:
5481 security['description'] = "SELinux: Enabled({}, {})".format(security['SELINUX'], security['SELINUXTYPE'])
5482 return security
5483
5484 def _fetch_apparmor():
5485 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
5486 security = {}
5487 for apparmor_path in HostFacts._apparmor_path_list:
5488 if os.path.exists(apparmor_path):
5489 security['type'] = "AppArmor"
5490 security['description'] = "AppArmor: Enabled"
5491 try:
5492 profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
5493 except OSError:
5494 pass
5495 else:
5496 summary = {} # type: Dict[str, int]
5497 for line in profiles.split('\n'):
5498 item, mode = line.split(' ')
5499 mode= mode.strip('()')
5500 if mode in summary:
5501 summary[mode] += 1
5502 else:
5503 summary[mode] = 0
5504 summary_str = ",".join(["{} {}".format(v, k) for k, v in summary.items()])
5505 security = {**security, **summary} # type: ignore
5506 security['description'] += "({})".format(summary_str)
5507
5508 return security
5509
5510 if os.path.exists('/sys/kernel/security/lsm'):
5511 lsm = read_file(['/sys/kernel/security/lsm']).strip()
5512 if 'selinux' in lsm:
5513 return _fetch_selinux()
5514 elif 'apparmor' in lsm:
5515 return _fetch_apparmor()
5516 else:
5517 return {
5518 "type": "Unknown",
5519 "description": "Linux Security Module framework is active, but is not using SELinux or AppArmor"
5520 }
5521
5522 return {
5523 "type": "None",
5524 "description": "Linux Security Module framework is not available"
5525 }
5526
adb31ebb
TL
5527 @property
5528 def kernel_parameters(self):
5529 # type: () -> Dict[str, str]
5530 """Get kernel parameters required/used in Ceph clusters"""
5531
5532 k_param = {}
5533 out, _, _ = call_throws(['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
5534 if out:
5535 param_list = out.split('\n')
5536 param_dict = { param.split(" = ")[0]:param.split(" = ")[-1] for param in param_list}
5537
5538 # return only desired parameters
5539 if 'net.ipv4.ip_nonlocal_bind' in param_dict:
5540 k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
5541
5542 return k_param
5543
f91f0fd5
TL
5544 def dump(self):
5545 # type: () -> str
5546 """Return the attributes of this HostFacts object as json"""
5547 data = {k: getattr(self, k) for k in dir(self)
5548 if not k.startswith('_') and
5549 isinstance(getattr(self, k),
5550 (float, int, str, list, dict, tuple))
5551 }
5552 return json.dumps(data, indent=2, sort_keys=True)
5553
5554##################################
5555
5556def command_gather_facts():
5557 """gather_facts is intended to provide host releated metadata to the caller"""
5558 host = HostFacts()
5559 print(host.dump())
5560
5561
5562##################################
5563
f6b5b4d7 5564
9f95a23c
TL
5565def _get_parser():
5566 # type: () -> argparse.ArgumentParser
5567 parser = argparse.ArgumentParser(
5568 description='Bootstrap Ceph daemons with systemd and containers.',
5569 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
5570 parser.add_argument(
5571 '--image',
5572 help='container image. Can also be set via the "CEPHADM_IMAGE" '
5573 'env var')
5574 parser.add_argument(
5575 '--docker',
5576 action='store_true',
5577 help='use docker instead of podman')
5578 parser.add_argument(
5579 '--data-dir',
5580 default=DATA_DIR,
5581 help='base directory for daemon data')
5582 parser.add_argument(
5583 '--log-dir',
5584 default=LOG_DIR,
5585 help='base directory for daemon logs')
5586 parser.add_argument(
5587 '--logrotate-dir',
5588 default=LOGROTATE_DIR,
5589 help='location of logrotate configuration files')
5590 parser.add_argument(
5591 '--unit-dir',
5592 default=UNIT_DIR,
5593 help='base directory for systemd units')
5594 parser.add_argument(
5595 '--verbose', '-v',
5596 action='store_true',
5597 help='Show debug-level log messages')
5598 parser.add_argument(
5599 '--timeout',
5600 type=int,
5601 default=DEFAULT_TIMEOUT,
5602 help='timeout in seconds')
5603 parser.add_argument(
5604 '--retry',
5605 type=int,
5606 default=DEFAULT_RETRY,
5607 help='max number of retries')
e306af50
TL
5608 parser.add_argument(
5609 '--env', '-e',
5610 action='append',
5611 default=[],
5612 help='set environment variable')
9f95a23c
TL
5613
5614 subparsers = parser.add_subparsers(help='sub-command')
5615
5616 parser_version = subparsers.add_parser(
5617 'version', help='get ceph version from container')
5618 parser_version.set_defaults(func=command_version)
5619
5620 parser_pull = subparsers.add_parser(
5621 'pull', help='pull latest image version')
5622 parser_pull.set_defaults(func=command_pull)
5623
5624 parser_inspect_image = subparsers.add_parser(
5625 'inspect-image', help='inspect local container image')
5626 parser_inspect_image.set_defaults(func=command_inspect_image)
5627
5628 parser_ls = subparsers.add_parser(
5629 'ls', help='list daemon instances on this host')
5630 parser_ls.set_defaults(func=command_ls)
5631 parser_ls.add_argument(
5632 '--no-detail',
5633 action='store_true',
5634 help='Do not include daemon status')
5635 parser_ls.add_argument(
5636 '--legacy-dir',
5637 default='/',
5638 help='base directory for legacy daemon data')
5639
5640 parser_list_networks = subparsers.add_parser(
5641 'list-networks', help='list IP networks')
5642 parser_list_networks.set_defaults(func=command_list_networks)
5643
5644 parser_adopt = subparsers.add_parser(
5645 'adopt', help='adopt daemon deployed with a different tool')
5646 parser_adopt.set_defaults(func=command_adopt)
5647 parser_adopt.add_argument(
5648 '--name', '-n',
5649 required=True,
5650 help='daemon name (type.id)')
5651 parser_adopt.add_argument(
5652 '--style',
5653 required=True,
5654 help='deployment style (legacy, ...)')
5655 parser_adopt.add_argument(
5656 '--cluster',
5657 default='ceph',
5658 help='cluster name')
5659 parser_adopt.add_argument(
5660 '--legacy-dir',
5661 default='/',
5662 help='base directory for legacy daemon data')
5663 parser_adopt.add_argument(
5664 '--config-json',
5665 help='Additional configuration information in JSON format')
5666 parser_adopt.add_argument(
5667 '--skip-firewalld',
5668 action='store_true',
5669 help='Do not configure firewalld')
5670 parser_adopt.add_argument(
5671 '--skip-pull',
5672 action='store_true',
5673 help='do not pull the latest image before adopting')
1911f103
TL
5674 parser_adopt.add_argument(
5675 '--force-start',
5676 action='store_true',
5677 help="start newly adoped daemon, even if it wasn't running previously")
f91f0fd5
TL
5678 parser_adopt.add_argument(
5679 '--container-init',
5680 action='store_true',
5681 help='Run podman/docker with `--init`')
9f95a23c
TL
5682
5683 parser_rm_daemon = subparsers.add_parser(
5684 'rm-daemon', help='remove daemon instance')
5685 parser_rm_daemon.set_defaults(func=command_rm_daemon)
5686 parser_rm_daemon.add_argument(
5687 '--name', '-n',
5688 required=True,
5689 action=CustomValidation,
5690 help='daemon name (type.id)')
5691 parser_rm_daemon.add_argument(
5692 '--fsid',
5693 required=True,
5694 help='cluster FSID')
5695 parser_rm_daemon.add_argument(
5696 '--force',
5697 action='store_true',
5698 help='proceed, even though this may destroy valuable data')
5699 parser_rm_daemon.add_argument(
5700 '--force-delete-data',
5701 action='store_true',
5702 help='delete valuable daemon data instead of making a backup')
5703
5704 parser_rm_cluster = subparsers.add_parser(
5705 'rm-cluster', help='remove all daemons for a cluster')
5706 parser_rm_cluster.set_defaults(func=command_rm_cluster)
5707 parser_rm_cluster.add_argument(
5708 '--fsid',
5709 required=True,
5710 help='cluster FSID')
5711 parser_rm_cluster.add_argument(
5712 '--force',
5713 action='store_true',
5714 help='proceed, even though this may destroy valuable data')
5715
5716 parser_run = subparsers.add_parser(
5717 'run', help='run a ceph daemon, in a container, in the foreground')
5718 parser_run.set_defaults(func=command_run)
5719 parser_run.add_argument(
5720 '--name', '-n',
5721 required=True,
5722 help='daemon name (type.id)')
5723 parser_run.add_argument(
5724 '--fsid',
5725 required=True,
5726 help='cluster FSID')
5727
5728 parser_shell = subparsers.add_parser(
5729 'shell', help='run an interactive shell inside a daemon container')
5730 parser_shell.set_defaults(func=command_shell)
5731 parser_shell.add_argument(
5732 '--fsid',
5733 help='cluster FSID')
5734 parser_shell.add_argument(
5735 '--name', '-n',
5736 help='daemon name (type.id)')
5737 parser_shell.add_argument(
5738 '--config', '-c',
5739 help='ceph.conf to pass through to the container')
5740 parser_shell.add_argument(
5741 '--keyring', '-k',
5742 help='ceph.keyring to pass through to the container')
e306af50
TL
5743 parser_shell.add_argument(
5744 '--mount', '-m',
f91f0fd5
TL
5745 help=("mount a file or directory in the container. "
5746 "Support multiple mounts. "
5747 "ie: `--mount /foo /bar:/bar`. "
5748 "When no destination is passed, default is /mnt"),
5749 nargs='+')
9f95a23c
TL
5750 parser_shell.add_argument(
5751 '--env', '-e',
5752 action='append',
5753 default=[],
5754 help='set environment variable')
5755 parser_shell.add_argument(
e306af50 5756 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
5757 help='command (optional)')
5758
5759 parser_enter = subparsers.add_parser(
5760 'enter', help='run an interactive shell inside a running daemon container')
5761 parser_enter.set_defaults(func=command_enter)
5762 parser_enter.add_argument(
5763 '--fsid',
5764 help='cluster FSID')
5765 parser_enter.add_argument(
5766 '--name', '-n',
5767 required=True,
5768 help='daemon name (type.id)')
5769 parser_enter.add_argument(
e306af50 5770 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
5771 help='command')
5772
5773 parser_ceph_volume = subparsers.add_parser(
5774 'ceph-volume', help='run ceph-volume inside a container')
5775 parser_ceph_volume.set_defaults(func=command_ceph_volume)
5776 parser_ceph_volume.add_argument(
5777 '--fsid',
5778 help='cluster FSID')
5779 parser_ceph_volume.add_argument(
5780 '--config-json',
5781 help='JSON file with config and (client.bootrap-osd) key')
801d1391
TL
5782 parser_ceph_volume.add_argument(
5783 '--config', '-c',
5784 help='ceph conf file')
5785 parser_ceph_volume.add_argument(
5786 '--keyring', '-k',
5787 help='ceph.keyring to pass through to the container')
9f95a23c 5788 parser_ceph_volume.add_argument(
e306af50 5789 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
5790 help='command')
5791
5792 parser_unit = subparsers.add_parser(
5793 'unit', help='operate on the daemon\'s systemd unit')
5794 parser_unit.set_defaults(func=command_unit)
5795 parser_unit.add_argument(
5796 'command',
5797 help='systemd command (start, stop, restart, enable, disable, ...)')
5798 parser_unit.add_argument(
5799 '--fsid',
5800 help='cluster FSID')
5801 parser_unit.add_argument(
5802 '--name', '-n',
5803 required=True,
5804 help='daemon name (type.id)')
5805
5806 parser_logs = subparsers.add_parser(
5807 'logs', help='print journald logs for a daemon container')
5808 parser_logs.set_defaults(func=command_logs)
5809 parser_logs.add_argument(
5810 '--fsid',
5811 help='cluster FSID')
5812 parser_logs.add_argument(
5813 '--name', '-n',
5814 required=True,
5815 help='daemon name (type.id)')
5816 parser_logs.add_argument(
5817 'command', nargs='*',
5818 help='additional journalctl args')
5819
5820 parser_bootstrap = subparsers.add_parser(
5821 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
5822 parser_bootstrap.set_defaults(func=command_bootstrap)
5823 parser_bootstrap.add_argument(
5824 '--config', '-c',
5825 help='ceph conf file to incorporate')
5826 parser_bootstrap.add_argument(
5827 '--mon-id',
5828 required=False,
5829 help='mon id (default: local hostname)')
5830 parser_bootstrap.add_argument(
5831 '--mon-addrv',
5832 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
5833 parser_bootstrap.add_argument(
5834 '--mon-ip',
5835 help='mon IP')
5836 parser_bootstrap.add_argument(
5837 '--mgr-id',
5838 required=False,
5839 help='mgr id (default: randomly generated)')
5840 parser_bootstrap.add_argument(
5841 '--fsid',
5842 help='cluster FSID')
5843 parser_bootstrap.add_argument(
5844 '--output-dir',
5845 default='/etc/ceph',
5846 help='directory to write config, keyring, and pub key files')
5847 parser_bootstrap.add_argument(
5848 '--output-keyring',
5849 help='location to write keyring file with new cluster admin and mon keys')
5850 parser_bootstrap.add_argument(
5851 '--output-config',
5852 help='location to write conf file to connect to new cluster')
5853 parser_bootstrap.add_argument(
5854 '--output-pub-ssh-key',
5855 help='location to write the cluster\'s public SSH key')
5856 parser_bootstrap.add_argument(
5857 '--skip-ssh',
5858 action='store_true',
5859 help='skip setup of ssh key on local host')
5860 parser_bootstrap.add_argument(
5861 '--initial-dashboard-user',
5862 default='admin',
5863 help='Initial user for the dashboard')
5864 parser_bootstrap.add_argument(
5865 '--initial-dashboard-password',
5866 help='Initial password for the initial dashboard user')
f6b5b4d7
TL
5867 parser_bootstrap.add_argument(
5868 '--ssl-dashboard-port',
5869 type=int,
5870 default = 8443,
5871 help='Port number used to connect with dashboard using SSL')
9f95a23c
TL
5872 parser_bootstrap.add_argument(
5873 '--dashboard-key',
e306af50 5874 type=argparse.FileType('r'),
9f95a23c
TL
5875 help='Dashboard key')
5876 parser_bootstrap.add_argument(
5877 '--dashboard-crt',
e306af50 5878 type=argparse.FileType('r'),
9f95a23c
TL
5879 help='Dashboard certificate')
5880
e306af50
TL
5881 parser_bootstrap.add_argument(
5882 '--ssh-config',
5883 type=argparse.FileType('r'),
5884 help='SSH config')
5885 parser_bootstrap.add_argument(
5886 '--ssh-private-key',
5887 type=argparse.FileType('r'),
5888 help='SSH private key')
5889 parser_bootstrap.add_argument(
5890 '--ssh-public-key',
5891 type=argparse.FileType('r'),
5892 help='SSH public key')
f6b5b4d7
TL
5893 parser_bootstrap.add_argument(
5894 '--ssh-user',
5895 default='root',
5896 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
e306af50 5897
9f95a23c
TL
5898 parser_bootstrap.add_argument(
5899 '--skip-mon-network',
5900 action='store_true',
5901 help='set mon public_network based on bootstrap mon ip')
5902 parser_bootstrap.add_argument(
5903 '--skip-dashboard',
5904 action='store_true',
5905 help='do not enable the Ceph Dashboard')
5906 parser_bootstrap.add_argument(
5907 '--dashboard-password-noupdate',
5908 action='store_true',
5909 help='stop forced dashboard password change')
5910 parser_bootstrap.add_argument(
5911 '--no-minimize-config',
5912 action='store_true',
5913 help='do not assimilate and minimize the config file')
5914 parser_bootstrap.add_argument(
5915 '--skip-ping-check',
5916 action='store_true',
5917 help='do not verify that mon IP is pingable')
5918 parser_bootstrap.add_argument(
5919 '--skip-pull',
5920 action='store_true',
5921 help='do not pull the latest image before bootstrapping')
5922 parser_bootstrap.add_argument(
5923 '--skip-firewalld',
5924 action='store_true',
5925 help='Do not configure firewalld')
5926 parser_bootstrap.add_argument(
5927 '--allow-overwrite',
5928 action='store_true',
5929 help='allow overwrite of existing --output-* config/keyring/ssh files')
5930 parser_bootstrap.add_argument(
5931 '--allow-fqdn-hostname',
5932 action='store_true',
5933 help='allow hostname that is fully-qualified (contains ".")')
5934 parser_bootstrap.add_argument(
5935 '--skip-prepare-host',
5936 action='store_true',
5937 help='Do not prepare host')
5938 parser_bootstrap.add_argument(
5939 '--orphan-initial-daemons',
5940 action='store_true',
5941 help='Do not create initial mon, mgr, and crash service specs')
5942 parser_bootstrap.add_argument(
5943 '--skip-monitoring-stack',
5944 action='store_true',
5945 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
e306af50
TL
5946 parser_bootstrap.add_argument(
5947 '--apply-spec',
5948 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
5949
e306af50
TL
5950 parser_bootstrap.add_argument(
5951 '--shared_ceph_folder',
5952 metavar='CEPH_SOURCE_FOLDER',
5953 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c 5954
f6b5b4d7
TL
5955 parser_bootstrap.add_argument(
5956 '--registry-url',
5957 help='url for custom registry')
5958 parser_bootstrap.add_argument(
5959 '--registry-username',
5960 help='username for custom registry')
5961 parser_bootstrap.add_argument(
5962 '--registry-password',
5963 help='password for custom registry')
5964 parser_bootstrap.add_argument(
5965 '--registry-json',
5966 help='json file with custom registry login info (URL, Username, Password)')
f91f0fd5
TL
5967 parser_bootstrap.add_argument(
5968 '--container-init',
5969 action='store_true',
5970 help='Run podman/docker with `--init`')
f6b5b4d7 5971
9f95a23c
TL
5972 parser_deploy = subparsers.add_parser(
5973 'deploy', help='deploy a daemon')
5974 parser_deploy.set_defaults(func=command_deploy)
5975 parser_deploy.add_argument(
5976 '--name',
5977 required=True,
5978 action=CustomValidation,
5979 help='daemon name (type.id)')
5980 parser_deploy.add_argument(
5981 '--fsid',
5982 required=True,
5983 help='cluster FSID')
5984 parser_deploy.add_argument(
5985 '--config', '-c',
5986 help='config file for new daemon')
5987 parser_deploy.add_argument(
5988 '--config-json',
5989 help='Additional configuration information in JSON format')
5990 parser_deploy.add_argument(
5991 '--keyring',
5992 help='keyring for new daemon')
5993 parser_deploy.add_argument(
5994 '--key',
5995 help='key for new daemon')
5996 parser_deploy.add_argument(
5997 '--osd-fsid',
5998 help='OSD uuid, if creating an OSD container')
5999 parser_deploy.add_argument(
6000 '--skip-firewalld',
6001 action='store_true',
6002 help='Do not configure firewalld')
f6b5b4d7
TL
6003 parser_deploy.add_argument(
6004 '--tcp-ports',
6005 help='List of tcp ports to open in the host firewall')
9f95a23c
TL
6006 parser_deploy.add_argument(
6007 '--reconfig',
6008 action='store_true',
6009 help='Reconfigure a previously deployed daemon')
6010 parser_deploy.add_argument(
6011 '--allow-ptrace',
6012 action='store_true',
6013 help='Allow SYS_PTRACE on daemon container')
f91f0fd5
TL
6014 parser_deploy.add_argument(
6015 '--container-init',
6016 action='store_true',
6017 help='Run podman/docker with `--init`')
9f95a23c
TL
6018
6019 parser_check_host = subparsers.add_parser(
6020 'check-host', help='check host configuration')
6021 parser_check_host.set_defaults(func=command_check_host)
6022 parser_check_host.add_argument(
6023 '--expect-hostname',
6024 help='Check that hostname matches an expected value')
6025
6026 parser_prepare_host = subparsers.add_parser(
6027 'prepare-host', help='prepare a host for cephadm use')
6028 parser_prepare_host.set_defaults(func=command_prepare_host)
6029 parser_prepare_host.add_argument(
6030 '--expect-hostname',
6031 help='Set hostname')
6032
6033 parser_add_repo = subparsers.add_parser(
6034 'add-repo', help='configure package repository')
6035 parser_add_repo.set_defaults(func=command_add_repo)
6036 parser_add_repo.add_argument(
6037 '--release',
1911f103 6038 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
9f95a23c
TL
6039 parser_add_repo.add_argument(
6040 '--version',
6041 help='use specific upstream version (x.y.z)')
6042 parser_add_repo.add_argument(
6043 '--dev',
6044 help='use specified bleeding edge build from git branch or tag')
6045 parser_add_repo.add_argument(
6046 '--dev-commit',
6047 help='use specified bleeding edge build from git commit')
6048 parser_add_repo.add_argument(
6049 '--gpg-url',
6050 help='specify alternative GPG key location')
6051 parser_add_repo.add_argument(
6052 '--repo-url',
6053 default='https://download.ceph.com',
6054 help='specify alternative repo location')
6055 # TODO: proxy?
6056
6057 parser_rm_repo = subparsers.add_parser(
6058 'rm-repo', help='remove package repository configuration')
6059 parser_rm_repo.set_defaults(func=command_rm_repo)
6060
6061 parser_install = subparsers.add_parser(
6062 'install', help='install ceph package(s)')
6063 parser_install.set_defaults(func=command_install)
6064 parser_install.add_argument(
6065 'packages', nargs='*',
6066 default=['cephadm'],
6067 help='packages')
6068
f6b5b4d7
TL
6069 parser_registry_login = subparsers.add_parser(
6070 'registry-login', help='log host into authenticated registry')
6071 parser_registry_login.set_defaults(func=command_registry_login)
6072 parser_registry_login.add_argument(
6073 '--registry-url',
6074 help='url for custom registry')
6075 parser_registry_login.add_argument(
6076 '--registry-username',
6077 help='username for custom registry')
6078 parser_registry_login.add_argument(
6079 '--registry-password',
6080 help='password for custom registry')
6081 parser_registry_login.add_argument(
6082 '--registry-json',
6083 help='json file with custom registry login info (URL, Username, Password)')
6084 parser_registry_login.add_argument(
6085 '--fsid',
6086 help='cluster FSID')
6087
f91f0fd5
TL
6088 parser_gather_facts = subparsers.add_parser(
6089 'gather-facts', help='gather and return host related information (JSON format)')
6090 parser_gather_facts.set_defaults(func=command_gather_facts)
6091
9f95a23c
TL
6092 return parser
6093
f6b5b4d7 6094
9f95a23c
TL
6095def _parse_args(av):
6096 parser = _get_parser()
e306af50
TL
6097 args = parser.parse_args(av)
6098 if 'command' in args and args.command and args.command[0] == "--":
6099 args.command.pop(0)
6100 return args
9f95a23c 6101
f6b5b4d7 6102
9f95a23c 6103if __name__ == "__main__":
f91f0fd5
TL
6104
6105 # root?
6106 if os.geteuid() != 0:
6107 sys.stderr.write('ERROR: cephadm should be run as root\n')
6108 sys.exit(1)
6109
6110 # Logger configuration
6111 if not os.path.exists(LOG_DIR):
6112 os.makedirs(LOG_DIR)
6113 dictConfig(logging_config)
6114 logger = logging.getLogger()
6115
9f95a23c
TL
6116 # allow argv to be injected
6117 try:
f6b5b4d7 6118 av = injected_argv # type: ignore
9f95a23c
TL
6119 except NameError:
6120 av = sys.argv[1:]
f91f0fd5 6121 logger.debug("%s\ncephadm %s" % ("-" * 80, av))
9f95a23c
TL
6122 args = _parse_args(av)
6123
f91f0fd5 6124 # More verbose console output
9f95a23c 6125 if args.verbose:
f91f0fd5
TL
6126 for handler in logger.handlers:
6127 if handler.name == "console":
6128 handler.setLevel(logging.DEBUG)
9f95a23c 6129
9f95a23c
TL
6130 if 'func' not in args:
6131 sys.stderr.write('No command specified; pass -h or --help for usage\n')
6132 sys.exit(1)
6133
1911f103
TL
6134 # podman or docker?
6135 if args.func != command_check_host:
6136 if args.docker:
6137 container_path = find_program('docker')
6138 else:
6139 for i in CONTAINER_PREFERENCE:
6140 try:
6141 container_path = find_program(i)
6142 break
6143 except Exception as e:
6144 logger.debug('Could not locate %s: %s' % (i, e))
6145 if not container_path and args.func != command_prepare_host\
6146 and args.func != command_add_repo:
6147 sys.stderr.write('Unable to locate any of %s\n' % CONTAINER_PREFERENCE)
6148 sys.exit(1)
6149
9f95a23c
TL
6150 try:
6151 r = args.func()
6152 except Error as e:
6153 if args.verbose:
6154 raise
6155 sys.stderr.write('ERROR: %s\n' % e)
6156 sys.exit(1)
6157 if not r:
6158 r = 0
6159 sys.exit(r)