]> git.proxmox.com Git - ceph.git/blame - ceph/src/cephadm/cephadm
bump version to 15.2.8-pve2
[ceph.git] / ceph / src / cephadm / cephadm
CommitLineData
9f95a23c
TL
1#!/usr/bin/python3
2
3DEFAULT_IMAGE='docker.io/ceph/ceph:v15'
1911f103 4DEFAULT_IMAGE_IS_MASTER=False
f6b5b4d7
TL
5LATEST_STABLE_RELEASE = 'octopus'
6DATA_DIR = '/var/lib/ceph'
7LOG_DIR = '/var/log/ceph'
8LOCK_DIR = '/run/cephadm'
9LOGROTATE_DIR = '/etc/logrotate.d'
10UNIT_DIR = '/etc/systemd/system'
11LOG_DIR_MODE = 0o770
12DATA_DIR_MODE = 0o700
9f95a23c 13CONTAINER_PREFERENCE = ['podman', 'docker'] # prefer podman to docker
f6b5b4d7
TL
14CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
15DEFAULT_TIMEOUT = None # in seconds
16DEFAULT_RETRY = 10
17SHELL_DEFAULT_CONF = '/etc/ceph/ceph.conf'
18SHELL_DEFAULT_KEYRING = '/etc/ceph/ceph.client.admin.keyring'
9f95a23c
TL
19
20"""
21You can invoke cephadm in two ways:
22
231. The normal way, at the command line.
24
252. By piping the script to the python3 binary. In this latter case, you should
26 prepend one or more lines to the beginning of the script.
27
28 For arguments,
29
30 injected_argv = [...]
31
32 e.g.,
33
34 injected_argv = ['ls']
35
36 For reading stdin from the '--config-json -' argument,
37
38 injected_stdin = '...'
39"""
9f95a23c
TL
40import argparse
41import datetime
42import fcntl
f6b5b4d7 43import ipaddress
9f95a23c
TL
44import json
45import logging
f91f0fd5 46from logging.config import dictConfig
9f95a23c
TL
47import os
48import platform
f6b5b4d7 49import pwd
9f95a23c
TL
50import random
51import re
52import select
53import shutil
54import socket
55import string
56import subprocess
57import sys
58import tempfile
59import time
60import errno
f91f0fd5 61import struct
9f95a23c 62try:
f6b5b4d7 63 from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO
9f95a23c
TL
64except ImportError:
65 pass
f91f0fd5
TL
66
67import re
9f95a23c
TL
68import uuid
69
70from functools import wraps
71from glob import glob
72from threading import Thread
73
74if sys.version_info >= (3, 0):
75 from io import StringIO
76else:
77 from StringIO import StringIO
78
79if sys.version_info >= (3, 2):
80 from configparser import ConfigParser
81else:
82 from ConfigParser import SafeConfigParser
83
84if sys.version_info >= (3, 0):
85 from urllib.request import urlopen
86 from urllib.error import HTTPError
87else:
88 from urllib2 import urlopen, HTTPError
89
f6b5b4d7
TL
90if sys.version_info > (3, 0):
91 unicode = str
92
9f95a23c
TL
93container_path = ''
94cached_stdin = None
95
96DATEFMT = '%Y-%m-%dT%H:%M:%S.%f'
97
f91f0fd5
TL
98# Log and console output config
99logging_config = {
100 'version': 1,
101 'disable_existing_loggers': True,
102 'formatters': {
103 'cephadm': {
104 'format': '%(asctime)s %(levelname)s %(message)s'
105 },
106 },
107 'handlers': {
108 'console':{
109 'level':'INFO',
110 'class':'logging.StreamHandler',
111 },
112 'log_file': {
113 'level': 'DEBUG',
114 'class': 'logging.handlers.RotatingFileHandler',
115 'formatter': 'cephadm',
116 'filename': '%s/cephadm.log' % LOG_DIR,
117 'maxBytes': 1024000,
118 'backupCount': 1,
119 }
120 },
121 'loggers': {
122 '': {
123 'level': 'DEBUG',
124 'handlers': ['console', 'log_file'],
125 }
126 }
127}
e306af50
TL
128
129class termcolor:
130 yellow = '\033[93m'
131 red = '\033[31m'
132 end = '\033[0m'
133
f6b5b4d7 134
9f95a23c
TL
135class Error(Exception):
136 pass
137
f6b5b4d7 138
9f95a23c
TL
139class TimeoutExpired(Error):
140 pass
141
142##################################
143
f6b5b4d7 144
9f95a23c
TL
145class Ceph(object):
146 daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
147 'crash')
148
149##################################
150
f6b5b4d7 151
9f95a23c
TL
152class Monitoring(object):
153 """Define the configs for the monitoring containers"""
154
155 port_map = {
156 "prometheus": [9095], # Avoid default 9090, due to conflict with cockpit UI
157 "node-exporter": [9100],
158 "grafana": [3000],
159 "alertmanager": [9093, 9094],
160 }
161
162 components = {
163 "prometheus": {
f91f0fd5 164 "image": "docker.io/prom/prometheus:v2.18.1",
9f95a23c
TL
165 "cpus": '2',
166 "memory": '4GB',
167 "args": [
168 "--config.file=/etc/prometheus/prometheus.yml",
169 "--storage.tsdb.path=/prometheus",
170 "--web.listen-address=:{}".format(port_map['prometheus'][0]),
171 ],
172 "config-json-files": [
173 "prometheus.yml",
174 ],
175 },
176 "node-exporter": {
f91f0fd5 177 "image": "docker.io/prom/node-exporter:v0.18.1",
9f95a23c
TL
178 "cpus": "1",
179 "memory": "1GB",
180 "args": [
181 "--no-collector.timex",
182 ],
183 },
184 "grafana": {
f91f0fd5 185 "image": "docker.io/ceph/ceph-grafana:6.6.2",
9f95a23c
TL
186 "cpus": "2",
187 "memory": "4GB",
188 "args": [],
189 "config-json-files": [
190 "grafana.ini",
191 "provisioning/datasources/ceph-dashboard.yml",
192 "certs/cert_file",
193 "certs/cert_key",
194 ],
195 },
196 "alertmanager": {
f91f0fd5 197 "image": "docker.io/prom/alertmanager:v0.20.0",
9f95a23c
TL
198 "cpus": "2",
199 "memory": "2GB",
f91f0fd5
TL
200 "args": [
201 "--web.listen-address=:{}".format(port_map['alertmanager'][0]),
202 "--cluster.listen-address=:{}".format(port_map['alertmanager'][1]),
203 ],
9f95a23c
TL
204 "config-json-files": [
205 "alertmanager.yml",
206 ],
207 "config-json-args": [
208 "peers",
209 ],
210 },
211 } # type: ignore
212
213##################################
214
f6b5b4d7 215
9f95a23c
TL
216class NFSGanesha(object):
217 """Defines a NFS-Ganesha container"""
218
219 daemon_type = 'nfs'
220 entrypoint = '/usr/bin/ganesha.nfsd'
221 daemon_args = ['-F', '-L', 'STDERR']
222
223 required_files = ['ganesha.conf']
224
225 port_map = {
226 "nfs" : 2049,
227 }
228
229 def __init__(self,
230 fsid,
231 daemon_id,
232 config_json,
233 image=DEFAULT_IMAGE):
234 # type: (str, Union[int, str], Dict, str) -> None
235 self.fsid = fsid
236 self.daemon_id = daemon_id
237 self.image = image
238
9f95a23c 239 # config-json options
f91f0fd5
TL
240 self.pool = dict_get(config_json, 'pool', require=True)
241 self.namespace = dict_get(config_json, 'namespace')
242 self.userid = dict_get(config_json, 'userid')
243 self.extra_args = dict_get(config_json, 'extra_args', [])
244 self.files = dict_get(config_json, 'files', {})
245 self.rgw = dict_get(config_json, 'rgw', {})
9f95a23c
TL
246
247 # validate the supplied args
248 self.validate()
249
250 @classmethod
251 def init(cls, fsid, daemon_id):
252 # type: (str, Union[int, str]) -> NFSGanesha
253 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
254
f91f0fd5 255 def get_container_mounts(self, data_dir):
9f95a23c
TL
256 # type: (str) -> Dict[str, str]
257 mounts = dict()
258 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
259 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
260 mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
f91f0fd5
TL
261 if self.rgw:
262 cluster = self.rgw.get('cluster', 'ceph')
263 rgw_user = self.rgw.get('user', 'admin')
264 mounts[os.path.join(data_dir, 'keyring.rgw')] = \
265 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
9f95a23c
TL
266 return mounts
267
268 @staticmethod
269 def get_container_envs():
270 # type: () -> List[str]
271 envs = [
272 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
273 ]
274 return envs
275
276 @staticmethod
277 def get_version(container_id):
e306af50 278 # type: (str) -> Optional[str]
9f95a23c
TL
279 version = None
280 out, err, code = call(
281 [container_path, 'exec', container_id,
282 NFSGanesha.entrypoint, '-v'])
283 if code == 0:
284 match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
285 if match:
286 version = match.group(1)
287 return version
288
289 def validate(self):
e306af50 290 # type: () -> None
9f95a23c
TL
291 if not is_fsid(self.fsid):
292 raise Error('not an fsid: %s' % self.fsid)
293 if not self.daemon_id:
294 raise Error('invalid daemon_id: %s' % self.daemon_id)
295 if not self.image:
296 raise Error('invalid image: %s' % self.image)
297
298 # check for the required files
299 if self.required_files:
300 for fname in self.required_files:
301 if fname not in self.files:
302 raise Error('required file missing from config-json: %s' % fname)
303
f91f0fd5
TL
304 # check for an RGW config
305 if self.rgw:
306 if not self.rgw.get('keyring'):
307 raise Error('RGW keyring is missing')
308 if not self.rgw.get('user'):
309 raise Error('RGW user is missing')
310
9f95a23c
TL
311 def get_daemon_name(self):
312 # type: () -> str
313 return '%s.%s' % (self.daemon_type, self.daemon_id)
314
315 def get_container_name(self, desc=None):
316 # type: (Optional[str]) -> str
317 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
318 if desc:
319 cname = '%s-%s' % (cname, desc)
320 return cname
321
1911f103
TL
322 def get_daemon_args(self):
323 # type: () -> List[str]
324 return self.daemon_args + self.extra_args
325
9f95a23c
TL
326 def create_daemon_dirs(self, data_dir, uid, gid):
327 # type: (str, int, int) -> None
328 """Create files under the container data dir"""
329 if not os.path.isdir(data_dir):
330 raise OSError('data_dir is not a directory: %s' % (data_dir))
331
332 logger.info('Creating ganesha config...')
333
334 # create the ganesha conf dir
335 config_dir = os.path.join(data_dir, 'etc/ganesha')
336 makedirs(config_dir, uid, gid, 0o755)
337
338 # populate files from the config-json
339 for fname in self.files:
340 config_file = os.path.join(config_dir, fname)
f91f0fd5 341 config_content = dict_get_join(self.files, fname)
9f95a23c
TL
342 logger.info('Write file: %s' % (config_file))
343 with open(config_file, 'w') as f:
344 os.fchown(f.fileno(), uid, gid)
345 os.fchmod(f.fileno(), 0o600)
346 f.write(config_content)
347
f91f0fd5
TL
348 # write the RGW keyring
349 if self.rgw:
350 keyring_path = os.path.join(data_dir, 'keyring.rgw')
351 with open(keyring_path, 'w') as f:
352 os.fchmod(f.fileno(), 0o600)
353 os.fchown(f.fileno(), uid, gid)
354 f.write(self.rgw.get('keyring', ''))
355
9f95a23c
TL
356 def get_rados_grace_container(self, action):
357 # type: (str) -> CephContainer
358 """Container for a ganesha action on the grace db"""
359 entrypoint = '/usr/bin/ganesha-rados-grace'
360
361 assert self.pool
362 args=['--pool', self.pool]
363 if self.namespace:
364 args += ['--ns', self.namespace]
1911f103
TL
365 if self.userid:
366 args += ['--userid', self.userid]
9f95a23c
TL
367 args += [action, self.get_daemon_name()]
368
369 data_dir = get_data_dir(self.fsid, self.daemon_type, self.daemon_id)
370 volume_mounts = self.get_container_mounts(data_dir)
371 envs = self.get_container_envs()
372
f6b5b4d7 373 logger.info('Creating RADOS grace for action: %s' % action)
9f95a23c
TL
374 c = CephContainer(
375 image=self.image,
376 entrypoint=entrypoint,
377 args=args,
378 volume_mounts=volume_mounts,
f6b5b4d7 379 cname=self.get_container_name(desc='grace-%s' % action),
9f95a23c
TL
380 envs=envs
381 )
382 return c
383
384##################################
385
f6b5b4d7 386
1911f103
TL
387class CephIscsi(object):
388 """Defines a Ceph-Iscsi container"""
389
390 daemon_type = 'iscsi'
391 entrypoint = '/usr/bin/rbd-target-api'
392
393 required_files = ['iscsi-gateway.cfg']
394
395 def __init__(self,
396 fsid,
397 daemon_id,
398 config_json,
399 image=DEFAULT_IMAGE):
400 # type: (str, Union[int, str], Dict, str) -> None
401 self.fsid = fsid
402 self.daemon_id = daemon_id
403 self.image = image
404
1911f103 405 # config-json options
f91f0fd5 406 self.files = dict_get(config_json, 'files', {})
1911f103
TL
407
408 # validate the supplied args
409 self.validate()
410
411 @classmethod
412 def init(cls, fsid, daemon_id):
413 # type: (str, Union[int, str]) -> CephIscsi
414 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
415
416 @staticmethod
417 def get_container_mounts(data_dir, log_dir):
418 # type: (str, str) -> Dict[str, str]
419 mounts = dict()
420 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
421 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
422 mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
f91f0fd5 423 mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
1911f103 424 mounts[log_dir] = '/var/log/rbd-target-api:z'
f91f0fd5 425 mounts['/dev'] = '/dev'
1911f103
TL
426 return mounts
427
f6b5b4d7
TL
428 @staticmethod
429 def get_container_binds():
430 # type: () -> List[List[str]]
431 binds = []
432 lib_modules = ['type=bind',
433 'source=/lib/modules',
434 'destination=/lib/modules',
435 'ro=true']
436 binds.append(lib_modules)
437 return binds
438
1911f103
TL
439 @staticmethod
440 def get_version(container_id):
e306af50 441 # type: (str) -> Optional[str]
1911f103
TL
442 version = None
443 out, err, code = call(
444 [container_path, 'exec', container_id,
445 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"])
446 if code == 0:
f6b5b4d7 447 version = out.strip()
1911f103
TL
448 return version
449
450 def validate(self):
e306af50 451 # type: () -> None
1911f103
TL
452 if not is_fsid(self.fsid):
453 raise Error('not an fsid: %s' % self.fsid)
454 if not self.daemon_id:
455 raise Error('invalid daemon_id: %s' % self.daemon_id)
456 if not self.image:
457 raise Error('invalid image: %s' % self.image)
458
459 # check for the required files
460 if self.required_files:
461 for fname in self.required_files:
462 if fname not in self.files:
463 raise Error('required file missing from config-json: %s' % fname)
464
465 def get_daemon_name(self):
466 # type: () -> str
467 return '%s.%s' % (self.daemon_type, self.daemon_id)
468
469 def get_container_name(self, desc=None):
470 # type: (Optional[str]) -> str
471 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
472 if desc:
473 cname = '%s-%s' % (cname, desc)
474 return cname
475
1911f103
TL
476 def create_daemon_dirs(self, data_dir, uid, gid):
477 # type: (str, int, int) -> None
478 """Create files under the container data dir"""
479 if not os.path.isdir(data_dir):
480 raise OSError('data_dir is not a directory: %s' % (data_dir))
481
482 logger.info('Creating ceph-iscsi config...')
483 configfs_dir = os.path.join(data_dir, 'configfs')
484 makedirs(configfs_dir, uid, gid, 0o755)
485
486 # populate files from the config-json
487 for fname in self.files:
488 config_file = os.path.join(data_dir, fname)
f91f0fd5 489 config_content = dict_get_join(self.files, fname)
1911f103
TL
490 logger.info('Write file: %s' % (config_file))
491 with open(config_file, 'w') as f:
492 os.fchown(f.fileno(), uid, gid)
493 os.fchmod(f.fileno(), 0o600)
494 f.write(config_content)
495
496 @staticmethod
497 def configfs_mount_umount(data_dir, mount=True):
e306af50 498 # type: (str, bool) -> List[str]
1911f103
TL
499 mount_path = os.path.join(data_dir, 'configfs')
500 if mount:
501 cmd = "if ! grep -qs {0} /proc/mounts; then " \
502 "mount -t configfs none {0}; fi".format(mount_path)
503 else:
504 cmd = "if grep -qs {0} /proc/mounts; then " \
505 "umount {0}; fi".format(mount_path)
506 return cmd.split()
507
f6b5b4d7
TL
508 def get_tcmu_runner_container(self):
509 # type: () -> CephContainer
510 tcmu_container = get_container(self.fsid, self.daemon_type, self.daemon_id)
511 tcmu_container.entrypoint = "/usr/bin/tcmu-runner"
f6b5b4d7 512 tcmu_container.cname = self.get_container_name(desc='tcmu')
f91f0fd5
TL
513 # remove extra container args for tcmu container.
514 # extra args could cause issue with forking service type
515 tcmu_container.container_args = []
f6b5b4d7
TL
516 return tcmu_container
517
1911f103
TL
518##################################
519
f6b5b4d7 520
f91f0fd5
TL
521class CustomContainer(object):
522 """Defines a custom container"""
523 daemon_type = 'container'
524
525 def __init__(self, fsid: str, daemon_id: Union[int, str],
526 config_json: Dict, image: str) -> None:
527 self.fsid = fsid
528 self.daemon_id = daemon_id
529 self.image = image
530
531 # config-json options
532 self.entrypoint = dict_get(config_json, 'entrypoint')
533 self.uid = dict_get(config_json, 'uid', 65534) # nobody
534 self.gid = dict_get(config_json, 'gid', 65534) # nobody
535 self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
536 self.args = dict_get(config_json, 'args', [])
537 self.envs = dict_get(config_json, 'envs', [])
538 self.privileged = dict_get(config_json, 'privileged', False)
539 self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
540 self.ports = dict_get(config_json, 'ports', [])
541 self.dirs = dict_get(config_json, 'dirs', [])
542 self.files = dict_get(config_json, 'files', {})
543
544 @classmethod
545 def init(cls, fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
546 return cls(fsid, daemon_id, get_parm(args.config_json), args.image)
547
548 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
549 """
550 Create dirs/files below the container data directory.
551 """
552 logger.info('Creating custom container configuration '
553 'dirs/files in {} ...'.format(data_dir))
554
555 if not os.path.isdir(data_dir):
556 raise OSError('data_dir is not a directory: %s' % data_dir)
557
558 for dir_path in self.dirs:
559 logger.info('Creating directory: {}'.format(dir_path))
560 dir_path = os.path.join(data_dir, dir_path.strip('/'))
561 makedirs(dir_path, uid, gid, 0o755)
562
563 for file_path in self.files:
564 logger.info('Creating file: {}'.format(file_path))
565 content = dict_get_join(self.files, file_path)
566 file_path = os.path.join(data_dir, file_path.strip('/'))
567 with open(file_path, 'w', encoding='utf-8') as f:
568 os.fchown(f.fileno(), uid, gid)
569 os.fchmod(f.fileno(), 0o600)
570 f.write(content)
571
572 def get_daemon_args(self) -> List[str]:
573 return []
574
575 def get_container_args(self) -> List[str]:
576 return self.args
577
578 def get_container_envs(self) -> List[str]:
579 return self.envs
580
581 def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
582 """
583 Get the volume mounts. Relative source paths will be located below
584 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
585
586 Example:
587 {
588 /foo/conf: /conf
589 foo/conf: /conf
590 }
591 becomes
592 {
593 /foo/conf: /conf
594 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
595 }
596 """
597 mounts = {}
598 for source, destination in self.volume_mounts.items():
599 source = os.path.join(data_dir, source)
600 mounts[source] = destination
601 return mounts
602
603 def get_container_binds(self, data_dir: str) -> List[List[str]]:
604 """
605 Get the bind mounts. Relative `source=...` paths will be located below
606 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
607
608 Example:
609 [
610 'type=bind',
611 'source=lib/modules',
612 'destination=/lib/modules',
613 'ro=true'
614 ]
615 becomes
616 [
617 ...
618 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
619 ...
620 ]
621 """
622 binds = self.bind_mounts.copy()
623 for bind in binds:
624 for index, value in enumerate(bind):
625 match = re.match(r'^source=(.+)$', value)
626 if match:
627 bind[index] = 'source={}'.format(os.path.join(
628 data_dir, match.group(1)))
629 return binds
630
631##################################
632
633
634def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
635 """
636 Helper function to get a key from a dictionary.
637 :param d: The dictionary to process.
638 :param key: The name of the key to get.
639 :param default: The default value in case the key does not
640 exist. Default is `None`.
641 :param require: Set to `True` if the key is required. An
642 exception will be raised if the key does not exist in
643 the given dictionary.
644 :return: Returns the value of the given key.
645 :raises: :exc:`self.Error` if the given key does not exist
646 and `require` is set to `True`.
647 """
648 if require and key not in d.keys():
649 raise Error('{} missing from dict'.format(key))
650 return d.get(key, default)
651
652##################################
653
654
655def dict_get_join(d: Dict, key: str) -> Any:
656 """
657 Helper function to get the value of a given key from a dictionary.
658 `List` values will be converted to a string by joining them with a
659 line break.
660 :param d: The dictionary to process.
661 :param key: The name of the key to get.
662 :return: Returns the value of the given key. If it was a `list`, it
663 will be joining with a line break.
664 """
665 value = d.get(key)
666 if isinstance(value, list):
667 value = '\n'.join(map(str, value))
668 return value
669
670##################################
671
672
9f95a23c 673def get_supported_daemons():
e306af50 674 # type: () -> List[str]
9f95a23c
TL
675 supported_daemons = list(Ceph.daemons)
676 supported_daemons.extend(Monitoring.components)
677 supported_daemons.append(NFSGanesha.daemon_type)
1911f103 678 supported_daemons.append(CephIscsi.daemon_type)
f91f0fd5 679 supported_daemons.append(CustomContainer.daemon_type)
9f95a23c
TL
680 assert len(supported_daemons) == len(set(supported_daemons))
681 return supported_daemons
682
683##################################
684
f6b5b4d7 685
9f95a23c 686def attempt_bind(s, address, port):
e306af50 687 # type: (socket.socket, str, int) -> None
9f95a23c
TL
688 try:
689 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
690 s.bind((address, port))
691 except (socket.error, OSError) as e: # py2 and py3
692 msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
693 logger.warning(msg)
694 if e.errno == errno.EADDRINUSE:
695 raise OSError(msg)
696 elif e.errno == errno.EADDRNOTAVAIL:
697 pass
698 finally:
699 s.close()
700
f6b5b4d7 701
9f95a23c 702def port_in_use(port_num):
e306af50 703 # type: (int) -> bool
9f95a23c 704 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
e306af50 705 logger.info('Verifying port %d ...' % port_num)
9f95a23c
TL
706 try:
707 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
708 attempt_bind(s, '0.0.0.0', port_num)
709
710 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
711 attempt_bind(s, '::', port_num)
712 except OSError:
713 return True
714 else:
715 return False
716
f6b5b4d7 717
9f95a23c 718def check_ip_port(ip, port):
e306af50 719 # type: (str, int) -> None
9f95a23c
TL
720 if not args.skip_ping_check:
721 logger.info('Verifying IP %s port %d ...' % (ip, port))
f91f0fd5 722 if is_ipv6(ip):
9f95a23c 723 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
f91f0fd5 724 ip = unwrap_ipv6(ip)
9f95a23c
TL
725 else:
726 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
727 try:
728 attempt_bind(s, ip, port)
729 except OSError as e:
730 raise Error(e)
731
732##################################
733
734# this is an abbreviated version of
735# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
736# that drops all of the compatibility (this is Unix/Linux only).
737
738try:
739 TimeoutError
740except NameError:
741 TimeoutError = OSError
742
f6b5b4d7 743
9f95a23c
TL
744class Timeout(TimeoutError):
745 """
746 Raised when the lock could not be acquired in *timeout*
747 seconds.
748 """
749
750 def __init__(self, lock_file):
751 """
752 """
753 #: The path of the file lock.
754 self.lock_file = lock_file
755 return None
756
757 def __str__(self):
758 temp = "The file lock '{}' could not be acquired."\
759 .format(self.lock_file)
760 return temp
761
762
763class _Acquire_ReturnProxy(object):
764 def __init__(self, lock):
765 self.lock = lock
766 return None
767
768 def __enter__(self):
769 return self.lock
770
771 def __exit__(self, exc_type, exc_value, traceback):
772 self.lock.release()
773 return None
774
775
776class FileLock(object):
f6b5b4d7 777 def __init__(self, name, timeout=-1):
9f95a23c
TL
778 if not os.path.exists(LOCK_DIR):
779 os.mkdir(LOCK_DIR, 0o700)
780 self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
781
782 # The file descriptor for the *_lock_file* as it is returned by the
783 # os.open() function.
784 # This file lock is only NOT None, if the object currently holds the
785 # lock.
786 self._lock_file_fd = None
787 self.timeout = timeout
788 # The lock counter is used for implementing the nested locking
789 # mechanism. Whenever the lock is acquired, the counter is increased and
790 # the lock is only released, when this value is 0 again.
791 self._lock_counter = 0
792 return None
793
794 @property
795 def is_locked(self):
796 return self._lock_file_fd is not None
797
798 def acquire(self, timeout=None, poll_intervall=0.05):
799 """
800 Acquires the file lock or fails with a :exc:`Timeout` error.
801 .. code-block:: python
802 # You can use this method in the context manager (recommended)
803 with lock.acquire():
804 pass
805 # Or use an equivalent try-finally construct:
806 lock.acquire()
807 try:
808 pass
809 finally:
810 lock.release()
811 :arg float timeout:
812 The maximum time waited for the file lock.
813 If ``timeout < 0``, there is no timeout and this method will
814 block until the lock could be acquired.
815 If ``timeout`` is None, the default :attr:`~timeout` is used.
816 :arg float poll_intervall:
817 We check once in *poll_intervall* seconds if we can acquire the
818 file lock.
819 :raises Timeout:
820 if the lock could not be acquired in *timeout* seconds.
821 .. versionchanged:: 2.0.0
822 This method returns now a *proxy* object instead of *self*,
823 so that it can be used in a with statement without side effects.
824 """
825 # Use the default timeout, if no timeout is provided.
826 if timeout is None:
827 timeout = self.timeout
828
829 # Increment the number right at the beginning.
830 # We can still undo it, if something fails.
831 self._lock_counter += 1
832
833 lock_id = id(self)
834 lock_filename = self._lock_file
835 start_time = time.time()
836 try:
837 while True:
838 if not self.is_locked:
839 logger.debug('Acquiring lock %s on %s', lock_id,
840 lock_filename)
841 self._acquire()
842
843 if self.is_locked:
844 logger.debug('Lock %s acquired on %s', lock_id,
845 lock_filename)
846 break
847 elif timeout >= 0 and time.time() - start_time > timeout:
848 logger.warning('Timeout acquiring lock %s on %s', lock_id,
849 lock_filename)
850 raise Timeout(self._lock_file)
851 else:
852 logger.debug(
853 'Lock %s not acquired on %s, waiting %s seconds ...',
854 lock_id, lock_filename, poll_intervall
855 )
856 time.sleep(poll_intervall)
f6b5b4d7 857 except: # noqa
9f95a23c
TL
858 # Something did go wrong, so decrement the counter.
859 self._lock_counter = max(0, self._lock_counter - 1)
860
861 raise
862 return _Acquire_ReturnProxy(lock = self)
863
f6b5b4d7 864 def release(self, force=False):
9f95a23c
TL
865 """
866 Releases the file lock.
867 Please note, that the lock is only completly released, if the lock
868 counter is 0.
869 Also note, that the lock file itself is not automatically deleted.
870 :arg bool force:
871 If true, the lock counter is ignored and the lock is released in
872 every case.
873 """
874 if self.is_locked:
875 self._lock_counter -= 1
876
877 if self._lock_counter == 0 or force:
878 lock_id = id(self)
879 lock_filename = self._lock_file
880
881 logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
882 self._release()
883 self._lock_counter = 0
884 logger.debug('Lock %s released on %s', lock_id, lock_filename)
885
886 return None
887
888 def __enter__(self):
889 self.acquire()
890 return self
891
892 def __exit__(self, exc_type, exc_value, traceback):
893 self.release()
894 return None
895
896 def __del__(self):
f6b5b4d7 897 self.release(force=True)
9f95a23c
TL
898 return None
899
9f95a23c
TL
900 def _acquire(self):
901 open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
902 fd = os.open(self._lock_file, open_mode)
903
904 try:
905 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
906 except (IOError, OSError):
907 os.close(fd)
908 else:
909 self._lock_file_fd = fd
910 return None
911
912 def _release(self):
913 # Do not remove the lockfile:
914 #
915 # https://github.com/benediktschmitt/py-filelock/issues/31
916 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
917 fd = self._lock_file_fd
918 self._lock_file_fd = None
f6b5b4d7
TL
919 fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
920 os.close(fd) # type: ignore
9f95a23c
TL
921 return None
922
923
924##################################
925# Popen wrappers, lifted from ceph-volume
926
927def call(command, # type: List[str]
928 desc=None, # type: Optional[str]
929 verbose=False, # type: bool
930 verbose_on_failure=True, # type: bool
931 timeout=DEFAULT_TIMEOUT, # type: Optional[int]
932 **kwargs):
933 """
934 Wrap subprocess.Popen to
935
936 - log stdout/stderr to a logger,
937 - decode utf-8
938 - cleanly return out, err, returncode
939
940 If verbose=True, log at info (instead of debug) level.
941
942 :param verbose_on_failure: On a non-zero exit status, it will forcefully set
943 logging ON for the terminal
944 :param timeout: timeout in seconds
945 """
946 if not desc:
947 desc = command[0]
948 timeout = timeout or args.timeout
949
950 logger.debug("Running command: %s" % ' '.join(command))
951 process = subprocess.Popen(
952 command,
953 stdout=subprocess.PIPE,
954 stderr=subprocess.PIPE,
955 close_fds=True,
956 **kwargs
957 )
958 # get current p.stdout flags, add O_NONBLOCK
959 assert process.stdout is not None
960 assert process.stderr is not None
961 stdout_flags = fcntl.fcntl(process.stdout, fcntl.F_GETFL)
962 stderr_flags = fcntl.fcntl(process.stderr, fcntl.F_GETFL)
963 fcntl.fcntl(process.stdout, fcntl.F_SETFL, stdout_flags | os.O_NONBLOCK)
964 fcntl.fcntl(process.stderr, fcntl.F_SETFL, stderr_flags | os.O_NONBLOCK)
965
966 out = ''
967 err = ''
968 reads = None
969 stop = False
970 out_buffer = '' # partial line (no newline yet)
971 err_buffer = '' # partial line (no newline yet)
972 start_time = time.time()
973 end_time = None
974 if timeout:
975 end_time = start_time + timeout
976 while not stop:
977 if end_time and (time.time() >= end_time):
9f95a23c 978 stop = True
f6b5b4d7
TL
979 if process.poll() is None:
980 logger.info(desc + ':timeout after %s seconds' % timeout)
981 process.kill()
9f95a23c
TL
982 if reads and process.poll() is not None:
983 # we want to stop, but first read off anything remaining
984 # on stdout/stderr
985 stop = True
986 else:
987 reads, _, _ = select.select(
988 [process.stdout.fileno(), process.stderr.fileno()],
989 [], [], timeout
990 )
991 for fd in reads:
992 try:
993 message_b = os.read(fd, 1024)
994 if isinstance(message_b, bytes):
995 message = message_b.decode('utf-8')
996 if isinstance(message_b, str):
997 message = message_b
801d1391
TL
998 if stop and message:
999 # process has terminated, but have more to read still, so not stopping yet
1000 # (os.read returns '' when it encounters EOF)
1001 stop = False
1002 if not message:
1003 continue
9f95a23c
TL
1004 if fd == process.stdout.fileno():
1005 out += message
1006 message = out_buffer + message
1007 lines = message.split('\n')
1008 out_buffer = lines.pop()
1009 for line in lines:
1010 if verbose:
1011 logger.info(desc + ':stdout ' + line)
1012 else:
1013 logger.debug(desc + ':stdout ' + line)
1014 elif fd == process.stderr.fileno():
1015 err += message
1016 message = err_buffer + message
1017 lines = message.split('\n')
1018 err_buffer = lines.pop()
1019 for line in lines:
1020 if verbose:
1021 logger.info(desc + ':stderr ' + line)
1022 else:
1023 logger.debug(desc + ':stderr ' + line)
1024 else:
1025 assert False
1026 except (IOError, OSError):
1027 pass
f91f0fd5
TL
1028 if verbose:
1029 logger.debug(desc + ':profile rt=%s, stop=%s, exit=%s, reads=%s'
f6b5b4d7 1030 % (time.time()-start_time, stop, process.poll(), reads))
9f95a23c
TL
1031
1032 returncode = process.wait()
1033
1034 if out_buffer != '':
1035 if verbose:
1036 logger.info(desc + ':stdout ' + out_buffer)
1037 else:
1038 logger.debug(desc + ':stdout ' + out_buffer)
1039 if err_buffer != '':
1040 if verbose:
1041 logger.info(desc + ':stderr ' + err_buffer)
1042 else:
1043 logger.debug(desc + ':stderr ' + err_buffer)
1044
1045 if returncode != 0 and verbose_on_failure and not verbose:
1046 # dump stdout + stderr
1047 logger.info('Non-zero exit code %d from %s' % (returncode, ' '.join(command)))
1048 for line in out.splitlines():
1049 logger.info(desc + ':stdout ' + line)
1050 for line in err.splitlines():
1051 logger.info(desc + ':stderr ' + line)
1052
1053 return out, err, returncode
1054
1055
1056def call_throws(command, **kwargs):
1057 # type: (List[str], Any) -> Tuple[str, str, int]
1058 out, err, ret = call(command, **kwargs)
1059 if ret:
1060 raise RuntimeError('Failed command: %s' % ' '.join(command))
1061 return out, err, ret
1062
1063
1064def call_timeout(command, timeout):
1065 # type: (List[str], int) -> int
1066
1067 logger.debug('Running command (timeout=%s): %s'
1068 % (timeout, ' '.join(command)))
1069
1070 def raise_timeout(command, timeout):
1071 # type: (List[str], int) -> NoReturn
1072 msg = 'Command \'%s\' timed out after %s seconds' % (command, timeout)
1073 logger.debug(msg)
1074 raise TimeoutExpired(msg)
1075
1076 def call_timeout_py2(command, timeout):
1077 # type: (List[str], int) -> int
1078 proc = subprocess.Popen(command)
1079 thread = Thread(target=proc.wait)
1080 thread.start()
1081 thread.join(timeout)
1082 if thread.is_alive():
1083 proc.kill()
1084 thread.join()
1085 raise_timeout(command, timeout)
1086 return proc.returncode
1087
1088 def call_timeout_py3(command, timeout):
1089 # type: (List[str], int) -> int
1090 try:
1091 return subprocess.call(command, timeout=timeout)
1092 except subprocess.TimeoutExpired as e:
1093 raise_timeout(command, timeout)
1094
1095 ret = 1
1096 if sys.version_info >= (3, 3):
1097 ret = call_timeout_py3(command, timeout)
1098 else:
1099 # py2 subprocess has no timeout arg
1100 ret = call_timeout_py2(command, timeout)
1101 return ret
1102
1103##################################
1104
f6b5b4d7 1105
9f95a23c
TL
1106def is_available(what, func):
1107 # type: (str, Callable[[], bool]) -> None
1108 """
1109 Wait for a service to become available
1110
1111 :param what: the name of the service
1112 :param func: the callable object that determines availability
1113 """
1114 retry = args.retry
f6b5b4d7 1115 logger.info('Waiting for %s...' % what)
9f95a23c
TL
1116 num = 1
1117 while True:
1118 if func():
e306af50 1119 logger.info('%s is available'
f6b5b4d7 1120 % what)
9f95a23c
TL
1121 break
1122 elif num > retry:
1123 raise Error('%s not available after %s tries'
1124 % (what, retry))
1125
1126 logger.info('%s not available, waiting (%s/%s)...'
1127 % (what, num, retry))
1128
1129 num += 1
1130 time.sleep(1)
1131
1132
1133def read_config(fn):
1134 # type: (Optional[str]) -> ConfigParser
1135 # bend over backwards here because py2's ConfigParser doesn't like
1136 # whitespace before config option names (e.g., '\n foo = bar\n').
1137 # Yeesh!
1138 if sys.version_info >= (3, 2):
1139 cp = ConfigParser()
1140 else:
1141 cp = SafeConfigParser()
1142
1143 if fn:
1144 with open(fn, 'r') as f:
1145 raw_conf = f.read()
1146 nice_conf = re.sub(r'\n(\s)+', r'\n', raw_conf)
1147 s_io = StringIO(nice_conf)
1148 if sys.version_info >= (3, 2):
1149 cp.read_file(s_io)
1150 else:
1151 cp.readfp(s_io)
1152
1153 return cp
1154
f6b5b4d7 1155
9f95a23c
TL
1156def pathify(p):
1157 # type: (str) -> str
e306af50
TL
1158 p = os.path.expanduser(p)
1159 return os.path.abspath(p)
9f95a23c 1160
f6b5b4d7 1161
9f95a23c 1162def get_file_timestamp(fn):
e306af50 1163 # type: (str) -> Optional[str]
9f95a23c
TL
1164 try:
1165 mt = os.path.getmtime(fn)
1166 return datetime.datetime.fromtimestamp(
1167 mt, tz=datetime.timezone.utc
1168 ).strftime(DATEFMT)
1169 except Exception as e:
1170 return None
1171
f6b5b4d7 1172
9f95a23c 1173def try_convert_datetime(s):
e306af50 1174 # type: (str) -> Optional[str]
9f95a23c
TL
1175 # This is super irritating because
1176 # 1) podman and docker use different formats
1177 # 2) python's strptime can't parse either one
1178 #
1179 # I've seen:
1180 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1181 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1182 # 2020-03-03 15:52:30.136257504 -0600 CST
1183 # (In the podman case, there is a different string format for
1184 # 'inspect' and 'inspect --format {{.Created}}'!!)
1185
1186 # In *all* cases, the 9 digit second precision is too much for
1187 # python's strptime. Shorten it to 6 digits.
1188 p = re.compile(r'(\.[\d]{6})[\d]*')
1189 s = p.sub(r'\1', s)
1190
1191 # replace trailling Z with -0000, since (on python 3.6.8) it won't parse
1192 if s and s[-1] == 'Z':
1193 s = s[:-1] + '-0000'
1194
1195 # cut off the redundnat 'CST' part that strptime can't parse, if
1196 # present.
1197 v = s.split(' ')
1198 s = ' '.join(v[0:3])
1199
1200 # try parsing with several format strings
1201 fmts = [
1202 '%Y-%m-%dT%H:%M:%S.%f%z',
1203 '%Y-%m-%d %H:%M:%S.%f %z',
1204 ]
1205 for f in fmts:
1206 try:
1207 # return timestamp normalized to UTC, rendered as DATEFMT.
1208 return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
1209 except ValueError:
1210 pass
1211 return None
1212
f6b5b4d7 1213
9f95a23c
TL
1214def get_podman_version():
1215 # type: () -> Tuple[int, ...]
1216 if 'podman' not in container_path:
1217 raise ValueError('not using podman')
1218 out, _, _ = call_throws([container_path, '--version'])
1219 return _parse_podman_version(out)
1220
f6b5b4d7 1221
9f95a23c
TL
1222def _parse_podman_version(out):
1223 # type: (str) -> Tuple[int, ...]
1224 _, _, version_str = out.strip().split()
1225
1226 def to_int(val, org_e=None):
1227 if not val and org_e:
1228 raise org_e
1229 try:
1230 return int(val)
1231 except ValueError as e:
1232 return to_int(val[0:-1], org_e or e)
1233
1234 return tuple(map(to_int, version_str.split('.')))
1235
1236
1237def get_hostname():
1238 # type: () -> str
1239 return socket.gethostname()
1240
f6b5b4d7 1241
9f95a23c
TL
1242def get_fqdn():
1243 # type: () -> str
1244 return socket.getfqdn() or socket.gethostname()
1245
f6b5b4d7 1246
9f95a23c
TL
1247def get_arch():
1248 # type: () -> str
1249 return platform.uname().machine
1250
f6b5b4d7 1251
9f95a23c
TL
1252def generate_service_id():
1253 # type: () -> str
1254 return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
1255 for _ in range(6))
1256
f6b5b4d7 1257
9f95a23c
TL
1258def generate_password():
1259 # type: () -> str
1260 return ''.join(random.choice(string.ascii_lowercase + string.digits)
1261 for i in range(10))
1262
f6b5b4d7 1263
9f95a23c
TL
1264def normalize_container_id(i):
1265 # type: (str) -> str
1266 # docker adds the sha256: prefix, but AFAICS both
1267 # docker (18.09.7 in bionic at least) and podman
1268 # both always use sha256, so leave off the prefix
1269 # for consistency.
1270 prefix = 'sha256:'
1271 if i.startswith(prefix):
1272 i = i[len(prefix):]
1273 return i
1274
f6b5b4d7 1275
9f95a23c
TL
1276def make_fsid():
1277 # type: () -> str
1278 return str(uuid.uuid1())
1279
f6b5b4d7 1280
9f95a23c
TL
1281def is_fsid(s):
1282 # type: (str) -> bool
1283 try:
1284 uuid.UUID(s)
1285 except ValueError:
1286 return False
1287 return True
1288
f6b5b4d7 1289
9f95a23c
TL
1290def infer_fsid(func):
1291 """
1292 If we only find a single fsid in /var/lib/ceph/*, use that
1293 """
1294 @wraps(func)
1295 def _infer_fsid():
1296 if args.fsid:
1297 logger.debug('Using specified fsid: %s' % args.fsid)
1298 return func()
1299
f6b5b4d7 1300 fsids_set = set()
9f95a23c
TL
1301 daemon_list = list_daemons(detail=False)
1302 for daemon in daemon_list:
f6b5b4d7
TL
1303 if not is_fsid(daemon['fsid']):
1304 # 'unknown' fsid
1305 continue
1306 elif 'name' not in args or not args.name:
1307 # args.name not specified
1308 fsids_set.add(daemon['fsid'])
9f95a23c 1309 elif daemon['name'] == args.name:
f6b5b4d7
TL
1310 # args.name is a match
1311 fsids_set.add(daemon['fsid'])
1312 fsids = sorted(fsids_set)
9f95a23c
TL
1313
1314 if not fsids:
1315 # some commands do not always require an fsid
1316 pass
1317 elif len(fsids) == 1:
1318 logger.info('Inferring fsid %s' % fsids[0])
1319 args.fsid = fsids[0]
1320 else:
1321 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids)
1322 return func()
1323
1324 return _infer_fsid
1325
f6b5b4d7 1326
e306af50
TL
1327def infer_config(func):
1328 """
1329 If we find a MON daemon, use the config from that container
1330 """
1331 @wraps(func)
1332 def _infer_config():
1333 if args.config:
1334 logger.debug('Using specified config: %s' % args.config)
1335 return func()
1336 config = None
1337 if args.fsid:
1338 name = args.name
1339 if not name:
1340 daemon_list = list_daemons(detail=False)
1341 for daemon in daemon_list:
1342 if daemon['name'].startswith('mon.'):
1343 name = daemon['name']
1344 break
1345 if name:
1346 config = '/var/lib/ceph/{}/{}/config'.format(args.fsid, name)
1347 if config:
1348 logger.info('Inferring config %s' % config)
1349 args.config = config
1350 elif os.path.exists(SHELL_DEFAULT_CONF):
1351 logger.debug('Using default config: %s' % SHELL_DEFAULT_CONF)
1352 args.config = SHELL_DEFAULT_CONF
1353 return func()
1354
1355 return _infer_config
1356
f6b5b4d7 1357
1911f103
TL
1358def _get_default_image():
1359 if DEFAULT_IMAGE_IS_MASTER:
1911f103
TL
1360 warn = '''This is a development version of cephadm.
1361For information regarding the latest stable release:
1362 https://docs.ceph.com/docs/{}/cephadm/install
1363'''.format(LATEST_STABLE_RELEASE)
1364 for line in warn.splitlines():
e306af50 1365 logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
1911f103
TL
1366 return DEFAULT_IMAGE
1367
f6b5b4d7 1368
9f95a23c
TL
1369def infer_image(func):
1370 """
1371 Use the most recent ceph image
1372 """
1373 @wraps(func)
1374 def _infer_image():
1375 if not args.image:
1376 args.image = os.environ.get('CEPHADM_IMAGE')
1377 if not args.image:
1378 args.image = get_last_local_ceph_image()
1379 if not args.image:
1911f103 1380 args.image = _get_default_image()
9f95a23c
TL
1381 return func()
1382
1383 return _infer_image
1384
f6b5b4d7 1385
9f95a23c
TL
1386def default_image(func):
1387 @wraps(func)
1388 def _default_image():
1389 if not args.image:
1390 if 'name' in args and args.name:
1391 type_ = args.name.split('.', 1)[0]
1392 if type_ in Monitoring.components:
1393 args.image = Monitoring.components[type_]['image']
1394 if not args.image:
1395 args.image = os.environ.get('CEPHADM_IMAGE')
1396 if not args.image:
1911f103
TL
1397 args.image = _get_default_image()
1398
9f95a23c
TL
1399 return func()
1400
1401 return _default_image
1402
f6b5b4d7 1403
9f95a23c
TL
1404def get_last_local_ceph_image():
1405 """
1406 :return: The most recent local ceph image (already pulled)
1407 """
1408 out, _, _ = call_throws(
1409 [container_path, 'images',
1410 '--filter', 'label=ceph=True',
f91f0fd5 1411 '--filter', 'dangling=false',
9f95a23c 1412 '--format', '{{.Repository}} {{.Tag}}'])
f91f0fd5
TL
1413 for line in out.splitlines():
1414 if len(line.split()) == 2:
1415 repository, tag = line.split()
1416 r = '{}:{}'.format(repository, tag)
1417 logger.info('Using recent ceph image %s' % r)
1418 return r
9f95a23c
TL
1419 return None
1420
f6b5b4d7 1421
9f95a23c 1422def write_tmp(s, uid, gid):
e306af50 1423 # type: (str, int, int) -> Any
9f95a23c
TL
1424 tmp_f = tempfile.NamedTemporaryFile(mode='w',
1425 prefix='ceph-tmp')
1426 os.fchown(tmp_f.fileno(), uid, gid)
1427 tmp_f.write(s)
1428 tmp_f.flush()
1429
1430 return tmp_f
1431
f6b5b4d7 1432
9f95a23c
TL
1433def makedirs(dir, uid, gid, mode):
1434 # type: (str, int, int, int) -> None
1435 if not os.path.exists(dir):
1436 os.makedirs(dir, mode=mode)
1437 else:
1438 os.chmod(dir, mode)
1439 os.chown(dir, uid, gid)
1440 os.chmod(dir, mode) # the above is masked by umask...
1441
f6b5b4d7 1442
9f95a23c
TL
1443def get_data_dir(fsid, t, n):
1444 # type: (str, str, Union[int, str]) -> str
1445 return os.path.join(args.data_dir, fsid, '%s.%s' % (t, n))
1446
f6b5b4d7 1447
9f95a23c
TL
1448def get_log_dir(fsid):
1449 # type: (str) -> str
1450 return os.path.join(args.log_dir, fsid)
1451
f6b5b4d7 1452
9f95a23c
TL
1453def make_data_dir_base(fsid, uid, gid):
1454 # type: (str, int, int) -> str
1455 data_dir_base = os.path.join(args.data_dir, fsid)
1456 makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
1457 makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
1458 makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
1459 DATA_DIR_MODE)
1460 return data_dir_base
1461
f6b5b4d7 1462
9f95a23c 1463def make_data_dir(fsid, daemon_type, daemon_id, uid=None, gid=None):
f6b5b4d7
TL
1464 # type: (str, str, Union[int, str], Optional[int], Optional[int]) -> str
1465 if uid is None or gid is None:
1466 uid, gid = extract_uid_gid()
9f95a23c
TL
1467 make_data_dir_base(fsid, uid, gid)
1468 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1469 makedirs(data_dir, uid, gid, DATA_DIR_MODE)
1470 return data_dir
1471
f6b5b4d7 1472
9f95a23c 1473def make_log_dir(fsid, uid=None, gid=None):
f6b5b4d7
TL
1474 # type: (str, Optional[int], Optional[int]) -> str
1475 if uid is None or gid is None:
1476 uid, gid = extract_uid_gid()
9f95a23c
TL
1477 log_dir = get_log_dir(fsid)
1478 makedirs(log_dir, uid, gid, LOG_DIR_MODE)
1479 return log_dir
1480
f6b5b4d7 1481
9f95a23c
TL
1482def make_var_run(fsid, uid, gid):
1483 # type: (str, int, int) -> None
1484 call_throws(['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
1485 '/var/run/ceph/%s' % fsid])
1486
f6b5b4d7 1487
9f95a23c 1488def copy_tree(src, dst, uid=None, gid=None):
f6b5b4d7 1489 # type: (List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1490 """
1491 Copy a directory tree from src to dst
1492 """
f91f0fd5 1493 if uid is None or gid is None:
9f95a23c
TL
1494 (uid, gid) = extract_uid_gid()
1495
1496 for src_dir in src:
1497 dst_dir = dst
1498 if os.path.isdir(dst):
1499 dst_dir = os.path.join(dst, os.path.basename(src_dir))
1500
1501 logger.debug('copy directory \'%s\' -> \'%s\'' % (src_dir, dst_dir))
1502 shutil.rmtree(dst_dir, ignore_errors=True)
1503 shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8
1504
1505 for dirpath, dirnames, filenames in os.walk(dst_dir):
1506 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dirpath))
1507 os.chown(dirpath, uid, gid)
1508 for filename in filenames:
1509 logger.debug('chown %s:%s \'%s\'' % (uid, gid, filename))
1510 os.chown(os.path.join(dirpath, filename), uid, gid)
1511
1512
1513def copy_files(src, dst, uid=None, gid=None):
f6b5b4d7 1514 # type: (List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1515 """
1516 Copy a files from src to dst
1517 """
f91f0fd5 1518 if uid is None or gid is None:
9f95a23c
TL
1519 (uid, gid) = extract_uid_gid()
1520
1521 for src_file in src:
1522 dst_file = dst
1523 if os.path.isdir(dst):
1524 dst_file = os.path.join(dst, os.path.basename(src_file))
1525
1526 logger.debug('copy file \'%s\' -> \'%s\'' % (src_file, dst_file))
1527 shutil.copyfile(src_file, dst_file)
1528
1529 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1530 os.chown(dst_file, uid, gid)
1531
f6b5b4d7 1532
9f95a23c 1533def move_files(src, dst, uid=None, gid=None):
f6b5b4d7 1534 # type: (List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1535 """
1536 Move files from src to dst
1537 """
f91f0fd5 1538 if uid is None or gid is None:
9f95a23c
TL
1539 (uid, gid) = extract_uid_gid()
1540
1541 for src_file in src:
1542 dst_file = dst
1543 if os.path.isdir(dst):
1544 dst_file = os.path.join(dst, os.path.basename(src_file))
1545
1546 if os.path.islink(src_file):
1547 # shutil.move() in py2 does not handle symlinks correctly
1548 src_rl = os.readlink(src_file)
1549 logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
1550 os.symlink(src_rl, dst_file)
1551 os.unlink(src_file)
1552 else:
1553 logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
1554 shutil.move(src_file, dst_file)
1555 logger.debug('chown %s:%s \'%s\'' % (uid, gid, dst_file))
1556 os.chown(dst_file, uid, gid)
1557
f6b5b4d7 1558
9f95a23c
TL
1559## copied from distutils ##
1560def find_executable(executable, path=None):
1561 """Tries to find 'executable' in the directories listed in 'path'.
1562 A string listing directories separated by 'os.pathsep'; defaults to
1563 os.environ['PATH']. Returns the complete filename or None if not found.
1564 """
1565 _, ext = os.path.splitext(executable)
1566 if (sys.platform == 'win32') and (ext != '.exe'):
1567 executable = executable + '.exe'
1568
1569 if os.path.isfile(executable):
1570 return executable
1571
1572 if path is None:
1573 path = os.environ.get('PATH', None)
1574 if path is None:
1575 try:
1576 path = os.confstr("CS_PATH")
1577 except (AttributeError, ValueError):
1578 # os.confstr() or CS_PATH is not available
1579 path = os.defpath
1580 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1581 # set to an empty string
1582
1583 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1584 if not path:
1585 return None
1586
1587 paths = path.split(os.pathsep)
1588 for p in paths:
1589 f = os.path.join(p, executable)
1590 if os.path.isfile(f):
1591 # the file exists, we have a shot at spawn working
1592 return f
1593 return None
1594
f6b5b4d7 1595
9f95a23c
TL
1596def find_program(filename):
1597 # type: (str) -> str
1598 name = find_executable(filename)
1599 if name is None:
1600 raise ValueError('%s not found' % filename)
1601 return name
1602
f6b5b4d7 1603
9f95a23c
TL
1604def get_unit_name(fsid, daemon_type, daemon_id=None):
1605 # type: (str, str, Optional[Union[int, str]]) -> str
1606 # accept either name or type + id
1607 if daemon_id is not None:
1608 return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
1609 else:
1610 return 'ceph-%s@%s' % (fsid, daemon_type)
1611
f6b5b4d7 1612
e306af50
TL
1613def get_unit_name_by_daemon_name(fsid, name):
1614 daemon = get_daemon_description(fsid, name)
1615 try:
1616 return daemon['systemd_unit']
1617 except KeyError:
1618 raise Error('Failed to get unit name for {}'.format(daemon))
1619
f6b5b4d7 1620
9f95a23c
TL
1621def check_unit(unit_name):
1622 # type: (str) -> Tuple[bool, str, bool]
1623 # NOTE: we ignore the exit code here because systemctl outputs
1624 # various exit codes based on the state of the service, but the
1625 # string result is more explicit (and sufficient).
1626 enabled = False
1627 installed = False
1628 try:
1629 out, err, code = call(['systemctl', 'is-enabled', unit_name],
1630 verbose_on_failure=False)
1631 if code == 0:
1632 enabled = True
1633 installed = True
1634 elif "disabled" in out:
1635 installed = True
1636 except Exception as e:
1637 logger.warning('unable to run systemctl: %s' % e)
1638 enabled = False
1639 installed = False
1640
1641 state = 'unknown'
1642 try:
1643 out, err, code = call(['systemctl', 'is-active', unit_name],
1644 verbose_on_failure=False)
1645 out = out.strip()
1646 if out in ['active']:
1647 state = 'running'
1648 elif out in ['inactive']:
1649 state = 'stopped'
1650 elif out in ['failed', 'auto-restart']:
1651 state = 'error'
1652 else:
1653 state = 'unknown'
1654 except Exception as e:
1655 logger.warning('unable to run systemctl: %s' % e)
1656 state = 'unknown'
1657 return (enabled, state, installed)
1658
f6b5b4d7 1659
9f95a23c
TL
1660def check_units(units, enabler=None):
1661 # type: (List[str], Optional[Packager]) -> bool
1662 for u in units:
1663 (enabled, state, installed) = check_unit(u)
1664 if enabled and state == 'running':
1665 logger.info('Unit %s is enabled and running' % u)
1666 return True
1667 if enabler is not None:
1668 if installed:
1669 logger.info('Enabling unit %s' % u)
1670 enabler.enable_service(u)
1671 return False
1672
f6b5b4d7 1673
9f95a23c 1674def get_legacy_config_fsid(cluster, legacy_dir=None):
f6b5b4d7 1675 # type: (str, Optional[str]) -> Optional[str]
9f95a23c
TL
1676 config_file = '/etc/ceph/%s.conf' % cluster
1677 if legacy_dir is not None:
1678 config_file = os.path.abspath(legacy_dir + config_file)
1679
1680 if os.path.exists(config_file):
1681 config = read_config(config_file)
1682 if config.has_section('global') and config.has_option('global', 'fsid'):
1683 return config.get('global', 'fsid')
1684 return None
1685
f6b5b4d7 1686
9f95a23c 1687def get_legacy_daemon_fsid(cluster, daemon_type, daemon_id, legacy_dir=None):
f6b5b4d7 1688 # type: (str, str, Union[int, str], Optional[str]) -> Optional[str]
9f95a23c
TL
1689 fsid = None
1690 if daemon_type == 'osd':
1691 try:
1692 fsid_file = os.path.join(args.data_dir,
1693 daemon_type,
1694 'ceph-%s' % daemon_id,
1695 'ceph_fsid')
1696 if legacy_dir is not None:
1697 fsid_file = os.path.abspath(legacy_dir + fsid_file)
1698 with open(fsid_file, 'r') as f:
1699 fsid = f.read().strip()
1700 except IOError:
1701 pass
1702 if not fsid:
1703 fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
1704 return fsid
1705
f6b5b4d7 1706
9f95a23c
TL
1707def get_daemon_args(fsid, daemon_type, daemon_id):
1708 # type: (str, str, Union[int, str]) -> List[str]
1709 r = list() # type: List[str]
1710
1711 if daemon_type in Ceph.daemons and daemon_type != 'crash':
1712 r += [
1713 '--setuser', 'ceph',
1714 '--setgroup', 'ceph',
1715 '--default-log-to-file=false',
1716 '--default-log-to-stderr=true',
1717 '--default-log-stderr-prefix="debug "',
1718 ]
1719 if daemon_type == 'mon':
1720 r += [
1721 '--default-mon-cluster-log-to-file=false',
1722 '--default-mon-cluster-log-to-stderr=true',
1723 ]
1724 elif daemon_type in Monitoring.components:
1725 metadata = Monitoring.components[daemon_type]
1726 r += metadata.get('args', list())
1727 if daemon_type == 'alertmanager':
1728 config = get_parm(args.config_json)
1729 peers = config.get('peers', list()) # type: ignore
1730 for peer in peers:
1731 r += ["--cluster.peer={}".format(peer)]
f6b5b4d7
TL
1732 # some alertmanager, by default, look elsewhere for a config
1733 r += ["--config.file=/etc/alertmanager/alertmanager.yml"]
9f95a23c 1734 elif daemon_type == NFSGanesha.daemon_type:
1911f103
TL
1735 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1736 r += nfs_ganesha.get_daemon_args()
f91f0fd5
TL
1737 elif daemon_type == CustomContainer.daemon_type:
1738 cc = CustomContainer.init(fsid, daemon_id)
1739 r.extend(cc.get_daemon_args())
9f95a23c
TL
1740
1741 return r
1742
f6b5b4d7 1743
9f95a23c 1744def create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid,
e306af50
TL
1745 config=None, keyring=None):
1746 # type: (str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
9f95a23c
TL
1747 data_dir = make_data_dir(fsid, daemon_type, daemon_id, uid=uid, gid=gid)
1748 make_log_dir(fsid, uid=uid, gid=gid)
1749
1750 if config:
1751 config_path = os.path.join(data_dir, 'config')
1752 with open(config_path, 'w') as f:
1753 os.fchown(f.fileno(), uid, gid)
1754 os.fchmod(f.fileno(), 0o600)
1755 f.write(config)
f91f0fd5 1756
9f95a23c
TL
1757 if keyring:
1758 keyring_path = os.path.join(data_dir, 'keyring')
1759 with open(keyring_path, 'w') as f:
1760 os.fchmod(f.fileno(), 0o600)
1761 os.fchown(f.fileno(), uid, gid)
1762 f.write(keyring)
1763
1764 if daemon_type in Monitoring.components.keys():
f91f0fd5 1765 config_json: Dict[str, Any] = get_parm(args.config_json)
9f95a23c
TL
1766 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
1767
1768 # Set up directories specific to the monitoring component
1769 config_dir = ''
1770 if daemon_type == 'prometheus':
1771 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1772 config_dir = 'etc/prometheus'
1773 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1774 makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
1775 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1776 elif daemon_type == 'grafana':
1777 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1778 config_dir = 'etc/grafana'
1779 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1780 makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
1781 makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
1782 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
1783 elif daemon_type == 'alertmanager':
1784 data_dir_root = get_data_dir(fsid, daemon_type, daemon_id)
1785 config_dir = 'etc/alertmanager'
1786 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
1787 makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
1788
9f95a23c
TL
1789 # populate the config directory for the component from the config-json
1790 for fname in required_files:
f91f0fd5
TL
1791 if 'files' in config_json: # type: ignore
1792 content = dict_get_join(config_json['files'], fname)
9f95a23c
TL
1793 with open(os.path.join(data_dir_root, config_dir, fname), 'w') as f:
1794 os.fchown(f.fileno(), uid, gid)
1795 os.fchmod(f.fileno(), 0o600)
1796 f.write(content)
1797
f91f0fd5 1798 elif daemon_type == NFSGanesha.daemon_type:
9f95a23c
TL
1799 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1800 nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
1801
f91f0fd5 1802 elif daemon_type == CephIscsi.daemon_type:
1911f103
TL
1803 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
1804 ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
1805
f91f0fd5
TL
1806 elif daemon_type == CustomContainer.daemon_type:
1807 cc = CustomContainer.init(fsid, daemon_id)
1808 cc.create_daemon_dirs(data_dir, uid, gid)
1809
f6b5b4d7 1810
9f95a23c
TL
1811def get_parm(option):
1812 # type: (str) -> Dict[str, str]
1813
1814 if not option:
1815 return dict()
1816
1817 global cached_stdin
1818 if option == '-':
1819 if cached_stdin is not None:
1820 j = cached_stdin
1821 else:
1822 try:
1823 j = injected_stdin # type: ignore
1824 except NameError:
1825 j = sys.stdin.read()
1826 cached_stdin = j
1827 else:
1828 # inline json string
1829 if option[0] == '{' and option[-1] == '}':
1830 j = option
1831 # json file
1832 elif os.path.exists(option):
1833 with open(option, 'r') as f:
1834 j = f.read()
1835 else:
1836 raise Error("Config file {} not found".format(option))
1837
1838 try:
1839 js = json.loads(j)
1840 except ValueError as e:
1841 raise Error("Invalid JSON in {}: {}".format(option, e))
1842 else:
1843 return js
1844
f6b5b4d7 1845
9f95a23c 1846def get_config_and_keyring():
801d1391
TL
1847 # type: () -> Tuple[Optional[str], Optional[str]]
1848 config = None
1849 keyring = None
1850
9f95a23c
TL
1851 if 'config_json' in args and args.config_json:
1852 d = get_parm(args.config_json)
1853 config = d.get('config')
1854 keyring = d.get('keyring')
1855
1856 if 'config' in args and args.config:
1857 with open(args.config, 'r') as f:
1858 config = f.read()
1859
1860 if 'key' in args and args.key:
1861 keyring = '[%s]\n\tkey = %s\n' % (args.name, args.key)
1862 elif 'keyring' in args and args.keyring:
1863 with open(args.keyring, 'r') as f:
1864 keyring = f.read()
1865
f6b5b4d7
TL
1866 return config, keyring
1867
1868
1869def get_container_binds(fsid, daemon_type, daemon_id):
1870 # type: (str, str, Union[int, str, None]) -> List[List[str]]
1871 binds = list()
1872
1873 if daemon_type == CephIscsi.daemon_type:
f6b5b4d7 1874 binds.extend(CephIscsi.get_container_binds())
f91f0fd5
TL
1875 elif daemon_type == CustomContainer.daemon_type:
1876 assert daemon_id
1877 cc = CustomContainer.init(fsid, daemon_id)
1878 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1879 binds.extend(cc.get_container_binds(data_dir))
f6b5b4d7
TL
1880
1881 return binds
1882
9f95a23c
TL
1883
1884def get_container_mounts(fsid, daemon_type, daemon_id,
1885 no_config=False):
1886 # type: (str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
1887 mounts = dict()
1888
1889 if daemon_type in Ceph.daemons:
1890 if fsid:
1891 run_path = os.path.join('/var/run/ceph', fsid);
1892 if os.path.exists(run_path):
1893 mounts[run_path] = '/var/run/ceph:z'
1894 log_dir = get_log_dir(fsid)
1895 mounts[log_dir] = '/var/log/ceph:z'
1896 crash_dir = '/var/lib/ceph/%s/crash' % fsid
1897 if os.path.exists(crash_dir):
1898 mounts[crash_dir] = '/var/lib/ceph/crash:z'
1899
1900 if daemon_type in Ceph.daemons and daemon_id:
1901 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1902 if daemon_type == 'rgw':
1903 cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
1904 else:
1905 cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
1906 if daemon_type != 'crash':
1907 mounts[data_dir] = cdata_dir + ':z'
1908 if not no_config:
1909 mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
1910 if daemon_type == 'rbd-mirror' or daemon_type == 'crash':
1911 # these do not search for their keyrings in a data directory
1912 mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
1913
1914 if daemon_type in ['mon', 'osd']:
1915 mounts['/dev'] = '/dev' # FIXME: narrow this down?
1916 mounts['/run/udev'] = '/run/udev'
1917 if daemon_type == 'osd':
1918 mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
1919 mounts['/run/lvm'] = '/run/lvm'
1920 mounts['/run/lock/lvm'] = '/run/lock/lvm'
1921
e306af50
TL
1922 try:
1923 if args.shared_ceph_folder: # make easy manager modules/ceph-volume development
1924 ceph_folder = pathify(args.shared_ceph_folder)
1925 if os.path.exists(ceph_folder):
1926 mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
1927 mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
1928 mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
1929 mounts[ceph_folder + '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
1930 mounts[ceph_folder + '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
1931 else:
1932 logger.error('{}{}{}'.format(termcolor.red,
1933 'Ceph shared source folder does not exist.',
1934 termcolor.end))
1935 except AttributeError:
1936 pass
1937
9f95a23c
TL
1938 if daemon_type in Monitoring.components and daemon_id:
1939 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1940 if daemon_type == 'prometheus':
1941 mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
1942 mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
1943 elif daemon_type == 'node-exporter':
1944 mounts['/proc'] = '/host/proc:ro'
1945 mounts['/sys'] = '/host/sys:ro'
1946 mounts['/'] = '/rootfs:ro'
1947 elif daemon_type == "grafana":
1948 mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
1949 mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
1950 mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
1951 elif daemon_type == 'alertmanager':
f6b5b4d7 1952 mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
9f95a23c
TL
1953
1954 if daemon_type == NFSGanesha.daemon_type:
1955 assert daemon_id
1956 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
f91f0fd5
TL
1957 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
1958 mounts.update(nfs_ganesha.get_container_mounts(data_dir))
9f95a23c 1959
1911f103
TL
1960 if daemon_type == CephIscsi.daemon_type:
1961 assert daemon_id
1962 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1963 log_dir = get_log_dir(fsid)
1964 mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
1965
f91f0fd5
TL
1966 if daemon_type == CustomContainer.daemon_type:
1967 assert daemon_id
1968 cc = CustomContainer.init(fsid, daemon_id)
1969 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
1970 mounts.update(cc.get_container_mounts(data_dir))
1971
9f95a23c
TL
1972 return mounts
1973
f6b5b4d7 1974
f91f0fd5
TL
1975def get_container(fsid: str, daemon_type: str, daemon_id: Union[int, str],
1976 privileged: bool = False,
1977 ptrace: bool = False,
1978 container_args: Optional[List[str]] = None) -> 'CephContainer':
1979 entrypoint: str = ''
1980 name: str = ''
1981 ceph_args: List[str] = []
1982 envs: List[str] = []
1983 host_network: bool = True
1984
1985 if container_args is None:
1986 container_args = []
9f95a23c
TL
1987 if daemon_type in ['mon', 'osd']:
1988 # mon and osd need privileged in order for libudev to query devices
1989 privileged = True
1990 if daemon_type == 'rgw':
1991 entrypoint = '/usr/bin/radosgw'
1992 name = 'client.rgw.%s' % daemon_id
1993 elif daemon_type == 'rbd-mirror':
1994 entrypoint = '/usr/bin/rbd-mirror'
1995 name = 'client.rbd-mirror.%s' % daemon_id
1996 elif daemon_type == 'crash':
1997 entrypoint = '/usr/bin/ceph-crash'
1998 name = 'client.crash.%s' % daemon_id
1999 elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
2000 entrypoint = '/usr/bin/ceph-' + daemon_type
2001 name = '%s.%s' % (daemon_type, daemon_id)
2002 elif daemon_type in Monitoring.components:
2003 entrypoint = ''
9f95a23c
TL
2004 elif daemon_type == NFSGanesha.daemon_type:
2005 entrypoint = NFSGanesha.entrypoint
2006 name = '%s.%s' % (daemon_type, daemon_id)
f91f0fd5 2007 envs.extend(NFSGanesha.get_container_envs())
1911f103
TL
2008 elif daemon_type == CephIscsi.daemon_type:
2009 entrypoint = CephIscsi.entrypoint
2010 name = '%s.%s' % (daemon_type, daemon_id)
e306af50
TL
2011 # So the container can modprobe iscsi_target_mod and have write perms
2012 # to configfs we need to make this a privileged container.
2013 privileged = True
f91f0fd5
TL
2014 elif daemon_type == CustomContainer.daemon_type:
2015 cc = CustomContainer.init(fsid, daemon_id)
2016 entrypoint = cc.entrypoint
2017 host_network = False
2018 envs.extend(cc.get_container_envs())
2019 container_args.extend(cc.get_container_args())
9f95a23c 2020
9f95a23c
TL
2021 if daemon_type in Monitoring.components:
2022 uid, gid = extract_uid_gid_monitoring(daemon_type)
9f95a23c
TL
2023 monitoring_args = [
2024 '--user',
2025 str(uid),
2026 # FIXME: disable cpu/memory limits for the time being (not supported
2027 # by ubuntu 18.04 kernel!)
9f95a23c
TL
2028 ]
2029 container_args.extend(monitoring_args)
2030 elif daemon_type == 'crash':
2031 ceph_args = ['-n', name]
2032 elif daemon_type in Ceph.daemons:
2033 ceph_args = ['-n', name, '-f']
2034
f91f0fd5
TL
2035 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2036 # so service can have Type=Forking
2037 if 'podman' in container_path:
2038 runtime_dir = '/run'
2039 container_args.extend(['-d',
2040 '--conmon-pidfile',
2041 runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
2042 '--cidfile',
2043 runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id)])
9f95a23c
TL
2044
2045 return CephContainer(
2046 image=args.image,
2047 entrypoint=entrypoint,
2048 args=ceph_args + get_daemon_args(fsid, daemon_type, daemon_id),
2049 container_args=container_args,
2050 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
f6b5b4d7 2051 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
9f95a23c
TL
2052 cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
2053 envs=envs,
2054 privileged=privileged,
2055 ptrace=ptrace,
f91f0fd5
TL
2056 init=args.container_init,
2057 host_network=host_network,
9f95a23c
TL
2058 )
2059
f6b5b4d7 2060
9f95a23c 2061def extract_uid_gid(img='', file_path='/var/lib/ceph'):
f6b5b4d7 2062 # type: (str, Union[str, List[str]]) -> Tuple[int, int]
9f95a23c
TL
2063
2064 if not img:
2065 img = args.image
2066
f6b5b4d7
TL
2067 if isinstance(file_path, str):
2068 paths = [file_path]
2069 else:
2070 paths = file_path
2071
2072 for fp in paths:
2073 try:
2074 out = CephContainer(
2075 image=img,
2076 entrypoint='stat',
2077 args=['-c', '%u %g', fp]
2078 ).run()
2079 uid, gid = out.split(' ')
2080 return int(uid), int(gid)
2081 except RuntimeError:
2082 pass
2083 raise RuntimeError('uid/gid not found')
2084
9f95a23c
TL
2085
2086def deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid,
2087 config=None, keyring=None,
2088 osd_fsid=None,
f6b5b4d7
TL
2089 reconfig=False,
2090 ports=None):
2091 # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
2092
2093 ports = ports or []
2094 if any([port_in_use(port) for port in ports]):
2095 raise Error("TCP Port(s) '{}' required for {} already in use".format(",".join(map(str, ports)), daemon_type))
2096
9f95a23c
TL
2097 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2098 if reconfig and not os.path.exists(data_dir):
2099 raise Error('cannot reconfig, data path %s does not exist' % data_dir)
2100 if daemon_type == 'mon' and not os.path.exists(data_dir):
2101 assert config
2102 assert keyring
2103 # tmp keyring file
2104 tmp_keyring = write_tmp(keyring, uid, gid)
2105
2106 # tmp config file
2107 tmp_config = write_tmp(config, uid, gid)
2108
2109 # --mkfs
2110 create_daemon_dirs(fsid, daemon_type, daemon_id, uid, gid)
2111 mon_dir = get_data_dir(fsid, 'mon', daemon_id)
2112 log_dir = get_log_dir(fsid)
2113 out = CephContainer(
2114 image=args.image,
2115 entrypoint='/usr/bin/ceph-mon',
2116 args=['--mkfs',
2117 '-i', str(daemon_id),
2118 '--fsid', fsid,
2119 '-c', '/tmp/config',
2120 '--keyring', '/tmp/keyring',
2121 ] + get_daemon_args(fsid, 'mon', daemon_id),
2122 volume_mounts={
2123 log_dir: '/var/log/ceph:z',
2124 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
2125 tmp_keyring.name: '/tmp/keyring:z',
2126 tmp_config.name: '/tmp/config:z',
2127 },
2128 ).run()
2129
2130 # write conf
2131 with open(mon_dir + '/config', 'w') as f:
2132 os.fchown(f.fileno(), uid, gid)
2133 os.fchmod(f.fileno(), 0o600)
2134 f.write(config)
2135 else:
2136 # dirs, conf, keyring
2137 create_daemon_dirs(
2138 fsid, daemon_type, daemon_id,
2139 uid, gid,
2140 config, keyring)
2141
2142 if not reconfig:
2143 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
2144 osd_fsid=osd_fsid)
2145
2146 if not os.path.exists(data_dir + '/unit.created'):
2147 with open(data_dir + '/unit.created', 'w') as f:
2148 os.fchmod(f.fileno(), 0o600)
2149 os.fchown(f.fileno(), uid, gid)
2150 f.write('mtime is time the daemon deployment was created\n')
2151
2152 with open(data_dir + '/unit.configured', 'w') as f:
2153 f.write('mtime is time we were last configured\n')
2154 os.fchmod(f.fileno(), 0o600)
2155 os.fchown(f.fileno(), uid, gid)
2156
2157 update_firewalld(daemon_type)
2158
f6b5b4d7
TL
2159 # Open ports explicitly required for the daemon
2160 if ports:
2161 fw = Firewalld()
2162 fw.open_ports(ports)
2163 fw.apply_rules()
2164
9f95a23c
TL
2165 if reconfig and daemon_type not in Ceph.daemons:
2166 # ceph daemons do not need a restart; others (presumably) do to pick
2167 # up the new config
2168 call_throws(['systemctl', 'reset-failed',
2169 get_unit_name(fsid, daemon_type, daemon_id)])
2170 call_throws(['systemctl', 'restart',
2171 get_unit_name(fsid, daemon_type, daemon_id)])
2172
f6b5b4d7
TL
2173def _write_container_cmd_to_bash(file_obj, container, comment=None, background=False):
2174 # type: (IO[str], CephContainer, Optional[str], Optional[bool]) -> None
2175 if comment:
f91f0fd5 2176 # Sometimes adding a comment, especially if there are multiple containers in one
f6b5b4d7
TL
2177 # unit file, makes it easier to read and grok.
2178 file_obj.write('# ' + comment + '\n')
2179 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
2180 file_obj.write('! '+ ' '.join(container.rm_cmd()) + '\n')
2181 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
2182 if 'podman' in container_path:
2183 file_obj.write('! '+ ' '.join(container.rm_cmd(storage=True)) + '\n')
2184
2185 # container run command
2186 file_obj.write(' '.join(container.run_cmd()) + (' &' if background else '') + '\n')
2187
f91f0fd5 2188
9f95a23c
TL
2189def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
2190 enable=True, start=True,
2191 osd_fsid=None):
2192 # type: (str, int, int, str, Union[int, str], CephContainer, bool, bool, Optional[str]) -> None
2193 # cmd
2194 data_dir = get_data_dir(fsid, daemon_type, daemon_id)
2195 with open(data_dir + '/unit.run.new', 'w') as f:
f6b5b4d7 2196 f.write('set -e\n')
f91f0fd5
TL
2197
2198 if daemon_type in Ceph.daemons:
2199 install_path = find_program('install')
2200 f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
2201
9f95a23c
TL
2202 # pre-start cmd(s)
2203 if daemon_type == 'osd':
2204 # osds have a pre-start step
2205 assert osd_fsid
f6b5b4d7
TL
2206 simple_fn = os.path.join('/etc/ceph/osd',
2207 '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
2208 if os.path.exists(simple_fn):
2209 f.write('# Simple OSDs need chown on startup:\n')
2210 for n in ['block', 'block.db', 'block.wal']:
2211 p = os.path.join(data_dir, n)
2212 f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
2213 else:
f6b5b4d7
TL
2214 prestart = CephContainer(
2215 image=args.image,
2216 entrypoint='/usr/sbin/ceph-volume',
2217 args=[
2218 'lvm', 'activate',
2219 str(daemon_id), osd_fsid,
2220 '--no-systemd'
2221 ],
2222 privileged=True,
2223 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
2224 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
2225 cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
2226 )
f91f0fd5 2227 _write_container_cmd_to_bash(f, prestart, 'LVM OSDs use ceph-volume lvm activate')
9f95a23c
TL
2228 elif daemon_type == NFSGanesha.daemon_type:
2229 # add nfs to the rados grace db
2230 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2231 prestart = nfs_ganesha.get_rados_grace_container('add')
f91f0fd5 2232 _write_container_cmd_to_bash(f, prestart, 'add daemon to rados grace')
1911f103
TL
2233 elif daemon_type == CephIscsi.daemon_type:
2234 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
f6b5b4d7
TL
2235 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
2236 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
2237 _write_container_cmd_to_bash(f, tcmu_container, 'iscsi tcmu-runnter container', background=True)
1911f103 2238
f6b5b4d7 2239 _write_container_cmd_to_bash(f, c, '%s.%s' % (daemon_type, str(daemon_id)))
9f95a23c
TL
2240 os.fchmod(f.fileno(), 0o600)
2241 os.rename(data_dir + '/unit.run.new',
2242 data_dir + '/unit.run')
2243
2244 # post-stop command(s)
2245 with open(data_dir + '/unit.poststop.new', 'w') as f:
2246 if daemon_type == 'osd':
2247 assert osd_fsid
2248 poststop = CephContainer(
2249 image=args.image,
2250 entrypoint='/usr/sbin/ceph-volume',
2251 args=[
2252 'lvm', 'deactivate',
2253 str(daemon_id), osd_fsid,
2254 ],
2255 privileged=True,
2256 volume_mounts=get_container_mounts(fsid, daemon_type, daemon_id),
f6b5b4d7 2257 bind_mounts=get_container_binds(fsid, daemon_type, daemon_id),
9f95a23c
TL
2258 cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
2259 daemon_id),
2260 )
f91f0fd5 2261 _write_container_cmd_to_bash(f, poststop, 'deactivate osd')
9f95a23c
TL
2262 elif daemon_type == NFSGanesha.daemon_type:
2263 # remove nfs from the rados grace db
2264 nfs_ganesha = NFSGanesha.init(fsid, daemon_id)
2265 poststop = nfs_ganesha.get_rados_grace_container('remove')
f91f0fd5 2266 _write_container_cmd_to_bash(f, poststop, 'remove daemon from rados grace')
1911f103 2267 elif daemon_type == CephIscsi.daemon_type:
f6b5b4d7
TL
2268 # make sure we also stop the tcmu container
2269 ceph_iscsi = CephIscsi.init(fsid, daemon_id)
2270 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
2271 f.write('! '+ ' '.join(tcmu_container.stop_cmd()) + '\n')
1911f103 2272 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
9f95a23c
TL
2273 os.fchmod(f.fileno(), 0o600)
2274 os.rename(data_dir + '/unit.poststop.new',
2275 data_dir + '/unit.poststop')
2276
2277 with open(data_dir + '/unit.image.new', 'w') as f:
2278 f.write(c.image + '\n')
2279 os.fchmod(f.fileno(), 0o600)
2280 os.rename(data_dir + '/unit.image.new',
2281 data_dir + '/unit.image')
2282
2283 # systemd
2284 install_base_units(fsid)
1911f103 2285 unit = get_unit_file(fsid)
9f95a23c
TL
2286 unit_file = 'ceph-%s@.service' % (fsid)
2287 with open(args.unit_dir + '/' + unit_file + '.new', 'w') as f:
2288 f.write(unit)
2289 os.rename(args.unit_dir + '/' + unit_file + '.new',
2290 args.unit_dir + '/' + unit_file)
2291 call_throws(['systemctl', 'daemon-reload'])
2292
2293 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
2294 call(['systemctl', 'stop', unit_name],
2295 verbose_on_failure=False)
2296 call(['systemctl', 'reset-failed', unit_name],
2297 verbose_on_failure=False)
2298 if enable:
2299 call_throws(['systemctl', 'enable', unit_name])
2300 if start:
2301 call_throws(['systemctl', 'start', unit_name])
2302
9f95a23c 2303
f6b5b4d7
TL
2304
2305class Firewalld(object):
2306 def __init__(self):
2307 # type: () -> None
2308 self.available = self.check()
2309
2310 def check(self):
2311 # type: () -> bool
2312 self.cmd = find_executable('firewall-cmd')
2313 if not self.cmd:
2314 logger.debug('firewalld does not appear to be present')
2315 return False
2316 (enabled, state, _) = check_unit('firewalld.service')
2317 if not enabled:
2318 logger.debug('firewalld.service is not enabled')
2319 return False
2320 if state != "running":
2321 logger.debug('firewalld.service is not running')
2322 return False
2323
2324 logger.info("firewalld ready")
2325 return True
2326
2327 def enable_service_for(self, daemon_type):
2328 # type: (str) -> None
2329 if not self.available:
2330 logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)
2331 return
2332
2333 if daemon_type == 'mon':
2334 svc = 'ceph-mon'
2335 elif daemon_type in ['mgr', 'mds', 'osd']:
2336 svc = 'ceph'
2337 elif daemon_type == NFSGanesha.daemon_type:
2338 svc = 'nfs'
2339 else:
2340 return
2341
2342 out, err, ret = call([self.cmd, '--permanent', '--query-service', svc], verbose_on_failure=False)
9f95a23c
TL
2343 if ret:
2344 logger.info('Enabling firewalld service %s in current zone...' % svc)
f6b5b4d7 2345 out, err, ret = call([self.cmd, '--permanent', '--add-service', svc])
9f95a23c
TL
2346 if ret:
2347 raise RuntimeError(
2348 'unable to add service %s to current zone: %s' % (svc, err))
2349 else:
2350 logger.debug('firewalld service %s is enabled in current zone' % svc)
f6b5b4d7
TL
2351
2352 def open_ports(self, fw_ports):
2353 # type: (List[int]) -> None
2354 if not self.available:
2355 logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
2356 return
2357
2358 for port in fw_ports:
2359 tcp_port = str(port) + '/tcp'
2360 out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbose_on_failure=False)
9f95a23c 2361 if ret:
f6b5b4d7
TL
2362 logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
2363 out, err, ret = call([self.cmd, '--permanent', '--add-port', tcp_port])
2364 if ret:
2365 raise RuntimeError('unable to add port %s to current zone: %s' %
2366 (tcp_port, err))
2367 else:
2368 logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
2369
2370 def apply_rules(self):
2371 # type: () -> None
2372 if not self.available:
2373 return
2374
2375 call_throws([self.cmd, '--reload'])
2376
2377
2378def update_firewalld(daemon_type):
2379 # type: (str) -> None
2380 firewall = Firewalld()
2381
2382 firewall.enable_service_for(daemon_type)
2383
2384 fw_ports = []
2385
2386 if daemon_type in Monitoring.port_map.keys():
2387 fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc
2388
2389 firewall.open_ports(fw_ports)
2390 firewall.apply_rules()
9f95a23c
TL
2391
2392def install_base_units(fsid):
2393 # type: (str) -> None
2394 """
2395 Set up ceph.target and ceph-$fsid.target units.
2396 """
2397 # global unit
2398 existed = os.path.exists(args.unit_dir + '/ceph.target')
2399 with open(args.unit_dir + '/ceph.target.new', 'w') as f:
2400 f.write('[Unit]\n'
2401 'Description=All Ceph clusters and services\n'
2402 '\n'
2403 '[Install]\n'
2404 'WantedBy=multi-user.target\n')
2405 os.rename(args.unit_dir + '/ceph.target.new',
2406 args.unit_dir + '/ceph.target')
2407 if not existed:
2408 # we disable before enable in case a different ceph.target
2409 # (from the traditional package) is present; while newer
2410 # systemd is smart enough to disable the old
2411 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2412 # some older versions of systemd error out with EEXIST.
2413 call_throws(['systemctl', 'disable', 'ceph.target'])
2414 call_throws(['systemctl', 'enable', 'ceph.target'])
2415 call_throws(['systemctl', 'start', 'ceph.target'])
2416
2417 # cluster unit
2418 existed = os.path.exists(args.unit_dir + '/ceph-%s.target' % fsid)
2419 with open(args.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f:
2420 f.write('[Unit]\n'
2421 'Description=Ceph cluster {fsid}\n'
2422 'PartOf=ceph.target\n'
2423 'Before=ceph.target\n'
2424 '\n'
2425 '[Install]\n'
2426 'WantedBy=multi-user.target ceph.target\n'.format(
2427 fsid=fsid)
2428 )
2429 os.rename(args.unit_dir + '/ceph-%s.target.new' % fsid,
2430 args.unit_dir + '/ceph-%s.target' % fsid)
2431 if not existed:
2432 call_throws(['systemctl', 'enable', 'ceph-%s.target' % fsid])
2433 call_throws(['systemctl', 'start', 'ceph-%s.target' % fsid])
2434
2435 # logrotate for the cluster
2436 with open(args.logrotate_dir + '/ceph-%s' % fsid, 'w') as f:
2437 """
2438 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
2439 in all containers, but I don't see an elegant way to send SIGHUP *just* to
2440 the daemons for this cluster. (1) systemd kill -s will get the signal to
2441 podman, but podman will exit. (2) podman kill will get the signal to the
2442 first child (bash), but that isn't the ceph daemon. This is simpler and
2443 should be harmless.
2444 """
2445 f.write("""# created by cephadm
2446/var/log/ceph/%s/*.log {
2447 rotate 7
2448 daily
2449 compress
2450 sharedscripts
2451 postrotate
2452 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror || pkill -1 -x "ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror" || true
2453 endscript
2454 missingok
2455 notifempty
2456 su root root
2457}
2458""" % fsid)
2459
f6b5b4d7 2460
1911f103
TL
2461def get_unit_file(fsid):
2462 # type: (str) -> str
f91f0fd5
TL
2463 extra_args = ''
2464 if 'podman' in container_path:
2465 extra_args = ('ExecStartPre=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2466 'ExecStopPost=-/bin/rm -f /%t/%n-pid /%t/%n-cid\n'
2467 'Type=forking\n'
2468 'PIDFile=/%t/%n-pid\n')
2469
9f95a23c
TL
2470 u = """# generated by cephadm
2471[Unit]
2472Description=Ceph %i for {fsid}
2473
2474# According to:
2475# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
2476# these can be removed once ceph-mon will dynamically change network
2477# configuration.
2478After=network-online.target local-fs.target time-sync.target
2479Wants=network-online.target local-fs.target time-sync.target
2480
2481PartOf=ceph-{fsid}.target
2482Before=ceph-{fsid}.target
2483
2484[Service]
2485LimitNOFILE=1048576
2486LimitNPROC=1048576
2487EnvironmentFile=-/etc/environment
2488ExecStartPre=-{container_path} rm ceph-{fsid}-%i
9f95a23c
TL
2489ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
2490ExecStop=-{container_path} stop ceph-{fsid}-%i
2491ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
2492KillMode=none
2493Restart=on-failure
2494RestartSec=10s
2495TimeoutStartSec=120
e306af50 2496TimeoutStopSec=120
9f95a23c
TL
2497StartLimitInterval=30min
2498StartLimitBurst=5
f91f0fd5 2499{extra_args}
9f95a23c
TL
2500[Install]
2501WantedBy=ceph-{fsid}.target
2502""".format(
2503 container_path=container_path,
9f95a23c 2504 fsid=fsid,
f91f0fd5
TL
2505 data_dir=args.data_dir,
2506 extra_args=extra_args)
2507
9f95a23c
TL
2508 return u
2509
2510##################################
2511
f6b5b4d7 2512
9f95a23c
TL
2513class CephContainer:
2514 def __init__(self,
f91f0fd5
TL
2515 image: str,
2516 entrypoint: str,
2517 args: List[str] = [],
2518 volume_mounts: Dict[str, str] = {},
2519 cname: str = '',
2520 container_args: List[str] = [],
2521 envs: Optional[List[str]] = None,
2522 privileged: bool = False,
2523 ptrace: bool = False,
2524 bind_mounts: Optional[List[List[str]]] = None,
2525 init: bool = False,
2526 host_network: bool = True,
2527 ) -> None:
9f95a23c
TL
2528 self.image = image
2529 self.entrypoint = entrypoint
2530 self.args = args
2531 self.volume_mounts = volume_mounts
2532 self.cname = cname
2533 self.container_args = container_args
2534 self.envs = envs
2535 self.privileged = privileged
2536 self.ptrace = ptrace
f6b5b4d7 2537 self.bind_mounts = bind_mounts if bind_mounts else []
f91f0fd5
TL
2538 self.init = init
2539 self.host_network = host_network
9f95a23c 2540
f91f0fd5
TL
2541 def run_cmd(self) -> List[str]:
2542 cmd_args: List[str] = [
2543 str(container_path),
2544 'run',
2545 '--rm',
2546 '--ipc=host',
2547 ]
2548 envs: List[str] = [
2549 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2550 '-e', 'NODE_NAME=%s' % get_hostname(),
2551 ]
2552 vols: List[str] = []
2553 binds: List[str] = []
9f95a23c 2554
f91f0fd5
TL
2555 if self.host_network:
2556 cmd_args.append('--net=host')
2557 if self.entrypoint:
2558 cmd_args.extend(['--entrypoint', self.entrypoint])
9f95a23c 2559 if self.privileged:
f91f0fd5
TL
2560 cmd_args.extend([
2561 '--privileged',
2562 # let OSD etc read block devs that haven't been chowned
2563 '--group-add=disk'])
2564 if self.ptrace and not self.privileged:
2565 # if privileged, the SYS_PTRACE cap is already added
2566 # in addition, --cap-add and --privileged are mutually
2567 # exclusive since podman >= 2.0
2568 cmd_args.append('--cap-add=SYS_PTRACE')
2569 if self.init:
2570 cmd_args.append('--init')
2571 if self.cname:
2572 cmd_args.extend(['--name', self.cname])
2573 if self.envs:
2574 for env in self.envs:
2575 envs.extend(['-e', env])
2576
9f95a23c
TL
2577 vols = sum(
2578 [['-v', '%s:%s' % (host_dir, container_dir)]
2579 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7 2580 binds = sum([['--mount', '{}'.format(','.join(bind))]
f91f0fd5
TL
2581 for bind in self.bind_mounts], [])
2582
2583 return cmd_args + self.container_args + envs + vols + binds + [
2584 self.image,
2585 ] + self.args # type: ignore
2586
2587 def shell_cmd(self, cmd: List[str]) -> List[str]:
2588 cmd_args: List[str] = [
9f95a23c
TL
2589 str(container_path),
2590 'run',
2591 '--rm',
e306af50 2592 '--ipc=host',
f91f0fd5
TL
2593 ]
2594 envs: List[str] = [
2595 '-e', 'CONTAINER_IMAGE=%s' % self.image,
2596 '-e', 'NODE_NAME=%s' % get_hostname(),
2597 ]
2598 vols: List[str] = []
2599 binds: List[str] = []
9f95a23c 2600
f91f0fd5
TL
2601 if self.host_network:
2602 cmd_args.append('--net=host')
9f95a23c 2603 if self.privileged:
f91f0fd5
TL
2604 cmd_args.extend([
2605 '--privileged',
2606 # let OSD etc read block devs that haven't been chowned
2607 '--group-add=disk',
2608 ])
2609 if self.envs:
2610 for env in self.envs:
2611 envs.extend(['-e', env])
2612
9f95a23c
TL
2613 vols = sum(
2614 [['-v', '%s:%s' % (host_dir, container_dir)]
2615 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7
TL
2616 binds = sum([['--mount', '{}'.format(','.join(bind))]
2617 for bind in self.bind_mounts], [])
f91f0fd5
TL
2618
2619 return cmd_args + self.container_args + envs + vols + binds + [
9f95a23c 2620 '--entrypoint', cmd[0],
f91f0fd5 2621 self.image,
9f95a23c
TL
2622 ] + cmd[1:]
2623
2624 def exec_cmd(self, cmd):
2625 # type: (List[str]) -> List[str]
2626 return [
2627 str(container_path),
2628 'exec',
2629 ] + self.container_args + [
2630 self.cname,
2631 ] + cmd
2632
f6b5b4d7
TL
2633 def rm_cmd(self, storage=False):
2634 # type: (bool) -> List[str]
2635 ret = [
2636 str(container_path),
2637 'rm', '-f',
2638 ]
2639 if storage:
2640 ret.append('--storage')
2641 ret.append(self.cname)
2642 return ret
2643
2644 def stop_cmd(self):
2645 # type () -> List[str]
2646 ret = [
2647 str(container_path),
2648 'stop', self.cname,
2649 ]
2650 return ret
2651
9f95a23c
TL
2652 def run(self, timeout=DEFAULT_TIMEOUT):
2653 # type: (Optional[int]) -> str
9f95a23c
TL
2654 out, _, _ = call_throws(
2655 self.run_cmd(), desc=self.entrypoint, timeout=timeout)
2656 return out
2657
2658##################################
2659
f6b5b4d7 2660
9f95a23c
TL
2661@infer_image
2662def command_version():
2663 # type: () -> int
2664 out = CephContainer(args.image, 'ceph', ['--version']).run()
2665 print(out.strip())
2666 return 0
2667
2668##################################
2669
f6b5b4d7 2670
9f95a23c
TL
2671@infer_image
2672def command_pull():
2673 # type: () -> int
f6b5b4d7
TL
2674
2675 _pull_image(args.image)
9f95a23c
TL
2676 return command_inspect_image()
2677
f6b5b4d7
TL
2678
2679def _pull_image(image):
2680 # type: (str) -> None
2681 logger.info('Pulling container image %s...' % image)
2682
2683 ignorelist = [
2684 "error creating read-write layer with ID",
2685 "net/http: TLS handshake timeout",
2686 "Digest did not match, expected",
2687 ]
2688
2689 cmd = [container_path, 'pull', image]
2690 cmd_str = ' '.join(cmd)
2691
2692 for sleep_secs in [1, 4, 25]:
2693 out, err, ret = call(cmd)
2694 if not ret:
2695 return
2696
2697 if not any(pattern in err for pattern in ignorelist):
2698 raise RuntimeError('Failed command: %s' % cmd_str)
2699
2700 logger.info('"%s failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))
2701 time.sleep(sleep_secs)
2702
2703 raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str)
9f95a23c
TL
2704##################################
2705
f6b5b4d7 2706
9f95a23c
TL
2707@infer_image
2708def command_inspect_image():
2709 # type: () -> int
2710 out, err, ret = call_throws([
2711 container_path, 'inspect',
f91f0fd5 2712 '--format', '{{.ID}},{{json .RepoDigests}}',
9f95a23c
TL
2713 args.image])
2714 if ret:
2715 return errno.ENOENT
f91f0fd5
TL
2716 info_from = get_image_info_from_inspect(out.strip(), args.image)
2717
9f95a23c 2718 ver = CephContainer(args.image, 'ceph', ['--version']).run().strip()
f91f0fd5
TL
2719 info_from['ceph_version'] = ver
2720
2721 print(json.dumps(info_from, indent=4, sort_keys=True))
2722 return 0
2723
2724
2725def get_image_info_from_inspect(out, image):
2726 # type: (str, str) -> Dict[str, str]
2727 image_id, digests = out.split(',', 1)
2728 if not out:
2729 raise Error('inspect {}: empty result'.format(image))
9f95a23c 2730 r = {
f91f0fd5 2731 'image_id': normalize_container_id(image_id)
9f95a23c 2732 }
f91f0fd5
TL
2733 if digests:
2734 json_digests = json.loads(digests)
2735 if json_digests:
2736 r['repo_digest'] = json_digests[0]
2737 return r
2738
9f95a23c
TL
2739
2740##################################
2741
f91f0fd5 2742
f6b5b4d7
TL
2743def unwrap_ipv6(address):
2744 # type: (str) -> str
2745 if address.startswith('[') and address.endswith(']'):
2746 return address[1:-1]
2747 return address
2748
2749
f91f0fd5
TL
2750def wrap_ipv6(address):
2751 # type: (str) -> str
2752
2753 # We cannot assume it's already wrapped or even an IPv6 address if
2754 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
2755 # the ValueError
2756 try:
2757 if ipaddress.ip_address(unicode(address)).version == 6:
2758 return f"[{address}]"
2759 except ValueError:
2760 pass
2761
2762 return address
2763
2764
f6b5b4d7
TL
2765def is_ipv6(address):
2766 # type: (str) -> bool
2767 address = unwrap_ipv6(address)
2768 try:
2769 return ipaddress.ip_address(unicode(address)).version == 6
2770 except ValueError:
2771 logger.warning("Address: {} isn't a valid IP address".format(address))
2772 return False
2773
2774
9f95a23c
TL
2775@default_image
2776def command_bootstrap():
2777 # type: () -> int
2778
2779 if not args.output_config:
2780 args.output_config = os.path.join(args.output_dir, 'ceph.conf')
2781 if not args.output_keyring:
2782 args.output_keyring = os.path.join(args.output_dir,
2783 'ceph.client.admin.keyring')
2784 if not args.output_pub_ssh_key:
2785 args.output_pub_ssh_key = os.path.join(args.output_dir, 'ceph.pub')
2786
2787 # verify output files
2788 for f in [args.output_config, args.output_keyring, args.output_pub_ssh_key]:
2789 if not args.allow_overwrite:
2790 if os.path.exists(f):
2791 raise Error('%s already exists; delete or pass '
2792 '--allow-overwrite to overwrite' % f)
2793 dirname = os.path.dirname(f)
2794 if dirname and not os.path.exists(dirname):
2795 raise Error('%s directory %s does not exist' % (f, dirname))
2796
2797 if not args.skip_prepare_host:
2798 command_prepare_host()
2799 else:
2800 logger.info('Skip prepare_host')
2801
2802 # initial vars
2803 fsid = args.fsid or make_fsid()
2804 hostname = get_hostname()
2805 if '.' in hostname and not args.allow_fqdn_hostname:
2806 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
2807 mon_id = args.mon_id or hostname
2808 mgr_id = args.mgr_id or generate_service_id()
f91f0fd5 2809 logger.info('Cluster fsid: %s' % fsid)
f6b5b4d7 2810 ipv6 = False
9f95a23c
TL
2811
2812 l = FileLock(fsid)
2813 l.acquire()
2814
2815 # ip
2816 r = re.compile(r':(\d+)$')
f6b5b4d7 2817 base_ip = ''
9f95a23c 2818 if args.mon_ip:
f6b5b4d7 2819 ipv6 = is_ipv6(args.mon_ip)
f91f0fd5
TL
2820 if ipv6:
2821 args.mon_ip = wrap_ipv6(args.mon_ip)
9f95a23c
TL
2822 hasport = r.findall(args.mon_ip)
2823 if hasport:
2824 port = int(hasport[0])
2825 if port == 6789:
2826 addr_arg = '[v1:%s]' % args.mon_ip
2827 elif port == 3300:
2828 addr_arg = '[v2:%s]' % args.mon_ip
2829 else:
2830 logger.warning('Using msgr2 protocol for unrecognized port %d' %
2831 port)
2832 addr_arg = '[v2:%s]' % args.mon_ip
2833 base_ip = args.mon_ip[0:-(len(str(port)))-1]
2834 check_ip_port(base_ip, port)
2835 else:
2836 base_ip = args.mon_ip
2837 addr_arg = '[v2:%s:3300,v1:%s:6789]' % (args.mon_ip, args.mon_ip)
2838 check_ip_port(args.mon_ip, 3300)
2839 check_ip_port(args.mon_ip, 6789)
2840 elif args.mon_addrv:
2841 addr_arg = args.mon_addrv
2842 if addr_arg[0] != '[' or addr_arg[-1] != ']':
2843 raise Error('--mon-addrv value %s must use square backets' %
2844 addr_arg)
f6b5b4d7 2845 ipv6 = addr_arg.count('[') > 1
9f95a23c
TL
2846 for addr in addr_arg[1:-1].split(','):
2847 hasport = r.findall(addr)
2848 if not hasport:
2849 raise Error('--mon-addrv value %s must include port number' %
2850 addr_arg)
2851 port = int(hasport[0])
2852 # strip off v1: or v2: prefix
2853 addr = re.sub(r'^\w+:', '', addr)
2854 base_ip = addr[0:-(len(str(port)))-1]
2855 check_ip_port(base_ip, port)
2856 else:
2857 raise Error('must specify --mon-ip or --mon-addrv')
2858 logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg))
2859
2860 mon_network = None
2861 if not args.skip_mon_network:
2862 # make sure IP is configured locally, and then figure out the
2863 # CIDR network
2864 for net, ips in list_networks().items():
f6b5b4d7
TL
2865 if ipaddress.ip_address(unicode(unwrap_ipv6(base_ip))) in \
2866 [ipaddress.ip_address(unicode(ip)) for ip in ips]:
9f95a23c
TL
2867 mon_network = net
2868 logger.info('Mon IP %s is in CIDR network %s' % (base_ip,
2869 mon_network))
2870 break
2871 if not mon_network:
2872 raise Error('Failed to infer CIDR network for mon ip %s; pass '
2873 '--skip-mon-network to configure it later' % base_ip)
2874
2875 # config
2876 cp = read_config(args.config)
2877 if not cp.has_section('global'):
2878 cp.add_section('global')
2879 cp.set('global', 'fsid', fsid);
2880 cp.set('global', 'mon host', addr_arg)
2881 cp.set('global', 'container_image', args.image)
2882 cpf = StringIO()
2883 cp.write(cpf)
2884 config = cpf.getvalue()
2885
f6b5b4d7
TL
2886 if args.registry_json or args.registry_url:
2887 command_registry_login()
2888
9f95a23c 2889 if not args.skip_pull:
f6b5b4d7 2890 _pull_image(args.image)
9f95a23c
TL
2891
2892 logger.info('Extracting ceph user uid/gid from container image...')
2893 (uid, gid) = extract_uid_gid()
2894
2895 # create some initial keys
2896 logger.info('Creating initial keys...')
2897 mon_key = CephContainer(
2898 image=args.image,
2899 entrypoint='/usr/bin/ceph-authtool',
2900 args=['--gen-print-key'],
2901 ).run().strip()
2902 admin_key = CephContainer(
2903 image=args.image,
2904 entrypoint='/usr/bin/ceph-authtool',
2905 args=['--gen-print-key'],
2906 ).run().strip()
2907 mgr_key = CephContainer(
2908 image=args.image,
2909 entrypoint='/usr/bin/ceph-authtool',
2910 args=['--gen-print-key'],
2911 ).run().strip()
2912
2913 keyring = ('[mon.]\n'
2914 '\tkey = %s\n'
2915 '\tcaps mon = allow *\n'
2916 '[client.admin]\n'
2917 '\tkey = %s\n'
2918 '\tcaps mon = allow *\n'
2919 '\tcaps mds = allow *\n'
2920 '\tcaps mgr = allow *\n'
2921 '\tcaps osd = allow *\n'
2922 '[mgr.%s]\n'
2923 '\tkey = %s\n'
2924 '\tcaps mon = profile mgr\n'
2925 '\tcaps mds = allow *\n'
2926 '\tcaps osd = allow *\n'
2927 % (mon_key, admin_key, mgr_id, mgr_key))
2928
2929 # tmp keyring file
2930 tmp_bootstrap_keyring = write_tmp(keyring, uid, gid)
2931
2932 # create initial monmap, tmp monmap file
2933 logger.info('Creating initial monmap...')
2934 tmp_monmap = write_tmp('', 0, 0)
2935 out = CephContainer(
2936 image=args.image,
2937 entrypoint='/usr/bin/monmaptool',
2938 args=['--create',
2939 '--clobber',
2940 '--fsid', fsid,
2941 '--addv', mon_id, addr_arg,
2942 '/tmp/monmap'
2943 ],
2944 volume_mounts={
2945 tmp_monmap.name: '/tmp/monmap:z',
2946 },
2947 ).run()
2948
2949 # pass monmap file to ceph user for use by ceph-mon --mkfs below
2950 os.fchown(tmp_monmap.fileno(), uid, gid)
2951
2952 # create mon
2953 logger.info('Creating mon...')
2954 create_daemon_dirs(fsid, 'mon', mon_id, uid, gid)
2955 mon_dir = get_data_dir(fsid, 'mon', mon_id)
2956 log_dir = get_log_dir(fsid)
2957 out = CephContainer(
2958 image=args.image,
2959 entrypoint='/usr/bin/ceph-mon',
2960 args=['--mkfs',
2961 '-i', mon_id,
2962 '--fsid', fsid,
2963 '-c', '/dev/null',
2964 '--monmap', '/tmp/monmap',
2965 '--keyring', '/tmp/keyring',
2966 ] + get_daemon_args(fsid, 'mon', mon_id),
2967 volume_mounts={
2968 log_dir: '/var/log/ceph:z',
2969 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
2970 tmp_bootstrap_keyring.name: '/tmp/keyring:z',
2971 tmp_monmap.name: '/tmp/monmap:z',
2972 },
2973 ).run()
2974
2975 with open(mon_dir + '/config', 'w') as f:
2976 os.fchown(f.fileno(), uid, gid)
2977 os.fchmod(f.fileno(), 0o600)
2978 f.write(config)
2979
2980 make_var_run(fsid, uid, gid)
2981 mon_c = get_container(fsid, 'mon', mon_id)
2982 deploy_daemon(fsid, 'mon', mon_id, mon_c, uid, gid,
2983 config=None, keyring=None)
2984
2985 # client.admin key + config to issue various CLI commands
2986 tmp_admin_keyring = write_tmp('[client.admin]\n'
2987 '\tkey = ' + admin_key + '\n',
2988 uid, gid)
2989 tmp_config = write_tmp(config, uid, gid)
2990
2991 # a CLI helper to reduce our typing
2992 def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT):
2993 # type: (List[str], Dict[str, str], Optional[int]) -> str
2994 mounts = {
2995 log_dir: '/var/log/ceph:z',
2996 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
2997 tmp_config.name: '/etc/ceph/ceph.conf:z',
2998 }
2999 for k, v in extra_mounts.items():
3000 mounts[k] = v
3001 timeout = timeout or args.timeout
3002 return CephContainer(
3003 image=args.image,
3004 entrypoint='/usr/bin/ceph',
3005 args=cmd,
3006 volume_mounts=mounts,
3007 ).run(timeout=timeout)
3008
3009 logger.info('Waiting for mon to start...')
3010 c = CephContainer(
3011 image=args.image,
3012 entrypoint='/usr/bin/ceph',
3013 args=[
3014 'status'],
3015 volume_mounts={
3016 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
3017 tmp_admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
3018 tmp_config.name: '/etc/ceph/ceph.conf:z',
3019 },
3020 )
3021
3022 # wait for the service to become available
3023 def is_mon_available():
3024 # type: () -> bool
f6b5b4d7 3025 timeout=args.timeout if args.timeout else 60 # seconds
9f95a23c
TL
3026 out, err, ret = call(c.run_cmd(),
3027 desc=c.entrypoint,
3028 timeout=timeout)
3029 return ret == 0
3030 is_available('mon', is_mon_available)
3031
3032 # assimilate and minimize config
3033 if not args.no_minimize_config:
3034 logger.info('Assimilating anything we can from ceph.conf...')
3035 cli([
3036 'config', 'assimilate-conf',
3037 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3038 ], {
3039 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3040 })
3041 logger.info('Generating new minimal ceph.conf...')
3042 cli([
3043 'config', 'generate-minimal-conf',
3044 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3045 ], {
3046 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3047 })
3048 # re-read our minimized config
3049 with open(mon_dir + '/config', 'r') as f:
3050 config = f.read()
3051 logger.info('Restarting the monitor...')
3052 call_throws([
3053 'systemctl',
3054 'restart',
3055 get_unit_name(fsid, 'mon', mon_id)
3056 ])
3057
3058 if mon_network:
3059 logger.info('Setting mon public_network...')
3060 cli(['config', 'set', 'mon', 'public_network', mon_network])
3061
f6b5b4d7
TL
3062 if ipv6:
3063 logger.info('Enabling IPv6 (ms_bind_ipv6)')
3064 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
3065
9f95a23c
TL
3066 # create mgr
3067 logger.info('Creating mgr...')
3068 mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
3069 mgr_c = get_container(fsid, 'mgr', mgr_id)
f6b5b4d7 3070 # Note:the default port used by the Prometheus node exporter is opened in fw
9f95a23c 3071 deploy_daemon(fsid, 'mgr', mgr_id, mgr_c, uid, gid,
f6b5b4d7 3072 config=config, keyring=mgr_keyring, ports=[9283])
9f95a23c
TL
3073
3074 # output files
3075 with open(args.output_keyring, 'w') as f:
3076 os.fchmod(f.fileno(), 0o600)
3077 f.write('[client.admin]\n'
3078 '\tkey = ' + admin_key + '\n')
3079 logger.info('Wrote keyring to %s' % args.output_keyring)
3080
3081 with open(args.output_config, 'w') as f:
3082 f.write(config)
3083 logger.info('Wrote config to %s' % args.output_config)
3084
3085 # wait for the service to become available
3086 logger.info('Waiting for mgr to start...')
3087 def is_mgr_available():
3088 # type: () -> bool
f6b5b4d7 3089 timeout=args.timeout if args.timeout else 60 # seconds
e306af50
TL
3090 try:
3091 out = cli(['status', '-f', 'json-pretty'], timeout=timeout)
3092 j = json.loads(out)
3093 return j.get('mgrmap', {}).get('available', False)
3094 except Exception as e:
3095 logger.debug('status failed: %s' % e)
3096 return False
9f95a23c
TL
3097 is_available('mgr', is_mgr_available)
3098
3099 # wait for mgr to restart (after enabling a module)
3100 def wait_for_mgr_restart():
3101 # first get latest mgrmap epoch from the mon
3102 out = cli(['mgr', 'dump'])
3103 j = json.loads(out)
3104 epoch = j['epoch']
3105 # wait for mgr to have it
3106 logger.info('Waiting for the mgr to restart...')
3107 def mgr_has_latest_epoch():
3108 # type: () -> bool
3109 try:
3110 out = cli(['tell', 'mgr', 'mgr_status'])
3111 j = json.loads(out)
3112 return j['mgrmap_epoch'] >= epoch
3113 except Exception as e:
3114 logger.debug('tell mgr mgr_status failed: %s' % e)
3115 return False
3116 is_available('Mgr epoch %d' % epoch, mgr_has_latest_epoch)
3117
3118 # ssh
3119 if not args.skip_ssh:
f6b5b4d7
TL
3120 cli(['config-key', 'set', 'mgr/cephadm/ssh_user', args.ssh_user])
3121
9f95a23c
TL
3122 logger.info('Enabling cephadm module...')
3123 cli(['mgr', 'module', 'enable', 'cephadm'])
3124 wait_for_mgr_restart()
3125
3126 logger.info('Setting orchestrator backend to cephadm...')
3127 cli(['orch', 'set', 'backend', 'cephadm'])
3128
e306af50
TL
3129 if args.ssh_config:
3130 logger.info('Using provided ssh config...')
3131 mounts = {
3132 pathify(args.ssh_config.name): '/tmp/cephadm-ssh-config:z',
3133 }
3134 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
3135
3136 if args.ssh_private_key and args.ssh_public_key:
3137 logger.info('Using provided ssh keys...')
3138 mounts = {
3139 pathify(args.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
3140 pathify(args.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
3141 }
3142 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
3143 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
3144 else:
3145 logger.info('Generating ssh key...')
3146 cli(['cephadm', 'generate-key'])
3147 ssh_pub = cli(['cephadm', 'get-pub-key'])
3148
3149 with open(args.output_pub_ssh_key, 'w') as f:
3150 f.write(ssh_pub)
3151 logger.info('Wrote public SSH key to to %s' % args.output_pub_ssh_key)
3152
f6b5b4d7
TL
3153 logger.info('Adding key to %s@localhost\'s authorized_keys...' % args.ssh_user)
3154 try:
3155 s_pwd = pwd.getpwnam(args.ssh_user)
3156 except KeyError as e:
3157 raise Error('Cannot find uid/gid for ssh-user: %s' % (args.ssh_user))
3158 ssh_uid = s_pwd.pw_uid
3159 ssh_gid = s_pwd.pw_gid
3160 ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
3161
3162 if not os.path.exists(ssh_dir):
3163 makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
3164
3165 auth_keys_file = '%s/authorized_keys' % ssh_dir
e306af50 3166 add_newline = False
f6b5b4d7 3167
e306af50
TL
3168 if os.path.exists(auth_keys_file):
3169 with open(auth_keys_file, 'r') as f:
3170 f.seek(0, os.SEEK_END)
3171 if f.tell() > 0:
3172 f.seek(f.tell()-1, os.SEEK_SET) # go to last char
3173 if f.read() != '\n':
3174 add_newline = True
f6b5b4d7 3175
e306af50 3176 with open(auth_keys_file, 'a') as f:
f6b5b4d7 3177 os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it
e306af50
TL
3178 os.fchmod(f.fileno(), 0o600) # just in case we created it
3179 if add_newline:
3180 f.write('\n')
3181 f.write(ssh_pub.strip() + '\n')
9f95a23c
TL
3182
3183 host = get_hostname()
3184 logger.info('Adding host %s...' % host)
f6b5b4d7
TL
3185 try:
3186 cli(['orch', 'host', 'add', host])
3187 except RuntimeError as e:
3188 raise Error('Failed to add host <%s>: %s' % (host, e))
9f95a23c
TL
3189
3190 if not args.orphan_initial_daemons:
3191 for t in ['mon', 'mgr', 'crash']:
3192 logger.info('Deploying %s service with default placement...' % t)
3193 cli(['orch', 'apply', t])
3194
3195 if not args.skip_monitoring_stack:
3196 logger.info('Enabling mgr prometheus module...')
3197 cli(['mgr', 'module', 'enable', 'prometheus'])
3198 for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
3199 logger.info('Deploying %s service with default placement...' % t)
3200 cli(['orch', 'apply', t])
3201
f6b5b4d7
TL
3202 if args.registry_url and args.registry_username and args.registry_password:
3203 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', args.registry_url, '--force'])
3204 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', args.registry_username, '--force'])
3205 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', args.registry_password, '--force'])
3206
f91f0fd5
TL
3207 if args.container_init:
3208 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(args.container_init), '--force'])
3209
9f95a23c 3210 if not args.skip_dashboard:
f6b5b4d7
TL
3211 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
3212 # if the user does not want to use SSL he can change this setting once the cluster is up
3213 cli(["config", "set", "mgr", "mgr/dashboard/ssl_server_port" , str(args.ssl_dashboard_port)])
3214
3215 # configuring dashboard parameters
9f95a23c
TL
3216 logger.info('Enabling the dashboard module...')
3217 cli(['mgr', 'module', 'enable', 'dashboard'])
3218 wait_for_mgr_restart()
3219
3220 # dashboard crt and key
3221 if args.dashboard_key and args.dashboard_crt:
3222 logger.info('Using provided dashboard certificate...')
e306af50
TL
3223 mounts = {
3224 pathify(args.dashboard_crt.name): '/tmp/dashboard.crt:z',
3225 pathify(args.dashboard_key.name): '/tmp/dashboard.key:z'
3226 }
9f95a23c
TL
3227 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
3228 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
3229 else:
3230 logger.info('Generating a dashboard self-signed certificate...')
3231 cli(['dashboard', 'create-self-signed-cert'])
3232
3233 logger.info('Creating initial admin user...')
3234 password = args.initial_dashboard_password or generate_password()
3235 cmd = ['dashboard', 'ac-user-create', args.initial_dashboard_user, password, 'administrator', '--force-password']
3236 if not args.dashboard_password_noupdate:
3237 cmd.append('--pwd-update-required')
1911f103 3238 cli(cmd)
9f95a23c
TL
3239 logger.info('Fetching dashboard port number...')
3240 out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
3241 port = int(out)
3242
f6b5b4d7
TL
3243 # Open dashboard port
3244 fw = Firewalld()
3245 fw.open_ports([port])
3246 fw.apply_rules()
3247
9f95a23c
TL
3248 logger.info('Ceph Dashboard is now available at:\n\n'
3249 '\t URL: https://%s:%s/\n'
3250 '\t User: %s\n'
3251 '\tPassword: %s\n' % (
3252 get_fqdn(), port,
3253 args.initial_dashboard_user,
3254 password))
f6b5b4d7 3255
e306af50
TL
3256 if args.apply_spec:
3257 logger.info('Applying %s to cluster' % args.apply_spec)
3258
3259 with open(args.apply_spec) as f:
3260 for line in f:
3261 if 'hostname:' in line:
3262 line = line.replace('\n', '')
3263 split = line.split(': ')
3264 if split[1] != host:
3265 logger.info('Adding ssh key to %s' % split[1])
3266
3267 ssh_key = '/etc/ceph/ceph.pub'
3268 if args.ssh_public_key:
3269 ssh_key = args.ssh_public_key.name
f6b5b4d7 3270 out, err, code = call_throws(['ssh-copy-id', '-f', '-i', ssh_key, '%s@%s' % (args.ssh_user, split[1])])
e306af50
TL
3271
3272 mounts = {}
3273 mounts[pathify(args.apply_spec)] = '/tmp/spec.yml:z'
3274
3275 out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
3276 logger.info(out)
9f95a23c
TL
3277
3278 logger.info('You can access the Ceph CLI with:\n\n'
3279 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
3280 sys.argv[0],
3281 fsid,
3282 args.output_config,
3283 args.output_keyring))
3284 logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
3285 '\tceph telemetry on\n\n'
3286 'For more information see:\n\n'
3287 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
3288 logger.info('Bootstrap complete.')
3289 return 0
3290
3291##################################
3292
f6b5b4d7
TL
3293def command_registry_login():
3294 if args.registry_json:
3295 logger.info("Pulling custom registry login info from %s." % args.registry_json)
3296 d = get_parm(args.registry_json)
3297 if d.get('url') and d.get('username') and d.get('password'):
3298 args.registry_url = d.get('url')
3299 args.registry_username = d.get('username')
3300 args.registry_password = d.get('password')
3301 registry_login(args.registry_url, args.registry_username, args.registry_password)
3302 else:
3303 raise Error("json provided for custom registry login did not include all necessary fields. "
3304 "Please setup json file as\n"
3305 "{\n"
3306 " \"url\": \"REGISTRY_URL\",\n"
3307 " \"username\": \"REGISTRY_USERNAME\",\n"
3308 " \"password\": \"REGISTRY_PASSWORD\"\n"
3309 "}\n")
3310 elif args.registry_url and args.registry_username and args.registry_password:
3311 registry_login(args.registry_url, args.registry_username, args.registry_password)
3312 else:
3313 raise Error("Invalid custom registry arguments received. To login to a custom registry include "
3314 "--registry-url, --registry-username and --registry-password "
3315 "options or --registry-json option")
3316 return 0
3317
3318def registry_login(url, username, password):
3319 logger.info("Logging into custom registry.")
3320 try:
3321 out, _, _ = call_throws([container_path, 'login',
3322 '-u', username,
3323 '-p', password,
3324 url])
3325 except:
3326 raise Error("Failed to login to custom registry @ %s as %s with given password" % (args.registry_url, args.registry_username))
3327
3328##################################
3329
3330
9f95a23c
TL
3331def extract_uid_gid_monitoring(daemon_type):
3332 # type: (str) -> Tuple[int, int]
3333
3334 if daemon_type == 'prometheus':
3335 uid, gid = extract_uid_gid(file_path='/etc/prometheus')
3336 elif daemon_type == 'node-exporter':
3337 uid, gid = 65534, 65534
3338 elif daemon_type == 'grafana':
3339 uid, gid = extract_uid_gid(file_path='/var/lib/grafana')
3340 elif daemon_type == 'alertmanager':
f6b5b4d7 3341 uid, gid = extract_uid_gid(file_path=['/etc/alertmanager', '/etc/prometheus'])
9f95a23c
TL
3342 else:
3343 raise Error("{} not implemented yet".format(daemon_type))
3344 return uid, gid
3345
3346
3347@default_image
3348def command_deploy():
3349 # type: () -> None
e306af50 3350 daemon_type, daemon_id = args.name.split('.', 1)
9f95a23c
TL
3351
3352 l = FileLock(args.fsid)
3353 l.acquire()
3354
3355 if daemon_type not in get_supported_daemons():
3356 raise Error('daemon type %s not recognized' % daemon_type)
3357
e306af50
TL
3358 redeploy = False
3359 unit_name = get_unit_name(args.fsid, daemon_type, daemon_id)
3360 (_, state, _) = check_unit(unit_name)
3361 if state == 'running':
3362 redeploy = True
3363
3364 if args.reconfig:
3365 logger.info('%s daemon %s ...' % ('Reconfig', args.name))
3366 elif redeploy:
3367 logger.info('%s daemon %s ...' % ('Redeploy', args.name))
3368 else:
3369 logger.info('%s daemon %s ...' % ('Deploy', args.name))
9f95a23c 3370
f6b5b4d7
TL
3371 # Get and check ports explicitly required to be opened
3372 daemon_ports = [] # type: List[int]
3373 if args.tcp_ports:
3374 daemon_ports = list(map(int, args.tcp_ports.split()))
3375
9f95a23c 3376 if daemon_type in Ceph.daemons:
e306af50
TL
3377 config, keyring = get_config_and_keyring()
3378 uid, gid = extract_uid_gid()
9f95a23c 3379 make_var_run(args.fsid, uid, gid)
f6b5b4d7 3380
9f95a23c
TL
3381 c = get_container(args.fsid, daemon_type, daemon_id,
3382 ptrace=args.allow_ptrace)
3383 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3384 config=config, keyring=keyring,
3385 osd_fsid=args.osd_fsid,
f6b5b4d7
TL
3386 reconfig=args.reconfig,
3387 ports=daemon_ports)
9f95a23c
TL
3388
3389 elif daemon_type in Monitoring.components:
3390 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
9f95a23c 3391 # Default Checks
e306af50 3392 if not args.reconfig and not redeploy:
f6b5b4d7 3393 daemon_ports.extend(Monitoring.port_map[daemon_type])
9f95a23c
TL
3394
3395 # make sure provided config-json is sufficient
3396 config = get_parm(args.config_json) # type: ignore
3397 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
3398 required_args = Monitoring.components[daemon_type].get('config-json-args', list())
3399 if required_files:
3400 if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
3401 raise Error("{} deployment requires config-json which must "
3402 "contain file content for {}".format(daemon_type.capitalize(), ', '.join(required_files)))
3403 if required_args:
3404 if not config or not all(c in config.keys() for c in required_args): # type: ignore
3405 raise Error("{} deployment requires config-json which must "
3406 "contain arg for {}".format(daemon_type.capitalize(), ', '.join(required_args)))
3407
9f95a23c
TL
3408 uid, gid = extract_uid_gid_monitoring(daemon_type)
3409 c = get_container(args.fsid, daemon_type, daemon_id)
3410 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
f6b5b4d7
TL
3411 reconfig=args.reconfig,
3412 ports=daemon_ports)
9f95a23c
TL
3413
3414 elif daemon_type == NFSGanesha.daemon_type:
e306af50 3415 if not args.reconfig and not redeploy:
f6b5b4d7
TL
3416 daemon_ports.extend(NFSGanesha.port_map.values())
3417
e306af50 3418 config, keyring = get_config_and_keyring()
9f95a23c 3419 # TODO: extract ganesha uid/gid (997, 994) ?
e306af50 3420 uid, gid = extract_uid_gid()
9f95a23c
TL
3421 c = get_container(args.fsid, daemon_type, daemon_id)
3422 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3423 config=config, keyring=keyring,
f6b5b4d7
TL
3424 reconfig=args.reconfig,
3425 ports=daemon_ports)
e306af50 3426
1911f103 3427 elif daemon_type == CephIscsi.daemon_type:
e306af50
TL
3428 config, keyring = get_config_and_keyring()
3429 uid, gid = extract_uid_gid()
1911f103
TL
3430 c = get_container(args.fsid, daemon_type, daemon_id)
3431 deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
3432 config=config, keyring=keyring,
f6b5b4d7
TL
3433 reconfig=args.reconfig,
3434 ports=daemon_ports)
f91f0fd5
TL
3435
3436 elif daemon_type == CustomContainer.daemon_type:
3437 cc = CustomContainer.init(args.fsid, daemon_id)
3438 if not args.reconfig and not redeploy:
3439 daemon_ports.extend(cc.ports)
3440 c = get_container(args.fsid, daemon_type, daemon_id,
3441 privileged=cc.privileged,
3442 ptrace=args.allow_ptrace)
3443 deploy_daemon(args.fsid, daemon_type, daemon_id, c,
3444 uid=cc.uid, gid=cc.gid, config=None,
3445 keyring=None, reconfig=args.reconfig,
3446 ports=daemon_ports)
3447
9f95a23c 3448 else:
f91f0fd5
TL
3449 raise Error('daemon type {} not implemented in command_deploy function'
3450 .format(daemon_type))
9f95a23c
TL
3451
3452##################################
3453
f6b5b4d7 3454
9f95a23c
TL
3455@infer_image
3456def command_run():
3457 # type: () -> int
3458 (daemon_type, daemon_id) = args.name.split('.', 1)
3459 c = get_container(args.fsid, daemon_type, daemon_id)
3460 command = c.run_cmd()
3461 return call_timeout(command, args.timeout)
3462
3463##################################
3464
f6b5b4d7 3465
9f95a23c 3466@infer_fsid
e306af50 3467@infer_config
9f95a23c
TL
3468@infer_image
3469def command_shell():
3470 # type: () -> int
3471 if args.fsid:
3472 make_log_dir(args.fsid)
3473 if args.name:
3474 if '.' in args.name:
3475 (daemon_type, daemon_id) = args.name.split('.', 1)
3476 else:
3477 daemon_type = args.name
3478 daemon_id = None
3479 else:
3480 daemon_type = 'osd' # get the most mounts
3481 daemon_id = None
3482
3483 if daemon_id and not args.fsid:
3484 raise Error('must pass --fsid to specify cluster')
3485
3486 # use /etc/ceph files by default, if present. we do this instead of
3487 # making these defaults in the arg parser because we don't want an error
3488 # if they don't exist.
9f95a23c
TL
3489 if not args.keyring and os.path.exists(SHELL_DEFAULT_KEYRING):
3490 args.keyring = SHELL_DEFAULT_KEYRING
3491
3492 container_args = [] # type: List[str]
3493 mounts = get_container_mounts(args.fsid, daemon_type, daemon_id,
3494 no_config=True if args.config else False)
f6b5b4d7 3495 binds = get_container_binds(args.fsid, daemon_type, daemon_id)
9f95a23c
TL
3496 if args.config:
3497 mounts[pathify(args.config)] = '/etc/ceph/ceph.conf:z'
3498 if args.keyring:
3499 mounts[pathify(args.keyring)] = '/etc/ceph/ceph.keyring:z'
e306af50 3500 if args.mount:
f91f0fd5
TL
3501 for _mount in args.mount:
3502 split_src_dst = _mount.split(':')
3503 mount = pathify(split_src_dst[0])
3504 filename = os.path.basename(split_src_dst[0])
3505 if len(split_src_dst) > 1:
3506 dst = split_src_dst[1] + ':z' if len(split_src_dst) == 3 else split_src_dst[1]
3507 mounts[mount] = dst
3508 else:
3509 mounts[mount] = '/mnt/{}:z'.format(filename)
9f95a23c
TL
3510 if args.command:
3511 command = args.command
3512 else:
3513 command = ['bash']
3514 container_args += [
3515 '-it',
3516 '-e', 'LANG=C',
3517 '-e', "PS1=%s" % CUSTOM_PS1,
3518 ]
3519 if args.fsid:
3520 home = os.path.join(args.data_dir, args.fsid, 'home')
3521 if not os.path.exists(home):
3522 logger.debug('Creating root home at %s' % home)
3523 makedirs(home, 0, 0, 0o660)
3524 if os.path.exists('/etc/skel'):
3525 for f in os.listdir('/etc/skel'):
3526 if f.startswith('.bash'):
3527 shutil.copyfile(os.path.join('/etc/skel', f),
3528 os.path.join(home, f))
3529 mounts[home] = '/root'
3530
3531 c = CephContainer(
3532 image=args.image,
3533 entrypoint='doesnotmatter',
3534 args=[],
3535 container_args=container_args,
3536 volume_mounts=mounts,
f6b5b4d7 3537 bind_mounts=binds,
9f95a23c
TL
3538 envs=args.env,
3539 privileged=True)
3540 command = c.shell_cmd(command)
3541
3542 return call_timeout(command, args.timeout)
3543
3544##################################
3545
f6b5b4d7 3546
9f95a23c
TL
3547@infer_fsid
3548def command_enter():
3549 # type: () -> int
3550 if not args.fsid:
3551 raise Error('must pass --fsid to specify cluster')
3552 (daemon_type, daemon_id) = args.name.split('.', 1)
3553 container_args = [] # type: List[str]
3554 if args.command:
3555 command = args.command
3556 else:
3557 command = ['sh']
3558 container_args += [
3559 '-it',
3560 '-e', 'LANG=C',
3561 '-e', "PS1=%s" % CUSTOM_PS1,
3562 ]
1911f103
TL
3563 c = CephContainer(
3564 image=args.image,
3565 entrypoint='doesnotmatter',
3566 container_args=container_args,
3567 cname='ceph-%s-%s.%s' % (args.fsid, daemon_type, daemon_id),
3568 )
9f95a23c
TL
3569 command = c.exec_cmd(command)
3570 return call_timeout(command, args.timeout)
3571
3572##################################
3573
f6b5b4d7 3574
9f95a23c
TL
3575@infer_fsid
3576@infer_image
3577def command_ceph_volume():
3578 # type: () -> None
3579 if args.fsid:
3580 make_log_dir(args.fsid)
3581
1911f103
TL
3582 l = FileLock(args.fsid)
3583 l.acquire()
3584
9f95a23c
TL
3585 (uid, gid) = (0, 0) # ceph-volume runs as root
3586 mounts = get_container_mounts(args.fsid, 'osd', None)
3587
3588 tmp_config = None
3589 tmp_keyring = None
3590
801d1391 3591 (config, keyring) = get_config_and_keyring()
9f95a23c 3592
801d1391 3593 if config:
9f95a23c
TL
3594 # tmp config file
3595 tmp_config = write_tmp(config, uid, gid)
9f95a23c 3596 mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
801d1391
TL
3597
3598 if keyring:
3599 # tmp keyring file
3600 tmp_keyring = write_tmp(keyring, uid, gid)
9f95a23c
TL
3601 mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
3602
3603 c = CephContainer(
3604 image=args.image,
3605 entrypoint='/usr/sbin/ceph-volume',
e306af50 3606 envs=args.env,
9f95a23c
TL
3607 args=args.command,
3608 privileged=True,
3609 volume_mounts=mounts,
3610 )
3611 out, err, code = call_throws(c.run_cmd(), verbose=True)
3612 if not code:
3613 print(out)
3614
3615##################################
3616
f6b5b4d7 3617
9f95a23c
TL
3618@infer_fsid
3619def command_unit():
3620 # type: () -> None
3621 if not args.fsid:
3622 raise Error('must pass --fsid to specify cluster')
e306af50
TL
3623
3624 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
3625
9f95a23c
TL
3626 call_throws([
3627 'systemctl',
3628 args.command,
3629 unit_name])
3630
3631##################################
3632
f6b5b4d7 3633
9f95a23c
TL
3634@infer_fsid
3635def command_logs():
3636 # type: () -> None
3637 if not args.fsid:
3638 raise Error('must pass --fsid to specify cluster')
3639
e306af50 3640 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
9f95a23c
TL
3641
3642 cmd = [find_program('journalctl')]
3643 cmd.extend(['-u', unit_name])
3644 if args.command:
3645 cmd.extend(args.command)
3646
3647 # call this directly, without our wrapper, so that we get an unmolested
3648 # stdout with logger prefixing.
3649 logger.debug("Running command: %s" % ' '.join(cmd))
3650 subprocess.call(cmd) # type: ignore
3651
3652##################################
3653
f6b5b4d7 3654
9f95a23c
TL
3655def list_networks():
3656 # type: () -> Dict[str,List[str]]
3657
3658 ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
3659 ## so we'll need to use a regex to parse 'ip' command output.
3660 #out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
3661 #j = json.loads(out)
3662 #for x in j:
3663
f6b5b4d7
TL
3664 res = _list_ipv4_networks()
3665 res.update(_list_ipv6_networks())
3666 return res
3667
3668
3669def _list_ipv4_networks():
9f95a23c 3670 out, _, _ = call_throws([find_executable('ip'), 'route', 'ls'])
f6b5b4d7
TL
3671 return _parse_ipv4_route(out)
3672
9f95a23c 3673
f6b5b4d7 3674def _parse_ipv4_route(out):
9f95a23c
TL
3675 r = {} # type: Dict[str,List[str]]
3676 p = re.compile(r'^(\S+) (.*)scope link (.*)src (\S+)')
3677 for line in out.splitlines():
3678 m = p.findall(line)
3679 if not m:
3680 continue
3681 net = m[0][0]
3682 ip = m[0][3]
3683 if net not in r:
3684 r[net] = []
3685 r[net].append(ip)
3686 return r
3687
f6b5b4d7
TL
3688
3689def _list_ipv6_networks():
3690 routes, _, _ = call_throws([find_executable('ip'), '-6', 'route', 'ls'])
3691 ips, _, _ = call_throws([find_executable('ip'), '-6', 'addr', 'ls'])
3692 return _parse_ipv6_route(routes, ips)
3693
3694
3695def _parse_ipv6_route(routes, ips):
3696 r = {} # type: Dict[str,List[str]]
3697 route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
3698 ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
3699 for line in routes.splitlines():
3700 m = route_p.findall(line)
3701 if not m or m[0][0].lower() == 'default':
3702 continue
3703 net = m[0][0]
3704 if net not in r:
3705 r[net] = []
3706
3707 for line in ips.splitlines():
3708 m = ip_p.findall(line)
3709 if not m:
3710 continue
3711 ip = m[0][0]
3712 # find the network it belongs to
3713 net = [n for n in r.keys()
3714 if ipaddress.ip_address(unicode(ip)) in ipaddress.ip_network(unicode(n))]
3715 if net:
3716 r[net[0]].append(ip)
3717
3718 return r
3719
3720
9f95a23c
TL
3721def command_list_networks():
3722 # type: () -> None
3723 r = list_networks()
3724 print(json.dumps(r, indent=4))
3725
3726##################################
3727
f6b5b4d7 3728
9f95a23c
TL
3729def command_ls():
3730 # type: () -> None
f91f0fd5 3731
9f95a23c
TL
3732 ls = list_daemons(detail=not args.no_detail,
3733 legacy_dir=args.legacy_dir)
3734 print(json.dumps(ls, indent=4))
3735
f6b5b4d7 3736
9f95a23c
TL
3737def list_daemons(detail=True, legacy_dir=None):
3738 # type: (bool, Optional[str]) -> List[Dict[str, str]]
3739 host_version = None
3740 ls = []
3741
3742 data_dir = args.data_dir
3743 if legacy_dir is not None:
3744 data_dir = os.path.abspath(legacy_dir + data_dir)
3745
3746 # keep track of ceph versions we see
3747 seen_versions = {} # type: Dict[str, Optional[str]]
3748
3749 # /var/lib/ceph
3750 if os.path.exists(data_dir):
3751 for i in os.listdir(data_dir):
3752 if i in ['mon', 'osd', 'mds', 'mgr']:
3753 daemon_type = i
3754 for j in os.listdir(os.path.join(data_dir, i)):
3755 if '-' not in j:
3756 continue
3757 (cluster, daemon_id) = j.split('-', 1)
3758 fsid = get_legacy_daemon_fsid(
3759 cluster, daemon_type, daemon_id,
3760 legacy_dir=legacy_dir)
e306af50 3761 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
9f95a23c
TL
3762 i = {
3763 'style': 'legacy',
3764 'name': '%s.%s' % (daemon_type, daemon_id),
3765 'fsid': fsid if fsid is not None else 'unknown',
e306af50 3766 'systemd_unit': legacy_unit_name,
9f95a23c
TL
3767 }
3768 if detail:
e306af50 3769 (i['enabled'], i['state'], _) = check_unit(legacy_unit_name)
9f95a23c
TL
3770 if not host_version:
3771 try:
3772 out, err, code = call(['ceph', '-v'])
3773 if not code and out.startswith('ceph version '):
3774 host_version = out.split(' ')[2]
3775 except Exception:
3776 pass
3777 i['host_version'] = host_version
3778 ls.append(i)
3779 elif is_fsid(i):
3780 fsid = str(i) # convince mypy that fsid is a str here
3781 for j in os.listdir(os.path.join(data_dir, i)):
3782 if '.' in j:
3783 name = j
3784 (daemon_type, daemon_id) = j.split('.', 1)
3785 unit_name = get_unit_name(fsid,
3786 daemon_type,
3787 daemon_id)
3788 else:
3789 continue
3790 i = {
3791 'style': 'cephadm:v1',
3792 'name': name,
3793 'fsid': fsid,
e306af50 3794 'systemd_unit': unit_name,
9f95a23c
TL
3795 }
3796 if detail:
3797 # get container id
3798 (i['enabled'], i['state'], _) = check_unit(unit_name)
3799 container_id = None
3800 image_name = None
3801 image_id = None
3802 version = None
3803 start_stamp = None
3804
3805 if 'podman' in container_path and get_podman_version() < (1, 6, 2):
3806 image_field = '.ImageID'
3807 else:
3808 image_field = '.Image'
3809
3810 out, err, code = call(
3811 [
3812 container_path, 'inspect',
3813 '--format', '{{.Id}},{{.Config.Image}},{{%s}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}' % image_field,
3814 'ceph-%s-%s' % (fsid, j)
3815 ],
3816 verbose_on_failure=False)
3817 if not code:
3818 (container_id, image_name, image_id, start,
3819 version) = out.strip().split(',')
3820 image_id = normalize_container_id(image_id)
3821 daemon_type = name.split('.', 1)[0]
3822 start_stamp = try_convert_datetime(start)
3823 if not version or '.' not in version:
3824 version = seen_versions.get(image_id, None)
3825 if daemon_type == NFSGanesha.daemon_type:
3826 version = NFSGanesha.get_version(container_id)
1911f103
TL
3827 if daemon_type == CephIscsi.daemon_type:
3828 version = CephIscsi.get_version(container_id)
9f95a23c
TL
3829 elif not version:
3830 if daemon_type in Ceph.daemons:
3831 out, err, code = call(
3832 [container_path, 'exec', container_id,
3833 'ceph', '-v'])
3834 if not code and \
3835 out.startswith('ceph version '):
3836 version = out.split(' ')[2]
3837 seen_versions[image_id] = version
3838 elif daemon_type == 'grafana':
3839 out, err, code = call(
3840 [container_path, 'exec', container_id,
3841 'grafana-server', '-v'])
3842 if not code and \
3843 out.startswith('Version '):
3844 version = out.split(' ')[1]
3845 seen_versions[image_id] = version
3846 elif daemon_type in ['prometheus',
3847 'alertmanager',
3848 'node-exporter']:
3849 cmd = daemon_type.replace('-', '_')
3850 out, err, code = call(
3851 [container_path, 'exec', container_id,
3852 cmd, '--version'])
3853 if not code and \
3854 err.startswith('%s, version ' % cmd):
3855 version = err.split(' ')[2]
3856 seen_versions[image_id] = version
f91f0fd5
TL
3857 elif daemon_type == CustomContainer.daemon_type:
3858 # Because a custom container can contain
3859 # everything, we do not know which command
3860 # to execute to get the version.
3861 pass
9f95a23c 3862 else:
f91f0fd5 3863 logger.warning('version for unknown daemon type %s' % daemon_type)
9f95a23c
TL
3864 else:
3865 vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore
3866 try:
3867 with open(vfile, 'r') as f:
3868 image_name = f.read().strip() or None
3869 except IOError:
3870 pass
3871 i['container_id'] = container_id
3872 i['container_image_name'] = image_name
3873 i['container_image_id'] = image_id
3874 i['version'] = version
3875 i['started'] = start_stamp
3876 i['created'] = get_file_timestamp(
3877 os.path.join(data_dir, fsid, j, 'unit.created')
3878 )
3879 i['deployed'] = get_file_timestamp(
3880 os.path.join(data_dir, fsid, j, 'unit.image'))
3881 i['configured'] = get_file_timestamp(
3882 os.path.join(data_dir, fsid, j, 'unit.configured'))
3883
3884 ls.append(i)
3885
9f95a23c
TL
3886 return ls
3887
3888
e306af50
TL
3889def get_daemon_description(fsid, name, detail=False, legacy_dir=None):
3890 # type: (str, str, bool, Optional[str]) -> Dict[str, str]
3891
3892 for d in list_daemons(detail=detail, legacy_dir=legacy_dir):
3893 if d['fsid'] != fsid:
3894 continue
3895 if d['name'] != name:
3896 continue
3897 return d
3898 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
3899
3900
9f95a23c
TL
3901##################################
3902
3903@default_image
3904def command_adopt():
3905 # type: () -> None
3906
3907 if not args.skip_pull:
f6b5b4d7 3908 _pull_image(args.image)
9f95a23c
TL
3909
3910 (daemon_type, daemon_id) = args.name.split('.', 1)
3911
3912 # legacy check
3913 if args.style != 'legacy':
3914 raise Error('adoption of style %s not implemented' % args.style)
3915
3916 # lock
3917 fsid = get_legacy_daemon_fsid(args.cluster,
3918 daemon_type,
3919 daemon_id,
3920 legacy_dir=args.legacy_dir)
3921 if not fsid:
3922 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
3923 l = FileLock(fsid)
3924 l.acquire()
3925
3926 # call correct adoption
3927 if daemon_type in Ceph.daemons:
3928 command_adopt_ceph(daemon_type, daemon_id, fsid);
3929 elif daemon_type == 'prometheus':
3930 command_adopt_prometheus(daemon_id, fsid)
3931 elif daemon_type == 'grafana':
3932 command_adopt_grafana(daemon_id, fsid)
3933 elif daemon_type == 'node-exporter':
3934 raise Error('adoption of node-exporter not implemented')
3935 elif daemon_type == 'alertmanager':
801d1391 3936 command_adopt_alertmanager(daemon_id, fsid)
9f95a23c
TL
3937 else:
3938 raise Error('daemon type %s not recognized' % daemon_type)
3939
3940
1911f103
TL
3941class AdoptOsd(object):
3942 def __init__(self, osd_data_dir, osd_id):
3943 # type: (str, str) -> None
3944 self.osd_data_dir = osd_data_dir
3945 self.osd_id = osd_id
3946
3947 def check_online_osd(self):
3948 # type: () -> Tuple[Optional[str], Optional[str]]
3949
3950 osd_fsid, osd_type = None, None
3951
3952 path = os.path.join(self.osd_data_dir, 'fsid')
3953 try:
3954 with open(path, 'r') as f:
3955 osd_fsid = f.read().strip()
3956 logger.info("Found online OSD at %s" % path)
1911f103
TL
3957 except IOError:
3958 logger.info('Unable to read OSD fsid from %s' % path)
e306af50
TL
3959 if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
3960 with open(os.path.join(self.osd_data_dir, 'type')) as f:
3961 osd_type = f.read().strip()
3962 else:
3963 logger.info('"type" file missing for OSD data dir')
1911f103
TL
3964
3965 return osd_fsid, osd_type
3966
3967 def check_offline_lvm_osd(self):
3968 # type: () -> Tuple[Optional[str], Optional[str]]
3969
3970 osd_fsid, osd_type = None, None
3971
3972 c = CephContainer(
3973 image=args.image,
3974 entrypoint='/usr/sbin/ceph-volume',
3975 args=['lvm', 'list', '--format=json'],
3976 privileged=True
3977 )
3978 out, err, code = call_throws(c.run_cmd(), verbose=False)
3979 if not code:
3980 try:
3981 js = json.loads(out)
3982 if self.osd_id in js:
3983 logger.info("Found offline LVM OSD {}".format(self.osd_id))
3984 osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
3985 for device in js[self.osd_id]:
3986 if device['tags']['ceph.type'] == 'block':
3987 osd_type = 'bluestore'
3988 break
3989 if device['tags']['ceph.type'] == 'data':
3990 osd_type = 'filestore'
3991 break
3992 except ValueError as e:
3993 logger.info("Invalid JSON in ceph-volume lvm list: {}".format(e))
3994
3995 return osd_fsid, osd_type
3996
3997 def check_offline_simple_osd(self):
3998 # type: () -> Tuple[Optional[str], Optional[str]]
3999
4000 osd_fsid, osd_type = None, None
4001
4002 osd_file = glob("/etc/ceph/osd/{}-[a-f0-9-]*.json".format(self.osd_id))
4003 if len(osd_file) == 1:
4004 with open(osd_file[0], 'r') as f:
4005 try:
4006 js = json.loads(f.read())
4007 logger.info("Found offline simple OSD {}".format(self.osd_id))
4008 osd_fsid = js["fsid"]
4009 osd_type = js["type"]
4010 if osd_type != "filestore":
4011 # need this to be mounted for the adopt to work, as it
4012 # needs to move files from this directory
4013 call_throws(['mount', js["data"]["path"], self.osd_data_dir])
4014 except ValueError as e:
4015 logger.info("Invalid JSON in {}: {}".format(osd_file, e))
4016
4017 return osd_fsid, osd_type
4018
9f95a23c
TL
4019
4020def command_adopt_ceph(daemon_type, daemon_id, fsid):
4021 # type: (str, str, str) -> None
4022
4023 (uid, gid) = extract_uid_gid()
4024
4025 data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
4026 (daemon_type, args.cluster, daemon_id))
4027 data_dir_src = os.path.abspath(args.legacy_dir + data_dir_src)
4028
1911f103
TL
4029 if not os.path.exists(data_dir_src):
4030 raise Error("{}.{} data directory '{}' does not exist. "
4031 "Incorrect ID specified, or daemon alrady adopted?".format(
4032 daemon_type, daemon_id, data_dir_src))
4033
9f95a23c
TL
4034 osd_fsid = None
4035 if daemon_type == 'osd':
1911f103
TL
4036 adopt_osd = AdoptOsd(data_dir_src, daemon_id)
4037 osd_fsid, osd_type = adopt_osd.check_online_osd()
4038 if not osd_fsid:
4039 osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
4040 if not osd_fsid:
4041 osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
4042 if not osd_fsid:
4043 raise Error('Unable to find OSD {}'.format(daemon_id))
4044 logger.info('objectstore_type is %s' % osd_type)
e306af50 4045 assert osd_type
1911f103 4046 if osd_type == 'filestore':
9f95a23c
TL
4047 raise Error('FileStore is not supported by cephadm')
4048
4049 # NOTE: implicit assumption here that the units correspond to the
4050 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
4051 # CLUSTER field.
4052 unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
4053 (enabled, state, _) = check_unit(unit_name)
4054 if state == 'running':
4055 logger.info('Stopping old systemd unit %s...' % unit_name)
4056 call_throws(['systemctl', 'stop', unit_name])
4057 if enabled:
4058 logger.info('Disabling old systemd unit %s...' % unit_name)
4059 call_throws(['systemctl', 'disable', unit_name])
4060
4061 # data
4062 logger.info('Moving data...')
4063 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4064 uid=uid, gid=gid)
4065 move_files(glob(os.path.join(data_dir_src, '*')),
4066 data_dir_dst,
4067 uid=uid, gid=gid)
4068 logger.debug('Remove dir \'%s\'' % (data_dir_src))
4069 if os.path.ismount(data_dir_src):
4070 call_throws(['umount', data_dir_src])
4071 os.rmdir(data_dir_src)
4072
4073 logger.info('Chowning content...')
4074 call_throws(['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
4075
4076 if daemon_type == 'mon':
4077 # rename *.ldb -> *.sst, in case they are coming from ubuntu
4078 store = os.path.join(data_dir_dst, 'store.db')
4079 num_renamed = 0
4080 if os.path.exists(store):
4081 for oldf in os.listdir(store):
4082 if oldf.endswith('.ldb'):
4083 newf = oldf.replace('.ldb', '.sst')
4084 oldp = os.path.join(store, oldf)
4085 newp = os.path.join(store, newf)
4086 logger.debug('Renaming %s -> %s' % (oldp, newp))
4087 os.rename(oldp, newp)
4088 if num_renamed:
4089 logger.info('Renamed %d leveldb *.ldb files to *.sst',
4090 num_renamed)
4091 if daemon_type == 'osd':
4092 for n in ['block', 'block.db', 'block.wal']:
4093 p = os.path.join(data_dir_dst, n)
4094 if os.path.exists(p):
4095 logger.info('Chowning %s...' % p)
4096 os.chown(p, uid, gid)
4097 # disable the ceph-volume 'simple' mode files on the host
4098 simple_fn = os.path.join('/etc/ceph/osd',
4099 '%s-%s.json' % (daemon_id, osd_fsid))
4100 if os.path.exists(simple_fn):
4101 new_fn = simple_fn + '.adopted-by-cephadm'
4102 logger.info('Renaming %s -> %s', simple_fn, new_fn)
4103 os.rename(simple_fn, new_fn)
4104 logger.info('Disabling host unit ceph-volume@ simple unit...')
1911f103
TL
4105 call(['systemctl', 'disable',
4106 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
9f95a23c
TL
4107 else:
4108 # assume this is an 'lvm' c-v for now, but don't error
4109 # out if it's not.
4110 logger.info('Disabling host unit ceph-volume@ lvm unit...')
4111 call(['systemctl', 'disable',
4112 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
4113
4114 # config
4115 config_src = '/etc/ceph/%s.conf' % (args.cluster)
4116 config_src = os.path.abspath(args.legacy_dir + config_src)
4117 config_dst = os.path.join(data_dir_dst, 'config')
4118 copy_files([config_src], config_dst, uid=uid, gid=gid)
4119
4120 # logs
4121 logger.info('Moving logs...')
4122 log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
4123 (args.cluster, daemon_type, daemon_id))
4124 log_dir_src = os.path.abspath(args.legacy_dir + log_dir_src)
4125 log_dir_dst = make_log_dir(fsid, uid=uid, gid=gid)
4126 move_files(glob(log_dir_src),
4127 log_dir_dst,
4128 uid=uid, gid=gid)
4129
4130 logger.info('Creating new units...')
4131 make_var_run(fsid, uid, gid)
4132 c = get_container(fsid, daemon_type, daemon_id)
4133 deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c,
4134 enable=True, # unconditionally enable the new unit
1911f103 4135 start=(state == 'running' or args.force_start),
9f95a23c
TL
4136 osd_fsid=osd_fsid)
4137 update_firewalld(daemon_type)
4138
4139
4140def command_adopt_prometheus(daemon_id, fsid):
4141 # type: (str, str) -> None
4142
4143 daemon_type = 'prometheus'
4144 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4145
4146 _stop_and_disable('prometheus')
4147
4148 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4149 uid=uid, gid=gid)
4150
4151 # config
4152 config_src = '/etc/prometheus/prometheus.yml'
4153 config_src = os.path.abspath(args.legacy_dir + config_src)
4154 config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
1911f103 4155 makedirs(config_dst, uid, gid, 0o755)
9f95a23c
TL
4156 copy_files([config_src], config_dst, uid=uid, gid=gid)
4157
4158 # data
4159 data_src = '/var/lib/prometheus/metrics/'
4160 data_src = os.path.abspath(args.legacy_dir + data_src)
4161 data_dst = os.path.join(data_dir_dst, 'data')
4162 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4163
4164 make_var_run(fsid, uid, gid)
4165 c = get_container(fsid, daemon_type, daemon_id)
4166 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4167 update_firewalld(daemon_type)
4168
f6b5b4d7 4169
9f95a23c
TL
4170def command_adopt_grafana(daemon_id, fsid):
4171 # type: (str, str) -> None
4172
4173 daemon_type = 'grafana'
4174 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4175
4176 _stop_and_disable('grafana-server')
4177
4178 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4179 uid=uid, gid=gid)
4180
4181 # config
4182 config_src = '/etc/grafana/grafana.ini'
4183 config_src = os.path.abspath(args.legacy_dir + config_src)
4184 config_dst = os.path.join(data_dir_dst, 'etc/grafana')
4185 makedirs(config_dst, uid, gid, 0o755)
4186 copy_files([config_src], config_dst, uid=uid, gid=gid)
4187
4188 prov_src = '/etc/grafana/provisioning/'
4189 prov_src = os.path.abspath(args.legacy_dir + prov_src)
4190 prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
4191 copy_tree([prov_src], prov_dst, uid=uid, gid=gid)
4192
4193 # cert
4194 cert = '/etc/grafana/grafana.crt'
4195 key = '/etc/grafana/grafana.key'
4196 if os.path.exists(cert) and os.path.exists(key):
4197 cert_src = '/etc/grafana/grafana.crt'
4198 cert_src = os.path.abspath(args.legacy_dir + cert_src)
4199 makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
4200 cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
4201 copy_files([cert_src], cert_dst, uid=uid, gid=gid)
4202
4203 key_src = '/etc/grafana/grafana.key'
4204 key_src = os.path.abspath(args.legacy_dir + key_src)
4205 key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
4206 copy_files([key_src], key_dst, uid=uid, gid=gid)
4207
4208 _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
4209 else:
4210 logger.debug("Skipping ssl, missing cert {} or key {}".format(cert, key))
4211
9f95a23c
TL
4212 # data - possible custom dashboards/plugins
4213 data_src = '/var/lib/grafana/'
4214 data_src = os.path.abspath(args.legacy_dir + data_src)
4215 data_dst = os.path.join(data_dir_dst, 'data')
4216 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4217
4218 make_var_run(fsid, uid, gid)
4219 c = get_container(fsid, daemon_type, daemon_id)
4220 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4221 update_firewalld(daemon_type)
4222
f6b5b4d7 4223
801d1391
TL
4224def command_adopt_alertmanager(daemon_id, fsid):
4225 # type: (str, str) -> None
4226
4227 daemon_type = 'alertmanager'
4228 (uid, gid) = extract_uid_gid_monitoring(daemon_type)
4229
4230 _stop_and_disable('prometheus-alertmanager')
4231
4232 data_dir_dst = make_data_dir(fsid, daemon_type, daemon_id,
4233 uid=uid, gid=gid)
4234
4235 # config
4236 config_src = '/etc/prometheus/alertmanager.yml'
4237 config_src = os.path.abspath(args.legacy_dir + config_src)
4238 config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
4239 makedirs(config_dst, uid, gid, 0o755)
4240 copy_files([config_src], config_dst, uid=uid, gid=gid)
4241
4242 # data
4243 data_src = '/var/lib/prometheus/alertmanager/'
4244 data_src = os.path.abspath(args.legacy_dir + data_src)
4245 data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
4246 copy_tree([data_src], data_dst, uid=uid, gid=gid)
4247
4248 make_var_run(fsid, uid, gid)
4249 c = get_container(fsid, daemon_type, daemon_id)
4250 deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid)
4251 update_firewalld(daemon_type)
4252
f6b5b4d7 4253
9f95a23c
TL
4254def _adjust_grafana_ini(filename):
4255 # type: (str) -> None
4256
4257 # Update cert_file, cert_key pathnames in server section
4258 # ConfigParser does not preserve comments
4259 try:
4260 with open(filename, "r") as grafana_ini:
4261 lines = grafana_ini.readlines()
4262 with open("{}.new".format(filename), "w") as grafana_ini:
4263 server_section=False
4264 for line in lines:
4265 if line.startswith('['):
4266 server_section=False
4267 if line.startswith('[server]'):
4268 server_section=True
4269 if server_section:
4270 line = re.sub(r'^cert_file.*',
4271 'cert_file = /etc/grafana/certs/cert_file', line)
4272 line = re.sub(r'^cert_key.*',
4273 'cert_key = /etc/grafana/certs/cert_key', line)
4274 grafana_ini.write(line)
4275 os.rename("{}.new".format(filename), filename)
4276 except OSError as err:
4277 raise Error("Cannot update {}: {}".format(filename, err))
4278
4279
4280def _stop_and_disable(unit_name):
4281 # type: (str) -> None
4282
4283 (enabled, state, _) = check_unit(unit_name)
4284 if state == 'running':
4285 logger.info('Stopping old systemd unit %s...' % unit_name)
4286 call_throws(['systemctl', 'stop', unit_name])
4287 if enabled:
4288 logger.info('Disabling old systemd unit %s...' % unit_name)
4289 call_throws(['systemctl', 'disable', unit_name])
4290
4291
4292##################################
4293
4294def command_rm_daemon():
4295 # type: () -> None
4296
4297 l = FileLock(args.fsid)
4298 l.acquire()
4299
e306af50
TL
4300 unit_name = get_unit_name_by_daemon_name(args.fsid, args.name)
4301
9f95a23c
TL
4302 (daemon_type, daemon_id) = args.name.split('.', 1)
4303 if daemon_type in ['mon', 'osd'] and not args.force:
4304 raise Error('must pass --force to proceed: '
4305 'this command may destroy precious data!')
e306af50 4306
9f95a23c
TL
4307 call(['systemctl', 'stop', unit_name],
4308 verbose_on_failure=False)
4309 call(['systemctl', 'reset-failed', unit_name],
4310 verbose_on_failure=False)
4311 call(['systemctl', 'disable', unit_name],
4312 verbose_on_failure=False)
4313 data_dir = get_data_dir(args.fsid, daemon_type, daemon_id)
4314 if daemon_type in ['mon', 'osd', 'prometheus'] and \
4315 not args.force_delete_data:
4316 # rename it out of the way -- do not delete
4317 backup_dir = os.path.join(args.data_dir, args.fsid, 'removed')
4318 if not os.path.exists(backup_dir):
4319 makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
4320 dirname = '%s.%s_%s' % (daemon_type, daemon_id,
4321 datetime.datetime.utcnow().strftime(DATEFMT))
4322 os.rename(data_dir,
4323 os.path.join(backup_dir, dirname))
4324 else:
4325 call_throws(['rm', '-rf', data_dir])
4326
4327##################################
4328
f6b5b4d7 4329
9f95a23c
TL
4330def command_rm_cluster():
4331 # type: () -> None
4332 if not args.force:
4333 raise Error('must pass --force to proceed: '
4334 'this command may destroy precious data!')
4335
4336 l = FileLock(args.fsid)
4337 l.acquire()
4338
4339 # stop + disable individual daemon units
4340 for d in list_daemons(detail=False):
4341 if d['fsid'] != args.fsid:
4342 continue
4343 if d['style'] != 'cephadm:v1':
4344 continue
4345 unit_name = get_unit_name(args.fsid, d['name'])
4346 call(['systemctl', 'stop', unit_name],
4347 verbose_on_failure=False)
4348 call(['systemctl', 'reset-failed', unit_name],
4349 verbose_on_failure=False)
4350 call(['systemctl', 'disable', unit_name],
4351 verbose_on_failure=False)
4352
4353 # cluster units
4354 for unit_name in ['ceph-%s.target' % args.fsid]:
4355 call(['systemctl', 'stop', unit_name],
4356 verbose_on_failure=False)
4357 call(['systemctl', 'reset-failed', unit_name],
4358 verbose_on_failure=False)
4359 call(['systemctl', 'disable', unit_name],
4360 verbose_on_failure=False)
4361
4362 slice_name = 'system-%s.slice' % (('ceph-%s' % args.fsid).replace('-',
4363 '\\x2d'))
4364 call(['systemctl', 'stop', slice_name],
4365 verbose_on_failure=False)
4366
4367 # rm units
4368 call_throws(['rm', '-f', args.unit_dir +
4369 '/ceph-%s@.service' % args.fsid])
4370 call_throws(['rm', '-f', args.unit_dir +
4371 '/ceph-%s.target' % args.fsid])
4372 call_throws(['rm', '-rf',
4373 args.unit_dir + '/ceph-%s.target.wants' % args.fsid])
4374 # rm data
4375 call_throws(['rm', '-rf', args.data_dir + '/' + args.fsid])
4376 # rm logs
4377 call_throws(['rm', '-rf', args.log_dir + '/' + args.fsid])
4378 call_throws(['rm', '-rf', args.log_dir +
4379 '/*.wants/ceph-%s@*' % args.fsid])
4380 # rm logrotate config
4381 call_throws(['rm', '-f', args.logrotate_dir + '/ceph-%s' % args.fsid])
4382
1911f103
TL
4383 # clean up config, keyring, and pub key files
4384 files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
4385
4386 if os.path.exists(files[0]):
4387 valid_fsid = False
4388 with open(files[0]) as f:
4389 if args.fsid in f.read():
4390 valid_fsid = True
4391 if valid_fsid:
4392 for n in range(0, len(files)):
4393 if os.path.exists(files[n]):
4394 os.remove(files[n])
4395
9f95a23c
TL
4396
4397##################################
4398
4399def check_time_sync(enabler=None):
4400 # type: (Optional[Packager]) -> bool
4401 units = [
4402 'chrony.service', # 18.04 (at least)
4403 'chronyd.service', # el / opensuse
4404 'systemd-timesyncd.service',
4405 'ntpd.service', # el7 (at least)
4406 'ntp.service', # 18.04 (at least)
f91f0fd5 4407 'ntpsec.service', # 20.04 (at least) / buster
9f95a23c 4408 ]
e306af50 4409 if not check_units(units, enabler):
9f95a23c
TL
4410 logger.warning('No time sync service is running; checked for %s' % units)
4411 return False
4412 return True
4413
f6b5b4d7 4414
9f95a23c
TL
4415def command_check_host():
4416 # type: () -> None
f6b5b4d7
TL
4417 global container_path
4418
1911f103 4419 errors = []
9f95a23c
TL
4420 commands = ['systemctl', 'lvcreate']
4421
1911f103 4422 if args.docker:
f6b5b4d7 4423 container_path = find_program('docker')
1911f103
TL
4424 else:
4425 for i in CONTAINER_PREFERENCE:
4426 try:
4427 container_path = find_program(i)
4428 break
4429 except Exception as e:
4430 logger.debug('Could not locate %s: %s' % (i, e))
4431 if not container_path:
4432 errors.append('Unable to locate any of %s' % CONTAINER_PREFERENCE)
4433 else:
4434 logger.info('podman|docker (%s) is present' % container_path)
4435
9f95a23c
TL
4436 for command in commands:
4437 try:
4438 find_program(command)
4439 logger.info('%s is present' % command)
4440 except ValueError:
1911f103 4441 errors.append('%s binary does not appear to be installed' % command)
9f95a23c
TL
4442
4443 # check for configured+running chronyd or ntp
4444 if not check_time_sync():
1911f103 4445 errors.append('No time synchronization is active')
9f95a23c
TL
4446
4447 if 'expect_hostname' in args and args.expect_hostname:
1911f103
TL
4448 if get_hostname().lower() != args.expect_hostname.lower():
4449 errors.append('hostname "%s" does not match expected hostname "%s"' % (
9f95a23c
TL
4450 get_hostname(), args.expect_hostname))
4451 logger.info('Hostname "%s" matches what is expected.',
4452 args.expect_hostname)
4453
1911f103
TL
4454 if errors:
4455 raise Error('\n'.join(errors))
4456
9f95a23c
TL
4457 logger.info('Host looks OK')
4458
4459##################################
4460
f6b5b4d7 4461
9f95a23c
TL
4462def command_prepare_host():
4463 # type: () -> None
4464 logger.info('Verifying podman|docker is present...')
4465 pkg = None
4466 if not container_path:
4467 if not pkg:
4468 pkg = create_packager()
4469 pkg.install_podman()
4470
4471 logger.info('Verifying lvm2 is present...')
4472 if not find_executable('lvcreate'):
4473 if not pkg:
4474 pkg = create_packager()
4475 pkg.install(['lvm2'])
4476
4477 logger.info('Verifying time synchronization is in place...')
4478 if not check_time_sync():
4479 if not pkg:
4480 pkg = create_packager()
4481 pkg.install(['chrony'])
4482 # check again, and this time try to enable
4483 # the service
4484 check_time_sync(enabler=pkg)
4485
4486 if 'expect_hostname' in args and args.expect_hostname and args.expect_hostname != get_hostname():
4487 logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), args.expect_hostname))
4488 call_throws(['hostname', args.expect_hostname])
4489 with open('/etc/hostname', 'w') as f:
4490 f.write(args.expect_hostname + '\n')
4491
4492 logger.info('Repeating the final host check...')
4493 command_check_host()
4494
4495##################################
4496
f6b5b4d7 4497
9f95a23c
TL
4498class CustomValidation(argparse.Action):
4499
4500 def _check_name(self, values):
4501 try:
4502 (daemon_type, daemon_id) = values.split('.', 1)
4503 except ValueError:
4504 raise argparse.ArgumentError(self,
4505 "must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com")
4506
4507 daemons = get_supported_daemons()
4508 if daemon_type not in daemons:
4509 raise argparse.ArgumentError(self,
4510 "name must declare the type of daemon e.g. "
4511 "{}".format(', '.join(daemons)))
4512
4513 def __call__(self, parser, namespace, values, option_string=None):
4514 if self.dest == "name":
4515 self._check_name(values)
4516 setattr(namespace, self.dest, values)
4517
4518##################################
4519
f6b5b4d7 4520
9f95a23c 4521def get_distro():
e306af50 4522 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
9f95a23c
TL
4523 distro = None
4524 distro_version = None
4525 distro_codename = None
4526 with open('/etc/os-release', 'r') as f:
4527 for line in f.readlines():
4528 line = line.strip()
4529 if '=' not in line or line.startswith('#'):
4530 continue
4531 (var, val) = line.split('=', 1)
4532 if val[0] == '"' and val[-1] == '"':
4533 val = val[1:-1]
4534 if var == 'ID':
4535 distro = val.lower()
4536 elif var == 'VERSION_ID':
4537 distro_version = val.lower()
4538 elif var == 'VERSION_CODENAME':
4539 distro_codename = val.lower()
4540 return distro, distro_version, distro_codename
4541
f6b5b4d7 4542
9f95a23c
TL
4543class Packager(object):
4544 def __init__(self, stable=None, version=None, branch=None, commit=None):
4545 assert \
4546 (stable and not version and not branch and not commit) or \
4547 (not stable and version and not branch and not commit) or \
4548 (not stable and not version and branch) or \
4549 (not stable and not version and not branch and not commit)
4550 self.stable = stable
4551 self.version = version
4552 self.branch = branch
4553 self.commit = commit
4554
4555 def add_repo(self):
4556 raise NotImplementedError
4557
4558 def rm_repo(self):
4559 raise NotImplementedError
4560
4561 def query_shaman(self, distro, distro_version, branch, commit):
4562 # query shaman
f91f0fd5 4563 logger.info('Fetching repo metadata from shaman and chacra...')
9f95a23c
TL
4564 shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
4565 distro=distro,
4566 distro_version=distro_version,
4567 branch=branch,
4568 sha1=commit or 'latest',
4569 arch=get_arch()
4570 )
4571 try:
4572 shaman_response = urlopen(shaman_url)
4573 except HTTPError as err:
f91f0fd5 4574 logger.error('repository not found in shaman (might not be available yet)')
9f95a23c
TL
4575 raise Error('%s, failed to fetch %s' % (err, shaman_url))
4576 try:
4577 chacra_url = shaman_response.geturl()
4578 chacra_response = urlopen(chacra_url)
4579 except HTTPError as err:
f91f0fd5 4580 logger.error('repository not found in chacra (might not be available yet)')
9f95a23c
TL
4581 raise Error('%s, failed to fetch %s' % (err, chacra_url))
4582 return chacra_response.read().decode('utf-8')
4583
4584 def repo_gpgkey(self):
4585 if args.gpg_url:
4586 return args.gpg_url
4587 if self.stable or self.version:
4588 return 'https://download.ceph.com/keys/release.asc', 'release'
4589 else:
4590 return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
4591
4592 def enable_service(self, service):
4593 """
4594 Start and enable the service (typically using systemd).
4595 """
4596 call_throws(['systemctl', 'enable', '--now', service])
4597
4598
4599class Apt(Packager):
4600 DISTRO_NAMES = {
4601 'ubuntu': 'ubuntu',
4602 'debian': 'debian',
4603 }
4604
4605 def __init__(self, stable, version, branch, commit,
4606 distro, distro_version, distro_codename):
4607 super(Apt, self).__init__(stable=stable, version=version,
4608 branch=branch, commit=commit)
4609 self.distro = self.DISTRO_NAMES[distro]
4610 self.distro_codename = distro_codename
f91f0fd5 4611 self.distro_version = distro_version
9f95a23c
TL
4612
4613 def repo_path(self):
4614 return '/etc/apt/sources.list.d/ceph.list'
4615
4616 def add_repo(self):
4617 url, name = self.repo_gpgkey()
f91f0fd5 4618 logger.info('Installing repo GPG key from %s...' % url)
9f95a23c
TL
4619 try:
4620 response = urlopen(url)
4621 except HTTPError as err:
f91f0fd5 4622 logger.error('failed to fetch GPG repo key from %s: %s' % (
9f95a23c
TL
4623 url, err))
4624 raise Error('failed to fetch GPG key')
4625 key = response.read().decode('utf-8')
4626 with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'w') as f:
4627 f.write(key)
4628
4629 if self.version:
4630 content = 'deb %s/debian-%s/ %s main\n' % (
4631 args.repo_url, self.version, self.distro_codename)
4632 elif self.stable:
4633 content = 'deb %s/debian-%s/ %s main\n' % (
4634 args.repo_url, self.stable, self.distro_codename)
4635 else:
4636 content = self.query_shaman(self.distro, self.distro_codename, self.branch,
4637 self.commit)
4638
f91f0fd5 4639 logger.info('Installing repo file at %s...' % self.repo_path())
9f95a23c
TL
4640 with open(self.repo_path(), 'w') as f:
4641 f.write(content)
4642
4643 def rm_repo(self):
4644 for name in ['autobuild', 'release']:
4645 p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
4646 if os.path.exists(p):
f91f0fd5 4647 logger.info('Removing repo GPG key %s...' % p)
9f95a23c
TL
4648 os.unlink(p)
4649 if os.path.exists(self.repo_path()):
f91f0fd5 4650 logger.info('Removing repo at %s...' % self.repo_path())
9f95a23c
TL
4651 os.unlink(self.repo_path())
4652
f91f0fd5
TL
4653 if self.distro == 'ubuntu':
4654 self.rm_kubic_repo()
4655
9f95a23c 4656 def install(self, ls):
f91f0fd5 4657 logger.info('Installing packages %s...' % ls)
9f95a23c
TL
4658 call_throws(['apt', 'install', '-y'] + ls)
4659
4660 def install_podman(self):
4661 if self.distro == 'ubuntu':
f91f0fd5
TL
4662 logger.info('Setting up repo for podman...')
4663 self.add_kubic_repo()
9f95a23c
TL
4664 call_throws(['apt', 'update'])
4665
f91f0fd5 4666 logger.info('Attempting podman install...')
9f95a23c
TL
4667 try:
4668 self.install(['podman'])
4669 except Error as e:
f91f0fd5 4670 logger.info('Podman did not work. Falling back to docker...')
9f95a23c
TL
4671 self.install(['docker.io'])
4672
f91f0fd5
TL
4673 def kubic_repo_url(self):
4674 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
4675 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version
4676
4677 def kubic_repo_path(self):
4678 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
4679
4680 def kubric_repo_gpgkey_url(self):
4681 return '%s/Release.key' % self.kubic_repo_url()
4682
4683 def kubric_repo_gpgkey_path(self):
4684 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
4685
4686 def add_kubic_repo(self):
4687 url = self.kubric_repo_gpgkey_url()
4688 logger.info('Installing repo GPG key from %s...' % url)
4689 try:
4690 response = urlopen(url)
4691 except HTTPError as err:
4692 logger.error('failed to fetch GPG repo key from %s: %s' % (
4693 url, err))
4694 raise Error('failed to fetch GPG key')
4695 key = response.read().decode('utf-8')
4696 tmp_key = write_tmp(key, 0, 0)
4697 keyring = self.kubric_repo_gpgkey_path()
4698 call_throws(['apt-key', '--keyring', keyring, 'add', tmp_key.name])
4699
4700 logger.info('Installing repo file at %s...' % self.kubic_repo_path())
4701 content = 'deb %s /\n' % self.kubic_repo_url()
4702 with open(self.kubic_repo_path(), 'w') as f:
4703 f.write(content)
4704
4705 def rm_kubic_repo(self):
4706 keyring = self.kubric_repo_gpgkey_path()
4707 if os.path.exists(keyring):
4708 logger.info('Removing repo GPG key %s...' % keyring)
4709 os.unlink(keyring)
4710
4711 p = self.kubic_repo_path()
4712 if os.path.exists(p):
4713 logger.info('Removing repo at %s...' % p)
4714 os.unlink(p)
4715
f6b5b4d7 4716
9f95a23c
TL
4717class YumDnf(Packager):
4718 DISTRO_NAMES = {
4719 'centos': ('centos', 'el'),
4720 'rhel': ('centos', 'el'),
4721 'scientific': ('centos', 'el'),
4722 'fedora': ('fedora', 'fc'),
4723 }
4724
4725 def __init__(self, stable, version, branch, commit,
4726 distro, distro_version):
4727 super(YumDnf, self).__init__(stable=stable, version=version,
4728 branch=branch, commit=commit)
4729 self.major = int(distro_version.split('.')[0])
4730 self.distro_normalized = self.DISTRO_NAMES[distro][0]
4731 self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
4732 if (self.distro_code == 'fc' and self.major >= 30) or \
4733 (self.distro_code == 'el' and self.major >= 8):
4734 self.tool = 'dnf'
4735 else:
4736 self.tool = 'yum'
4737
4738 def custom_repo(self, **kw):
4739 """
4740 Repo files need special care in that a whole line should not be present
4741 if there is no value for it. Because we were using `format()` we could
4742 not conditionally add a line for a repo file. So the end result would
4743 contain a key with a missing value (say if we were passing `None`).
4744
4745 For example, it could look like::
4746
4747 [ceph repo]
4748 name= ceph repo
4749 proxy=
4750 gpgcheck=
4751
4752 Which breaks. This function allows us to conditionally add lines,
4753 preserving an order and be more careful.
4754
4755 Previously, and for historical purposes, this is how the template used
4756 to look::
4757
4758 custom_repo =
4759 [{repo_name}]
4760 name={name}
4761 baseurl={baseurl}
4762 enabled={enabled}
4763 gpgcheck={gpgcheck}
4764 type={_type}
4765 gpgkey={gpgkey}
4766 proxy={proxy}
4767
4768 """
4769 lines = []
4770
4771 # by using tuples (vs a dict) we preserve the order of what we want to
4772 # return, like starting with a [repo name]
4773 tmpl = (
4774 ('reponame', '[%s]'),
4775 ('name', 'name=%s'),
4776 ('baseurl', 'baseurl=%s'),
4777 ('enabled', 'enabled=%s'),
4778 ('gpgcheck', 'gpgcheck=%s'),
4779 ('_type', 'type=%s'),
4780 ('gpgkey', 'gpgkey=%s'),
4781 ('proxy', 'proxy=%s'),
4782 ('priority', 'priority=%s'),
4783 )
4784
4785 for line in tmpl:
4786 tmpl_key, tmpl_value = line # key values from tmpl
4787
4788 # ensure that there is an actual value (not None nor empty string)
4789 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4790 lines.append(tmpl_value % kw.get(tmpl_key))
4791
4792 return '\n'.join(lines)
4793
4794 def repo_path(self):
4795 return '/etc/yum.repos.d/ceph.repo'
4796
4797 def repo_baseurl(self):
4798 assert self.stable or self.version
4799 if self.version:
4800 return '%s/rpm-%s/%s' % (args.repo_url, self.version,
4801 self.distro_code)
4802 else:
4803 return '%s/rpm-%s/%s' % (args.repo_url, self.stable,
4804 self.distro_code)
4805
4806 def add_repo(self):
4807 if self.stable or self.version:
4808 content = ''
4809 for n, t in {
4810 'Ceph': '$basearch',
4811 'Ceph-noarch': 'noarch',
4812 'Ceph-source': 'SRPMS'}.items():
4813 content += '[%s]\n' % (n)
4814 content += self.custom_repo(
4815 name='Ceph %s' % t,
4816 baseurl=self.repo_baseurl() + '/' + t,
4817 enabled=1,
4818 gpgcheck=1,
4819 gpgkey=self.repo_gpgkey()[0],
4820 )
4821 content += '\n\n'
4822 else:
4823 content = self.query_shaman(self.distro_normalized, self.major,
4824 self.branch,
4825 self.commit)
4826
f91f0fd5 4827 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
4828 with open(self.repo_path(), 'w') as f:
4829 f.write(content)
4830
4831 if self.distro_code.startswith('el'):
4832 logger.info('Enabling EPEL...')
4833 call_throws([self.tool, 'install', '-y', 'epel-release'])
9f95a23c
TL
4834
4835 def rm_repo(self):
4836 if os.path.exists(self.repo_path()):
4837 os.unlink(self.repo_path())
9f95a23c
TL
4838
4839 def install(self, ls):
4840 logger.info('Installing packages %s...' % ls)
4841 call_throws([self.tool, 'install', '-y'] + ls)
4842
4843 def install_podman(self):
4844 self.install(['podman'])
4845
4846
4847class Zypper(Packager):
4848 DISTRO_NAMES = [
4849 'sles',
4850 'opensuse-tumbleweed',
4851 'opensuse-leap'
4852 ]
4853
4854 def __init__(self, stable, version, branch, commit,
4855 distro, distro_version):
4856 super(Zypper, self).__init__(stable=stable, version=version,
4857 branch=branch, commit=commit)
4858 self.tool = 'zypper'
4859 self.distro = 'opensuse'
4860 self.distro_version = '15.1'
4861 if 'tumbleweed' not in distro and distro_version is not None:
4862 self.distro_version = distro_version
4863
4864 def custom_repo(self, **kw):
4865 """
4866 See YumDnf for format explanation.
4867 """
4868 lines = []
4869
4870 # by using tuples (vs a dict) we preserve the order of what we want to
4871 # return, like starting with a [repo name]
4872 tmpl = (
4873 ('reponame', '[%s]'),
4874 ('name', 'name=%s'),
4875 ('baseurl', 'baseurl=%s'),
4876 ('enabled', 'enabled=%s'),
4877 ('gpgcheck', 'gpgcheck=%s'),
4878 ('_type', 'type=%s'),
4879 ('gpgkey', 'gpgkey=%s'),
4880 ('proxy', 'proxy=%s'),
4881 ('priority', 'priority=%s'),
4882 )
4883
4884 for line in tmpl:
4885 tmpl_key, tmpl_value = line # key values from tmpl
4886
4887 # ensure that there is an actual value (not None nor empty string)
4888 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
4889 lines.append(tmpl_value % kw.get(tmpl_key))
4890
4891 return '\n'.join(lines)
4892
4893 def repo_path(self):
4894 return '/etc/zypp/repos.d/ceph.repo'
4895
4896 def repo_baseurl(self):
4897 assert self.stable or self.version
4898 if self.version:
4899 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4900 else:
4901 return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
4902
4903 def add_repo(self):
4904 if self.stable or self.version:
4905 content = ''
4906 for n, t in {
4907 'Ceph': '$basearch',
4908 'Ceph-noarch': 'noarch',
4909 'Ceph-source': 'SRPMS'}.items():
4910 content += '[%s]\n' % (n)
4911 content += self.custom_repo(
4912 name='Ceph %s' % t,
4913 baseurl=self.repo_baseurl() + '/' + t,
4914 enabled=1,
4915 gpgcheck=1,
4916 gpgkey=self.repo_gpgkey()[0],
4917 )
4918 content += '\n\n'
4919 else:
4920 content = self.query_shaman(self.distro, self.distro_version,
4921 self.branch,
4922 self.commit)
4923
f91f0fd5 4924 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
4925 with open(self.repo_path(), 'w') as f:
4926 f.write(content)
4927
4928 def rm_repo(self):
4929 if os.path.exists(self.repo_path()):
4930 os.unlink(self.repo_path())
4931
4932 def install(self, ls):
4933 logger.info('Installing packages %s...' % ls)
4934 call_throws([self.tool, 'in', '-y'] + ls)
4935
4936 def install_podman(self):
4937 self.install(['podman'])
4938
4939
4940def create_packager(stable=None, version=None, branch=None, commit=None):
4941 distro, distro_version, distro_codename = get_distro()
4942 if distro in YumDnf.DISTRO_NAMES:
4943 return YumDnf(stable=stable, version=version,
4944 branch=branch, commit=commit,
4945 distro=distro, distro_version=distro_version)
4946 elif distro in Apt.DISTRO_NAMES:
4947 return Apt(stable=stable, version=version,
4948 branch=branch, commit=commit,
4949 distro=distro, distro_version=distro_version,
4950 distro_codename=distro_codename)
4951 elif distro in Zypper.DISTRO_NAMES:
4952 return Zypper(stable=stable, version=version,
4953 branch=branch, commit=commit,
4954 distro=distro, distro_version=distro_version)
4955 raise Error('Distro %s version %s not supported' % (distro, distro_version))
4956
4957
4958def command_add_repo():
4959 if args.version and args.release:
4960 raise Error('you can specify either --release or --version but not both')
1911f103
TL
4961 if not args.version and not args.release and not args.dev and not args.dev_commit:
4962 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
9f95a23c
TL
4963 if args.version:
4964 try:
4965 (x, y, z) = args.version.split('.')
4966 except Exception as e:
4967 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
4968
4969 pkg = create_packager(stable=args.release,
4970 version=args.version,
4971 branch=args.dev,
4972 commit=args.dev_commit)
4973 pkg.add_repo()
4974
f6b5b4d7 4975
9f95a23c
TL
4976def command_rm_repo():
4977 pkg = create_packager()
4978 pkg.rm_repo()
4979
f6b5b4d7 4980
9f95a23c
TL
4981def command_install():
4982 pkg = create_packager()
4983 pkg.install(args.packages)
4984
4985##################################
4986
f91f0fd5
TL
4987def get_ipv4_address(ifname):
4988 # type: (str) -> str
4989 def _extract(sock, offset):
4990 return socket.inet_ntop(
4991 socket.AF_INET,
4992 fcntl.ioctl(
4993 sock.fileno(),
4994 offset,
4995 struct.pack('256s', bytes(ifname[:15], 'utf-8'))
4996 )[20:24])
4997
4998 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
4999 try:
5000 addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR
5001 dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK
5002 except OSError:
5003 # interface does not have an ipv4 address
5004 return ''
5005
5006 dec_mask = sum([bin(int(i)).count('1')
5007 for i in dq_mask.split('.')])
5008 return '{}/{}'.format(addr, dec_mask)
5009
5010
5011def get_ipv6_address(ifname):
5012 # type: (str) -> str
5013 if not os.path.exists('/proc/net/if_inet6'):
5014 return ''
5015
5016 raw = read_file(['/proc/net/if_inet6'])
5017 data = raw.splitlines()
5018 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
5019 # field 0 is ipv6, field 2 is scope
5020 for iface_setting in data:
5021 field = iface_setting.split()
5022 if field[-1] == ifname:
5023 ipv6_raw = field[0]
5024 ipv6_fmtd = ":".join([ipv6_raw[_p:_p+4] for _p in range(0, len(field[0]),4)])
5025 # apply naming rules using ipaddress module
5026 ipv6 = ipaddress.ip_address(ipv6_fmtd)
5027 return "{}/{}".format(str(ipv6), int('0x{}'.format(field[2]), 16))
5028 return ''
5029
5030
5031def bytes_to_human(num, mode='decimal'):
5032 # type: (float, str) -> str
5033 """Convert a bytes value into it's human-readable form.
5034
5035 :param num: number, in bytes, to convert
5036 :param mode: Either decimal (default) or binary to determine divisor
5037 :returns: string representing the bytes value in a more readable format
5038 """
5039 unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
5040 divisor = 1000.0
5041 yotta = "YB"
5042
5043 if mode == 'binary':
5044 unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
5045 divisor = 1024.0
5046 yotta = "YiB"
5047
5048 for unit in unit_list:
5049 if abs(num) < divisor:
5050 return "%3.1f%s" % (num, unit)
5051 num /= divisor
5052 return "%.1f%s" % (num, yotta)
5053
5054
5055def read_file(path_list, file_name=''):
5056 # type: (List[str], str) -> str
5057 """Returns the content of the first file found within the `path_list`
5058
5059 :param path_list: list of file paths to search
5060 :param file_name: optional file_name to be applied to a file path
5061 :returns: content of the file or 'Unknown'
5062 """
5063 for path in path_list:
5064 if file_name:
5065 file_path = os.path.join(path, file_name)
5066 else:
5067 file_path = path
5068 if os.path.exists(file_path):
5069 with open(file_path, 'r') as f:
5070 try:
5071 content = f.read().strip()
5072 except OSError:
5073 # sysfs may populate the file, but for devices like
5074 # virtio reads can fail
5075 return "Unknown"
5076 else:
5077 return content
5078 return "Unknown"
5079
5080
5081##################################
5082class HostFacts():
5083 _dmi_path_list = ['/sys/class/dmi/id']
5084 _nic_path_list = ['/sys/class/net']
5085 _selinux_path_list = ['/etc/selinux/config']
5086 _apparmor_path_list = ['/etc/apparmor']
5087 _disk_vendor_workarounds = {
5088 "0x1af4": "Virtio Block Device"
5089 }
5090
5091 def __init__(self):
5092 self.cpu_model = 'Unknown'
5093 self.cpu_count = 0
5094 self.cpu_cores = 0
5095 self.cpu_threads = 0
5096 self.interfaces = {}
5097
5098 self._meminfo = read_file(['/proc/meminfo']).splitlines()
5099 self._get_cpuinfo()
5100 self._process_nics()
5101 self.arch = platform.processor()
5102 self.kernel = platform.release()
5103
5104 def _get_cpuinfo(self):
5105 # type: () -> None
5106 """Determine cpu information via /proc/cpuinfo"""
5107 raw = read_file(['/proc/cpuinfo'])
5108 output = raw.splitlines()
5109 cpu_set = set()
5110
5111 for line in output:
5112 field = [l.strip() for l in line.split(':')]
5113 if "model name" in line:
5114 self.cpu_model = field[1]
5115 if "physical id" in line:
5116 cpu_set.add(field[1])
5117 if "siblings" in line:
5118 self.cpu_threads = int(field[1].strip())
5119 if "cpu cores" in line:
5120 self.cpu_cores = int(field[1].strip())
5121 pass
5122 self.cpu_count = len(cpu_set)
5123
5124 def _get_block_devs(self):
5125 # type: () -> List[str]
5126 """Determine the list of block devices by looking at /sys/block"""
5127 return [dev for dev in os.listdir('/sys/block')
5128 if not dev.startswith('dm')]
5129
5130 def _get_devs_by_type(self, rota='0'):
5131 # type: (str) -> List[str]
5132 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
5133 devs = list()
5134 for blk_dev in self._get_block_devs():
5135 rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev)
5136 rot_value = read_file([rot_path])
5137 if rot_value == rota:
5138 devs.append(blk_dev)
5139 return devs
5140
5141 @property
5142 def operating_system(self):
5143 # type: () -> str
5144 """Determine OS version"""
5145 raw_info = read_file(['/etc/os-release'])
5146 os_release = raw_info.splitlines()
5147 rel_str = 'Unknown'
5148 rel_dict = dict()
5149
5150 for line in os_release:
5151 if "=" in line:
5152 var_name, var_value = line.split('=')
5153 rel_dict[var_name] = var_value.strip('"')
5154
5155 # Would normally use PRETTY_NAME, but NAME and VERSION are more
5156 # consistent
5157 if all(_v in rel_dict for _v in ["NAME", "VERSION"]):
5158 rel_str = "{} {}".format(rel_dict['NAME'], rel_dict['VERSION'])
5159 return rel_str
5160
5161 @property
5162 def hostname(self):
5163 # type: () -> str
5164 """Return the hostname"""
5165 return platform.node()
5166
5167 @property
5168 def subscribed(self):
5169 # type: () -> str
5170 """Highlevel check to see if the host is subscribed to receive updates/support"""
5171 def _red_hat():
5172 # type: () -> str
5173 # RHEL 7 and RHEL 8
5174 entitlements_dir = '/etc/pki/entitlement'
5175 if os.path.exists(entitlements_dir):
5176 pems = glob('{}/*.pem'.format(entitlements_dir))
5177 if len(pems) >= 2:
5178 return "Yes"
5179
5180 return "No"
5181
5182 os_name = self.operating_system
5183 if os_name.upper().startswith("RED HAT"):
5184 return _red_hat()
5185
5186 return "Unknown"
5187
5188 @property
5189 def hdd_count(self):
5190 # type: () -> int
5191 """Return a count of HDDs (spinners)"""
5192 return len(self._get_devs_by_type(rota='1'))
5193
5194 def _get_capacity(self, dev):
5195 # type: (str) -> int
5196 """Determine the size of a given device"""
5197 size_path = os.path.join('/sys/block', dev, 'size')
5198 size_blocks = int(read_file([size_path]))
5199 blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size')
5200 blk_count = int(read_file([blk_path]))
5201 return size_blocks * blk_count
5202
5203 def _get_capacity_by_type(self, rota='0'):
5204 # type: (str) -> int
5205 """Return the total capacity of a category of device (flash or hdd)"""
5206 devs = self._get_devs_by_type(rota=rota)
5207 capacity = 0
5208 for dev in devs:
5209 capacity += self._get_capacity(dev)
5210 return capacity
5211
5212 def _dev_list(self, dev_list):
5213 # type: (List[str]) -> List[Dict[str, object]]
5214 """Return a 'pretty' name list for each device in the `dev_list`"""
5215 disk_list = list()
5216
5217 for dev in dev_list:
5218 disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
5219 disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
5220 disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
5221 vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
5222 disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
5223 disk_size_bytes = self._get_capacity(dev)
5224 disk_list.append({
5225 "description": "{} {} ({})".format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
5226 "vendor": disk_vendor,
5227 "model": disk_model,
5228 "rev": disk_rev,
5229 "wwid": disk_wwid,
5230 "dev_name": dev,
5231 "disk_size_bytes": disk_size_bytes,
5232 }
5233 )
5234 return disk_list
5235
5236 @property
5237 def hdd_list(self):
5238 # type: () -> List[Dict[str, object]]
5239 """Return a list of devices that are HDDs (spinners)"""
5240 devs = self._get_devs_by_type(rota='1')
5241 return self._dev_list(devs)
5242
5243 @property
5244 def flash_list(self):
5245 # type: () -> List[Dict[str, object]]
5246 """Return a list of devices that are flash based (SSD, NVMe)"""
5247 devs = self._get_devs_by_type(rota='0')
5248 return self._dev_list(devs)
5249
5250 @property
5251 def hdd_capacity_bytes(self):
5252 # type: () -> int
5253 """Return the total capacity for all HDD devices (bytes)"""
5254 return self._get_capacity_by_type(rota='1')
5255
5256 @property
5257 def hdd_capacity(self):
5258 # type: () -> str
5259 """Return the total capacity for all HDD devices (human readable format)"""
5260 return bytes_to_human(self.hdd_capacity_bytes)
5261
5262 @property
5263 def cpu_load(self):
5264 # type: () -> Dict[str, float]
5265 """Return the cpu load average data for the host"""
5266 raw = read_file(['/proc/loadavg']).strip()
5267 data = raw.split()
5268 return {
5269 "1min": float(data[0]),
5270 "5min": float(data[1]),
5271 "15min": float(data[2]),
5272 }
5273
5274 @property
5275 def flash_count(self):
5276 # type: () -> int
5277 """Return the number of flash devices in the system (SSD, NVMe)"""
5278 return len(self._get_devs_by_type(rota='0'))
5279
5280 @property
5281 def flash_capacity_bytes(self):
5282 # type: () -> int
5283 """Return the total capacity for all flash devices (bytes)"""
5284 return self._get_capacity_by_type(rota='0')
5285
5286 @property
5287 def flash_capacity(self):
5288 # type: () -> str
5289 """Return the total capacity for all Flash devices (human readable format)"""
5290 return bytes_to_human(self.flash_capacity_bytes)
5291
5292 def _process_nics(self):
5293 # type: () -> None
5294 """Look at the NIC devices and extract network related metadata"""
5295 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
5296 hw_lookup = {
5297 "1": "ethernet",
5298 "32": "infiniband",
5299 "772": "loopback",
5300 }
5301
5302 for nic_path in HostFacts._nic_path_list:
5303 if not os.path.exists(nic_path):
5304 continue
5305 for iface in os.listdir(nic_path):
5306
5307 lower_devs_list = [os.path.basename(link.replace("lower_", "")) for link in glob(os.path.join(nic_path, iface, "lower_*"))]
5308 upper_devs_list = [os.path.basename(link.replace("upper_", "")) for link in glob(os.path.join(nic_path, iface, "upper_*"))]
5309
5310 try:
5311 mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
5312 except ValueError:
5313 mtu = 0
5314
5315 operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
5316 try:
5317 speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
5318 except (OSError, ValueError):
5319 # OSError : device doesn't support the ethtool get_link_ksettings
5320 # ValueError : raised when the read fails, and returns Unknown
5321 #
5322 # Either way, we show a -1 when speed isn't available
5323 speed = -1
5324
5325 if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
5326 nic_type = "bridge"
5327 elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
5328 nic_type = "bonding"
5329 else:
5330 nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), "Unknown")
5331
5332 dev_link = os.path.join(nic_path, iface, 'device')
5333 if os.path.exists(dev_link):
5334 iftype = 'physical'
5335 driver_path = os.path.join(dev_link, 'driver')
5336 if os.path.exists(driver_path):
5337 driver = os.path.basename(
5338 os.path.realpath(driver_path))
5339 else:
5340 driver = 'Unknown'
5341
5342 else:
5343 iftype = 'logical'
5344 driver = ''
5345
5346 self.interfaces[iface] = {
5347 "mtu": mtu,
5348 "upper_devs_list": upper_devs_list,
5349 "lower_devs_list": lower_devs_list,
5350 "operstate": operstate,
5351 "iftype": iftype,
5352 "nic_type": nic_type,
5353 "driver": driver,
5354 "speed": speed,
5355 "ipv4_address": get_ipv4_address(iface),
5356 "ipv6_address": get_ipv6_address(iface),
5357 }
5358
5359 @property
5360 def nic_count(self):
5361 # type: () -> int
5362 """Return a total count of all physical NICs detected in the host"""
5363 phys_devs = []
5364 for iface in self.interfaces:
5365 if self.interfaces[iface]["iftype"] == 'physical':
5366 phys_devs.append(iface)
5367 return len(phys_devs)
5368
5369
5370 def _get_mem_data(self, field_name):
5371 # type: (str) -> int
5372 for line in self._meminfo:
5373 if line.startswith(field_name):
5374 _d = line.split()
5375 return int(_d[1])
5376 return 0
5377
5378 @property
5379 def memory_total_kb(self):
5380 # type: () -> int
5381 """Determine the memory installed (kb)"""
5382 return self._get_mem_data('MemTotal')
5383
5384 @property
5385 def memory_free_kb(self):
5386 # type: () -> int
5387 """Determine the memory free (not cache, immediately usable)"""
5388 return self._get_mem_data('MemFree')
5389
5390 @property
5391 def memory_available_kb(self):
5392 # type: () -> int
5393 """Determine the memory available to new applications without swapping"""
5394 return self._get_mem_data('MemAvailable')
5395
5396 @property
5397 def vendor(self):
5398 # type: () -> str
5399 """Determine server vendor from DMI data in sysfs"""
5400 return read_file(HostFacts._dmi_path_list, "sys_vendor")
5401
5402 @property
5403 def model(self):
5404 # type: () -> str
5405 """Determine server model information from DMI data in sysfs"""
5406 family = read_file(HostFacts._dmi_path_list, "product_family")
5407 product = read_file(HostFacts._dmi_path_list, "product_name")
5408 if family == 'Unknown' and product:
5409 return "{}".format(product)
5410
5411 return "{} ({})".format(family, product)
5412
5413 @property
5414 def bios_version(self):
5415 # type: () -> str
5416 """Determine server BIOS version from DMI data in sysfs"""
5417 return read_file(HostFacts._dmi_path_list, "bios_version")
5418
5419 @property
5420 def bios_date(self):
5421 # type: () -> str
5422 """Determine server BIOS date from DMI data in sysfs"""
5423 return read_file(HostFacts._dmi_path_list, "bios_date")
5424
5425 @property
5426 def timestamp(self):
5427 # type: () -> float
5428 """Return the current time as Epoch seconds"""
5429 return time.time()
5430
5431 @property
5432 def system_uptime(self):
5433 # type: () -> float
5434 """Return the system uptime (in secs)"""
5435 raw_time = read_file(['/proc/uptime'])
5436 up_secs, _ = raw_time.split()
5437 return float(up_secs)
5438
5439 @property
5440 def kernel_security(self):
5441 # type: () -> Dict[str, str]
5442 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
5443 def _fetch_selinux():
5444 """Read the selinux config file to determine state"""
5445 security = {}
5446 for selinux_path in HostFacts._selinux_path_list:
5447 if os.path.exists(selinux_path):
5448 selinux_config = read_file([selinux_path]).splitlines()
5449 security['type'] = 'SELinux'
5450 for line in selinux_config:
5451 if line.strip().startswith('#'):
5452 continue
5453 k, v = line.split('=')
5454 security[k] = v
5455 if security['SELINUX'].lower() == "disabled":
5456 security['description'] = "SELinux: Disabled"
5457 else:
5458 security['description'] = "SELinux: Enabled({}, {})".format(security['SELINUX'], security['SELINUXTYPE'])
5459 return security
5460
5461 def _fetch_apparmor():
5462 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
5463 security = {}
5464 for apparmor_path in HostFacts._apparmor_path_list:
5465 if os.path.exists(apparmor_path):
5466 security['type'] = "AppArmor"
5467 security['description'] = "AppArmor: Enabled"
5468 try:
5469 profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
5470 except OSError:
5471 pass
5472 else:
5473 summary = {} # type: Dict[str, int]
5474 for line in profiles.split('\n'):
5475 item, mode = line.split(' ')
5476 mode= mode.strip('()')
5477 if mode in summary:
5478 summary[mode] += 1
5479 else:
5480 summary[mode] = 0
5481 summary_str = ",".join(["{} {}".format(v, k) for k, v in summary.items()])
5482 security = {**security, **summary} # type: ignore
5483 security['description'] += "({})".format(summary_str)
5484
5485 return security
5486
5487 if os.path.exists('/sys/kernel/security/lsm'):
5488 lsm = read_file(['/sys/kernel/security/lsm']).strip()
5489 if 'selinux' in lsm:
5490 return _fetch_selinux()
5491 elif 'apparmor' in lsm:
5492 return _fetch_apparmor()
5493 else:
5494 return {
5495 "type": "Unknown",
5496 "description": "Linux Security Module framework is active, but is not using SELinux or AppArmor"
5497 }
5498
5499 return {
5500 "type": "None",
5501 "description": "Linux Security Module framework is not available"
5502 }
5503
5504 def dump(self):
5505 # type: () -> str
5506 """Return the attributes of this HostFacts object as json"""
5507 data = {k: getattr(self, k) for k in dir(self)
5508 if not k.startswith('_') and
5509 isinstance(getattr(self, k),
5510 (float, int, str, list, dict, tuple))
5511 }
5512 return json.dumps(data, indent=2, sort_keys=True)
5513
5514##################################
5515
5516def command_gather_facts():
5517 """gather_facts is intended to provide host releated metadata to the caller"""
5518 host = HostFacts()
5519 print(host.dump())
5520
5521
5522##################################
5523
f6b5b4d7 5524
9f95a23c
TL
5525def _get_parser():
5526 # type: () -> argparse.ArgumentParser
5527 parser = argparse.ArgumentParser(
5528 description='Bootstrap Ceph daemons with systemd and containers.',
5529 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
5530 parser.add_argument(
5531 '--image',
5532 help='container image. Can also be set via the "CEPHADM_IMAGE" '
5533 'env var')
5534 parser.add_argument(
5535 '--docker',
5536 action='store_true',
5537 help='use docker instead of podman')
5538 parser.add_argument(
5539 '--data-dir',
5540 default=DATA_DIR,
5541 help='base directory for daemon data')
5542 parser.add_argument(
5543 '--log-dir',
5544 default=LOG_DIR,
5545 help='base directory for daemon logs')
5546 parser.add_argument(
5547 '--logrotate-dir',
5548 default=LOGROTATE_DIR,
5549 help='location of logrotate configuration files')
5550 parser.add_argument(
5551 '--unit-dir',
5552 default=UNIT_DIR,
5553 help='base directory for systemd units')
5554 parser.add_argument(
5555 '--verbose', '-v',
5556 action='store_true',
5557 help='Show debug-level log messages')
5558 parser.add_argument(
5559 '--timeout',
5560 type=int,
5561 default=DEFAULT_TIMEOUT,
5562 help='timeout in seconds')
5563 parser.add_argument(
5564 '--retry',
5565 type=int,
5566 default=DEFAULT_RETRY,
5567 help='max number of retries')
e306af50
TL
5568 parser.add_argument(
5569 '--env', '-e',
5570 action='append',
5571 default=[],
5572 help='set environment variable')
9f95a23c
TL
5573
5574 subparsers = parser.add_subparsers(help='sub-command')
5575
5576 parser_version = subparsers.add_parser(
5577 'version', help='get ceph version from container')
5578 parser_version.set_defaults(func=command_version)
5579
5580 parser_pull = subparsers.add_parser(
5581 'pull', help='pull latest image version')
5582 parser_pull.set_defaults(func=command_pull)
5583
5584 parser_inspect_image = subparsers.add_parser(
5585 'inspect-image', help='inspect local container image')
5586 parser_inspect_image.set_defaults(func=command_inspect_image)
5587
5588 parser_ls = subparsers.add_parser(
5589 'ls', help='list daemon instances on this host')
5590 parser_ls.set_defaults(func=command_ls)
5591 parser_ls.add_argument(
5592 '--no-detail',
5593 action='store_true',
5594 help='Do not include daemon status')
5595 parser_ls.add_argument(
5596 '--legacy-dir',
5597 default='/',
5598 help='base directory for legacy daemon data')
5599
5600 parser_list_networks = subparsers.add_parser(
5601 'list-networks', help='list IP networks')
5602 parser_list_networks.set_defaults(func=command_list_networks)
5603
5604 parser_adopt = subparsers.add_parser(
5605 'adopt', help='adopt daemon deployed with a different tool')
5606 parser_adopt.set_defaults(func=command_adopt)
5607 parser_adopt.add_argument(
5608 '--name', '-n',
5609 required=True,
5610 help='daemon name (type.id)')
5611 parser_adopt.add_argument(
5612 '--style',
5613 required=True,
5614 help='deployment style (legacy, ...)')
5615 parser_adopt.add_argument(
5616 '--cluster',
5617 default='ceph',
5618 help='cluster name')
5619 parser_adopt.add_argument(
5620 '--legacy-dir',
5621 default='/',
5622 help='base directory for legacy daemon data')
5623 parser_adopt.add_argument(
5624 '--config-json',
5625 help='Additional configuration information in JSON format')
5626 parser_adopt.add_argument(
5627 '--skip-firewalld',
5628 action='store_true',
5629 help='Do not configure firewalld')
5630 parser_adopt.add_argument(
5631 '--skip-pull',
5632 action='store_true',
5633 help='do not pull the latest image before adopting')
1911f103
TL
5634 parser_adopt.add_argument(
5635 '--force-start',
5636 action='store_true',
5637 help="start newly adoped daemon, even if it wasn't running previously")
f91f0fd5
TL
5638 parser_adopt.add_argument(
5639 '--container-init',
5640 action='store_true',
5641 help='Run podman/docker with `--init`')
9f95a23c
TL
5642
5643 parser_rm_daemon = subparsers.add_parser(
5644 'rm-daemon', help='remove daemon instance')
5645 parser_rm_daemon.set_defaults(func=command_rm_daemon)
5646 parser_rm_daemon.add_argument(
5647 '--name', '-n',
5648 required=True,
5649 action=CustomValidation,
5650 help='daemon name (type.id)')
5651 parser_rm_daemon.add_argument(
5652 '--fsid',
5653 required=True,
5654 help='cluster FSID')
5655 parser_rm_daemon.add_argument(
5656 '--force',
5657 action='store_true',
5658 help='proceed, even though this may destroy valuable data')
5659 parser_rm_daemon.add_argument(
5660 '--force-delete-data',
5661 action='store_true',
5662 help='delete valuable daemon data instead of making a backup')
5663
5664 parser_rm_cluster = subparsers.add_parser(
5665 'rm-cluster', help='remove all daemons for a cluster')
5666 parser_rm_cluster.set_defaults(func=command_rm_cluster)
5667 parser_rm_cluster.add_argument(
5668 '--fsid',
5669 required=True,
5670 help='cluster FSID')
5671 parser_rm_cluster.add_argument(
5672 '--force',
5673 action='store_true',
5674 help='proceed, even though this may destroy valuable data')
5675
5676 parser_run = subparsers.add_parser(
5677 'run', help='run a ceph daemon, in a container, in the foreground')
5678 parser_run.set_defaults(func=command_run)
5679 parser_run.add_argument(
5680 '--name', '-n',
5681 required=True,
5682 help='daemon name (type.id)')
5683 parser_run.add_argument(
5684 '--fsid',
5685 required=True,
5686 help='cluster FSID')
5687
5688 parser_shell = subparsers.add_parser(
5689 'shell', help='run an interactive shell inside a daemon container')
5690 parser_shell.set_defaults(func=command_shell)
5691 parser_shell.add_argument(
5692 '--fsid',
5693 help='cluster FSID')
5694 parser_shell.add_argument(
5695 '--name', '-n',
5696 help='daemon name (type.id)')
5697 parser_shell.add_argument(
5698 '--config', '-c',
5699 help='ceph.conf to pass through to the container')
5700 parser_shell.add_argument(
5701 '--keyring', '-k',
5702 help='ceph.keyring to pass through to the container')
e306af50
TL
5703 parser_shell.add_argument(
5704 '--mount', '-m',
f91f0fd5
TL
5705 help=("mount a file or directory in the container. "
5706 "Support multiple mounts. "
5707 "ie: `--mount /foo /bar:/bar`. "
5708 "When no destination is passed, default is /mnt"),
5709 nargs='+')
9f95a23c
TL
5710 parser_shell.add_argument(
5711 '--env', '-e',
5712 action='append',
5713 default=[],
5714 help='set environment variable')
5715 parser_shell.add_argument(
e306af50 5716 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
5717 help='command (optional)')
5718
5719 parser_enter = subparsers.add_parser(
5720 'enter', help='run an interactive shell inside a running daemon container')
5721 parser_enter.set_defaults(func=command_enter)
5722 parser_enter.add_argument(
5723 '--fsid',
5724 help='cluster FSID')
5725 parser_enter.add_argument(
5726 '--name', '-n',
5727 required=True,
5728 help='daemon name (type.id)')
5729 parser_enter.add_argument(
e306af50 5730 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
5731 help='command')
5732
5733 parser_ceph_volume = subparsers.add_parser(
5734 'ceph-volume', help='run ceph-volume inside a container')
5735 parser_ceph_volume.set_defaults(func=command_ceph_volume)
5736 parser_ceph_volume.add_argument(
5737 '--fsid',
5738 help='cluster FSID')
5739 parser_ceph_volume.add_argument(
5740 '--config-json',
5741 help='JSON file with config and (client.bootrap-osd) key')
801d1391
TL
5742 parser_ceph_volume.add_argument(
5743 '--config', '-c',
5744 help='ceph conf file')
5745 parser_ceph_volume.add_argument(
5746 '--keyring', '-k',
5747 help='ceph.keyring to pass through to the container')
9f95a23c 5748 parser_ceph_volume.add_argument(
e306af50 5749 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
5750 help='command')
5751
5752 parser_unit = subparsers.add_parser(
5753 'unit', help='operate on the daemon\'s systemd unit')
5754 parser_unit.set_defaults(func=command_unit)
5755 parser_unit.add_argument(
5756 'command',
5757 help='systemd command (start, stop, restart, enable, disable, ...)')
5758 parser_unit.add_argument(
5759 '--fsid',
5760 help='cluster FSID')
5761 parser_unit.add_argument(
5762 '--name', '-n',
5763 required=True,
5764 help='daemon name (type.id)')
5765
5766 parser_logs = subparsers.add_parser(
5767 'logs', help='print journald logs for a daemon container')
5768 parser_logs.set_defaults(func=command_logs)
5769 parser_logs.add_argument(
5770 '--fsid',
5771 help='cluster FSID')
5772 parser_logs.add_argument(
5773 '--name', '-n',
5774 required=True,
5775 help='daemon name (type.id)')
5776 parser_logs.add_argument(
5777 'command', nargs='*',
5778 help='additional journalctl args')
5779
5780 parser_bootstrap = subparsers.add_parser(
5781 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
5782 parser_bootstrap.set_defaults(func=command_bootstrap)
5783 parser_bootstrap.add_argument(
5784 '--config', '-c',
5785 help='ceph conf file to incorporate')
5786 parser_bootstrap.add_argument(
5787 '--mon-id',
5788 required=False,
5789 help='mon id (default: local hostname)')
5790 parser_bootstrap.add_argument(
5791 '--mon-addrv',
5792 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
5793 parser_bootstrap.add_argument(
5794 '--mon-ip',
5795 help='mon IP')
5796 parser_bootstrap.add_argument(
5797 '--mgr-id',
5798 required=False,
5799 help='mgr id (default: randomly generated)')
5800 parser_bootstrap.add_argument(
5801 '--fsid',
5802 help='cluster FSID')
5803 parser_bootstrap.add_argument(
5804 '--output-dir',
5805 default='/etc/ceph',
5806 help='directory to write config, keyring, and pub key files')
5807 parser_bootstrap.add_argument(
5808 '--output-keyring',
5809 help='location to write keyring file with new cluster admin and mon keys')
5810 parser_bootstrap.add_argument(
5811 '--output-config',
5812 help='location to write conf file to connect to new cluster')
5813 parser_bootstrap.add_argument(
5814 '--output-pub-ssh-key',
5815 help='location to write the cluster\'s public SSH key')
5816 parser_bootstrap.add_argument(
5817 '--skip-ssh',
5818 action='store_true',
5819 help='skip setup of ssh key on local host')
5820 parser_bootstrap.add_argument(
5821 '--initial-dashboard-user',
5822 default='admin',
5823 help='Initial user for the dashboard')
5824 parser_bootstrap.add_argument(
5825 '--initial-dashboard-password',
5826 help='Initial password for the initial dashboard user')
f6b5b4d7
TL
5827 parser_bootstrap.add_argument(
5828 '--ssl-dashboard-port',
5829 type=int,
5830 default = 8443,
5831 help='Port number used to connect with dashboard using SSL')
9f95a23c
TL
5832 parser_bootstrap.add_argument(
5833 '--dashboard-key',
e306af50 5834 type=argparse.FileType('r'),
9f95a23c
TL
5835 help='Dashboard key')
5836 parser_bootstrap.add_argument(
5837 '--dashboard-crt',
e306af50 5838 type=argparse.FileType('r'),
9f95a23c
TL
5839 help='Dashboard certificate')
5840
e306af50
TL
5841 parser_bootstrap.add_argument(
5842 '--ssh-config',
5843 type=argparse.FileType('r'),
5844 help='SSH config')
5845 parser_bootstrap.add_argument(
5846 '--ssh-private-key',
5847 type=argparse.FileType('r'),
5848 help='SSH private key')
5849 parser_bootstrap.add_argument(
5850 '--ssh-public-key',
5851 type=argparse.FileType('r'),
5852 help='SSH public key')
f6b5b4d7
TL
5853 parser_bootstrap.add_argument(
5854 '--ssh-user',
5855 default='root',
5856 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
e306af50 5857
9f95a23c
TL
5858 parser_bootstrap.add_argument(
5859 '--skip-mon-network',
5860 action='store_true',
5861 help='set mon public_network based on bootstrap mon ip')
5862 parser_bootstrap.add_argument(
5863 '--skip-dashboard',
5864 action='store_true',
5865 help='do not enable the Ceph Dashboard')
5866 parser_bootstrap.add_argument(
5867 '--dashboard-password-noupdate',
5868 action='store_true',
5869 help='stop forced dashboard password change')
5870 parser_bootstrap.add_argument(
5871 '--no-minimize-config',
5872 action='store_true',
5873 help='do not assimilate and minimize the config file')
5874 parser_bootstrap.add_argument(
5875 '--skip-ping-check',
5876 action='store_true',
5877 help='do not verify that mon IP is pingable')
5878 parser_bootstrap.add_argument(
5879 '--skip-pull',
5880 action='store_true',
5881 help='do not pull the latest image before bootstrapping')
5882 parser_bootstrap.add_argument(
5883 '--skip-firewalld',
5884 action='store_true',
5885 help='Do not configure firewalld')
5886 parser_bootstrap.add_argument(
5887 '--allow-overwrite',
5888 action='store_true',
5889 help='allow overwrite of existing --output-* config/keyring/ssh files')
5890 parser_bootstrap.add_argument(
5891 '--allow-fqdn-hostname',
5892 action='store_true',
5893 help='allow hostname that is fully-qualified (contains ".")')
5894 parser_bootstrap.add_argument(
5895 '--skip-prepare-host',
5896 action='store_true',
5897 help='Do not prepare host')
5898 parser_bootstrap.add_argument(
5899 '--orphan-initial-daemons',
5900 action='store_true',
5901 help='Do not create initial mon, mgr, and crash service specs')
5902 parser_bootstrap.add_argument(
5903 '--skip-monitoring-stack',
5904 action='store_true',
5905 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
e306af50
TL
5906 parser_bootstrap.add_argument(
5907 '--apply-spec',
5908 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
5909
e306af50
TL
5910 parser_bootstrap.add_argument(
5911 '--shared_ceph_folder',
5912 metavar='CEPH_SOURCE_FOLDER',
5913 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c 5914
f6b5b4d7
TL
5915 parser_bootstrap.add_argument(
5916 '--registry-url',
5917 help='url for custom registry')
5918 parser_bootstrap.add_argument(
5919 '--registry-username',
5920 help='username for custom registry')
5921 parser_bootstrap.add_argument(
5922 '--registry-password',
5923 help='password for custom registry')
5924 parser_bootstrap.add_argument(
5925 '--registry-json',
5926 help='json file with custom registry login info (URL, Username, Password)')
f91f0fd5
TL
5927 parser_bootstrap.add_argument(
5928 '--container-init',
5929 action='store_true',
5930 help='Run podman/docker with `--init`')
f6b5b4d7 5931
9f95a23c
TL
5932 parser_deploy = subparsers.add_parser(
5933 'deploy', help='deploy a daemon')
5934 parser_deploy.set_defaults(func=command_deploy)
5935 parser_deploy.add_argument(
5936 '--name',
5937 required=True,
5938 action=CustomValidation,
5939 help='daemon name (type.id)')
5940 parser_deploy.add_argument(
5941 '--fsid',
5942 required=True,
5943 help='cluster FSID')
5944 parser_deploy.add_argument(
5945 '--config', '-c',
5946 help='config file for new daemon')
5947 parser_deploy.add_argument(
5948 '--config-json',
5949 help='Additional configuration information in JSON format')
5950 parser_deploy.add_argument(
5951 '--keyring',
5952 help='keyring for new daemon')
5953 parser_deploy.add_argument(
5954 '--key',
5955 help='key for new daemon')
5956 parser_deploy.add_argument(
5957 '--osd-fsid',
5958 help='OSD uuid, if creating an OSD container')
5959 parser_deploy.add_argument(
5960 '--skip-firewalld',
5961 action='store_true',
5962 help='Do not configure firewalld')
f6b5b4d7
TL
5963 parser_deploy.add_argument(
5964 '--tcp-ports',
5965 help='List of tcp ports to open in the host firewall')
9f95a23c
TL
5966 parser_deploy.add_argument(
5967 '--reconfig',
5968 action='store_true',
5969 help='Reconfigure a previously deployed daemon')
5970 parser_deploy.add_argument(
5971 '--allow-ptrace',
5972 action='store_true',
5973 help='Allow SYS_PTRACE on daemon container')
f91f0fd5
TL
5974 parser_deploy.add_argument(
5975 '--container-init',
5976 action='store_true',
5977 help='Run podman/docker with `--init`')
9f95a23c
TL
5978
5979 parser_check_host = subparsers.add_parser(
5980 'check-host', help='check host configuration')
5981 parser_check_host.set_defaults(func=command_check_host)
5982 parser_check_host.add_argument(
5983 '--expect-hostname',
5984 help='Check that hostname matches an expected value')
5985
5986 parser_prepare_host = subparsers.add_parser(
5987 'prepare-host', help='prepare a host for cephadm use')
5988 parser_prepare_host.set_defaults(func=command_prepare_host)
5989 parser_prepare_host.add_argument(
5990 '--expect-hostname',
5991 help='Set hostname')
5992
5993 parser_add_repo = subparsers.add_parser(
5994 'add-repo', help='configure package repository')
5995 parser_add_repo.set_defaults(func=command_add_repo)
5996 parser_add_repo.add_argument(
5997 '--release',
1911f103 5998 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
9f95a23c
TL
5999 parser_add_repo.add_argument(
6000 '--version',
6001 help='use specific upstream version (x.y.z)')
6002 parser_add_repo.add_argument(
6003 '--dev',
6004 help='use specified bleeding edge build from git branch or tag')
6005 parser_add_repo.add_argument(
6006 '--dev-commit',
6007 help='use specified bleeding edge build from git commit')
6008 parser_add_repo.add_argument(
6009 '--gpg-url',
6010 help='specify alternative GPG key location')
6011 parser_add_repo.add_argument(
6012 '--repo-url',
6013 default='https://download.ceph.com',
6014 help='specify alternative repo location')
6015 # TODO: proxy?
6016
6017 parser_rm_repo = subparsers.add_parser(
6018 'rm-repo', help='remove package repository configuration')
6019 parser_rm_repo.set_defaults(func=command_rm_repo)
6020
6021 parser_install = subparsers.add_parser(
6022 'install', help='install ceph package(s)')
6023 parser_install.set_defaults(func=command_install)
6024 parser_install.add_argument(
6025 'packages', nargs='*',
6026 default=['cephadm'],
6027 help='packages')
6028
f6b5b4d7
TL
6029 parser_registry_login = subparsers.add_parser(
6030 'registry-login', help='log host into authenticated registry')
6031 parser_registry_login.set_defaults(func=command_registry_login)
6032 parser_registry_login.add_argument(
6033 '--registry-url',
6034 help='url for custom registry')
6035 parser_registry_login.add_argument(
6036 '--registry-username',
6037 help='username for custom registry')
6038 parser_registry_login.add_argument(
6039 '--registry-password',
6040 help='password for custom registry')
6041 parser_registry_login.add_argument(
6042 '--registry-json',
6043 help='json file with custom registry login info (URL, Username, Password)')
6044 parser_registry_login.add_argument(
6045 '--fsid',
6046 help='cluster FSID')
6047
f91f0fd5
TL
6048 parser_gather_facts = subparsers.add_parser(
6049 'gather-facts', help='gather and return host related information (JSON format)')
6050 parser_gather_facts.set_defaults(func=command_gather_facts)
6051
9f95a23c
TL
6052 return parser
6053
f6b5b4d7 6054
9f95a23c
TL
6055def _parse_args(av):
6056 parser = _get_parser()
e306af50
TL
6057 args = parser.parse_args(av)
6058 if 'command' in args and args.command and args.command[0] == "--":
6059 args.command.pop(0)
6060 return args
9f95a23c 6061
f6b5b4d7 6062
9f95a23c 6063if __name__ == "__main__":
f91f0fd5
TL
6064
6065 # root?
6066 if os.geteuid() != 0:
6067 sys.stderr.write('ERROR: cephadm should be run as root\n')
6068 sys.exit(1)
6069
6070 # Logger configuration
6071 if not os.path.exists(LOG_DIR):
6072 os.makedirs(LOG_DIR)
6073 dictConfig(logging_config)
6074 logger = logging.getLogger()
6075
9f95a23c
TL
6076 # allow argv to be injected
6077 try:
f6b5b4d7 6078 av = injected_argv # type: ignore
9f95a23c
TL
6079 except NameError:
6080 av = sys.argv[1:]
f91f0fd5 6081 logger.debug("%s\ncephadm %s" % ("-" * 80, av))
9f95a23c
TL
6082 args = _parse_args(av)
6083
f91f0fd5 6084 # More verbose console output
9f95a23c 6085 if args.verbose:
f91f0fd5
TL
6086 for handler in logger.handlers:
6087 if handler.name == "console":
6088 handler.setLevel(logging.DEBUG)
9f95a23c 6089
9f95a23c
TL
6090 if 'func' not in args:
6091 sys.stderr.write('No command specified; pass -h or --help for usage\n')
6092 sys.exit(1)
6093
1911f103
TL
6094 # podman or docker?
6095 if args.func != command_check_host:
6096 if args.docker:
6097 container_path = find_program('docker')
6098 else:
6099 for i in CONTAINER_PREFERENCE:
6100 try:
6101 container_path = find_program(i)
6102 break
6103 except Exception as e:
6104 logger.debug('Could not locate %s: %s' % (i, e))
6105 if not container_path and args.func != command_prepare_host\
6106 and args.func != command_add_repo:
6107 sys.stderr.write('Unable to locate any of %s\n' % CONTAINER_PREFERENCE)
6108 sys.exit(1)
6109
9f95a23c
TL
6110 try:
6111 r = args.func()
6112 except Error as e:
6113 if args.verbose:
6114 raise
6115 sys.stderr.write('ERROR: %s\n' % e)
6116 sys.exit(1)
6117 if not r:
6118 r = 0
6119 sys.exit(r)