]> git.proxmox.com Git - ceph.git/blame - ceph/src/cephadm/cephadm
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / cephadm / cephadm
CommitLineData
9f95a23c
TL
1#!/usr/bin/python3
2
f67539c2
TL
3import asyncio
4import asyncio.subprocess
5import argparse
6import datetime
7import fcntl
8import ipaddress
9import json
10import logging
11from logging.config import dictConfig
12import os
13import platform
14import pwd
15import random
16import shlex
17import shutil
18import socket
19import string
20import subprocess
21import sys
22import tempfile
23import time
24import errno
25import struct
26from socketserver import ThreadingMixIn
27from http.server import BaseHTTPRequestHandler, HTTPServer
28import signal
29import io
30from contextlib import redirect_stdout
31import ssl
32from enum import Enum
33
34from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO
35
36import re
37import uuid
38
39from configparser import ConfigParser
40from functools import wraps
41from glob import glob
42from io import StringIO
43from threading import Thread, RLock
44from urllib.error import HTTPError
45from urllib.request import urlopen
46from pathlib import Path
47
48# Default container images -----------------------------------------------------
49DEFAULT_IMAGE = 'docker.io/ceph/ceph:v16'
50DEFAULT_IMAGE_IS_MASTER = False
51DEFAULT_IMAGE_RELEASE = 'pacific'
52DEFAULT_PROMETHEUS_IMAGE = 'docker.io/prom/prometheus:v2.18.1'
53DEFAULT_NODE_EXPORTER_IMAGE = 'docker.io/prom/node-exporter:v0.18.1'
54DEFAULT_GRAFANA_IMAGE = 'docker.io/ceph/ceph-grafana:6.7.4'
55DEFAULT_ALERT_MANAGER_IMAGE = 'docker.io/prom/alertmanager:v0.20.0'
56DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this
57# ------------------------------------------------------------------------------
58
59LATEST_STABLE_RELEASE = 'pacific'
f6b5b4d7
TL
60DATA_DIR = '/var/lib/ceph'
61LOG_DIR = '/var/log/ceph'
62LOCK_DIR = '/run/cephadm'
63LOGROTATE_DIR = '/etc/logrotate.d'
64UNIT_DIR = '/etc/systemd/system'
65LOG_DIR_MODE = 0o770
66DATA_DIR_MODE = 0o700
f67539c2
TL
67CONTAINER_INIT = True
68MIN_PODMAN_VERSION = (2, 0, 2)
69CGROUPS_SPLIT_PODMAN_VERSION = (2, 1, 0)
f6b5b4d7
TL
70CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
71DEFAULT_TIMEOUT = None # in seconds
f67539c2 72DEFAULT_RETRY = 15
f6b5b4d7
TL
73SHELL_DEFAULT_CONF = '/etc/ceph/ceph.conf'
74SHELL_DEFAULT_KEYRING = '/etc/ceph/ceph.client.admin.keyring'
f67539c2
TL
75DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
76
77logger: logging.Logger = None # type: ignore
9f95a23c
TL
78
79"""
80You can invoke cephadm in two ways:
81
821. The normal way, at the command line.
83
842. By piping the script to the python3 binary. In this latter case, you should
85 prepend one or more lines to the beginning of the script.
86
87 For arguments,
88
89 injected_argv = [...]
90
91 e.g.,
92
93 injected_argv = ['ls']
94
95 For reading stdin from the '--config-json -' argument,
96
97 injected_stdin = '...'
98"""
f67539c2 99cached_stdin = None
f91f0fd5 100
f67539c2 101##################################
9f95a23c 102
9f95a23c 103
f67539c2 104class BaseConfig:
9f95a23c 105
f67539c2
TL
106 def __init__(self):
107 self.image: str = ''
108 self.docker: bool = False
109 self.data_dir: str = DATA_DIR
110 self.log_dir: str = LOG_DIR
111 self.logrotate_dir: str = LOGROTATE_DIR
112 self.unit_dir: str = UNIT_DIR
113 self.verbose: bool = False
114 self.timeout: Optional[int] = DEFAULT_TIMEOUT
115 self.retry: int = DEFAULT_RETRY
116 self.env: List[str] = []
117 self.memory_request: Optional[int] = None
118 self.memory_limit: Optional[int] = None
119
120 self.container_init: bool = CONTAINER_INIT
121 self.container_engine: Optional[ContainerEngine] = None
122
123 def set_from_args(self, args: argparse.Namespace):
124 argdict: Dict[str, Any] = vars(args)
125 for k, v in argdict.items():
126 if hasattr(self, k):
127 setattr(self, k, v)
128
129
130class CephadmContext:
9f95a23c 131
f67539c2
TL
132 def __init__(self):
133 self.__dict__['_args'] = None
134 self.__dict__['_conf'] = BaseConfig()
9f95a23c 135
f67539c2
TL
136 def set_args(self, args: argparse.Namespace) -> None:
137 self._conf.set_from_args(args)
138 self._args = args
f6b5b4d7 139
f67539c2
TL
140 def has_function(self) -> bool:
141 return 'func' in self._args
142
143 def __contains__(self, name: str) -> bool:
144 return hasattr(self, name)
145
146 def __getattr__(self, name: str) -> Any:
147 if '_conf' in self.__dict__ and hasattr(self._conf, name):
148 return getattr(self._conf, name)
149 elif '_args' in self.__dict__ and hasattr(self._args, name):
150 return getattr(self._args, name)
151 else:
152 return super().__getattribute__(name)
153
154 def __setattr__(self, name: str, value: Any) -> None:
155 if hasattr(self._conf, name):
156 setattr(self._conf, name, value)
157 elif hasattr(self._args, name):
158 setattr(self._args, name, value)
159 else:
160 super().__setattr__(name, value)
161
162
163class ContainerEngine:
164 def __init__(self):
165 self.path = find_program(self.EXE)
166
167 @property
168 def EXE(self) -> str:
169 raise NotImplementedError()
170
171
172class Podman(ContainerEngine):
173 EXE = 'podman'
174
175 def __init__(self):
176 super().__init__()
177 self._version = None
178
179 @property
180 def version(self):
181 if self._version is None:
182 raise RuntimeError('Please call `get_version` first')
183 return self._version
184
185 def get_version(self, ctx: CephadmContext):
186 out, _, _ = call_throws(ctx, [self.path, 'version', '--format', '{{.Client.Version}}'])
187 self._version = _parse_podman_version(out)
188
189
190class Docker(ContainerEngine):
191 EXE = 'docker'
192
193
194CONTAINER_PREFERENCE = (Podman, Docker) # prefer podman to docker
9f95a23c 195
9f95a23c 196
f91f0fd5
TL
197# Log and console output config
198logging_config = {
199 'version': 1,
200 'disable_existing_loggers': True,
201 'formatters': {
202 'cephadm': {
203 'format': '%(asctime)s %(levelname)s %(message)s'
204 },
205 },
206 'handlers': {
f67539c2
TL
207 'console': {
208 'level': 'INFO',
209 'class': 'logging.StreamHandler',
f91f0fd5
TL
210 },
211 'log_file': {
212 'level': 'DEBUG',
213 'class': 'logging.handlers.RotatingFileHandler',
214 'formatter': 'cephadm',
215 'filename': '%s/cephadm.log' % LOG_DIR,
216 'maxBytes': 1024000,
217 'backupCount': 1,
218 }
219 },
220 'loggers': {
221 '': {
222 'level': 'DEBUG',
223 'handlers': ['console', 'log_file'],
224 }
225 }
226}
e306af50 227
f67539c2 228
e306af50
TL
229class termcolor:
230 yellow = '\033[93m'
231 red = '\033[31m'
232 end = '\033[0m'
233
f6b5b4d7 234
9f95a23c
TL
235class Error(Exception):
236 pass
237
f6b5b4d7 238
9f95a23c
TL
239class TimeoutExpired(Error):
240 pass
241
242##################################
243
f6b5b4d7 244
9f95a23c
TL
245class Ceph(object):
246 daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror',
f67539c2 247 'crash', 'cephfs-mirror')
9f95a23c
TL
248
249##################################
250
f6b5b4d7 251
9f95a23c
TL
252class Monitoring(object):
253 """Define the configs for the monitoring containers"""
254
255 port_map = {
f67539c2
TL
256 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI
257 'node-exporter': [9100],
258 'grafana': [3000],
259 'alertmanager': [9093, 9094],
9f95a23c
TL
260 }
261
262 components = {
f67539c2
TL
263 'prometheus': {
264 'image': DEFAULT_PROMETHEUS_IMAGE,
265 'cpus': '2',
266 'memory': '4GB',
267 'args': [
268 '--config.file=/etc/prometheus/prometheus.yml',
269 '--storage.tsdb.path=/prometheus',
270 '--web.listen-address=:{}'.format(port_map['prometheus'][0]),
9f95a23c 271 ],
f67539c2
TL
272 'config-json-files': [
273 'prometheus.yml',
9f95a23c
TL
274 ],
275 },
f67539c2
TL
276 'node-exporter': {
277 'image': DEFAULT_NODE_EXPORTER_IMAGE,
278 'cpus': '1',
279 'memory': '1GB',
280 'args': [
281 '--no-collector.timex',
9f95a23c
TL
282 ],
283 },
f67539c2
TL
284 'grafana': {
285 'image': DEFAULT_GRAFANA_IMAGE,
286 'cpus': '2',
287 'memory': '4GB',
288 'args': [],
289 'config-json-files': [
290 'grafana.ini',
291 'provisioning/datasources/ceph-dashboard.yml',
292 'certs/cert_file',
293 'certs/cert_key',
9f95a23c
TL
294 ],
295 },
f67539c2
TL
296 'alertmanager': {
297 'image': DEFAULT_ALERT_MANAGER_IMAGE,
298 'cpus': '2',
299 'memory': '2GB',
300 'args': [
301 '--web.listen-address=:{}'.format(port_map['alertmanager'][0]),
302 '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
f91f0fd5 303 ],
f67539c2
TL
304 'config-json-files': [
305 'alertmanager.yml',
9f95a23c 306 ],
f67539c2
TL
307 'config-json-args': [
308 'peers',
9f95a23c
TL
309 ],
310 },
311 } # type: ignore
312
f67539c2
TL
313 @staticmethod
314 def get_version(ctx, container_id, daemon_type):
315 # type: (CephadmContext, str, str) -> str
316 """
317 :param: daemon_type Either "prometheus", "alertmanager" or "node-exporter"
318 """
319 assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter')
320 cmd = daemon_type.replace('-', '_')
321 code = -1
322 err = ''
323 version = ''
324 if daemon_type == 'alertmanager':
325 for cmd in ['alertmanager', 'prometheus-alertmanager']:
326 _, err, code = call(ctx, [
327 ctx.container_engine.path, 'exec', container_id, cmd,
328 '--version'
329 ], verbosity=CallVerbosity.DEBUG)
330 if code == 0:
331 break
332 cmd = 'alertmanager' # reset cmd for version extraction
333 else:
334 _, err, code = call(ctx, [
335 ctx.container_engine.path, 'exec', container_id, cmd, '--version'
336 ], verbosity=CallVerbosity.DEBUG)
337 if code == 0 and \
338 err.startswith('%s, version ' % cmd):
339 version = err.split(' ')[2]
340 return version
341
9f95a23c
TL
342##################################
343
f6b5b4d7 344
f67539c2
TL
345def populate_files(config_dir, config_files, uid, gid):
346 # type: (str, Dict, int, int) -> None
347 """create config files for different services"""
348 for fname in config_files:
349 config_file = os.path.join(config_dir, fname)
350 config_content = dict_get_join(config_files, fname)
351 logger.info('Write file: %s' % (config_file))
352 with open(config_file, 'w') as f:
353 os.fchown(f.fileno(), uid, gid)
354 os.fchmod(f.fileno(), 0o600)
355 f.write(config_content)
356
357
9f95a23c
TL
358class NFSGanesha(object):
359 """Defines a NFS-Ganesha container"""
360
361 daemon_type = 'nfs'
362 entrypoint = '/usr/bin/ganesha.nfsd'
363 daemon_args = ['-F', '-L', 'STDERR']
364
365 required_files = ['ganesha.conf']
366
367 port_map = {
f67539c2 368 'nfs': 2049,
9f95a23c
TL
369 }
370
371 def __init__(self,
f67539c2 372 ctx,
9f95a23c
TL
373 fsid,
374 daemon_id,
375 config_json,
376 image=DEFAULT_IMAGE):
f67539c2
TL
377 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
378 self.ctx = ctx
9f95a23c
TL
379 self.fsid = fsid
380 self.daemon_id = daemon_id
381 self.image = image
382
9f95a23c 383 # config-json options
f91f0fd5
TL
384 self.pool = dict_get(config_json, 'pool', require=True)
385 self.namespace = dict_get(config_json, 'namespace')
386 self.userid = dict_get(config_json, 'userid')
387 self.extra_args = dict_get(config_json, 'extra_args', [])
388 self.files = dict_get(config_json, 'files', {})
389 self.rgw = dict_get(config_json, 'rgw', {})
9f95a23c
TL
390
391 # validate the supplied args
392 self.validate()
393
394 @classmethod
f67539c2
TL
395 def init(cls, ctx, fsid, daemon_id):
396 # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
397 return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), ctx.image)
9f95a23c 398
f91f0fd5 399 def get_container_mounts(self, data_dir):
9f95a23c
TL
400 # type: (str) -> Dict[str, str]
401 mounts = dict()
402 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
403 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
404 mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
f91f0fd5
TL
405 if self.rgw:
406 cluster = self.rgw.get('cluster', 'ceph')
407 rgw_user = self.rgw.get('user', 'admin')
408 mounts[os.path.join(data_dir, 'keyring.rgw')] = \
f67539c2 409 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
9f95a23c
TL
410 return mounts
411
412 @staticmethod
413 def get_container_envs():
414 # type: () -> List[str]
415 envs = [
416 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf')
417 ]
418 return envs
419
420 @staticmethod
f67539c2
TL
421 def get_version(ctx, container_id):
422 # type: (CephadmContext, str) -> Optional[str]
9f95a23c 423 version = None
f67539c2
TL
424 out, err, code = call(ctx,
425 [ctx.container_engine.path, 'exec', container_id,
426 NFSGanesha.entrypoint, '-v'],
427 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
428 if code == 0:
429 match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
430 if match:
431 version = match.group(1)
432 return version
433
434 def validate(self):
e306af50 435 # type: () -> None
9f95a23c
TL
436 if not is_fsid(self.fsid):
437 raise Error('not an fsid: %s' % self.fsid)
438 if not self.daemon_id:
439 raise Error('invalid daemon_id: %s' % self.daemon_id)
440 if not self.image:
441 raise Error('invalid image: %s' % self.image)
442
443 # check for the required files
444 if self.required_files:
445 for fname in self.required_files:
446 if fname not in self.files:
447 raise Error('required file missing from config-json: %s' % fname)
448
f91f0fd5
TL
449 # check for an RGW config
450 if self.rgw:
451 if not self.rgw.get('keyring'):
452 raise Error('RGW keyring is missing')
453 if not self.rgw.get('user'):
454 raise Error('RGW user is missing')
455
9f95a23c
TL
456 def get_daemon_name(self):
457 # type: () -> str
458 return '%s.%s' % (self.daemon_type, self.daemon_id)
459
460 def get_container_name(self, desc=None):
461 # type: (Optional[str]) -> str
462 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
463 if desc:
464 cname = '%s-%s' % (cname, desc)
465 return cname
466
1911f103
TL
467 def get_daemon_args(self):
468 # type: () -> List[str]
469 return self.daemon_args + self.extra_args
470
9f95a23c
TL
471 def create_daemon_dirs(self, data_dir, uid, gid):
472 # type: (str, int, int) -> None
473 """Create files under the container data dir"""
474 if not os.path.isdir(data_dir):
475 raise OSError('data_dir is not a directory: %s' % (data_dir))
476
477 logger.info('Creating ganesha config...')
478
479 # create the ganesha conf dir
480 config_dir = os.path.join(data_dir, 'etc/ganesha')
481 makedirs(config_dir, uid, gid, 0o755)
482
483 # populate files from the config-json
f67539c2 484 populate_files(config_dir, self.files, uid, gid)
9f95a23c 485
f91f0fd5
TL
486 # write the RGW keyring
487 if self.rgw:
488 keyring_path = os.path.join(data_dir, 'keyring.rgw')
489 with open(keyring_path, 'w') as f:
490 os.fchmod(f.fileno(), 0o600)
491 os.fchown(f.fileno(), uid, gid)
492 f.write(self.rgw.get('keyring', ''))
493
9f95a23c
TL
494 def get_rados_grace_container(self, action):
495 # type: (str) -> CephContainer
496 """Container for a ganesha action on the grace db"""
497 entrypoint = '/usr/bin/ganesha-rados-grace'
498
499 assert self.pool
f67539c2 500 args = ['--pool', self.pool]
9f95a23c
TL
501 if self.namespace:
502 args += ['--ns', self.namespace]
1911f103
TL
503 if self.userid:
504 args += ['--userid', self.userid]
9f95a23c
TL
505 args += [action, self.get_daemon_name()]
506
f67539c2
TL
507 data_dir = get_data_dir(self.fsid, self.ctx.data_dir,
508 self.daemon_type, self.daemon_id)
9f95a23c
TL
509 volume_mounts = self.get_container_mounts(data_dir)
510 envs = self.get_container_envs()
511
f6b5b4d7 512 logger.info('Creating RADOS grace for action: %s' % action)
9f95a23c 513 c = CephContainer(
f67539c2 514 self.ctx,
9f95a23c
TL
515 image=self.image,
516 entrypoint=entrypoint,
517 args=args,
518 volume_mounts=volume_mounts,
f6b5b4d7 519 cname=self.get_container_name(desc='grace-%s' % action),
9f95a23c
TL
520 envs=envs
521 )
522 return c
523
524##################################
525
f6b5b4d7 526
1911f103
TL
527class CephIscsi(object):
528 """Defines a Ceph-Iscsi container"""
529
530 daemon_type = 'iscsi'
531 entrypoint = '/usr/bin/rbd-target-api'
532
533 required_files = ['iscsi-gateway.cfg']
534
535 def __init__(self,
f67539c2 536 ctx,
1911f103
TL
537 fsid,
538 daemon_id,
539 config_json,
540 image=DEFAULT_IMAGE):
f67539c2
TL
541 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
542 self.ctx = ctx
1911f103
TL
543 self.fsid = fsid
544 self.daemon_id = daemon_id
545 self.image = image
546
1911f103 547 # config-json options
f91f0fd5 548 self.files = dict_get(config_json, 'files', {})
1911f103
TL
549
550 # validate the supplied args
551 self.validate()
552
553 @classmethod
f67539c2
TL
554 def init(cls, ctx, fsid, daemon_id):
555 # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
556 return cls(ctx, fsid, daemon_id,
557 get_parm(ctx.config_json), ctx.image)
1911f103
TL
558
559 @staticmethod
560 def get_container_mounts(data_dir, log_dir):
561 # type: (str, str) -> Dict[str, str]
562 mounts = dict()
563 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
564 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
565 mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
f91f0fd5 566 mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
1911f103 567 mounts[log_dir] = '/var/log/rbd-target-api:z'
f91f0fd5 568 mounts['/dev'] = '/dev'
1911f103
TL
569 return mounts
570
f6b5b4d7
TL
571 @staticmethod
572 def get_container_binds():
573 # type: () -> List[List[str]]
574 binds = []
575 lib_modules = ['type=bind',
576 'source=/lib/modules',
577 'destination=/lib/modules',
578 'ro=true']
579 binds.append(lib_modules)
580 return binds
581
1911f103 582 @staticmethod
f67539c2
TL
583 def get_version(ctx, container_id):
584 # type: (CephadmContext, str) -> Optional[str]
1911f103 585 version = None
f67539c2
TL
586 out, err, code = call(ctx,
587 [ctx.container_engine.path, 'exec', container_id,
588 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
589 verbosity=CallVerbosity.DEBUG)
1911f103 590 if code == 0:
f6b5b4d7 591 version = out.strip()
1911f103
TL
592 return version
593
594 def validate(self):
e306af50 595 # type: () -> None
1911f103
TL
596 if not is_fsid(self.fsid):
597 raise Error('not an fsid: %s' % self.fsid)
598 if not self.daemon_id:
599 raise Error('invalid daemon_id: %s' % self.daemon_id)
600 if not self.image:
601 raise Error('invalid image: %s' % self.image)
602
603 # check for the required files
604 if self.required_files:
605 for fname in self.required_files:
606 if fname not in self.files:
607 raise Error('required file missing from config-json: %s' % fname)
608
609 def get_daemon_name(self):
610 # type: () -> str
611 return '%s.%s' % (self.daemon_type, self.daemon_id)
612
613 def get_container_name(self, desc=None):
614 # type: (Optional[str]) -> str
615 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
616 if desc:
617 cname = '%s-%s' % (cname, desc)
618 return cname
619
1911f103
TL
620 def create_daemon_dirs(self, data_dir, uid, gid):
621 # type: (str, int, int) -> None
622 """Create files under the container data dir"""
623 if not os.path.isdir(data_dir):
624 raise OSError('data_dir is not a directory: %s' % (data_dir))
625
626 logger.info('Creating ceph-iscsi config...')
627 configfs_dir = os.path.join(data_dir, 'configfs')
628 makedirs(configfs_dir, uid, gid, 0o755)
629
630 # populate files from the config-json
f67539c2 631 populate_files(data_dir, self.files, uid, gid)
1911f103
TL
632
633 @staticmethod
634 def configfs_mount_umount(data_dir, mount=True):
e306af50 635 # type: (str, bool) -> List[str]
1911f103
TL
636 mount_path = os.path.join(data_dir, 'configfs')
637 if mount:
f67539c2
TL
638 cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
639 'mount -t configfs none {0}; fi'.format(mount_path)
1911f103 640 else:
f67539c2
TL
641 cmd = 'if grep -qs {0} /proc/mounts; then ' \
642 'umount {0}; fi'.format(mount_path)
1911f103
TL
643 return cmd.split()
644
f6b5b4d7
TL
645 def get_tcmu_runner_container(self):
646 # type: () -> CephContainer
f67539c2
TL
647 tcmu_container = get_container(self.ctx, self.fsid, self.daemon_type, self.daemon_id)
648 tcmu_container.entrypoint = '/usr/bin/tcmu-runner'
f6b5b4d7 649 tcmu_container.cname = self.get_container_name(desc='tcmu')
f91f0fd5
TL
650 # remove extra container args for tcmu container.
651 # extra args could cause issue with forking service type
652 tcmu_container.container_args = []
f6b5b4d7
TL
653 return tcmu_container
654
1911f103
TL
655##################################
656
f6b5b4d7 657
f67539c2
TL
658class HAproxy(object):
659 """Defines an HAproxy container"""
660 daemon_type = 'haproxy'
661 required_files = ['haproxy.cfg']
662 default_image = 'haproxy'
663
664 def __init__(self,
665 ctx: CephadmContext,
666 fsid: str, daemon_id: Union[int, str],
667 config_json: Dict, image: str) -> None:
668 self.ctx = ctx
669 self.fsid = fsid
670 self.daemon_id = daemon_id
671 self.image = image
672
673 # config-json options
674 self.files = dict_get(config_json, 'files', {})
675
676 self.validate()
677
678 @classmethod
679 def init(cls, ctx: CephadmContext,
680 fsid: str, daemon_id: Union[int, str]) -> 'HAproxy':
681 return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json),
682 ctx.image)
683
684 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
685 """Create files under the container data dir"""
686 if not os.path.isdir(data_dir):
687 raise OSError('data_dir is not a directory: %s' % (data_dir))
688
689 # create additional directories in data dir for HAproxy to use
690 if not os.path.isdir(os.path.join(data_dir, 'haproxy')):
691 makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE)
692
693 data_dir = os.path.join(data_dir, 'haproxy')
694 populate_files(data_dir, self.files, uid, gid)
695
696 def get_daemon_args(self) -> List[str]:
697 return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
698
699 def validate(self):
700 # type: () -> None
701 if not is_fsid(self.fsid):
702 raise Error('not an fsid: %s' % self.fsid)
703 if not self.daemon_id:
704 raise Error('invalid daemon_id: %s' % self.daemon_id)
705 if not self.image:
706 raise Error('invalid image: %s' % self.image)
707
708 # check for the required files
709 if self.required_files:
710 for fname in self.required_files:
711 if fname not in self.files:
712 raise Error('required file missing from config-json: %s' % fname)
713
714 def get_daemon_name(self):
715 # type: () -> str
716 return '%s.%s' % (self.daemon_type, self.daemon_id)
717
718 def get_container_name(self, desc=None):
719 # type: (Optional[str]) -> str
720 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
721 if desc:
722 cname = '%s-%s' % (cname, desc)
723 return cname
724
725 def extract_uid_gid_haproxy(self):
726 # better directory for this?
727 return extract_uid_gid(self.ctx, file_path='/var/lib')
728
729 @staticmethod
730 def get_container_mounts(data_dir: str) -> Dict[str, str]:
731 mounts = dict()
732 mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
733 return mounts
734
735##################################
736
737
738class Keepalived(object):
739 """Defines an Keepalived container"""
740 daemon_type = 'keepalived'
741 required_files = ['keepalived.conf']
742 default_image = 'arcts/keepalived'
743
744 def __init__(self,
745 ctx: CephadmContext,
746 fsid: str, daemon_id: Union[int, str],
747 config_json: Dict, image: str) -> None:
748 self.ctx = ctx
749 self.fsid = fsid
750 self.daemon_id = daemon_id
751 self.image = image
752
753 # config-json options
754 self.files = dict_get(config_json, 'files', {})
755
756 self.validate()
757
758 @classmethod
759 def init(cls, ctx: CephadmContext, fsid: str,
760 daemon_id: Union[int, str]) -> 'Keepalived':
761 return cls(ctx, fsid, daemon_id,
762 get_parm(ctx.config_json), ctx.image)
763
764 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
765 """Create files under the container data dir"""
766 if not os.path.isdir(data_dir):
767 raise OSError('data_dir is not a directory: %s' % (data_dir))
768
769 # create additional directories in data dir for keepalived to use
770 if not os.path.isdir(os.path.join(data_dir, 'keepalived')):
771 makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE)
772
773 # populate files from the config-json
774 populate_files(data_dir, self.files, uid, gid)
775
776 def validate(self):
777 # type: () -> None
778 if not is_fsid(self.fsid):
779 raise Error('not an fsid: %s' % self.fsid)
780 if not self.daemon_id:
781 raise Error('invalid daemon_id: %s' % self.daemon_id)
782 if not self.image:
783 raise Error('invalid image: %s' % self.image)
784
785 # check for the required files
786 if self.required_files:
787 for fname in self.required_files:
788 if fname not in self.files:
789 raise Error('required file missing from config-json: %s' % fname)
790
791 def get_daemon_name(self):
792 # type: () -> str
793 return '%s.%s' % (self.daemon_type, self.daemon_id)
794
795 def get_container_name(self, desc=None):
796 # type: (Optional[str]) -> str
797 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
798 if desc:
799 cname = '%s-%s' % (cname, desc)
800 return cname
801
802 @staticmethod
803 def get_container_envs():
804 # type: () -> List[str]
805 envs = [
806 'KEEPALIVED_AUTOCONF=false',
807 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
808 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
809 'KEEPALIVED_DEBUG=false'
810 ]
811 return envs
812
813 @staticmethod
814 def get_prestart():
815 return (
816 '# keepalived needs IP forwarding and non-local bind\n'
817 'sysctl net.ipv4.ip_forward=1\n'
818 'sysctl net.ipv4.ip_nonlocal_bind=1\n'
819 )
820
821 def extract_uid_gid_keepalived(self):
822 # better directory for this?
823 return extract_uid_gid(self.ctx, file_path='/var/lib')
824
825 @staticmethod
826 def get_container_mounts(data_dir: str) -> Dict[str, str]:
827 mounts = dict()
828 mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
829 return mounts
830
831##################################
832
833
f91f0fd5
TL
834class CustomContainer(object):
835 """Defines a custom container"""
836 daemon_type = 'container'
837
f67539c2
TL
838 def __init__(self,
839 fsid: str, daemon_id: Union[int, str],
f91f0fd5
TL
840 config_json: Dict, image: str) -> None:
841 self.fsid = fsid
842 self.daemon_id = daemon_id
843 self.image = image
844
845 # config-json options
846 self.entrypoint = dict_get(config_json, 'entrypoint')
847 self.uid = dict_get(config_json, 'uid', 65534) # nobody
848 self.gid = dict_get(config_json, 'gid', 65534) # nobody
849 self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
850 self.args = dict_get(config_json, 'args', [])
851 self.envs = dict_get(config_json, 'envs', [])
852 self.privileged = dict_get(config_json, 'privileged', False)
853 self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
854 self.ports = dict_get(config_json, 'ports', [])
855 self.dirs = dict_get(config_json, 'dirs', [])
856 self.files = dict_get(config_json, 'files', {})
857
858 @classmethod
f67539c2
TL
859 def init(cls, ctx: CephadmContext,
860 fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
861 return cls(fsid, daemon_id,
862 get_parm(ctx.config_json), ctx.image)
f91f0fd5
TL
863
864 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
865 """
866 Create dirs/files below the container data directory.
867 """
868 logger.info('Creating custom container configuration '
869 'dirs/files in {} ...'.format(data_dir))
870
871 if not os.path.isdir(data_dir):
872 raise OSError('data_dir is not a directory: %s' % data_dir)
873
874 for dir_path in self.dirs:
875 logger.info('Creating directory: {}'.format(dir_path))
876 dir_path = os.path.join(data_dir, dir_path.strip('/'))
877 makedirs(dir_path, uid, gid, 0o755)
878
879 for file_path in self.files:
880 logger.info('Creating file: {}'.format(file_path))
881 content = dict_get_join(self.files, file_path)
882 file_path = os.path.join(data_dir, file_path.strip('/'))
883 with open(file_path, 'w', encoding='utf-8') as f:
884 os.fchown(f.fileno(), uid, gid)
885 os.fchmod(f.fileno(), 0o600)
886 f.write(content)
887
888 def get_daemon_args(self) -> List[str]:
889 return []
890
891 def get_container_args(self) -> List[str]:
892 return self.args
893
894 def get_container_envs(self) -> List[str]:
895 return self.envs
896
897 def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
898 """
899 Get the volume mounts. Relative source paths will be located below
900 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
901
902 Example:
903 {
904 /foo/conf: /conf
905 foo/conf: /conf
906 }
907 becomes
908 {
909 /foo/conf: /conf
910 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
911 }
912 """
913 mounts = {}
914 for source, destination in self.volume_mounts.items():
915 source = os.path.join(data_dir, source)
916 mounts[source] = destination
917 return mounts
918
919 def get_container_binds(self, data_dir: str) -> List[List[str]]:
920 """
921 Get the bind mounts. Relative `source=...` paths will be located below
922 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
923
924 Example:
925 [
926 'type=bind',
927 'source=lib/modules',
928 'destination=/lib/modules',
929 'ro=true'
930 ]
931 becomes
932 [
933 ...
934 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
935 ...
936 ]
937 """
938 binds = self.bind_mounts.copy()
939 for bind in binds:
940 for index, value in enumerate(bind):
941 match = re.match(r'^source=(.+)$', value)
942 if match:
943 bind[index] = 'source={}'.format(os.path.join(
944 data_dir, match.group(1)))
945 return binds
946
947##################################
948
949
f67539c2
TL
950def touch(file_path: str, uid: Optional[int] = None, gid: Optional[int] = None) -> None:
951 Path(file_path).touch()
952 if uid and gid:
953 os.chown(file_path, uid, gid)
954
955
956##################################
957
958
f91f0fd5
TL
959def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
960 """
961 Helper function to get a key from a dictionary.
962 :param d: The dictionary to process.
963 :param key: The name of the key to get.
964 :param default: The default value in case the key does not
965 exist. Default is `None`.
966 :param require: Set to `True` if the key is required. An
967 exception will be raised if the key does not exist in
968 the given dictionary.
969 :return: Returns the value of the given key.
970 :raises: :exc:`self.Error` if the given key does not exist
971 and `require` is set to `True`.
972 """
973 if require and key not in d.keys():
974 raise Error('{} missing from dict'.format(key))
f67539c2 975 return d.get(key, default) # type: ignore
f91f0fd5
TL
976
977##################################
978
979
980def dict_get_join(d: Dict, key: str) -> Any:
981 """
982 Helper function to get the value of a given key from a dictionary.
983 `List` values will be converted to a string by joining them with a
984 line break.
985 :param d: The dictionary to process.
986 :param key: The name of the key to get.
987 :return: Returns the value of the given key. If it was a `list`, it
988 will be joining with a line break.
989 """
990 value = d.get(key)
991 if isinstance(value, list):
992 value = '\n'.join(map(str, value))
993 return value
994
995##################################
996
997
9f95a23c 998def get_supported_daemons():
e306af50 999 # type: () -> List[str]
9f95a23c
TL
1000 supported_daemons = list(Ceph.daemons)
1001 supported_daemons.extend(Monitoring.components)
1002 supported_daemons.append(NFSGanesha.daemon_type)
1911f103 1003 supported_daemons.append(CephIscsi.daemon_type)
f91f0fd5 1004 supported_daemons.append(CustomContainer.daemon_type)
f67539c2
TL
1005 supported_daemons.append(CephadmDaemon.daemon_type)
1006 supported_daemons.append(HAproxy.daemon_type)
1007 supported_daemons.append(Keepalived.daemon_type)
9f95a23c
TL
1008 assert len(supported_daemons) == len(set(supported_daemons))
1009 return supported_daemons
1010
1011##################################
1012
f6b5b4d7 1013
f67539c2
TL
1014class PortOccupiedError(Error):
1015 pass
1016
1017
1018def attempt_bind(ctx, s, address, port):
1019 # type: (CephadmContext, socket.socket, str, int) -> None
9f95a23c
TL
1020 try:
1021 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1022 s.bind((address, port))
1023 except (socket.error, OSError) as e: # py2 and py3
9f95a23c 1024 if e.errno == errno.EADDRINUSE:
f67539c2
TL
1025 msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
1026 logger.warning(msg)
1027 raise PortOccupiedError(msg)
1028 else:
1029 raise e
9f95a23c
TL
1030 finally:
1031 s.close()
1032
f6b5b4d7 1033
f67539c2
TL
1034def port_in_use(ctx, port_num):
1035 # type: (CephadmContext, int) -> bool
9f95a23c 1036 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
e306af50 1037 logger.info('Verifying port %d ...' % port_num)
9f95a23c 1038
f67539c2
TL
1039 def _port_in_use(af: socket.AddressFamily, address: str) -> bool:
1040 try:
1041 s = socket.socket(af, socket.SOCK_STREAM)
1042 attempt_bind(ctx, s, address, port_num)
1043 except PortOccupiedError:
1044 return True
1045 except OSError as e:
1046 if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL):
1047 # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
1048 # being tested here and one might be intentionally be disabled.
1049 # In that case no error should be raised.
1050 return False
1051 else:
1052 raise e
9f95a23c 1053 return False
f67539c2
TL
1054 return any(_port_in_use(af, address) for af, address in (
1055 (socket.AF_INET, '0.0.0.0'),
1056 (socket.AF_INET6, '::')
1057 ))
9f95a23c 1058
f6b5b4d7 1059
f67539c2
TL
1060def check_ip_port(ctx, ip, port):
1061 # type: (CephadmContext, str, int) -> None
1062 if not ctx.skip_ping_check:
9f95a23c 1063 logger.info('Verifying IP %s port %d ...' % (ip, port))
f91f0fd5 1064 if is_ipv6(ip):
9f95a23c 1065 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
f91f0fd5 1066 ip = unwrap_ipv6(ip)
9f95a23c
TL
1067 else:
1068 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
f67539c2 1069 attempt_bind(ctx, s, ip, port)
9f95a23c
TL
1070
1071##################################
1072
f67539c2 1073
9f95a23c
TL
1074# this is an abbreviated version of
1075# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
1076# that drops all of the compatibility (this is Unix/Linux only).
1077
9f95a23c
TL
1078class Timeout(TimeoutError):
1079 """
1080 Raised when the lock could not be acquired in *timeout*
1081 seconds.
1082 """
1083
1084 def __init__(self, lock_file):
1085 """
1086 """
1087 #: The path of the file lock.
1088 self.lock_file = lock_file
1089 return None
1090
1091 def __str__(self):
1092 temp = "The file lock '{}' could not be acquired."\
1093 .format(self.lock_file)
1094 return temp
1095
1096
1097class _Acquire_ReturnProxy(object):
1098 def __init__(self, lock):
1099 self.lock = lock
1100 return None
1101
1102 def __enter__(self):
1103 return self.lock
1104
1105 def __exit__(self, exc_type, exc_value, traceback):
1106 self.lock.release()
1107 return None
1108
1109
1110class FileLock(object):
f67539c2 1111 def __init__(self, ctx: CephadmContext, name, timeout=-1):
9f95a23c
TL
1112 if not os.path.exists(LOCK_DIR):
1113 os.mkdir(LOCK_DIR, 0o700)
1114 self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
f67539c2 1115 self.ctx = ctx
9f95a23c
TL
1116
1117 # The file descriptor for the *_lock_file* as it is returned by the
1118 # os.open() function.
1119 # This file lock is only NOT None, if the object currently holds the
1120 # lock.
f67539c2 1121 self._lock_file_fd: Optional[int] = None
9f95a23c
TL
1122 self.timeout = timeout
1123 # The lock counter is used for implementing the nested locking
1124 # mechanism. Whenever the lock is acquired, the counter is increased and
1125 # the lock is only released, when this value is 0 again.
1126 self._lock_counter = 0
1127 return None
1128
1129 @property
1130 def is_locked(self):
1131 return self._lock_file_fd is not None
1132
1133 def acquire(self, timeout=None, poll_intervall=0.05):
1134 """
1135 Acquires the file lock or fails with a :exc:`Timeout` error.
1136 .. code-block:: python
1137 # You can use this method in the context manager (recommended)
1138 with lock.acquire():
1139 pass
1140 # Or use an equivalent try-finally construct:
1141 lock.acquire()
1142 try:
1143 pass
1144 finally:
1145 lock.release()
1146 :arg float timeout:
1147 The maximum time waited for the file lock.
1148 If ``timeout < 0``, there is no timeout and this method will
1149 block until the lock could be acquired.
1150 If ``timeout`` is None, the default :attr:`~timeout` is used.
1151 :arg float poll_intervall:
1152 We check once in *poll_intervall* seconds if we can acquire the
1153 file lock.
1154 :raises Timeout:
1155 if the lock could not be acquired in *timeout* seconds.
1156 .. versionchanged:: 2.0.0
1157 This method returns now a *proxy* object instead of *self*,
1158 so that it can be used in a with statement without side effects.
1159 """
f67539c2 1160
9f95a23c
TL
1161 # Use the default timeout, if no timeout is provided.
1162 if timeout is None:
1163 timeout = self.timeout
1164
1165 # Increment the number right at the beginning.
1166 # We can still undo it, if something fails.
1167 self._lock_counter += 1
1168
1169 lock_id = id(self)
1170 lock_filename = self._lock_file
1171 start_time = time.time()
1172 try:
1173 while True:
1174 if not self.is_locked:
1175 logger.debug('Acquiring lock %s on %s', lock_id,
1176 lock_filename)
1177 self._acquire()
1178
1179 if self.is_locked:
1180 logger.debug('Lock %s acquired on %s', lock_id,
1181 lock_filename)
1182 break
1183 elif timeout >= 0 and time.time() - start_time > timeout:
1184 logger.warning('Timeout acquiring lock %s on %s', lock_id,
1185 lock_filename)
1186 raise Timeout(self._lock_file)
1187 else:
1188 logger.debug(
1189 'Lock %s not acquired on %s, waiting %s seconds ...',
1190 lock_id, lock_filename, poll_intervall
1191 )
1192 time.sleep(poll_intervall)
f6b5b4d7 1193 except: # noqa
9f95a23c
TL
1194 # Something did go wrong, so decrement the counter.
1195 self._lock_counter = max(0, self._lock_counter - 1)
1196
1197 raise
f67539c2 1198 return _Acquire_ReturnProxy(lock=self)
9f95a23c 1199
f6b5b4d7 1200 def release(self, force=False):
9f95a23c
TL
1201 """
1202 Releases the file lock.
1203 Please note, that the lock is only completly released, if the lock
1204 counter is 0.
1205 Also note, that the lock file itself is not automatically deleted.
1206 :arg bool force:
1207 If true, the lock counter is ignored and the lock is released in
1208 every case.
1209 """
1210 if self.is_locked:
1211 self._lock_counter -= 1
1212
1213 if self._lock_counter == 0 or force:
1214 lock_id = id(self)
1215 lock_filename = self._lock_file
1216
1217 logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
1218 self._release()
1219 self._lock_counter = 0
1220 logger.debug('Lock %s released on %s', lock_id, lock_filename)
1221
1222 return None
1223
1224 def __enter__(self):
1225 self.acquire()
1226 return self
1227
1228 def __exit__(self, exc_type, exc_value, traceback):
1229 self.release()
1230 return None
1231
1232 def __del__(self):
f6b5b4d7 1233 self.release(force=True)
9f95a23c
TL
1234 return None
1235
9f95a23c
TL
1236 def _acquire(self):
1237 open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
1238 fd = os.open(self._lock_file, open_mode)
1239
1240 try:
1241 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
1242 except (IOError, OSError):
1243 os.close(fd)
1244 else:
1245 self._lock_file_fd = fd
1246 return None
1247
1248 def _release(self):
1249 # Do not remove the lockfile:
1250 #
1251 # https://github.com/benediktschmitt/py-filelock/issues/31
1252 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
1253 fd = self._lock_file_fd
1254 self._lock_file_fd = None
f6b5b4d7
TL
1255 fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
1256 os.close(fd) # type: ignore
9f95a23c
TL
1257 return None
1258
1259
1260##################################
1261# Popen wrappers, lifted from ceph-volume
1262
adb31ebb
TL
1263class CallVerbosity(Enum):
1264 SILENT = 0
1265 # log stdout/stderr to logger.debug
1266 DEBUG = 1
1267 # On a non-zero exit status, it will forcefully set
1268 # logging ON for the terminal
1269 VERBOSE_ON_FAILURE = 2
1270 # log at info (instead of debug) level.
1271 VERBOSE = 3
1272
1273
f67539c2
TL
1274if sys.version_info < (3, 8):
1275 import itertools
1276 import threading
1277 import warnings
1278 from asyncio import events
1279
1280 class ThreadedChildWatcher(asyncio.AbstractChildWatcher):
1281 """Threaded child watcher implementation.
1282 The watcher uses a thread per process
1283 for waiting for the process finish.
1284 It doesn't require subscription on POSIX signal
1285 but a thread creation is not free.
1286 The watcher has O(1) complexity, its performance doesn't depend
1287 on amount of spawn processes.
1288 """
1289
1290 def __init__(self):
1291 self._pid_counter = itertools.count(0)
1292 self._threads = {}
1293
1294 def is_active(self):
1295 return True
1296
1297 def close(self):
1298 self._join_threads()
1299
1300 def _join_threads(self):
1301 """Internal: Join all non-daemon threads"""
1302 threads = [thread for thread in list(self._threads.values())
1303 if thread.is_alive() and not thread.daemon]
1304 for thread in threads:
1305 thread.join()
1306
1307 def __enter__(self):
1308 return self
1309
1310 def __exit__(self, exc_type, exc_val, exc_tb):
1311 pass
1312
1313 def __del__(self, _warn=warnings.warn):
1314 threads = [thread for thread in list(self._threads.values())
1315 if thread.is_alive()]
1316 if threads:
1317 _warn(f'{self.__class__} has registered but not finished child processes',
1318 ResourceWarning,
1319 source=self)
1320
1321 def add_child_handler(self, pid, callback, *args):
1322 loop = events.get_event_loop()
1323 thread = threading.Thread(target=self._do_waitpid,
1324 name=f'waitpid-{next(self._pid_counter)}',
1325 args=(loop, pid, callback, args),
1326 daemon=True)
1327 self._threads[pid] = thread
1328 thread.start()
1329
1330 def remove_child_handler(self, pid):
1331 # asyncio never calls remove_child_handler() !!!
1332 # The method is no-op but is implemented because
1333 # abstract base classe requires it
1334 return True
1335
1336 def attach_loop(self, loop):
1337 pass
1338
1339 def _do_waitpid(self, loop, expected_pid, callback, args):
1340 assert expected_pid > 0
1341
1342 try:
1343 pid, status = os.waitpid(expected_pid, 0)
1344 except ChildProcessError:
1345 # The child process is already reaped
1346 # (may happen if waitpid() is called elsewhere).
1347 pid = expected_pid
1348 returncode = 255
1349 logger.warning(
1350 'Unknown child process pid %d, will report returncode 255',
1351 pid)
1352 else:
1353 if os.WIFEXITED(status):
1354 returncode = os.WEXITSTATUS(status)
1355 elif os.WIFSIGNALED(status):
1356 returncode = -os.WTERMSIG(status)
1357 else:
1358 raise ValueError(f'unknown wait status {status}')
1359 if loop.get_debug():
1360 logger.debug('process %s exited with returncode %s',
1361 expected_pid, returncode)
1362
1363 if loop.is_closed():
1364 logger.warning('Loop %r that handles pid %r is closed', loop, pid)
1365 else:
1366 loop.call_soon_threadsafe(callback, pid, returncode, *args)
1367
1368 self._threads.pop(expected_pid)
1369
1370 # unlike SafeChildWatcher which handles SIGCHLD in the main thread,
1371 # ThreadedChildWatcher runs in a separated thread, hence allows us to
1372 # run create_subprocess_exec() in non-main thread, see
1373 # https://bugs.python.org/issue35621
1374 asyncio.set_child_watcher(ThreadedChildWatcher())
1375
1376
1377try:
1378 from asyncio import run as async_run # type: ignore[attr-defined]
1379except ImportError:
1380 def async_run(coro): # type: ignore
1381 loop = asyncio.new_event_loop()
1382 try:
1383 asyncio.set_event_loop(loop)
1384 return loop.run_until_complete(coro)
1385 finally:
1386 try:
1387 loop.run_until_complete(loop.shutdown_asyncgens())
1388 finally:
1389 asyncio.set_event_loop(None)
1390 loop.close()
1391
1392
1393def call(ctx: CephadmContext,
1394 command: List[str],
adb31ebb
TL
1395 desc: Optional[str] = None,
1396 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
1397 timeout: Optional[int] = DEFAULT_TIMEOUT,
1398 **kwargs) -> Tuple[str, str, int]:
9f95a23c
TL
1399 """
1400 Wrap subprocess.Popen to
1401
1402 - log stdout/stderr to a logger,
1403 - decode utf-8
1404 - cleanly return out, err, returncode
1405
9f95a23c
TL
1406 :param timeout: timeout in seconds
1407 """
f67539c2
TL
1408
1409 prefix = command[0] if desc is None else desc
1410 if prefix:
1411 prefix += ': '
1412 timeout = timeout or ctx.timeout
1413
1414 logger.debug('Running command: %s' % ' '.join(command))
1415
1416 async def tee(reader: asyncio.StreamReader) -> str:
1417 collected = StringIO()
1418 async for line in reader:
1419 message = line.decode('utf-8')
1420 collected.write(message)
1421 if verbosity == CallVerbosity.VERBOSE:
1422 logger.info(prefix + message.rstrip())
1423 elif verbosity != CallVerbosity.SILENT:
1424 logger.debug(prefix + message.rstrip())
1425 return collected.getvalue()
1426
1427 async def run_with_timeout() -> Tuple[str, str, int]:
1428 process = await asyncio.create_subprocess_exec(
1429 *command,
1430 stdout=asyncio.subprocess.PIPE,
1431 stderr=asyncio.subprocess.PIPE)
1432 assert process.stdout
1433 assert process.stderr
1434 try:
1435 stdout, stderr = await asyncio.gather(tee(process.stdout),
1436 tee(process.stderr))
1437 returncode = await asyncio.wait_for(process.wait(), timeout)
1438 except asyncio.TimeoutError:
1439 logger.info(prefix + f'timeout after {timeout} seconds')
1440 return '', '', 124
9f95a23c 1441 else:
f67539c2 1442 return stdout, stderr, returncode
9f95a23c 1443
f67539c2 1444 stdout, stderr, returncode = async_run(run_with_timeout())
adb31ebb 1445 if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE:
f67539c2
TL
1446 logger.info('Non-zero exit code %d from %s',
1447 returncode, ' '.join(command))
1448 for line in stdout.splitlines():
1449 logger.info(prefix + 'stdout ' + line)
1450 for line in stderr.splitlines():
1451 logger.info(prefix + 'stderr ' + line)
1452 return stdout, stderr, returncode
1453
1454
1455def call_throws(
1456 ctx: CephadmContext,
1457 command: List[str],
1458 desc: Optional[str] = None,
1459 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
1460 timeout: Optional[int] = DEFAULT_TIMEOUT,
1461 **kwargs) -> Tuple[str, str, int]:
1462 out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs)
9f95a23c
TL
1463 if ret:
1464 raise RuntimeError('Failed command: %s' % ' '.join(command))
1465 return out, err, ret
1466
1467
f67539c2
TL
1468def call_timeout(ctx, command, timeout):
1469 # type: (CephadmContext, List[str], int) -> int
9f95a23c 1470 logger.debug('Running command (timeout=%s): %s'
f67539c2 1471 % (timeout, ' '.join(command)))
9f95a23c
TL
1472
1473 def raise_timeout(command, timeout):
1474 # type: (List[str], int) -> NoReturn
f67539c2 1475 msg = 'Command `%s` timed out after %s seconds' % (command, timeout)
9f95a23c
TL
1476 logger.debug(msg)
1477 raise TimeoutExpired(msg)
1478
f67539c2
TL
1479 try:
1480 return subprocess.call(command, timeout=timeout)
1481 except subprocess.TimeoutExpired:
1482 raise_timeout(command, timeout)
9f95a23c
TL
1483
1484##################################
1485
f6b5b4d7 1486
f67539c2
TL
1487def is_available(ctx, what, func):
1488 # type: (CephadmContext, str, Callable[[], bool]) -> None
9f95a23c
TL
1489 """
1490 Wait for a service to become available
1491
1492 :param what: the name of the service
1493 :param func: the callable object that determines availability
1494 """
f67539c2 1495 retry = ctx.retry
f6b5b4d7 1496 logger.info('Waiting for %s...' % what)
9f95a23c
TL
1497 num = 1
1498 while True:
1499 if func():
e306af50 1500 logger.info('%s is available'
f6b5b4d7 1501 % what)
9f95a23c
TL
1502 break
1503 elif num > retry:
1504 raise Error('%s not available after %s tries'
f67539c2 1505 % (what, retry))
9f95a23c
TL
1506
1507 logger.info('%s not available, waiting (%s/%s)...'
f67539c2 1508 % (what, num, retry))
9f95a23c
TL
1509
1510 num += 1
f67539c2 1511 time.sleep(2)
9f95a23c
TL
1512
1513
1514def read_config(fn):
1515 # type: (Optional[str]) -> ConfigParser
f67539c2 1516 cp = ConfigParser()
9f95a23c 1517 if fn:
f67539c2 1518 cp.read(fn)
9f95a23c
TL
1519 return cp
1520
f6b5b4d7 1521
9f95a23c
TL
1522def pathify(p):
1523 # type: (str) -> str
e306af50
TL
1524 p = os.path.expanduser(p)
1525 return os.path.abspath(p)
9f95a23c 1526
f6b5b4d7 1527
9f95a23c 1528def get_file_timestamp(fn):
e306af50 1529 # type: (str) -> Optional[str]
9f95a23c
TL
1530 try:
1531 mt = os.path.getmtime(fn)
1532 return datetime.datetime.fromtimestamp(
1533 mt, tz=datetime.timezone.utc
1534 ).strftime(DATEFMT)
adb31ebb 1535 except Exception:
9f95a23c
TL
1536 return None
1537
f6b5b4d7 1538
9f95a23c 1539def try_convert_datetime(s):
e306af50 1540 # type: (str) -> Optional[str]
9f95a23c
TL
1541 # This is super irritating because
1542 # 1) podman and docker use different formats
1543 # 2) python's strptime can't parse either one
1544 #
1545 # I've seen:
1546 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1547 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1548 # 2020-03-03 15:52:30.136257504 -0600 CST
1549 # (In the podman case, there is a different string format for
1550 # 'inspect' and 'inspect --format {{.Created}}'!!)
1551
1552 # In *all* cases, the 9 digit second precision is too much for
1553 # python's strptime. Shorten it to 6 digits.
1554 p = re.compile(r'(\.[\d]{6})[\d]*')
1555 s = p.sub(r'\1', s)
1556
adb31ebb 1557 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
9f95a23c
TL
1558 if s and s[-1] == 'Z':
1559 s = s[:-1] + '-0000'
1560
adb31ebb 1561 # cut off the redundant 'CST' part that strptime can't parse, if
9f95a23c
TL
1562 # present.
1563 v = s.split(' ')
1564 s = ' '.join(v[0:3])
1565
1566 # try parsing with several format strings
1567 fmts = [
1568 '%Y-%m-%dT%H:%M:%S.%f%z',
1569 '%Y-%m-%d %H:%M:%S.%f %z',
1570 ]
1571 for f in fmts:
1572 try:
1573 # return timestamp normalized to UTC, rendered as DATEFMT.
1574 return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
1575 except ValueError:
1576 pass
1577 return None
1578
f6b5b4d7 1579
f67539c2 1580def _parse_podman_version(version_str):
9f95a23c 1581 # type: (str) -> Tuple[int, ...]
9f95a23c
TL
1582 def to_int(val, org_e=None):
1583 if not val and org_e:
1584 raise org_e
1585 try:
1586 return int(val)
1587 except ValueError as e:
1588 return to_int(val[0:-1], org_e or e)
1589
1590 return tuple(map(to_int, version_str.split('.')))
1591
1592
1593def get_hostname():
1594 # type: () -> str
1595 return socket.gethostname()
1596
f6b5b4d7 1597
9f95a23c
TL
1598def get_fqdn():
1599 # type: () -> str
1600 return socket.getfqdn() or socket.gethostname()
1601
f6b5b4d7 1602
9f95a23c
TL
1603def get_arch():
1604 # type: () -> str
1605 return platform.uname().machine
1606
f6b5b4d7 1607
9f95a23c
TL
1608def generate_service_id():
1609 # type: () -> str
1610 return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
1611 for _ in range(6))
1612
f6b5b4d7 1613
9f95a23c
TL
1614def generate_password():
1615 # type: () -> str
1616 return ''.join(random.choice(string.ascii_lowercase + string.digits)
1617 for i in range(10))
1618
f6b5b4d7 1619
9f95a23c
TL
1620def normalize_container_id(i):
1621 # type: (str) -> str
1622 # docker adds the sha256: prefix, but AFAICS both
1623 # docker (18.09.7 in bionic at least) and podman
1624 # both always use sha256, so leave off the prefix
1625 # for consistency.
1626 prefix = 'sha256:'
1627 if i.startswith(prefix):
1628 i = i[len(prefix):]
1629 return i
1630
f6b5b4d7 1631
9f95a23c
TL
1632def make_fsid():
1633 # type: () -> str
1634 return str(uuid.uuid1())
1635
f6b5b4d7 1636
9f95a23c
TL
1637def is_fsid(s):
1638 # type: (str) -> bool
1639 try:
1640 uuid.UUID(s)
1641 except ValueError:
1642 return False
1643 return True
1644
f6b5b4d7 1645
9f95a23c
TL
1646def infer_fsid(func):
1647 """
1648 If we only find a single fsid in /var/lib/ceph/*, use that
1649 """
1650 @wraps(func)
f67539c2
TL
1651 def _infer_fsid(ctx: CephadmContext):
1652 if ctx.fsid:
1653 logger.debug('Using specified fsid: %s' % ctx.fsid)
1654 return func(ctx)
9f95a23c 1655
f6b5b4d7 1656 fsids_set = set()
f67539c2 1657 daemon_list = list_daemons(ctx, detail=False)
9f95a23c 1658 for daemon in daemon_list:
f6b5b4d7
TL
1659 if not is_fsid(daemon['fsid']):
1660 # 'unknown' fsid
1661 continue
f67539c2
TL
1662 elif 'name' not in ctx or not ctx.name:
1663 # ctx.name not specified
f6b5b4d7 1664 fsids_set.add(daemon['fsid'])
f67539c2
TL
1665 elif daemon['name'] == ctx.name:
1666 # ctx.name is a match
f6b5b4d7
TL
1667 fsids_set.add(daemon['fsid'])
1668 fsids = sorted(fsids_set)
9f95a23c
TL
1669
1670 if not fsids:
1671 # some commands do not always require an fsid
1672 pass
1673 elif len(fsids) == 1:
1674 logger.info('Inferring fsid %s' % fsids[0])
f67539c2 1675 ctx.fsid = fsids[0]
9f95a23c
TL
1676 else:
1677 raise Error('Cannot infer an fsid, one must be specified: %s' % fsids)
f67539c2 1678 return func(ctx)
9f95a23c
TL
1679
1680 return _infer_fsid
1681
f6b5b4d7 1682
e306af50
TL
1683def infer_config(func):
1684 """
1685 If we find a MON daemon, use the config from that container
1686 """
1687 @wraps(func)
f67539c2
TL
1688 def _infer_config(ctx: CephadmContext):
1689 if ctx.config:
1690 logger.debug('Using specified config: %s' % ctx.config)
1691 return func(ctx)
e306af50 1692 config = None
f67539c2
TL
1693 if ctx.fsid:
1694 name = ctx.name
e306af50 1695 if not name:
f67539c2 1696 daemon_list = list_daemons(ctx, detail=False)
e306af50
TL
1697 for daemon in daemon_list:
1698 if daemon['name'].startswith('mon.'):
1699 name = daemon['name']
1700 break
1701 if name:
f67539c2
TL
1702 config = '/var/lib/ceph/{}/{}/config'.format(ctx.fsid,
1703 name)
e306af50
TL
1704 if config:
1705 logger.info('Inferring config %s' % config)
f67539c2 1706 ctx.config = config
e306af50
TL
1707 elif os.path.exists(SHELL_DEFAULT_CONF):
1708 logger.debug('Using default config: %s' % SHELL_DEFAULT_CONF)
f67539c2
TL
1709 ctx.config = SHELL_DEFAULT_CONF
1710 return func(ctx)
e306af50
TL
1711
1712 return _infer_config
1713
f6b5b4d7 1714
f67539c2 1715def _get_default_image(ctx: CephadmContext):
1911f103 1716 if DEFAULT_IMAGE_IS_MASTER:
f67539c2 1717 warn = """This is a development version of cephadm.
1911f103
TL
1718For information regarding the latest stable release:
1719 https://docs.ceph.com/docs/{}/cephadm/install
f67539c2 1720""".format(LATEST_STABLE_RELEASE)
1911f103 1721 for line in warn.splitlines():
e306af50 1722 logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
1911f103
TL
1723 return DEFAULT_IMAGE
1724
f6b5b4d7 1725
9f95a23c
TL
1726def infer_image(func):
1727 """
1728 Use the most recent ceph image
1729 """
1730 @wraps(func)
f67539c2
TL
1731 def _infer_image(ctx: CephadmContext):
1732 if not ctx.image:
1733 ctx.image = os.environ.get('CEPHADM_IMAGE')
1734 if not ctx.image:
1735 ctx.image = get_last_local_ceph_image(ctx, ctx.container_engine.path)
1736 if not ctx.image:
1737 ctx.image = _get_default_image(ctx)
1738 return func(ctx)
9f95a23c
TL
1739
1740 return _infer_image
1741
f6b5b4d7 1742
9f95a23c
TL
1743def default_image(func):
1744 @wraps(func)
f67539c2
TL
1745 def _default_image(ctx: CephadmContext):
1746 if not ctx.image:
1747 if 'name' in ctx and ctx.name:
1748 type_ = ctx.name.split('.', 1)[0]
9f95a23c 1749 if type_ in Monitoring.components:
f67539c2
TL
1750 ctx.image = Monitoring.components[type_]['image']
1751 if type_ == 'haproxy':
1752 ctx.image = HAproxy.default_image
1753 if type_ == 'keepalived':
1754 ctx.image = Keepalived.default_image
1755 if not ctx.image:
1756 ctx.image = os.environ.get('CEPHADM_IMAGE')
1757 if not ctx.image:
1758 ctx.image = _get_default_image(ctx)
1759
1760 return func(ctx)
9f95a23c
TL
1761
1762 return _default_image
1763
f6b5b4d7 1764
f67539c2 1765def get_last_local_ceph_image(ctx: CephadmContext, container_path: str):
9f95a23c
TL
1766 """
1767 :return: The most recent local ceph image (already pulled)
1768 """
f67539c2
TL
1769 out, _, _ = call_throws(ctx,
1770 [container_path, 'images',
1771 '--filter', 'label=ceph=True',
1772 '--filter', 'dangling=false',
1773 '--format', '{{.Repository}}@{{.Digest}}'])
adb31ebb
TL
1774 return _filter_last_local_ceph_image(out)
1775
1776
1777def _filter_last_local_ceph_image(out):
f67539c2 1778 # type: (str) -> Optional[str]
adb31ebb
TL
1779 for image in out.splitlines():
1780 if image and not image.endswith('@'):
1781 logger.info('Using recent ceph image %s' % image)
1782 return image
9f95a23c
TL
1783 return None
1784
f6b5b4d7 1785
9f95a23c 1786def write_tmp(s, uid, gid):
f67539c2 1787 # type: (str, int, int) -> IO[str]
9f95a23c
TL
1788 tmp_f = tempfile.NamedTemporaryFile(mode='w',
1789 prefix='ceph-tmp')
1790 os.fchown(tmp_f.fileno(), uid, gid)
1791 tmp_f.write(s)
1792 tmp_f.flush()
1793
1794 return tmp_f
1795
f6b5b4d7 1796
9f95a23c
TL
1797def makedirs(dir, uid, gid, mode):
1798 # type: (str, int, int, int) -> None
1799 if not os.path.exists(dir):
1800 os.makedirs(dir, mode=mode)
1801 else:
1802 os.chmod(dir, mode)
1803 os.chown(dir, uid, gid)
1804 os.chmod(dir, mode) # the above is masked by umask...
1805
f6b5b4d7 1806
f67539c2
TL
1807def get_data_dir(fsid, data_dir, t, n):
1808 # type: (str, str, str, Union[int, str]) -> str
1809 return os.path.join(data_dir, fsid, '%s.%s' % (t, n))
9f95a23c 1810
f6b5b4d7 1811
f67539c2
TL
1812def get_log_dir(fsid, log_dir):
1813 # type: (str, str) -> str
1814 return os.path.join(log_dir, fsid)
9f95a23c 1815
f6b5b4d7 1816
f67539c2
TL
1817def make_data_dir_base(fsid, data_dir, uid, gid):
1818 # type: (str, str, int, int) -> str
1819 data_dir_base = os.path.join(data_dir, fsid)
9f95a23c
TL
1820 makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
1821 makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
1822 makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
1823 DATA_DIR_MODE)
1824 return data_dir_base
1825
f6b5b4d7 1826
f67539c2
TL
1827def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None):
1828 # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str
f6b5b4d7 1829 if uid is None or gid is None:
f67539c2
TL
1830 uid, gid = extract_uid_gid(ctx)
1831 make_data_dir_base(fsid, ctx.data_dir, uid, gid)
1832 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c
TL
1833 makedirs(data_dir, uid, gid, DATA_DIR_MODE)
1834 return data_dir
1835
f6b5b4d7 1836
f67539c2
TL
1837def make_log_dir(ctx, fsid, uid=None, gid=None):
1838 # type: (CephadmContext, str, Optional[int], Optional[int]) -> str
f6b5b4d7 1839 if uid is None or gid is None:
f67539c2
TL
1840 uid, gid = extract_uid_gid(ctx)
1841 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c
TL
1842 makedirs(log_dir, uid, gid, LOG_DIR_MODE)
1843 return log_dir
1844
f6b5b4d7 1845
f67539c2
TL
1846def make_var_run(ctx, fsid, uid, gid):
1847 # type: (CephadmContext, str, int, int) -> None
1848 call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
1849 '/var/run/ceph/%s' % fsid])
9f95a23c 1850
f6b5b4d7 1851
f67539c2
TL
1852def copy_tree(ctx, src, dst, uid=None, gid=None):
1853 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1854 """
1855 Copy a directory tree from src to dst
1856 """
f91f0fd5 1857 if uid is None or gid is None:
f67539c2 1858 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
1859
1860 for src_dir in src:
1861 dst_dir = dst
1862 if os.path.isdir(dst):
1863 dst_dir = os.path.join(dst, os.path.basename(src_dir))
1864
f67539c2 1865 logger.debug('copy directory `%s` -> `%s`' % (src_dir, dst_dir))
9f95a23c 1866 shutil.rmtree(dst_dir, ignore_errors=True)
f67539c2 1867 shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8
9f95a23c
TL
1868
1869 for dirpath, dirnames, filenames in os.walk(dst_dir):
f67539c2 1870 logger.debug('chown %s:%s `%s`' % (uid, gid, dirpath))
9f95a23c
TL
1871 os.chown(dirpath, uid, gid)
1872 for filename in filenames:
f67539c2 1873 logger.debug('chown %s:%s `%s`' % (uid, gid, filename))
9f95a23c
TL
1874 os.chown(os.path.join(dirpath, filename), uid, gid)
1875
1876
f67539c2
TL
1877def copy_files(ctx, src, dst, uid=None, gid=None):
1878 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1879 """
1880 Copy a files from src to dst
1881 """
f91f0fd5 1882 if uid is None or gid is None:
f67539c2 1883 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
1884
1885 for src_file in src:
1886 dst_file = dst
1887 if os.path.isdir(dst):
1888 dst_file = os.path.join(dst, os.path.basename(src_file))
1889
f67539c2 1890 logger.debug('copy file `%s` -> `%s`' % (src_file, dst_file))
9f95a23c
TL
1891 shutil.copyfile(src_file, dst_file)
1892
f67539c2 1893 logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
9f95a23c
TL
1894 os.chown(dst_file, uid, gid)
1895
f6b5b4d7 1896
f67539c2
TL
1897def move_files(ctx, src, dst, uid=None, gid=None):
1898 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
1899 """
1900 Move files from src to dst
1901 """
f91f0fd5 1902 if uid is None or gid is None:
f67539c2 1903 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
1904
1905 for src_file in src:
1906 dst_file = dst
1907 if os.path.isdir(dst):
1908 dst_file = os.path.join(dst, os.path.basename(src_file))
1909
1910 if os.path.islink(src_file):
1911 # shutil.move() in py2 does not handle symlinks correctly
1912 src_rl = os.readlink(src_file)
1913 logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
1914 os.symlink(src_rl, dst_file)
1915 os.unlink(src_file)
1916 else:
1917 logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
1918 shutil.move(src_file, dst_file)
f67539c2 1919 logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
9f95a23c
TL
1920 os.chown(dst_file, uid, gid)
1921
f6b5b4d7 1922
f67539c2 1923# copied from distutils
9f95a23c
TL
1924def find_executable(executable, path=None):
1925 """Tries to find 'executable' in the directories listed in 'path'.
1926 A string listing directories separated by 'os.pathsep'; defaults to
1927 os.environ['PATH']. Returns the complete filename or None if not found.
1928 """
1929 _, ext = os.path.splitext(executable)
1930 if (sys.platform == 'win32') and (ext != '.exe'):
1931 executable = executable + '.exe'
1932
1933 if os.path.isfile(executable):
1934 return executable
1935
1936 if path is None:
1937 path = os.environ.get('PATH', None)
1938 if path is None:
1939 try:
f67539c2 1940 path = os.confstr('CS_PATH')
9f95a23c
TL
1941 except (AttributeError, ValueError):
1942 # os.confstr() or CS_PATH is not available
1943 path = os.defpath
1944 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1945 # set to an empty string
1946
1947 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
1948 if not path:
1949 return None
1950
1951 paths = path.split(os.pathsep)
1952 for p in paths:
1953 f = os.path.join(p, executable)
1954 if os.path.isfile(f):
1955 # the file exists, we have a shot at spawn working
1956 return f
1957 return None
1958
f6b5b4d7 1959
9f95a23c
TL
1960def find_program(filename):
1961 # type: (str) -> str
1962 name = find_executable(filename)
1963 if name is None:
1964 raise ValueError('%s not found' % filename)
1965 return name
1966
f6b5b4d7 1967
f67539c2
TL
1968def find_container_engine(ctx: CephadmContext):
1969 if ctx.docker:
1970 return Docker()
1971 else:
1972 for i in CONTAINER_PREFERENCE:
1973 try:
1974 return i()
1975 except Exception as e:
1976 logger.debug('Could not locate %s: %s' % (i.EXE, e))
1977 return None
1978
1979
1980def check_container_engine(ctx):
1981 # type: (CephadmContext) -> None
1982 engine = ctx.container_engine
1983 if not isinstance(engine, CONTAINER_PREFERENCE):
1984 raise Error('Unable to locate any of %s' % [i.EXE for i in CONTAINER_PREFERENCE])
1985 elif isinstance(engine, Podman):
1986 engine.get_version(ctx)
1987 if engine.version < MIN_PODMAN_VERSION:
1988 raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION)
1989
1990
9f95a23c
TL
1991def get_unit_name(fsid, daemon_type, daemon_id=None):
1992 # type: (str, str, Optional[Union[int, str]]) -> str
1993 # accept either name or type + id
f67539c2
TL
1994 if daemon_type == CephadmDaemon.daemon_type and daemon_id is not None:
1995 return 'ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id)
1996 elif daemon_id is not None:
9f95a23c
TL
1997 return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
1998 else:
1999 return 'ceph-%s@%s' % (fsid, daemon_type)
2000
f6b5b4d7 2001
f67539c2
TL
2002def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid, name):
2003 daemon = get_daemon_description(ctx, fsid, name)
e306af50
TL
2004 try:
2005 return daemon['systemd_unit']
2006 except KeyError:
2007 raise Error('Failed to get unit name for {}'.format(daemon))
2008
f6b5b4d7 2009
f67539c2
TL
2010def check_unit(ctx, unit_name):
2011 # type: (CephadmContext, str) -> Tuple[bool, str, bool]
9f95a23c
TL
2012 # NOTE: we ignore the exit code here because systemctl outputs
2013 # various exit codes based on the state of the service, but the
2014 # string result is more explicit (and sufficient).
2015 enabled = False
2016 installed = False
2017 try:
f67539c2 2018 out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name],
adb31ebb 2019 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
2020 if code == 0:
2021 enabled = True
2022 installed = True
f67539c2 2023 elif 'disabled' in out:
9f95a23c
TL
2024 installed = True
2025 except Exception as e:
2026 logger.warning('unable to run systemctl: %s' % e)
2027 enabled = False
2028 installed = False
2029
2030 state = 'unknown'
2031 try:
f67539c2 2032 out, err, code = call(ctx, ['systemctl', 'is-active', unit_name],
adb31ebb 2033 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
2034 out = out.strip()
2035 if out in ['active']:
2036 state = 'running'
2037 elif out in ['inactive']:
2038 state = 'stopped'
2039 elif out in ['failed', 'auto-restart']:
2040 state = 'error'
2041 else:
2042 state = 'unknown'
2043 except Exception as e:
2044 logger.warning('unable to run systemctl: %s' % e)
2045 state = 'unknown'
2046 return (enabled, state, installed)
2047
f6b5b4d7 2048
f67539c2
TL
2049def check_units(ctx, units, enabler=None):
2050 # type: (CephadmContext, List[str], Optional[Packager]) -> bool
9f95a23c 2051 for u in units:
f67539c2 2052 (enabled, state, installed) = check_unit(ctx, u)
9f95a23c
TL
2053 if enabled and state == 'running':
2054 logger.info('Unit %s is enabled and running' % u)
2055 return True
2056 if enabler is not None:
2057 if installed:
2058 logger.info('Enabling unit %s' % u)
2059 enabler.enable_service(u)
2060 return False
2061
f6b5b4d7 2062
f67539c2
TL
2063def is_container_running(ctx: CephadmContext, name: str) -> bool:
2064 out, err, ret = call_throws(ctx, [
2065 ctx.container_engine.path, 'ps',
2066 '--format', '{{.Names}}'])
2067 return name in out
2068
2069
9f95a23c 2070def get_legacy_config_fsid(cluster, legacy_dir=None):
f6b5b4d7 2071 # type: (str, Optional[str]) -> Optional[str]
9f95a23c
TL
2072 config_file = '/etc/ceph/%s.conf' % cluster
2073 if legacy_dir is not None:
2074 config_file = os.path.abspath(legacy_dir + config_file)
2075
2076 if os.path.exists(config_file):
2077 config = read_config(config_file)
2078 if config.has_section('global') and config.has_option('global', 'fsid'):
2079 return config.get('global', 'fsid')
2080 return None
2081
f6b5b4d7 2082
f67539c2
TL
2083def get_legacy_daemon_fsid(ctx, cluster,
2084 daemon_type, daemon_id, legacy_dir=None):
2085 # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str]
9f95a23c
TL
2086 fsid = None
2087 if daemon_type == 'osd':
2088 try:
f67539c2 2089 fsid_file = os.path.join(ctx.data_dir,
9f95a23c
TL
2090 daemon_type,
2091 'ceph-%s' % daemon_id,
2092 'ceph_fsid')
2093 if legacy_dir is not None:
2094 fsid_file = os.path.abspath(legacy_dir + fsid_file)
2095 with open(fsid_file, 'r') as f:
2096 fsid = f.read().strip()
2097 except IOError:
2098 pass
2099 if not fsid:
2100 fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
2101 return fsid
2102
f6b5b4d7 2103
f67539c2
TL
2104def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
2105 # type: (CephadmContext, str, str, Union[int, str]) -> List[str]
9f95a23c
TL
2106 r = list() # type: List[str]
2107
2108 if daemon_type in Ceph.daemons and daemon_type != 'crash':
2109 r += [
2110 '--setuser', 'ceph',
2111 '--setgroup', 'ceph',
2112 '--default-log-to-file=false',
2113 '--default-log-to-stderr=true',
f67539c2 2114 '--default-log-stderr-prefix=debug ',
9f95a23c
TL
2115 ]
2116 if daemon_type == 'mon':
2117 r += [
2118 '--default-mon-cluster-log-to-file=false',
2119 '--default-mon-cluster-log-to-stderr=true',
2120 ]
2121 elif daemon_type in Monitoring.components:
2122 metadata = Monitoring.components[daemon_type]
2123 r += metadata.get('args', list())
2124 if daemon_type == 'alertmanager':
f67539c2 2125 config = get_parm(ctx.config_json)
9f95a23c
TL
2126 peers = config.get('peers', list()) # type: ignore
2127 for peer in peers:
f67539c2 2128 r += ['--cluster.peer={}'.format(peer)]
f6b5b4d7 2129 # some alertmanager, by default, look elsewhere for a config
f67539c2 2130 r += ['--config.file=/etc/alertmanager/alertmanager.yml']
9f95a23c 2131 elif daemon_type == NFSGanesha.daemon_type:
f67539c2 2132 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
1911f103 2133 r += nfs_ganesha.get_daemon_args()
f67539c2
TL
2134 elif daemon_type == HAproxy.daemon_type:
2135 haproxy = HAproxy.init(ctx, fsid, daemon_id)
2136 r += haproxy.get_daemon_args()
f91f0fd5 2137 elif daemon_type == CustomContainer.daemon_type:
f67539c2 2138 cc = CustomContainer.init(ctx, fsid, daemon_id)
f91f0fd5 2139 r.extend(cc.get_daemon_args())
9f95a23c
TL
2140
2141 return r
2142
f6b5b4d7 2143
f67539c2 2144def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
e306af50 2145 config=None, keyring=None):
f67539c2
TL
2146 # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
2147 data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid)
2148 make_log_dir(ctx, fsid, uid=uid, gid=gid)
9f95a23c
TL
2149
2150 if config:
2151 config_path = os.path.join(data_dir, 'config')
2152 with open(config_path, 'w') as f:
2153 os.fchown(f.fileno(), uid, gid)
2154 os.fchmod(f.fileno(), 0o600)
2155 f.write(config)
f91f0fd5 2156
9f95a23c
TL
2157 if keyring:
2158 keyring_path = os.path.join(data_dir, 'keyring')
2159 with open(keyring_path, 'w') as f:
2160 os.fchmod(f.fileno(), 0o600)
2161 os.fchown(f.fileno(), uid, gid)
2162 f.write(keyring)
2163
2164 if daemon_type in Monitoring.components.keys():
f67539c2 2165 config_json: Dict[str, Any] = get_parm(ctx.config_json)
9f95a23c
TL
2166 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
2167
2168 # Set up directories specific to the monitoring component
2169 config_dir = ''
f67539c2 2170 data_dir_root = ''
9f95a23c 2171 if daemon_type == 'prometheus':
f67539c2
TL
2172 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2173 daemon_type, daemon_id)
9f95a23c
TL
2174 config_dir = 'etc/prometheus'
2175 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2176 makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
2177 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
2178 elif daemon_type == 'grafana':
f67539c2
TL
2179 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2180 daemon_type, daemon_id)
9f95a23c
TL
2181 config_dir = 'etc/grafana'
2182 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2183 makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
2184 makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
2185 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
f67539c2 2186 touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid)
9f95a23c 2187 elif daemon_type == 'alertmanager':
f67539c2
TL
2188 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2189 daemon_type, daemon_id)
9f95a23c
TL
2190 config_dir = 'etc/alertmanager'
2191 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2192 makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
2193
9f95a23c
TL
2194 # populate the config directory for the component from the config-json
2195 for fname in required_files:
f91f0fd5
TL
2196 if 'files' in config_json: # type: ignore
2197 content = dict_get_join(config_json['files'], fname)
9f95a23c
TL
2198 with open(os.path.join(data_dir_root, config_dir, fname), 'w') as f:
2199 os.fchown(f.fileno(), uid, gid)
2200 os.fchmod(f.fileno(), 0o600)
2201 f.write(content)
2202
f91f0fd5 2203 elif daemon_type == NFSGanesha.daemon_type:
f67539c2 2204 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
9f95a23c
TL
2205 nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
2206
f91f0fd5 2207 elif daemon_type == CephIscsi.daemon_type:
f67539c2 2208 ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
1911f103
TL
2209 ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
2210
f67539c2
TL
2211 elif daemon_type == HAproxy.daemon_type:
2212 haproxy = HAproxy.init(ctx, fsid, daemon_id)
2213 haproxy.create_daemon_dirs(data_dir, uid, gid)
2214
2215 elif daemon_type == Keepalived.daemon_type:
2216 keepalived = Keepalived.init(ctx, fsid, daemon_id)
2217 keepalived.create_daemon_dirs(data_dir, uid, gid)
2218
f91f0fd5 2219 elif daemon_type == CustomContainer.daemon_type:
f67539c2 2220 cc = CustomContainer.init(ctx, fsid, daemon_id)
f91f0fd5
TL
2221 cc.create_daemon_dirs(data_dir, uid, gid)
2222
f6b5b4d7 2223
9f95a23c
TL
2224def get_parm(option):
2225 # type: (str) -> Dict[str, str]
2226
2227 if not option:
2228 return dict()
2229
2230 global cached_stdin
2231 if option == '-':
2232 if cached_stdin is not None:
2233 j = cached_stdin
2234 else:
f67539c2
TL
2235 j = sys.stdin.read()
2236 cached_stdin = j
9f95a23c
TL
2237 else:
2238 # inline json string
2239 if option[0] == '{' and option[-1] == '}':
2240 j = option
2241 # json file
2242 elif os.path.exists(option):
2243 with open(option, 'r') as f:
2244 j = f.read()
2245 else:
f67539c2 2246 raise Error('Config file {} not found'.format(option))
9f95a23c
TL
2247
2248 try:
2249 js = json.loads(j)
2250 except ValueError as e:
f67539c2 2251 raise Error('Invalid JSON in {}: {}'.format(option, e))
9f95a23c
TL
2252 else:
2253 return js
2254
f6b5b4d7 2255
f67539c2
TL
2256def get_config_and_keyring(ctx):
2257 # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
801d1391
TL
2258 config = None
2259 keyring = None
2260
f67539c2
TL
2261 if 'config_json' in ctx and ctx.config_json:
2262 d = get_parm(ctx.config_json)
9f95a23c
TL
2263 config = d.get('config')
2264 keyring = d.get('keyring')
2265
f67539c2
TL
2266 if 'config' in ctx and ctx.config:
2267 try:
2268 with open(ctx.config, 'r') as f:
2269 config = f.read()
2270 except FileNotFoundError:
2271 raise Error('config file: %s does not exist' % ctx.config)
2272
2273 if 'key' in ctx and ctx.key:
2274 keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key)
2275 elif 'keyring' in ctx and ctx.keyring:
2276 try:
2277 with open(ctx.keyring, 'r') as f:
2278 keyring = f.read()
2279 except FileNotFoundError:
2280 raise Error('keyring file: %s does not exist' % ctx.keyring)
9f95a23c 2281
f6b5b4d7
TL
2282 return config, keyring
2283
2284
f67539c2
TL
2285def get_container_binds(ctx, fsid, daemon_type, daemon_id):
2286 # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]]
f6b5b4d7
TL
2287 binds = list()
2288
2289 if daemon_type == CephIscsi.daemon_type:
f6b5b4d7 2290 binds.extend(CephIscsi.get_container_binds())
f91f0fd5
TL
2291 elif daemon_type == CustomContainer.daemon_type:
2292 assert daemon_id
f67539c2
TL
2293 cc = CustomContainer.init(ctx, fsid, daemon_id)
2294 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
f91f0fd5 2295 binds.extend(cc.get_container_binds(data_dir))
f6b5b4d7
TL
2296
2297 return binds
2298
9f95a23c 2299
f67539c2 2300def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
9f95a23c 2301 no_config=False):
f67539c2 2302 # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
9f95a23c
TL
2303 mounts = dict()
2304
2305 if daemon_type in Ceph.daemons:
2306 if fsid:
f67539c2 2307 run_path = os.path.join('/var/run/ceph', fsid)
9f95a23c
TL
2308 if os.path.exists(run_path):
2309 mounts[run_path] = '/var/run/ceph:z'
f67539c2 2310 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c
TL
2311 mounts[log_dir] = '/var/log/ceph:z'
2312 crash_dir = '/var/lib/ceph/%s/crash' % fsid
2313 if os.path.exists(crash_dir):
2314 mounts[crash_dir] = '/var/lib/ceph/crash:z'
2315
2316 if daemon_type in Ceph.daemons and daemon_id:
f67539c2 2317 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c
TL
2318 if daemon_type == 'rgw':
2319 cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
2320 else:
2321 cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
2322 if daemon_type != 'crash':
2323 mounts[data_dir] = cdata_dir + ':z'
2324 if not no_config:
2325 mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
f67539c2 2326 if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash']:
9f95a23c
TL
2327 # these do not search for their keyrings in a data directory
2328 mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
2329
2330 if daemon_type in ['mon', 'osd']:
2331 mounts['/dev'] = '/dev' # FIXME: narrow this down?
2332 mounts['/run/udev'] = '/run/udev'
2333 if daemon_type == 'osd':
2334 mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
f67539c2
TL
2335 # selinux-policy in the container may not match the host.
2336 if HostFacts(ctx).selinux_enabled:
2337 selinux_folder = '/var/lib/ceph/%s/selinux' % fsid
2338 if not os.path.exists(selinux_folder):
2339 os.makedirs(selinux_folder, mode=0o755)
2340 mounts[selinux_folder] = '/sys/fs/selinux:ro'
9f95a23c
TL
2341 mounts['/run/lvm'] = '/run/lvm'
2342 mounts['/run/lock/lvm'] = '/run/lock/lvm'
2343
e306af50 2344 try:
f67539c2
TL
2345 if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development
2346 ceph_folder = pathify(ctx.shared_ceph_folder)
e306af50
TL
2347 if os.path.exists(ceph_folder):
2348 mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
2349 mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
2350 mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
2351 mounts[ceph_folder + '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard'
2352 mounts[ceph_folder + '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph'
2353 else:
2354 logger.error('{}{}{}'.format(termcolor.red,
f67539c2
TL
2355 'Ceph shared source folder does not exist.',
2356 termcolor.end))
e306af50
TL
2357 except AttributeError:
2358 pass
2359
9f95a23c 2360 if daemon_type in Monitoring.components and daemon_id:
f67539c2 2361 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c
TL
2362 if daemon_type == 'prometheus':
2363 mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
2364 mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
2365 elif daemon_type == 'node-exporter':
2366 mounts['/proc'] = '/host/proc:ro'
2367 mounts['/sys'] = '/host/sys:ro'
2368 mounts['/'] = '/rootfs:ro'
f67539c2 2369 elif daemon_type == 'grafana':
9f95a23c
TL
2370 mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
2371 mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
2372 mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
f67539c2 2373 mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
9f95a23c 2374 elif daemon_type == 'alertmanager':
f6b5b4d7 2375 mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
9f95a23c
TL
2376
2377 if daemon_type == NFSGanesha.daemon_type:
2378 assert daemon_id
f67539c2
TL
2379 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2380 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
f91f0fd5 2381 mounts.update(nfs_ganesha.get_container_mounts(data_dir))
9f95a23c 2382
f67539c2
TL
2383 if daemon_type == HAproxy.daemon_type:
2384 assert daemon_id
2385 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2386 mounts.update(HAproxy.get_container_mounts(data_dir))
2387
1911f103
TL
2388 if daemon_type == CephIscsi.daemon_type:
2389 assert daemon_id
f67539c2
TL
2390 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2391 log_dir = get_log_dir(fsid, ctx.log_dir)
1911f103
TL
2392 mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
2393
f67539c2
TL
2394 if daemon_type == Keepalived.daemon_type:
2395 assert daemon_id
2396 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2397 mounts.update(Keepalived.get_container_mounts(data_dir))
2398
f91f0fd5
TL
2399 if daemon_type == CustomContainer.daemon_type:
2400 assert daemon_id
f67539c2
TL
2401 cc = CustomContainer.init(ctx, fsid, daemon_id)
2402 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
f91f0fd5
TL
2403 mounts.update(cc.get_container_mounts(data_dir))
2404
9f95a23c
TL
2405 return mounts
2406
f6b5b4d7 2407
f67539c2
TL
2408def get_container(ctx: CephadmContext,
2409 fsid: str, daemon_type: str, daemon_id: Union[int, str],
f91f0fd5
TL
2410 privileged: bool = False,
2411 ptrace: bool = False,
2412 container_args: Optional[List[str]] = None) -> 'CephContainer':
2413 entrypoint: str = ''
2414 name: str = ''
2415 ceph_args: List[str] = []
2416 envs: List[str] = []
2417 host_network: bool = True
2418
2419 if container_args is None:
2420 container_args = []
9f95a23c
TL
2421 if daemon_type in ['mon', 'osd']:
2422 # mon and osd need privileged in order for libudev to query devices
2423 privileged = True
2424 if daemon_type == 'rgw':
2425 entrypoint = '/usr/bin/radosgw'
2426 name = 'client.rgw.%s' % daemon_id
2427 elif daemon_type == 'rbd-mirror':
2428 entrypoint = '/usr/bin/rbd-mirror'
2429 name = 'client.rbd-mirror.%s' % daemon_id
f67539c2
TL
2430 elif daemon_type == 'cephfs-mirror':
2431 entrypoint = '/usr/bin/cephfs-mirror'
2432 name = 'client.cephfs-mirror.%s' % daemon_id
9f95a23c
TL
2433 elif daemon_type == 'crash':
2434 entrypoint = '/usr/bin/ceph-crash'
2435 name = 'client.crash.%s' % daemon_id
2436 elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
2437 entrypoint = '/usr/bin/ceph-' + daemon_type
2438 name = '%s.%s' % (daemon_type, daemon_id)
2439 elif daemon_type in Monitoring.components:
2440 entrypoint = ''
9f95a23c
TL
2441 elif daemon_type == NFSGanesha.daemon_type:
2442 entrypoint = NFSGanesha.entrypoint
2443 name = '%s.%s' % (daemon_type, daemon_id)
f91f0fd5 2444 envs.extend(NFSGanesha.get_container_envs())
f67539c2
TL
2445 elif daemon_type == HAproxy.daemon_type:
2446 name = '%s.%s' % (daemon_type, daemon_id)
2447 elif daemon_type == Keepalived.daemon_type:
2448 name = '%s.%s' % (daemon_type, daemon_id)
2449 envs.extend(Keepalived.get_container_envs())
2450 container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
1911f103
TL
2451 elif daemon_type == CephIscsi.daemon_type:
2452 entrypoint = CephIscsi.entrypoint
2453 name = '%s.%s' % (daemon_type, daemon_id)
e306af50
TL
2454 # So the container can modprobe iscsi_target_mod and have write perms
2455 # to configfs we need to make this a privileged container.
2456 privileged = True
f91f0fd5 2457 elif daemon_type == CustomContainer.daemon_type:
f67539c2 2458 cc = CustomContainer.init(ctx, fsid, daemon_id)
f91f0fd5
TL
2459 entrypoint = cc.entrypoint
2460 host_network = False
2461 envs.extend(cc.get_container_envs())
2462 container_args.extend(cc.get_container_args())
9f95a23c 2463
9f95a23c 2464 if daemon_type in Monitoring.components:
f67539c2 2465 uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
9f95a23c
TL
2466 monitoring_args = [
2467 '--user',
2468 str(uid),
2469 # FIXME: disable cpu/memory limits for the time being (not supported
2470 # by ubuntu 18.04 kernel!)
9f95a23c
TL
2471 ]
2472 container_args.extend(monitoring_args)
2473 elif daemon_type == 'crash':
2474 ceph_args = ['-n', name]
2475 elif daemon_type in Ceph.daemons:
2476 ceph_args = ['-n', name, '-f']
2477
f91f0fd5
TL
2478 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2479 # so service can have Type=Forking
f67539c2 2480 if isinstance(ctx.container_engine, Podman):
f91f0fd5 2481 runtime_dir = '/run'
f67539c2
TL
2482 container_args.extend([
2483 '-d', '--log-driver', 'journald',
f91f0fd5
TL
2484 '--conmon-pidfile',
2485 runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
2486 '--cidfile',
f67539c2
TL
2487 runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id),
2488 ])
2489 if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
2490 container_args.append('--cgroups=split')
9f95a23c
TL
2491
2492 return CephContainer(
f67539c2
TL
2493 ctx,
2494 image=ctx.image,
9f95a23c 2495 entrypoint=entrypoint,
f67539c2 2496 args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id),
9f95a23c 2497 container_args=container_args,
f67539c2
TL
2498 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
2499 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
9f95a23c
TL
2500 cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
2501 envs=envs,
2502 privileged=privileged,
2503 ptrace=ptrace,
f91f0fd5 2504 host_network=host_network,
9f95a23c
TL
2505 )
2506
f6b5b4d7 2507
f67539c2
TL
2508def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'):
2509 # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]
9f95a23c
TL
2510
2511 if not img:
f67539c2 2512 img = ctx.image
9f95a23c 2513
f6b5b4d7
TL
2514 if isinstance(file_path, str):
2515 paths = [file_path]
2516 else:
2517 paths = file_path
2518
2519 for fp in paths:
2520 try:
2521 out = CephContainer(
f67539c2 2522 ctx,
f6b5b4d7
TL
2523 image=img,
2524 entrypoint='stat',
2525 args=['-c', '%u %g', fp]
2526 ).run()
2527 uid, gid = out.split(' ')
2528 return int(uid), int(gid)
2529 except RuntimeError:
2530 pass
2531 raise RuntimeError('uid/gid not found')
2532
9f95a23c 2533
f67539c2 2534def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
9f95a23c
TL
2535 config=None, keyring=None,
2536 osd_fsid=None,
f6b5b4d7
TL
2537 reconfig=False,
2538 ports=None):
f67539c2 2539 # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
f6b5b4d7
TL
2540
2541 ports = ports or []
f67539c2
TL
2542 if any([port_in_use(ctx, port) for port in ports]):
2543 raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports)), daemon_type))
f6b5b4d7 2544
f67539c2 2545 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c
TL
2546 if reconfig and not os.path.exists(data_dir):
2547 raise Error('cannot reconfig, data path %s does not exist' % data_dir)
2548 if daemon_type == 'mon' and not os.path.exists(data_dir):
2549 assert config
2550 assert keyring
2551 # tmp keyring file
2552 tmp_keyring = write_tmp(keyring, uid, gid)
2553
2554 # tmp config file
2555 tmp_config = write_tmp(config, uid, gid)
2556
2557 # --mkfs
f67539c2
TL
2558 create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid)
2559 mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id)
2560 log_dir = get_log_dir(fsid, ctx.log_dir)
2561 CephContainer(
2562 ctx,
2563 image=ctx.image,
9f95a23c 2564 entrypoint='/usr/bin/ceph-mon',
f67539c2
TL
2565 args=[
2566 '--mkfs',
2567 '-i', str(daemon_id),
2568 '--fsid', fsid,
2569 '-c', '/tmp/config',
2570 '--keyring', '/tmp/keyring',
2571 ] + get_daemon_args(ctx, fsid, 'mon', daemon_id),
9f95a23c
TL
2572 volume_mounts={
2573 log_dir: '/var/log/ceph:z',
2574 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
2575 tmp_keyring.name: '/tmp/keyring:z',
2576 tmp_config.name: '/tmp/config:z',
2577 },
2578 ).run()
2579
2580 # write conf
2581 with open(mon_dir + '/config', 'w') as f:
2582 os.fchown(f.fileno(), uid, gid)
2583 os.fchmod(f.fileno(), 0o600)
2584 f.write(config)
2585 else:
2586 # dirs, conf, keyring
2587 create_daemon_dirs(
f67539c2 2588 ctx,
9f95a23c
TL
2589 fsid, daemon_type, daemon_id,
2590 uid, gid,
2591 config, keyring)
2592
2593 if not reconfig:
f67539c2
TL
2594 if daemon_type == CephadmDaemon.daemon_type:
2595 port = next(iter(ports), None) # get first tcp port provided or None
2596
2597 if ctx.config_json == '-':
2598 config_js = get_parm('-')
2599 else:
2600 config_js = get_parm(ctx.config_json)
2601 assert isinstance(config_js, dict)
2602
2603 cephadm_exporter = CephadmDaemon(ctx, fsid, daemon_id, port)
2604 cephadm_exporter.deploy_daemon_unit(config_js)
2605 else:
2606 if c:
2607 deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id,
2608 c, osd_fsid=osd_fsid, ports=ports)
2609 else:
2610 raise RuntimeError('attempting to deploy a daemon without a container image')
9f95a23c
TL
2611
2612 if not os.path.exists(data_dir + '/unit.created'):
2613 with open(data_dir + '/unit.created', 'w') as f:
2614 os.fchmod(f.fileno(), 0o600)
2615 os.fchown(f.fileno(), uid, gid)
2616 f.write('mtime is time the daemon deployment was created\n')
2617
2618 with open(data_dir + '/unit.configured', 'w') as f:
2619 f.write('mtime is time we were last configured\n')
2620 os.fchmod(f.fileno(), 0o600)
2621 os.fchown(f.fileno(), uid, gid)
2622
f67539c2 2623 update_firewalld(ctx, daemon_type)
9f95a23c 2624
f6b5b4d7
TL
2625 # Open ports explicitly required for the daemon
2626 if ports:
f67539c2 2627 fw = Firewalld(ctx)
f6b5b4d7
TL
2628 fw.open_ports(ports)
2629 fw.apply_rules()
2630
9f95a23c
TL
2631 if reconfig and daemon_type not in Ceph.daemons:
2632 # ceph daemons do not need a restart; others (presumably) do to pick
2633 # up the new config
f67539c2
TL
2634 call_throws(ctx, ['systemctl', 'reset-failed',
2635 get_unit_name(fsid, daemon_type, daemon_id)])
2636 call_throws(ctx, ['systemctl', 'restart',
2637 get_unit_name(fsid, daemon_type, daemon_id)])
2638
9f95a23c 2639
f67539c2
TL
2640def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False):
2641 # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None
f6b5b4d7 2642 if comment:
f91f0fd5 2643 # Sometimes adding a comment, especially if there are multiple containers in one
f6b5b4d7
TL
2644 # unit file, makes it easier to read and grok.
2645 file_obj.write('# ' + comment + '\n')
2646 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
f67539c2 2647 file_obj.write('! ' + ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
f6b5b4d7 2648 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
f67539c2
TL
2649 if isinstance(ctx.container_engine, Podman):
2650 file_obj.write(
2651 '! '
2652 + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)])
2653 + ' 2> /dev/null\n')
f6b5b4d7
TL
2654
2655 # container run command
f67539c2
TL
2656 file_obj.write(
2657 ' '.join([shlex.quote(a) for a in container.run_cmd()])
2658 + (' &' if background else '') + '\n')
2659
2660
2661def deploy_daemon_units(
2662 ctx: CephadmContext,
2663 fsid: str,
2664 uid: int,
2665 gid: int,
2666 daemon_type: str,
2667 daemon_id: Union[int, str],
2668 c: 'CephContainer',
2669 enable: bool = True,
2670 start: bool = True,
2671 osd_fsid: Optional[str] = None,
2672 ports: Optional[List[int]] = None,
2673) -> None:
9f95a23c 2674 # cmd
f67539c2
TL
2675 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2676 with open(data_dir + '/unit.run.new', 'w') as f, \
2677 open(data_dir + '/unit.meta.new', 'w') as metaf:
f6b5b4d7 2678 f.write('set -e\n')
f91f0fd5
TL
2679
2680 if daemon_type in Ceph.daemons:
2681 install_path = find_program('install')
2682 f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
2683
9f95a23c
TL
2684 # pre-start cmd(s)
2685 if daemon_type == 'osd':
2686 # osds have a pre-start step
2687 assert osd_fsid
f6b5b4d7
TL
2688 simple_fn = os.path.join('/etc/ceph/osd',
2689 '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
2690 if os.path.exists(simple_fn):
2691 f.write('# Simple OSDs need chown on startup:\n')
2692 for n in ['block', 'block.db', 'block.wal']:
2693 p = os.path.join(data_dir, n)
2694 f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
2695 else:
f6b5b4d7 2696 prestart = CephContainer(
f67539c2
TL
2697 ctx,
2698 image=ctx.image,
f6b5b4d7
TL
2699 entrypoint='/usr/sbin/ceph-volume',
2700 args=[
2701 'lvm', 'activate',
2702 str(daemon_id), osd_fsid,
2703 '--no-systemd'
2704 ],
2705 privileged=True,
f67539c2
TL
2706 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
2707 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
f6b5b4d7 2708 cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
f67539c2
TL
2709 memory_request=ctx.memory_request,
2710 memory_limit=ctx.memory_limit,
f6b5b4d7 2711 )
f67539c2 2712 _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
9f95a23c
TL
2713 elif daemon_type == NFSGanesha.daemon_type:
2714 # add nfs to the rados grace db
f67539c2 2715 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
9f95a23c 2716 prestart = nfs_ganesha.get_rados_grace_container('add')
f67539c2 2717 _write_container_cmd_to_bash(ctx, f, prestart, 'add daemon to rados grace')
1911f103
TL
2718 elif daemon_type == CephIscsi.daemon_type:
2719 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
f67539c2 2720 ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
f6b5b4d7 2721 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
f67539c2
TL
2722 _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runnter container', background=True)
2723 elif daemon_type == Keepalived.daemon_type:
2724 f.write(Keepalived.get_prestart())
2725
2726 _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id)))
2727
2728 # some metadata about the deploy
2729 meta: Dict[str, Any] = {}
2730 if 'meta_json' in ctx and ctx.meta_json:
2731 meta = json.loads(ctx.meta_json) or {}
2732 meta.update({
2733 'memory_request': int(ctx.memory_request) if ctx.memory_request else None,
2734 'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None,
2735 })
2736 if not meta.get('ports'):
2737 meta['ports'] = ports
2738 metaf.write(json.dumps(meta, indent=4) + '\n')
1911f103 2739
9f95a23c 2740 os.fchmod(f.fileno(), 0o600)
f67539c2 2741 os.fchmod(metaf.fileno(), 0o600)
9f95a23c
TL
2742 os.rename(data_dir + '/unit.run.new',
2743 data_dir + '/unit.run')
f67539c2
TL
2744 os.rename(data_dir + '/unit.meta.new',
2745 data_dir + '/unit.meta')
9f95a23c
TL
2746
2747 # post-stop command(s)
2748 with open(data_dir + '/unit.poststop.new', 'w') as f:
2749 if daemon_type == 'osd':
2750 assert osd_fsid
2751 poststop = CephContainer(
f67539c2
TL
2752 ctx,
2753 image=ctx.image,
9f95a23c
TL
2754 entrypoint='/usr/sbin/ceph-volume',
2755 args=[
2756 'lvm', 'deactivate',
2757 str(daemon_id), osd_fsid,
2758 ],
2759 privileged=True,
f67539c2
TL
2760 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
2761 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
9f95a23c
TL
2762 cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
2763 daemon_id),
2764 )
f67539c2 2765 _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
9f95a23c
TL
2766 elif daemon_type == NFSGanesha.daemon_type:
2767 # remove nfs from the rados grace db
f67539c2 2768 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
9f95a23c 2769 poststop = nfs_ganesha.get_rados_grace_container('remove')
f67539c2 2770 _write_container_cmd_to_bash(ctx, f, poststop, 'remove daemon from rados grace')
1911f103 2771 elif daemon_type == CephIscsi.daemon_type:
f6b5b4d7 2772 # make sure we also stop the tcmu container
f67539c2 2773 ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
f6b5b4d7 2774 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
f67539c2 2775 f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n')
1911f103 2776 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
9f95a23c
TL
2777 os.fchmod(f.fileno(), 0o600)
2778 os.rename(data_dir + '/unit.poststop.new',
2779 data_dir + '/unit.poststop')
2780
f67539c2
TL
2781 if c:
2782 with open(data_dir + '/unit.image.new', 'w') as f:
2783 f.write(c.image + '\n')
2784 os.fchmod(f.fileno(), 0o600)
2785 os.rename(data_dir + '/unit.image.new',
2786 data_dir + '/unit.image')
9f95a23c
TL
2787
2788 # systemd
f67539c2
TL
2789 install_base_units(ctx, fsid)
2790 unit = get_unit_file(ctx, fsid)
9f95a23c 2791 unit_file = 'ceph-%s@.service' % (fsid)
f67539c2 2792 with open(ctx.unit_dir + '/' + unit_file + '.new', 'w') as f:
9f95a23c 2793 f.write(unit)
f67539c2
TL
2794 os.rename(ctx.unit_dir + '/' + unit_file + '.new',
2795 ctx.unit_dir + '/' + unit_file)
2796 call_throws(ctx, ['systemctl', 'daemon-reload'])
9f95a23c
TL
2797
2798 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
f67539c2 2799 call(ctx, ['systemctl', 'stop', unit_name],
adb31ebb 2800 verbosity=CallVerbosity.DEBUG)
f67539c2 2801 call(ctx, ['systemctl', 'reset-failed', unit_name],
adb31ebb 2802 verbosity=CallVerbosity.DEBUG)
9f95a23c 2803 if enable:
f67539c2 2804 call_throws(ctx, ['systemctl', 'enable', unit_name])
9f95a23c 2805 if start:
f67539c2 2806 call_throws(ctx, ['systemctl', 'start', unit_name])
9f95a23c 2807
f6b5b4d7
TL
2808
2809class Firewalld(object):
f67539c2
TL
2810 def __init__(self, ctx):
2811 # type: (CephadmContext) -> None
2812 self.ctx = ctx
f6b5b4d7
TL
2813 self.available = self.check()
2814
2815 def check(self):
2816 # type: () -> bool
2817 self.cmd = find_executable('firewall-cmd')
2818 if not self.cmd:
2819 logger.debug('firewalld does not appear to be present')
2820 return False
f67539c2 2821 (enabled, state, _) = check_unit(self.ctx, 'firewalld.service')
f6b5b4d7
TL
2822 if not enabled:
2823 logger.debug('firewalld.service is not enabled')
2824 return False
f67539c2 2825 if state != 'running':
f6b5b4d7
TL
2826 logger.debug('firewalld.service is not running')
2827 return False
2828
f67539c2 2829 logger.info('firewalld ready')
f6b5b4d7
TL
2830 return True
2831
2832 def enable_service_for(self, daemon_type):
2833 # type: (str) -> None
2834 if not self.available:
2835 logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)
2836 return
2837
2838 if daemon_type == 'mon':
2839 svc = 'ceph-mon'
2840 elif daemon_type in ['mgr', 'mds', 'osd']:
2841 svc = 'ceph'
2842 elif daemon_type == NFSGanesha.daemon_type:
2843 svc = 'nfs'
2844 else:
2845 return
2846
f67539c2
TL
2847 if not self.cmd:
2848 raise RuntimeError('command not defined')
2849
2850 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
2851 if ret:
2852 logger.info('Enabling firewalld service %s in current zone...' % svc)
f67539c2 2853 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc])
9f95a23c
TL
2854 if ret:
2855 raise RuntimeError(
2856 'unable to add service %s to current zone: %s' % (svc, err))
2857 else:
2858 logger.debug('firewalld service %s is enabled in current zone' % svc)
f6b5b4d7
TL
2859
2860 def open_ports(self, fw_ports):
2861 # type: (List[int]) -> None
2862 if not self.available:
2863 logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
2864 return
2865
f67539c2
TL
2866 if not self.cmd:
2867 raise RuntimeError('command not defined')
2868
f6b5b4d7
TL
2869 for port in fw_ports:
2870 tcp_port = str(port) + '/tcp'
f67539c2 2871 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
9f95a23c 2872 if ret:
f6b5b4d7 2873 logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
f67539c2 2874 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port])
f6b5b4d7
TL
2875 if ret:
2876 raise RuntimeError('unable to add port %s to current zone: %s' %
f67539c2 2877 (tcp_port, err))
f6b5b4d7
TL
2878 else:
2879 logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
2880
f67539c2
TL
2881 def close_ports(self, fw_ports):
2882 # type: (List[int]) -> None
2883 if not self.available:
2884 logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports)
2885 return
2886
2887 if not self.cmd:
2888 raise RuntimeError('command not defined')
2889
2890 for port in fw_ports:
2891 tcp_port = str(port) + '/tcp'
2892 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
2893 if not ret:
2894 logger.info('Disabling port %s in current zone...' % tcp_port)
2895 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port])
2896 if ret:
2897 raise RuntimeError('unable to remove port %s from current zone: %s' %
2898 (tcp_port, err))
2899 else:
2900 logger.info(f'Port {tcp_port} disabled')
2901 else:
2902 logger.info(f'firewalld port {tcp_port} already closed')
2903
f6b5b4d7
TL
2904 def apply_rules(self):
2905 # type: () -> None
2906 if not self.available:
2907 return
2908
f67539c2
TL
2909 if not self.cmd:
2910 raise RuntimeError('command not defined')
f6b5b4d7 2911
f67539c2 2912 call_throws(self.ctx, [self.cmd, '--reload'])
f6b5b4d7 2913
f67539c2
TL
2914
2915def update_firewalld(ctx, daemon_type):
2916 # type: (CephadmContext, str) -> None
2917 firewall = Firewalld(ctx)
f6b5b4d7
TL
2918
2919 firewall.enable_service_for(daemon_type)
2920
2921 fw_ports = []
2922
2923 if daemon_type in Monitoring.port_map.keys():
2924 fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc
2925
2926 firewall.open_ports(fw_ports)
2927 firewall.apply_rules()
9f95a23c 2928
f67539c2
TL
2929
2930def install_base_units(ctx, fsid):
2931 # type: (CephadmContext, str) -> None
9f95a23c
TL
2932 """
2933 Set up ceph.target and ceph-$fsid.target units.
2934 """
2935 # global unit
f67539c2
TL
2936 existed = os.path.exists(ctx.unit_dir + '/ceph.target')
2937 with open(ctx.unit_dir + '/ceph.target.new', 'w') as f:
9f95a23c
TL
2938 f.write('[Unit]\n'
2939 'Description=All Ceph clusters and services\n'
2940 '\n'
2941 '[Install]\n'
2942 'WantedBy=multi-user.target\n')
f67539c2
TL
2943 os.rename(ctx.unit_dir + '/ceph.target.new',
2944 ctx.unit_dir + '/ceph.target')
9f95a23c
TL
2945 if not existed:
2946 # we disable before enable in case a different ceph.target
2947 # (from the traditional package) is present; while newer
2948 # systemd is smart enough to disable the old
2949 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
2950 # some older versions of systemd error out with EEXIST.
f67539c2
TL
2951 call_throws(ctx, ['systemctl', 'disable', 'ceph.target'])
2952 call_throws(ctx, ['systemctl', 'enable', 'ceph.target'])
2953 call_throws(ctx, ['systemctl', 'start', 'ceph.target'])
9f95a23c
TL
2954
2955 # cluster unit
f67539c2
TL
2956 existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid)
2957 with open(ctx.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f:
2958 f.write(
2959 '[Unit]\n'
2960 'Description=Ceph cluster {fsid}\n'
2961 'PartOf=ceph.target\n'
2962 'Before=ceph.target\n'
2963 '\n'
2964 '[Install]\n'
2965 'WantedBy=multi-user.target ceph.target\n'.format(
2966 fsid=fsid)
9f95a23c 2967 )
f67539c2
TL
2968 os.rename(ctx.unit_dir + '/ceph-%s.target.new' % fsid,
2969 ctx.unit_dir + '/ceph-%s.target' % fsid)
9f95a23c 2970 if not existed:
f67539c2
TL
2971 call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid])
2972 call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid])
9f95a23c
TL
2973
2974 # logrotate for the cluster
f67539c2 2975 with open(ctx.logrotate_dir + '/ceph-%s' % fsid, 'w') as f:
9f95a23c
TL
2976 """
2977 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
2978 in all containers, but I don't see an elegant way to send SIGHUP *just* to
2979 the daemons for this cluster. (1) systemd kill -s will get the signal to
2980 podman, but podman will exit. (2) podman kill will get the signal to the
2981 first child (bash), but that isn't the ceph daemon. This is simpler and
2982 should be harmless.
2983 """
2984 f.write("""# created by cephadm
2985/var/log/ceph/%s/*.log {
2986 rotate 7
2987 daily
2988 compress
2989 sharedscripts
2990 postrotate
f67539c2 2991 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror' || true
9f95a23c
TL
2992 endscript
2993 missingok
2994 notifempty
2995 su root root
2996}
2997""" % fsid)
2998
f6b5b4d7 2999
f67539c2
TL
3000def get_unit_file(ctx, fsid):
3001 # type: (CephadmContext, str) -> str
f91f0fd5 3002 extra_args = ''
f67539c2
TL
3003 if isinstance(ctx.container_engine, Podman):
3004 extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3005 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3006 'Type=forking\n'
3007 'PIDFile=%t/%n-pid\n')
3008 if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
3009 extra_args += 'Delegate=yes\n'
3010
3011 docker = isinstance(ctx.container_engine, Docker)
9f95a23c
TL
3012 u = """# generated by cephadm
3013[Unit]
3014Description=Ceph %i for {fsid}
3015
3016# According to:
3017# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
3018# these can be removed once ceph-mon will dynamically change network
3019# configuration.
f67539c2 3020After=network-online.target local-fs.target time-sync.target{docker_after}
9f95a23c 3021Wants=network-online.target local-fs.target time-sync.target
f67539c2 3022{docker_requires}
9f95a23c
TL
3023
3024PartOf=ceph-{fsid}.target
3025Before=ceph-{fsid}.target
3026
3027[Service]
3028LimitNOFILE=1048576
3029LimitNPROC=1048576
3030EnvironmentFile=-/etc/environment
9f95a23c
TL
3031ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
3032ExecStop=-{container_path} stop ceph-{fsid}-%i
3033ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
3034KillMode=none
3035Restart=on-failure
3036RestartSec=10s
3037TimeoutStartSec=120
e306af50 3038TimeoutStopSec=120
9f95a23c
TL
3039StartLimitInterval=30min
3040StartLimitBurst=5
f91f0fd5 3041{extra_args}
9f95a23c
TL
3042[Install]
3043WantedBy=ceph-{fsid}.target
f67539c2
TL
3044""".format(container_path=ctx.container_engine.path,
3045 fsid=fsid,
3046 data_dir=ctx.data_dir,
3047 extra_args=extra_args,
3048 # if docker, we depend on docker.service
3049 docker_after=' docker.service' if docker else '',
3050 docker_requires='Requires=docker.service\n' if docker else '')
f91f0fd5 3051
9f95a23c
TL
3052 return u
3053
3054##################################
3055
f6b5b4d7 3056
9f95a23c
TL
3057class CephContainer:
3058 def __init__(self,
f67539c2 3059 ctx: CephadmContext,
f91f0fd5
TL
3060 image: str,
3061 entrypoint: str,
3062 args: List[str] = [],
3063 volume_mounts: Dict[str, str] = {},
3064 cname: str = '',
3065 container_args: List[str] = [],
3066 envs: Optional[List[str]] = None,
3067 privileged: bool = False,
3068 ptrace: bool = False,
3069 bind_mounts: Optional[List[List[str]]] = None,
f67539c2 3070 init: Optional[bool] = None,
f91f0fd5 3071 host_network: bool = True,
f67539c2
TL
3072 memory_request: Optional[str] = None,
3073 memory_limit: Optional[str] = None,
f91f0fd5 3074 ) -> None:
f67539c2 3075 self.ctx = ctx
9f95a23c
TL
3076 self.image = image
3077 self.entrypoint = entrypoint
3078 self.args = args
3079 self.volume_mounts = volume_mounts
3080 self.cname = cname
3081 self.container_args = container_args
3082 self.envs = envs
3083 self.privileged = privileged
3084 self.ptrace = ptrace
f6b5b4d7 3085 self.bind_mounts = bind_mounts if bind_mounts else []
f67539c2 3086 self.init = init if init else ctx.container_init
f91f0fd5 3087 self.host_network = host_network
f67539c2
TL
3088 self.memory_request = memory_request
3089 self.memory_limit = memory_limit
9f95a23c 3090
f91f0fd5
TL
3091 def run_cmd(self) -> List[str]:
3092 cmd_args: List[str] = [
f67539c2 3093 str(self.ctx.container_engine.path),
f91f0fd5
TL
3094 'run',
3095 '--rm',
3096 '--ipc=host',
3097 ]
f67539c2
TL
3098
3099 if isinstance(self.ctx.container_engine, Podman):
3100 # podman adds the container *name* to /etc/hosts (for 127.0.1.1)
3101 # by default, which makes python's socket.getfqdn() return that
3102 # instead of a valid hostname.
3103 cmd_args.append('--no-hosts')
3104 if os.path.exists('/etc/ceph/podman-auth.json'):
3105 cmd_args.append('--authfile=/etc/ceph/podman-auth.json')
3106
f91f0fd5
TL
3107 envs: List[str] = [
3108 '-e', 'CONTAINER_IMAGE=%s' % self.image,
3109 '-e', 'NODE_NAME=%s' % get_hostname(),
3110 ]
3111 vols: List[str] = []
3112 binds: List[str] = []
9f95a23c 3113
f67539c2
TL
3114 if self.memory_request:
3115 cmd_args.extend(['-e', 'POD_MEMORY_REQUEST', str(self.memory_request)])
3116 if self.memory_limit:
3117 cmd_args.extend(['-e', 'POD_MEMORY_LIMIT', str(self.memory_limit)])
3118 cmd_args.extend(['--memory', str(self.memory_limit)])
3119
f91f0fd5
TL
3120 if self.host_network:
3121 cmd_args.append('--net=host')
3122 if self.entrypoint:
3123 cmd_args.extend(['--entrypoint', self.entrypoint])
9f95a23c 3124 if self.privileged:
f91f0fd5
TL
3125 cmd_args.extend([
3126 '--privileged',
3127 # let OSD etc read block devs that haven't been chowned
3128 '--group-add=disk'])
3129 if self.ptrace and not self.privileged:
3130 # if privileged, the SYS_PTRACE cap is already added
3131 # in addition, --cap-add and --privileged are mutually
3132 # exclusive since podman >= 2.0
3133 cmd_args.append('--cap-add=SYS_PTRACE')
3134 if self.init:
3135 cmd_args.append('--init')
f67539c2 3136 envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
f91f0fd5
TL
3137 if self.cname:
3138 cmd_args.extend(['--name', self.cname])
3139 if self.envs:
3140 for env in self.envs:
3141 envs.extend(['-e', env])
3142
9f95a23c
TL
3143 vols = sum(
3144 [['-v', '%s:%s' % (host_dir, container_dir)]
3145 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7 3146 binds = sum([['--mount', '{}'.format(','.join(bind))]
f91f0fd5
TL
3147 for bind in self.bind_mounts], [])
3148
f67539c2
TL
3149 return \
3150 cmd_args + self.container_args + \
3151 envs + vols + binds + \
3152 [self.image] + self.args # type: ignore
f91f0fd5
TL
3153
3154 def shell_cmd(self, cmd: List[str]) -> List[str]:
3155 cmd_args: List[str] = [
f67539c2 3156 str(self.ctx.container_engine.path),
9f95a23c
TL
3157 'run',
3158 '--rm',
e306af50 3159 '--ipc=host',
f91f0fd5
TL
3160 ]
3161 envs: List[str] = [
3162 '-e', 'CONTAINER_IMAGE=%s' % self.image,
3163 '-e', 'NODE_NAME=%s' % get_hostname(),
3164 ]
3165 vols: List[str] = []
3166 binds: List[str] = []
9f95a23c 3167
f91f0fd5
TL
3168 if self.host_network:
3169 cmd_args.append('--net=host')
9f95a23c 3170 if self.privileged:
f91f0fd5
TL
3171 cmd_args.extend([
3172 '--privileged',
3173 # let OSD etc read block devs that haven't been chowned
3174 '--group-add=disk',
3175 ])
f67539c2
TL
3176 if self.init:
3177 cmd_args.append('--init')
3178 envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
f91f0fd5
TL
3179 if self.envs:
3180 for env in self.envs:
3181 envs.extend(['-e', env])
3182
9f95a23c
TL
3183 vols = sum(
3184 [['-v', '%s:%s' % (host_dir, container_dir)]
3185 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7
TL
3186 binds = sum([['--mount', '{}'.format(','.join(bind))]
3187 for bind in self.bind_mounts], [])
f91f0fd5
TL
3188
3189 return cmd_args + self.container_args + envs + vols + binds + [
9f95a23c 3190 '--entrypoint', cmd[0],
f91f0fd5 3191 self.image,
9f95a23c
TL
3192 ] + cmd[1:]
3193
3194 def exec_cmd(self, cmd):
3195 # type: (List[str]) -> List[str]
3196 return [
f67539c2 3197 str(self.ctx.container_engine.path),
9f95a23c
TL
3198 'exec',
3199 ] + self.container_args + [
3200 self.cname,
3201 ] + cmd
3202
f6b5b4d7
TL
3203 def rm_cmd(self, storage=False):
3204 # type: (bool) -> List[str]
3205 ret = [
f67539c2 3206 str(self.ctx.container_engine.path),
f6b5b4d7
TL
3207 'rm', '-f',
3208 ]
3209 if storage:
3210 ret.append('--storage')
3211 ret.append(self.cname)
3212 return ret
3213
3214 def stop_cmd(self):
3215 # type () -> List[str]
3216 ret = [
f67539c2 3217 str(self.ctx.container_engine.path),
f6b5b4d7
TL
3218 'stop', self.cname,
3219 ]
3220 return ret
3221
9f95a23c
TL
3222 def run(self, timeout=DEFAULT_TIMEOUT):
3223 # type: (Optional[int]) -> str
f67539c2
TL
3224 out, _, _ = call_throws(self.ctx, self.run_cmd(),
3225 desc=self.entrypoint, timeout=timeout)
9f95a23c
TL
3226 return out
3227
3228##################################
3229
f6b5b4d7 3230
9f95a23c 3231@infer_image
f67539c2
TL
3232def command_version(ctx):
3233 # type: (CephadmContext) -> int
3234 c = CephContainer(ctx, ctx.image, 'ceph', ['--version'])
3235 out, err, ret = call(ctx, c.run_cmd(), desc=c.entrypoint)
3236 if not ret:
3237 print(out.strip())
3238 return ret
9f95a23c
TL
3239
3240##################################
3241
f6b5b4d7 3242
9f95a23c 3243@infer_image
f67539c2
TL
3244def command_pull(ctx):
3245 # type: (CephadmContext) -> int
f6b5b4d7 3246
f67539c2
TL
3247 _pull_image(ctx, ctx.image)
3248 return command_inspect_image(ctx)
9f95a23c 3249
f6b5b4d7 3250
f67539c2
TL
3251def _pull_image(ctx, image):
3252 # type: (CephadmContext, str) -> None
f6b5b4d7
TL
3253 logger.info('Pulling container image %s...' % image)
3254
3255 ignorelist = [
f67539c2
TL
3256 'error creating read-write layer with ID',
3257 'net/http: TLS handshake timeout',
3258 'Digest did not match, expected',
f6b5b4d7
TL
3259 ]
3260
f67539c2
TL
3261 cmd = [ctx.container_engine.path, 'pull', image]
3262 if isinstance(ctx.container_engine, Podman) and os.path.exists('/etc/ceph/podman-auth.json'):
3263 cmd.append('--authfile=/etc/ceph/podman-auth.json')
f6b5b4d7
TL
3264 cmd_str = ' '.join(cmd)
3265
3266 for sleep_secs in [1, 4, 25]:
f67539c2 3267 out, err, ret = call(ctx, cmd)
f6b5b4d7
TL
3268 if not ret:
3269 return
3270
3271 if not any(pattern in err for pattern in ignorelist):
3272 raise RuntimeError('Failed command: %s' % cmd_str)
3273
f67539c2 3274 logger.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))
f6b5b4d7
TL
3275 time.sleep(sleep_secs)
3276
3277 raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str)
f67539c2 3278
9f95a23c
TL
3279##################################
3280
f6b5b4d7 3281
9f95a23c 3282@infer_image
f67539c2
TL
3283def command_inspect_image(ctx):
3284 # type: (CephadmContext) -> int
3285 out, err, ret = call_throws(ctx, [
3286 ctx.container_engine.path, 'inspect',
cd265ab1 3287 '--format', '{{.ID}},{{.RepoDigests}}',
f67539c2 3288 ctx.image])
9f95a23c
TL
3289 if ret:
3290 return errno.ENOENT
f67539c2 3291 info_from = get_image_info_from_inspect(out.strip(), ctx.image)
f91f0fd5 3292
f67539c2 3293 ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
f91f0fd5
TL
3294 info_from['ceph_version'] = ver
3295
3296 print(json.dumps(info_from, indent=4, sort_keys=True))
3297 return 0
3298
3299
f67539c2
TL
3300def normalize_image_digest(digest):
3301 # normal case:
3302 # ceph/ceph -> docker.io/ceph/ceph
3303 # edge cases that shouldn't ever come up:
3304 # ubuntu -> docker.io/ubuntu (ubuntu alias for library/ubuntu)
3305 # no change:
3306 # quay.ceph.io/ceph/ceph -> ceph
3307 # docker.io/ubuntu -> no change
3308 bits = digest.split('/')
3309 if '.' not in bits[0] or len(bits) < 3:
3310 digest = DEFAULT_REGISTRY + '/' + digest
3311 return digest
3312
3313
f91f0fd5 3314def get_image_info_from_inspect(out, image):
f67539c2 3315 # type: (str, str) -> Dict[str, Union[str,List[str]]]
f91f0fd5
TL
3316 image_id, digests = out.split(',', 1)
3317 if not out:
3318 raise Error('inspect {}: empty result'.format(image))
9f95a23c 3319 r = {
f91f0fd5 3320 'image_id': normalize_container_id(image_id)
f67539c2 3321 } # type: Dict[str, Union[str,List[str]]]
f91f0fd5 3322 if digests:
f67539c2 3323 r['repo_digests'] = list(map(normalize_image_digest, digests[1:-1].split(' ')))
f91f0fd5
TL
3324 return r
3325
9f95a23c
TL
3326##################################
3327
f91f0fd5 3328
f67539c2
TL
3329def check_subnet(subnets: str) -> Tuple[int, List[int], str]:
3330 """Determine whether the given string is a valid subnet
3331
3332 :param subnets: subnet string, a single definition or comma separated list of CIDR subnets
3333 :returns: return code, IP version list of the subnets and msg describing any errors validation errors
3334 """
3335
3336 rc = 0
3337 versions = set()
3338 errors = []
3339 subnet_list = subnets.split(',')
3340 for subnet in subnet_list:
3341 # ensure the format of the string is as expected address/netmask
3342 if not re.search(r'\/\d+$', subnet):
3343 rc = 1
3344 errors.append(f'{subnet} is not in CIDR format (address/netmask)')
3345 continue
3346 try:
3347 v = ipaddress.ip_network(subnet).version
3348 versions.add(v)
3349 except ValueError as e:
3350 rc = 1
3351 errors.append(f'{subnet} invalid: {str(e)}')
3352
3353 return rc, list(versions), ', '.join(errors)
3354
3355
f6b5b4d7
TL
3356def unwrap_ipv6(address):
3357 # type: (str) -> str
3358 if address.startswith('[') and address.endswith(']'):
3359 return address[1:-1]
3360 return address
3361
3362
f91f0fd5
TL
3363def wrap_ipv6(address):
3364 # type: (str) -> str
3365
3366 # We cannot assume it's already wrapped or even an IPv6 address if
3367 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
3368 # the ValueError
3369 try:
f67539c2
TL
3370 if ipaddress.ip_address(address).version == 6:
3371 return f'[{address}]'
f91f0fd5
TL
3372 except ValueError:
3373 pass
3374
3375 return address
3376
3377
f6b5b4d7
TL
3378def is_ipv6(address):
3379 # type: (str) -> bool
3380 address = unwrap_ipv6(address)
3381 try:
f67539c2 3382 return ipaddress.ip_address(address).version == 6
f6b5b4d7 3383 except ValueError:
f67539c2 3384 logger.warning('Address: {} is not a valid IP address'.format(address))
f6b5b4d7
TL
3385 return False
3386
3387
f67539c2
TL
3388def prepare_mon_addresses(
3389 ctx: CephadmContext
3390) -> Tuple[str, bool, Optional[str]]:
9f95a23c 3391 r = re.compile(r':(\d+)$')
f6b5b4d7 3392 base_ip = ''
f67539c2
TL
3393 ipv6 = False
3394
3395 if ctx.mon_ip:
3396 ipv6 = is_ipv6(ctx.mon_ip)
f91f0fd5 3397 if ipv6:
f67539c2
TL
3398 ctx.mon_ip = wrap_ipv6(ctx.mon_ip)
3399 hasport = r.findall(ctx.mon_ip)
9f95a23c
TL
3400 if hasport:
3401 port = int(hasport[0])
3402 if port == 6789:
f67539c2 3403 addr_arg = '[v1:%s]' % ctx.mon_ip
9f95a23c 3404 elif port == 3300:
f67539c2 3405 addr_arg = '[v2:%s]' % ctx.mon_ip
9f95a23c
TL
3406 else:
3407 logger.warning('Using msgr2 protocol for unrecognized port %d' %
3408 port)
f67539c2
TL
3409 addr_arg = '[v2:%s]' % ctx.mon_ip
3410 base_ip = ctx.mon_ip[0:-(len(str(port))) - 1]
3411 check_ip_port(ctx, base_ip, port)
9f95a23c 3412 else:
f67539c2
TL
3413 base_ip = ctx.mon_ip
3414 addr_arg = '[v2:%s:3300,v1:%s:6789]' % (ctx.mon_ip, ctx.mon_ip)
3415 check_ip_port(ctx, ctx.mon_ip, 3300)
3416 check_ip_port(ctx, ctx.mon_ip, 6789)
3417 elif ctx.mon_addrv:
3418 addr_arg = ctx.mon_addrv
9f95a23c
TL
3419 if addr_arg[0] != '[' or addr_arg[-1] != ']':
3420 raise Error('--mon-addrv value %s must use square backets' %
3421 addr_arg)
f6b5b4d7 3422 ipv6 = addr_arg.count('[') > 1
9f95a23c
TL
3423 for addr in addr_arg[1:-1].split(','):
3424 hasport = r.findall(addr)
3425 if not hasport:
3426 raise Error('--mon-addrv value %s must include port number' %
3427 addr_arg)
3428 port = int(hasport[0])
3429 # strip off v1: or v2: prefix
3430 addr = re.sub(r'^\w+:', '', addr)
f67539c2
TL
3431 base_ip = addr[0:-(len(str(port))) - 1]
3432 check_ip_port(ctx, base_ip, port)
9f95a23c
TL
3433 else:
3434 raise Error('must specify --mon-ip or --mon-addrv')
3435 logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg))
3436
3437 mon_network = None
f67539c2 3438 if not ctx.skip_mon_network:
9f95a23c
TL
3439 # make sure IP is configured locally, and then figure out the
3440 # CIDR network
f67539c2
TL
3441 for net, ifaces in list_networks(ctx).items():
3442 ips: List[str] = []
3443 for iface, ls in ifaces.items():
3444 ips.extend(ls)
3445 if ipaddress.ip_address(unwrap_ipv6(base_ip)) in \
3446 [ipaddress.ip_address(ip) for ip in ips]:
9f95a23c
TL
3447 mon_network = net
3448 logger.info('Mon IP %s is in CIDR network %s' % (base_ip,
3449 mon_network))
3450 break
3451 if not mon_network:
3452 raise Error('Failed to infer CIDR network for mon ip %s; pass '
3453 '--skip-mon-network to configure it later' % base_ip)
3454
f67539c2 3455 return (addr_arg, ipv6, mon_network)
9f95a23c 3456
f6b5b4d7 3457
f67539c2
TL
3458def prepare_cluster_network(ctx: CephadmContext) -> Tuple[str, bool]:
3459 cluster_network = ''
3460 ipv6_cluster_network = False
3461 # the cluster network may not exist on this node, so all we can do is
3462 # validate that the address given is valid ipv4 or ipv6 subnet
3463 if ctx.cluster_network:
3464 rc, versions, err_msg = check_subnet(ctx.cluster_network)
3465 if rc:
3466 raise Error(f'Invalid --cluster-network parameter: {err_msg}')
3467 cluster_network = ctx.cluster_network
3468 ipv6_cluster_network = True if 6 in versions else False
3469 else:
3470 logger.info('- internal network (--cluster-network) has not '
3471 'been provided, OSD replication will default to '
3472 'the public_network')
9f95a23c 3473
f67539c2
TL
3474 return cluster_network, ipv6_cluster_network
3475
3476
3477def create_initial_keys(
3478 ctx: CephadmContext,
3479 uid: int, gid: int,
3480 mgr_id: str
3481) -> Tuple[str, str, str, Any, Any]: # type: ignore
3482
3483 _image = ctx.image
9f95a23c
TL
3484
3485 # create some initial keys
3486 logger.info('Creating initial keys...')
3487 mon_key = CephContainer(
f67539c2
TL
3488 ctx,
3489 image=_image,
9f95a23c
TL
3490 entrypoint='/usr/bin/ceph-authtool',
3491 args=['--gen-print-key'],
3492 ).run().strip()
3493 admin_key = CephContainer(
f67539c2
TL
3494 ctx,
3495 image=_image,
9f95a23c
TL
3496 entrypoint='/usr/bin/ceph-authtool',
3497 args=['--gen-print-key'],
3498 ).run().strip()
3499 mgr_key = CephContainer(
f67539c2
TL
3500 ctx,
3501 image=_image,
9f95a23c
TL
3502 entrypoint='/usr/bin/ceph-authtool',
3503 args=['--gen-print-key'],
3504 ).run().strip()
3505
3506 keyring = ('[mon.]\n'
3507 '\tkey = %s\n'
3508 '\tcaps mon = allow *\n'
3509 '[client.admin]\n'
3510 '\tkey = %s\n'
3511 '\tcaps mon = allow *\n'
3512 '\tcaps mds = allow *\n'
3513 '\tcaps mgr = allow *\n'
3514 '\tcaps osd = allow *\n'
3515 '[mgr.%s]\n'
3516 '\tkey = %s\n'
3517 '\tcaps mon = profile mgr\n'
3518 '\tcaps mds = allow *\n'
3519 '\tcaps osd = allow *\n'
3520 % (mon_key, admin_key, mgr_id, mgr_key))
3521
f67539c2
TL
3522 admin_keyring = write_tmp('[client.admin]\n'
3523 '\tkey = ' + admin_key + '\n',
3524 uid, gid)
3525
9f95a23c 3526 # tmp keyring file
f67539c2
TL
3527 bootstrap_keyring = write_tmp(keyring, uid, gid)
3528 return (mon_key, mgr_key, admin_key,
3529 bootstrap_keyring, admin_keyring)
3530
9f95a23c 3531
f67539c2
TL
3532def create_initial_monmap(
3533 ctx: CephadmContext,
3534 uid: int, gid: int,
3535 fsid: str,
3536 mon_id: str, mon_addr: str
3537) -> Any:
9f95a23c 3538 logger.info('Creating initial monmap...')
f67539c2 3539 monmap = write_tmp('', 0, 0)
9f95a23c 3540 out = CephContainer(
f67539c2
TL
3541 ctx,
3542 image=ctx.image,
9f95a23c 3543 entrypoint='/usr/bin/monmaptool',
f67539c2
TL
3544 args=[
3545 '--create',
3546 '--clobber',
3547 '--fsid', fsid,
3548 '--addv', mon_id, mon_addr,
3549 '/tmp/monmap'
9f95a23c
TL
3550 ],
3551 volume_mounts={
f67539c2 3552 monmap.name: '/tmp/monmap:z',
9f95a23c
TL
3553 },
3554 ).run()
f67539c2 3555 logger.debug(f'monmaptool for {mon_id} {mon_addr} on {out}')
9f95a23c
TL
3556
3557 # pass monmap file to ceph user for use by ceph-mon --mkfs below
f67539c2
TL
3558 os.fchown(monmap.fileno(), uid, gid)
3559 return monmap
9f95a23c 3560
f67539c2
TL
3561
3562def prepare_create_mon(
3563 ctx: CephadmContext,
3564 uid: int, gid: int,
3565 fsid: str, mon_id: str,
3566 bootstrap_keyring_path: str,
3567 monmap_path: str
3568):
9f95a23c 3569 logger.info('Creating mon...')
f67539c2
TL
3570 create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid)
3571 mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id)
3572 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c 3573 out = CephContainer(
f67539c2
TL
3574 ctx,
3575 image=ctx.image,
9f95a23c 3576 entrypoint='/usr/bin/ceph-mon',
f67539c2
TL
3577 args=[
3578 '--mkfs',
3579 '-i', mon_id,
3580 '--fsid', fsid,
3581 '-c', '/dev/null',
3582 '--monmap', '/tmp/monmap',
3583 '--keyring', '/tmp/keyring',
3584 ] + get_daemon_args(ctx, fsid, 'mon', mon_id),
9f95a23c
TL
3585 volume_mounts={
3586 log_dir: '/var/log/ceph:z',
3587 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
f67539c2
TL
3588 bootstrap_keyring_path: '/tmp/keyring:z',
3589 monmap_path: '/tmp/monmap:z',
9f95a23c
TL
3590 },
3591 ).run()
f67539c2
TL
3592 logger.debug(f'create mon.{mon_id} on {out}')
3593 return (mon_dir, log_dir)
3594
3595
3596def create_mon(
3597 ctx: CephadmContext,
3598 uid: int, gid: int,
3599 fsid: str, mon_id: str
3600) -> None:
3601 mon_c = get_container(ctx, fsid, 'mon', mon_id)
3602 ctx.meta_json = json.dumps({'service_name': 'mon'})
3603 deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid,
9f95a23c
TL
3604 config=None, keyring=None)
3605
9f95a23c 3606
f67539c2
TL
3607def wait_for_mon(
3608 ctx: CephadmContext,
3609 mon_id: str, mon_dir: str,
3610 admin_keyring_path: str, config_path: str
3611):
9f95a23c
TL
3612 logger.info('Waiting for mon to start...')
3613 c = CephContainer(
f67539c2
TL
3614 ctx,
3615 image=ctx.image,
9f95a23c
TL
3616 entrypoint='/usr/bin/ceph',
3617 args=[
3618 'status'],
3619 volume_mounts={
3620 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
f67539c2
TL
3621 admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z',
3622 config_path: '/etc/ceph/ceph.conf:z',
9f95a23c
TL
3623 },
3624 )
3625
3626 # wait for the service to become available
3627 def is_mon_available():
3628 # type: () -> bool
f67539c2
TL
3629 timeout = ctx.timeout if ctx.timeout else 60 # seconds
3630 out, err, ret = call(ctx, c.run_cmd(),
9f95a23c
TL
3631 desc=c.entrypoint,
3632 timeout=timeout)
3633 return ret == 0
9f95a23c 3634
f67539c2
TL
3635 is_available(ctx, 'mon', is_mon_available)
3636
3637
3638def create_mgr(
3639 ctx: CephadmContext,
3640 uid: int, gid: int,
3641 fsid: str, mgr_id: str, mgr_key: str,
3642 config: str, clifunc: Callable
3643) -> None:
3644 logger.info('Creating mgr...')
3645 mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
3646 mgr_c = get_container(ctx, fsid, 'mgr', mgr_id)
3647 # Note:the default port used by the Prometheus node exporter is opened in fw
3648 ctx.meta_json = json.dumps({'service_name': 'mgr'})
3649 deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid,
3650 config=config, keyring=mgr_keyring, ports=[9283])
3651
3652 # wait for the service to become available
3653 logger.info('Waiting for mgr to start...')
3654
3655 def is_mgr_available():
3656 # type: () -> bool
3657 timeout = ctx.timeout if ctx.timeout else 60 # seconds
3658 try:
3659 out = clifunc(['status', '-f', 'json-pretty'], timeout=timeout)
3660 j = json.loads(out)
3661 return j.get('mgrmap', {}).get('available', False)
3662 except Exception as e:
3663 logger.debug('status failed: %s' % e)
3664 return False
3665 is_available(ctx, 'mgr', is_mgr_available)
3666
3667
3668def prepare_ssh(
3669 ctx: CephadmContext,
3670 cli: Callable, wait_for_mgr_restart: Callable
3671) -> None:
3672
3673 cli(['cephadm', 'set-user', ctx.ssh_user])
3674
3675 if ctx.ssh_config:
3676 logger.info('Using provided ssh config...')
3677 mounts = {
3678 pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z',
3679 }
3680 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
3681
3682 if ctx.ssh_private_key and ctx.ssh_public_key:
3683 logger.info('Using provided ssh keys...')
3684 mounts = {
3685 pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
3686 pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
3687 }
3688 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
3689 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
3690 else:
3691 logger.info('Generating ssh key...')
3692 cli(['cephadm', 'generate-key'])
3693 ssh_pub = cli(['cephadm', 'get-pub-key'])
3694
3695 with open(ctx.output_pub_ssh_key, 'w') as f:
3696 f.write(ssh_pub)
3697 logger.info('Wrote public SSH key to %s' % ctx.output_pub_ssh_key)
3698
3699 logger.info('Adding key to %s@localhost authorized_keys...' % ctx.ssh_user)
3700 try:
3701 s_pwd = pwd.getpwnam(ctx.ssh_user)
3702 except KeyError:
3703 raise Error('Cannot find uid/gid for ssh-user: %s' % (ctx.ssh_user))
3704 ssh_uid = s_pwd.pw_uid
3705 ssh_gid = s_pwd.pw_gid
3706 ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
3707
3708 if not os.path.exists(ssh_dir):
3709 makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
3710
3711 auth_keys_file = '%s/authorized_keys' % ssh_dir
3712 add_newline = False
3713
3714 if os.path.exists(auth_keys_file):
3715 with open(auth_keys_file, 'r') as f:
3716 f.seek(0, os.SEEK_END)
3717 if f.tell() > 0:
3718 f.seek(f.tell() - 1, os.SEEK_SET) # go to last char
3719 if f.read() != '\n':
3720 add_newline = True
3721
3722 with open(auth_keys_file, 'a') as f:
3723 os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it
3724 os.fchmod(f.fileno(), 0o600) # just in case we created it
3725 if add_newline:
3726 f.write('\n')
3727 f.write(ssh_pub.strip() + '\n')
3728
3729 host = get_hostname()
3730 logger.info('Adding host %s...' % host)
3731 try:
3732 args = ['orch', 'host', 'add', host]
3733 if ctx.mon_ip:
3734 args.append(ctx.mon_ip)
3735 cli(args)
3736 except RuntimeError as e:
3737 raise Error('Failed to add host <%s>: %s' % (host, e))
3738
3739 for t in ['mon', 'mgr']:
3740 if not ctx.orphan_initial_daemons:
3741 logger.info('Deploying %s service with default placement...' % t)
3742 cli(['orch', 'apply', t])
3743 else:
3744 logger.info('Deploying unmanaged %s service...' % t)
3745 cli(['orch', 'apply', t, '--unmanaged'])
3746
3747 if not ctx.orphan_initial_daemons:
3748 logger.info('Deploying crash service with default placement...')
3749 cli(['orch', 'apply', 'crash'])
3750
3751 if not ctx.skip_monitoring_stack:
3752 logger.info('Enabling mgr prometheus module...')
3753 cli(['mgr', 'module', 'enable', 'prometheus'])
3754 for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
3755 logger.info('Deploying %s service with default placement...' % t)
3756 cli(['orch', 'apply', t])
3757
3758
3759def enable_cephadm_mgr_module(
3760 cli: Callable, wait_for_mgr_restart: Callable
3761) -> None:
3762
3763 logger.info('Enabling cephadm module...')
3764 cli(['mgr', 'module', 'enable', 'cephadm'])
3765 wait_for_mgr_restart()
3766 logger.info('Setting orchestrator backend to cephadm...')
3767 cli(['orch', 'set', 'backend', 'cephadm'])
3768
3769
3770def prepare_dashboard(
3771 ctx: CephadmContext,
3772 uid: int, gid: int,
3773 cli: Callable, wait_for_mgr_restart: Callable
3774) -> None:
3775
3776 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
3777 # if the user does not want to use SSL he can change this setting once the cluster is up
3778 cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx.ssl_dashboard_port)])
3779
3780 # configuring dashboard parameters
3781 logger.info('Enabling the dashboard module...')
3782 cli(['mgr', 'module', 'enable', 'dashboard'])
3783 wait_for_mgr_restart()
3784
3785 # dashboard crt and key
3786 if ctx.dashboard_key and ctx.dashboard_crt:
3787 logger.info('Using provided dashboard certificate...')
3788 mounts = {
3789 pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z',
3790 pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z'
3791 }
3792 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
3793 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
3794 else:
3795 logger.info('Generating a dashboard self-signed certificate...')
3796 cli(['dashboard', 'create-self-signed-cert'])
3797
3798 logger.info('Creating initial admin user...')
3799 password = ctx.initial_dashboard_password or generate_password()
3800 tmp_password_file = write_tmp(password, uid, gid)
3801 cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
3802 if not ctx.dashboard_password_noupdate:
3803 cmd.append('--pwd-update-required')
3804 cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'})
3805 logger.info('Fetching dashboard port number...')
3806 out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
3807 port = int(out)
3808
3809 # Open dashboard port
3810 fw = Firewalld(ctx)
3811 fw.open_ports([port])
3812 fw.apply_rules()
3813
3814 logger.info('Ceph Dashboard is now available at:\n\n'
3815 '\t URL: https://%s:%s/\n'
3816 '\t User: %s\n'
3817 '\tPassword: %s\n' % (
3818 get_fqdn(), port,
3819 ctx.initial_dashboard_user,
3820 password))
3821
3822
3823def prepare_bootstrap_config(
3824 ctx: CephadmContext,
3825 fsid: str, mon_addr: str, image: str
3826
3827) -> str:
3828
3829 cp = read_config(ctx.config)
3830 if not cp.has_section('global'):
3831 cp.add_section('global')
3832 cp.set('global', 'fsid', fsid)
3833 cp.set('global', 'mon_host', mon_addr)
3834 cp.set('global', 'container_image', image)
3835 if not cp.has_section('mon'):
3836 cp.add_section('mon')
3837 if (
3838 not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
3839 and not cp.has_option('mon', 'auth allow insecure global id reclaim')
3840 ):
3841 cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
3842 cpf = StringIO()
3843 cp.write(cpf)
3844 config = cpf.getvalue()
3845
3846 if ctx.registry_json or ctx.registry_url:
3847 command_registry_login(ctx)
3848
3849 return config
3850
3851
3852def finish_bootstrap_config(
3853 ctx: CephadmContext,
3854 fsid: str,
3855 config: str,
3856 mon_id: str, mon_dir: str,
3857 mon_network: Optional[str], ipv6: bool,
3858 cli: Callable,
3859 cluster_network: Optional[str], ipv6_cluster_network: bool
3860
3861) -> None:
3862 if not ctx.no_minimize_config:
9f95a23c
TL
3863 logger.info('Assimilating anything we can from ceph.conf...')
3864 cli([
3865 'config', 'assimilate-conf',
3866 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3867 ], {
3868 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3869 })
3870 logger.info('Generating new minimal ceph.conf...')
3871 cli([
3872 'config', 'generate-minimal-conf',
3873 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
3874 ], {
3875 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
3876 })
3877 # re-read our minimized config
3878 with open(mon_dir + '/config', 'r') as f:
3879 config = f.read()
3880 logger.info('Restarting the monitor...')
f67539c2 3881 call_throws(ctx, [
9f95a23c
TL
3882 'systemctl',
3883 'restart',
3884 get_unit_name(fsid, 'mon', mon_id)
3885 ])
3886
3887 if mon_network:
f67539c2 3888 logger.info(f'Setting mon public_network to {mon_network}')
9f95a23c
TL
3889 cli(['config', 'set', 'mon', 'public_network', mon_network])
3890
f67539c2
TL
3891 if cluster_network:
3892 logger.info(f'Setting cluster_network to {cluster_network}')
3893 cli(['config', 'set', 'global', 'cluster_network', cluster_network])
3894
3895 if ipv6 or ipv6_cluster_network:
3896 logger.info('Enabling IPv6 (ms_bind_ipv6) binding')
f6b5b4d7
TL
3897 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
3898
f67539c2
TL
3899 with open(ctx.output_config, 'w') as f:
3900 f.write(config)
3901 logger.info('Wrote config to %s' % ctx.output_config)
3902 pass
3903
3904
3905@default_image
3906def command_bootstrap(ctx):
3907 # type: (CephadmContext) -> int
3908
3909 if not ctx.output_config:
3910 ctx.output_config = os.path.join(ctx.output_dir, 'ceph.conf')
3911 if not ctx.output_keyring:
3912 ctx.output_keyring = os.path.join(ctx.output_dir,
3913 'ceph.client.admin.keyring')
3914 if not ctx.output_pub_ssh_key:
3915 ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, 'ceph.pub')
3916
3917 # verify output files
3918 for f in [ctx.output_config, ctx.output_keyring,
3919 ctx.output_pub_ssh_key]:
3920 if not ctx.allow_overwrite:
3921 if os.path.exists(f):
3922 raise Error('%s already exists; delete or pass '
3923 '--allow-overwrite to overwrite' % f)
3924 dirname = os.path.dirname(f)
3925 if dirname and not os.path.exists(dirname):
3926 fname = os.path.basename(f)
3927 logger.info(f'Creating directory {dirname} for {fname}')
3928 try:
3929 # use makedirs to create intermediate missing dirs
3930 os.makedirs(dirname, 0o755)
3931 except PermissionError:
3932 raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
3933
3934 if not ctx.skip_prepare_host:
3935 command_prepare_host(ctx)
3936 else:
3937 logger.info('Skip prepare_host')
3938
3939 # initial vars
3940 fsid = ctx.fsid or make_fsid()
3941 hostname = get_hostname()
3942 if '.' in hostname and not ctx.allow_fqdn_hostname:
3943 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
3944 mon_id = ctx.mon_id or hostname
3945 mgr_id = ctx.mgr_id or generate_service_id()
3946 logger.info('Cluster fsid: %s' % fsid)
3947
3948 lock = FileLock(ctx, fsid)
3949 lock.acquire()
3950
3951 (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx)
3952 cluster_network, ipv6_cluster_network = prepare_cluster_network(ctx)
3953
3954 config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image)
3955
3956 if not ctx.skip_pull:
3957 _pull_image(ctx, ctx.image)
3958
3959 image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
3960 logger.info(f'Ceph version: {image_ver}')
3961 image_release = image_ver.split()[4]
3962 if (
3963 not ctx.allow_mismatched_release
3964 and image_release not in [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE]
3965 ):
3966 raise Error(
3967 f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE}; please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
3968 )
3969
3970 logger.info('Extracting ceph user uid/gid from container image...')
3971 (uid, gid) = extract_uid_gid(ctx)
3972
3973 # create some initial keys
3974 (mon_key, mgr_key, admin_key, bootstrap_keyring, admin_keyring) = \
3975 create_initial_keys(ctx, uid, gid, mgr_id)
3976
3977 monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg)
3978 (mon_dir, log_dir) = \
3979 prepare_create_mon(ctx, uid, gid, fsid, mon_id,
3980 bootstrap_keyring.name, monmap.name)
3981
3982 with open(mon_dir + '/config', 'w') as f:
3983 os.fchown(f.fileno(), uid, gid)
3984 os.fchmod(f.fileno(), 0o600)
3985 f.write(config)
3986
3987 make_var_run(ctx, fsid, uid, gid)
3988 create_mon(ctx, uid, gid, fsid, mon_id)
3989
3990 # config to issue various CLI commands
3991 tmp_config = write_tmp(config, uid, gid)
3992
3993 # a CLI helper to reduce our typing
3994 def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT):
3995 # type: (List[str], Dict[str, str], Optional[int]) -> str
3996 mounts = {
3997 log_dir: '/var/log/ceph:z',
3998 admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
3999 tmp_config.name: '/etc/ceph/ceph.conf:z',
4000 }
4001 for k, v in extra_mounts.items():
4002 mounts[k] = v
4003 timeout = timeout or ctx.timeout
4004 return CephContainer(
4005 ctx,
4006 image=ctx.image,
4007 entrypoint='/usr/bin/ceph',
4008 args=cmd,
4009 volume_mounts=mounts,
4010 ).run(timeout=timeout)
4011
4012 wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name)
4013
4014 finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir,
4015 mon_network, ipv6, cli,
4016 cluster_network, ipv6_cluster_network)
9f95a23c
TL
4017
4018 # output files
f67539c2 4019 with open(ctx.output_keyring, 'w') as f:
9f95a23c
TL
4020 os.fchmod(f.fileno(), 0o600)
4021 f.write('[client.admin]\n'
4022 '\tkey = ' + admin_key + '\n')
f67539c2 4023 logger.info('Wrote keyring to %s' % ctx.output_keyring)
9f95a23c 4024
f67539c2
TL
4025 # create mgr
4026 create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli)
9f95a23c 4027
f67539c2
TL
4028 def json_loads_retry(cli_func):
4029 for sleep_secs in [1, 4, 4]:
4030 try:
4031 return json.loads(cli_func())
4032 except json.JSONDecodeError:
4033 logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs)
4034 time.sleep(sleep_secs)
4035 return json.loads(cli_func())
9f95a23c
TL
4036
4037 # wait for mgr to restart (after enabling a module)
4038 def wait_for_mgr_restart():
f67539c2
TL
4039 # first get latest mgrmap epoch from the mon. try newer 'mgr
4040 # stat' command first, then fall back to 'mgr dump' if
4041 # necessary
4042 try:
4043 j = json_loads_retry(lambda: cli(['mgr', 'stat']))
4044 except Exception:
4045 j = json_loads_retry(lambda: cli(['mgr', 'dump']))
9f95a23c 4046 epoch = j['epoch']
f67539c2 4047
9f95a23c
TL
4048 # wait for mgr to have it
4049 logger.info('Waiting for the mgr to restart...')
f67539c2 4050
9f95a23c
TL
4051 def mgr_has_latest_epoch():
4052 # type: () -> bool
4053 try:
4054 out = cli(['tell', 'mgr', 'mgr_status'])
4055 j = json.loads(out)
4056 return j['mgrmap_epoch'] >= epoch
4057 except Exception as e:
4058 logger.debug('tell mgr mgr_status failed: %s' % e)
4059 return False
f67539c2 4060 is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch)
e306af50 4061
f67539c2 4062 enable_cephadm_mgr_module(cli, wait_for_mgr_restart)
e306af50 4063
f67539c2
TL
4064 # ssh
4065 if not ctx.skip_ssh:
4066 prepare_ssh(ctx, cli, wait_for_mgr_restart)
4067
4068 if ctx.registry_url and ctx.registry_username and ctx.registry_password:
4069 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', ctx.registry_url, '--force'])
4070 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', ctx.registry_username, '--force'])
4071 cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', ctx.registry_password, '--force'])
4072
4073 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force'])
4074
4075 if ctx.with_exporter:
4076 cli(['config-key', 'set', 'mgr/cephadm/exporter_enabled', 'true'])
4077 if ctx.exporter_config:
4078 logger.info('Applying custom cephadm exporter settings')
4079 # validated within the parser, so we can just apply to the store
4080 with tempfile.NamedTemporaryFile(buffering=0) as tmp:
4081 tmp.write(json.dumps(ctx.exporter_config).encode('utf-8'))
4082 mounts = {
4083 tmp.name: '/tmp/exporter-config.json:z'
4084 }
4085 cli(['cephadm', 'set-exporter-config', '-i', '/tmp/exporter-config.json'], extra_mounts=mounts)
4086 logger.info('-> Use ceph orch apply cephadm-exporter to deploy')
9f95a23c 4087 else:
f67539c2
TL
4088 # generate a default SSL configuration for the exporter(s)
4089 logger.info('Generating a default cephadm exporter configuration (self-signed)')
4090 cli(['cephadm', 'generate-exporter-config'])
4091 #
4092 # deploy the service (commented out until the cephadm changes are in the ceph container build)
4093 logger.info('Deploying cephadm exporter service with default placement...')
4094 cli(['orch', 'apply', 'cephadm-exporter'])
f6b5b4d7 4095
f67539c2
TL
4096 if not ctx.skip_dashboard:
4097 prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart)
f6b5b4d7 4098
f67539c2
TL
4099 if ctx.apply_spec:
4100 logger.info('Applying %s to cluster' % ctx.apply_spec)
e306af50 4101
f67539c2 4102 with open(ctx.apply_spec) as f:
e306af50
TL
4103 for line in f:
4104 if 'hostname:' in line:
4105 line = line.replace('\n', '')
4106 split = line.split(': ')
f67539c2 4107 if split[1] != hostname:
e306af50
TL
4108 logger.info('Adding ssh key to %s' % split[1])
4109
4110 ssh_key = '/etc/ceph/ceph.pub'
f67539c2
TL
4111 if ctx.ssh_public_key:
4112 ssh_key = ctx.ssh_public_key.name
4113 out, err, code = call_throws(ctx, ['sudo', '-u', ctx.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx.ssh_user, split[1])])
e306af50
TL
4114
4115 mounts = {}
f67539c2 4116 mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:z'
e306af50
TL
4117
4118 out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
4119 logger.info(out)
9f95a23c
TL
4120
4121 logger.info('You can access the Ceph CLI with:\n\n'
4122 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
4123 sys.argv[0],
4124 fsid,
f67539c2
TL
4125 ctx.output_config,
4126 ctx.output_keyring))
9f95a23c
TL
4127 logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
4128 '\tceph telemetry on\n\n'
4129 'For more information see:\n\n'
f67539c2 4130 '\thttps://docs.ceph.com/docs/pacific/mgr/telemetry/\n')
9f95a23c
TL
4131 logger.info('Bootstrap complete.')
4132 return 0
4133
4134##################################
4135
f67539c2
TL
4136
4137def command_registry_login(ctx: CephadmContext):
4138 if ctx.registry_json:
4139 logger.info('Pulling custom registry login info from %s.' % ctx.registry_json)
4140 d = get_parm(ctx.registry_json)
f6b5b4d7 4141 if d.get('url') and d.get('username') and d.get('password'):
f67539c2
TL
4142 ctx.registry_url = d.get('url')
4143 ctx.registry_username = d.get('username')
4144 ctx.registry_password = d.get('password')
4145 registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
f6b5b4d7 4146 else:
f67539c2
TL
4147 raise Error('json provided for custom registry login did not include all necessary fields. '
4148 'Please setup json file as\n'
4149 '{\n'
4150 ' "url": "REGISTRY_URL",\n'
4151 ' "username": "REGISTRY_USERNAME",\n'
4152 ' "password": "REGISTRY_PASSWORD"\n'
4153 '}\n')
4154 elif ctx.registry_url and ctx.registry_username and ctx.registry_password:
4155 registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
f6b5b4d7 4156 else:
f67539c2
TL
4157 raise Error('Invalid custom registry arguments received. To login to a custom registry include '
4158 '--registry-url, --registry-username and --registry-password '
4159 'options or --registry-json option')
f6b5b4d7
TL
4160 return 0
4161
f67539c2
TL
4162
4163def registry_login(ctx: CephadmContext, url, username, password):
4164 logger.info('Logging into custom registry.')
f6b5b4d7 4165 try:
f67539c2
TL
4166 engine = ctx.container_engine
4167 cmd = [engine.path, 'login',
4168 '-u', username, '-p', password,
4169 url]
4170 if isinstance(engine, Podman):
4171 cmd.append('--authfile=/etc/ceph/podman-auth.json')
4172 out, _, _ = call_throws(ctx, cmd)
4173 if isinstance(engine, Podman):
4174 os.chmod('/etc/ceph/podman-auth.json', 0o600)
4175 except Exception:
4176 raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx.registry_url, ctx.registry_username))
f6b5b4d7
TL
4177
4178##################################
4179
4180
f67539c2
TL
4181def extract_uid_gid_monitoring(ctx, daemon_type):
4182 # type: (CephadmContext, str) -> Tuple[int, int]
9f95a23c
TL
4183
4184 if daemon_type == 'prometheus':
f67539c2 4185 uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus')
9f95a23c
TL
4186 elif daemon_type == 'node-exporter':
4187 uid, gid = 65534, 65534
4188 elif daemon_type == 'grafana':
f67539c2 4189 uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
9f95a23c 4190 elif daemon_type == 'alertmanager':
f67539c2 4191 uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus'])
9f95a23c 4192 else:
f67539c2 4193 raise Error('{} not implemented yet'.format(daemon_type))
9f95a23c
TL
4194 return uid, gid
4195
4196
4197@default_image
f67539c2
TL
4198def command_deploy(ctx):
4199 # type: (CephadmContext) -> None
4200 daemon_type, daemon_id = ctx.name.split('.', 1)
9f95a23c 4201
f67539c2
TL
4202 lock = FileLock(ctx, ctx.fsid)
4203 lock.acquire()
9f95a23c
TL
4204
4205 if daemon_type not in get_supported_daemons():
4206 raise Error('daemon type %s not recognized' % daemon_type)
4207
e306af50 4208 redeploy = False
f67539c2
TL
4209 unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id)
4210 container_name = 'ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id)
4211 (_, state, _) = check_unit(ctx, unit_name)
4212 if state == 'running' or is_container_running(ctx, container_name):
e306af50
TL
4213 redeploy = True
4214
f67539c2
TL
4215 if ctx.reconfig:
4216 logger.info('%s daemon %s ...' % ('Reconfig', ctx.name))
e306af50 4217 elif redeploy:
f67539c2 4218 logger.info('%s daemon %s ...' % ('Redeploy', ctx.name))
e306af50 4219 else:
f67539c2 4220 logger.info('%s daemon %s ...' % ('Deploy', ctx.name))
9f95a23c 4221
f6b5b4d7 4222 # Get and check ports explicitly required to be opened
f67539c2
TL
4223 daemon_ports = [] # type: List[int]
4224
4225 # only check port in use if not reconfig or redeploy since service
4226 # we are redeploying/reconfiguring will already be using the port
4227 if not ctx.reconfig and not redeploy:
4228 if ctx.tcp_ports:
4229 daemon_ports = list(map(int, ctx.tcp_ports.split()))
f6b5b4d7 4230
9f95a23c 4231 if daemon_type in Ceph.daemons:
f67539c2
TL
4232 config, keyring = get_config_and_keyring(ctx)
4233 uid, gid = extract_uid_gid(ctx)
4234 make_var_run(ctx, ctx.fsid, uid, gid)
f6b5b4d7 4235
f67539c2
TL
4236 c = get_container(ctx, ctx.fsid, daemon_type, daemon_id,
4237 ptrace=ctx.allow_ptrace)
4238 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
9f95a23c 4239 config=config, keyring=keyring,
f67539c2
TL
4240 osd_fsid=ctx.osd_fsid,
4241 reconfig=ctx.reconfig,
f6b5b4d7 4242 ports=daemon_ports)
9f95a23c
TL
4243
4244 elif daemon_type in Monitoring.components:
4245 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
9f95a23c 4246 # Default Checks
f67539c2 4247 if not ctx.reconfig and not redeploy:
f6b5b4d7 4248 daemon_ports.extend(Monitoring.port_map[daemon_type])
9f95a23c
TL
4249
4250 # make sure provided config-json is sufficient
f67539c2 4251 config = get_parm(ctx.config_json) # type: ignore
9f95a23c
TL
4252 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
4253 required_args = Monitoring.components[daemon_type].get('config-json-args', list())
4254 if required_files:
4255 if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
f67539c2
TL
4256 raise Error('{} deployment requires config-json which must '
4257 'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files)))
9f95a23c
TL
4258 if required_args:
4259 if not config or not all(c in config.keys() for c in required_args): # type: ignore
f67539c2
TL
4260 raise Error('{} deployment requires config-json which must '
4261 'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args)))
9f95a23c 4262
f67539c2
TL
4263 uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
4264 c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
4265 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
4266 reconfig=ctx.reconfig,
f6b5b4d7 4267 ports=daemon_ports)
9f95a23c
TL
4268
4269 elif daemon_type == NFSGanesha.daemon_type:
f67539c2 4270 if not ctx.reconfig and not redeploy:
f6b5b4d7
TL
4271 daemon_ports.extend(NFSGanesha.port_map.values())
4272
f67539c2 4273 config, keyring = get_config_and_keyring(ctx)
9f95a23c 4274 # TODO: extract ganesha uid/gid (997, 994) ?
f67539c2
TL
4275 uid, gid = extract_uid_gid(ctx)
4276 c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
4277 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
9f95a23c 4278 config=config, keyring=keyring,
f67539c2 4279 reconfig=ctx.reconfig,
f6b5b4d7 4280 ports=daemon_ports)
e306af50 4281
1911f103 4282 elif daemon_type == CephIscsi.daemon_type:
f67539c2
TL
4283 config, keyring = get_config_and_keyring(ctx)
4284 uid, gid = extract_uid_gid(ctx)
4285 c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
4286 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
1911f103 4287 config=config, keyring=keyring,
f67539c2
TL
4288 reconfig=ctx.reconfig,
4289 ports=daemon_ports)
4290
4291 elif daemon_type == HAproxy.daemon_type:
4292 haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id)
4293 uid, gid = haproxy.extract_uid_gid_haproxy()
4294 c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
4295 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
4296 reconfig=ctx.reconfig,
4297 ports=daemon_ports)
4298
4299 elif daemon_type == Keepalived.daemon_type:
4300 keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id)
4301 uid, gid = keepalived.extract_uid_gid_keepalived()
4302 c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
4303 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
4304 reconfig=ctx.reconfig,
f6b5b4d7 4305 ports=daemon_ports)
f91f0fd5
TL
4306
4307 elif daemon_type == CustomContainer.daemon_type:
f67539c2
TL
4308 cc = CustomContainer.init(ctx, ctx.fsid, daemon_id)
4309 if not ctx.reconfig and not redeploy:
f91f0fd5 4310 daemon_ports.extend(cc.ports)
f67539c2 4311 c = get_container(ctx, ctx.fsid, daemon_type, daemon_id,
f91f0fd5 4312 privileged=cc.privileged,
f67539c2
TL
4313 ptrace=ctx.allow_ptrace)
4314 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
f91f0fd5 4315 uid=cc.uid, gid=cc.gid, config=None,
f67539c2 4316 keyring=None, reconfig=ctx.reconfig,
f91f0fd5
TL
4317 ports=daemon_ports)
4318
f67539c2
TL
4319 elif daemon_type == CephadmDaemon.daemon_type:
4320 # get current user gid and uid
4321 uid = os.getuid()
4322 gid = os.getgid()
4323 config_js = get_parm(ctx.config_json) # type: Dict[str, str]
4324 if not daemon_ports:
4325 logger.info('cephadm-exporter will use default port ({})'.format(CephadmDaemon.default_port))
4326 daemon_ports = [CephadmDaemon.default_port]
4327
4328 CephadmDaemon.validate_config(config_js)
4329
4330 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None,
4331 uid, gid, ports=daemon_ports)
4332
9f95a23c 4333 else:
f91f0fd5
TL
4334 raise Error('daemon type {} not implemented in command_deploy function'
4335 .format(daemon_type))
9f95a23c
TL
4336
4337##################################
4338
f6b5b4d7 4339
9f95a23c 4340@infer_image
f67539c2
TL
4341def command_run(ctx):
4342 # type: (CephadmContext) -> int
4343 (daemon_type, daemon_id) = ctx.name.split('.', 1)
4344 c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
9f95a23c 4345 command = c.run_cmd()
f67539c2 4346 return call_timeout(ctx, command, ctx.timeout)
9f95a23c
TL
4347
4348##################################
4349
f6b5b4d7 4350
f67539c2
TL
4351def fsid_conf_mismatch(ctx):
4352 # type: (CephadmContext) -> bool
4353 (config, _) = get_config_and_keyring(ctx)
4354 if config:
4355 for c in config.split('\n'):
4356 if 'fsid = ' in c.strip():
4357 if 'fsid = ' + ctx.fsid != c.strip():
4358 return True
4359 return False
4360
4361
9f95a23c 4362@infer_fsid
e306af50 4363@infer_config
9f95a23c 4364@infer_image
f67539c2
TL
4365def command_shell(ctx):
4366 # type: (CephadmContext) -> int
4367 if fsid_conf_mismatch(ctx):
4368 raise Error('fsid does not match ceph conf')
4369
4370 if ctx.fsid:
4371 make_log_dir(ctx, ctx.fsid)
4372 if ctx.name:
4373 if '.' in ctx.name:
4374 (daemon_type, daemon_id) = ctx.name.split('.', 1)
9f95a23c 4375 else:
f67539c2 4376 daemon_type = ctx.name
9f95a23c
TL
4377 daemon_id = None
4378 else:
4379 daemon_type = 'osd' # get the most mounts
4380 daemon_id = None
4381
f67539c2 4382 if daemon_id and not ctx.fsid:
9f95a23c
TL
4383 raise Error('must pass --fsid to specify cluster')
4384
4385 # use /etc/ceph files by default, if present. we do this instead of
4386 # making these defaults in the arg parser because we don't want an error
4387 # if they don't exist.
f67539c2
TL
4388 if not ctx.keyring and os.path.exists(SHELL_DEFAULT_KEYRING):
4389 ctx.keyring = SHELL_DEFAULT_KEYRING
4390
4391 container_args: List[str] = ['-i']
4392 mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id,
4393 no_config=True if ctx.config else False)
4394 binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id)
4395 if ctx.config:
4396 mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z'
4397 if ctx.keyring:
4398 mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z'
4399 if ctx.mount:
4400 for _mount in ctx.mount:
f91f0fd5
TL
4401 split_src_dst = _mount.split(':')
4402 mount = pathify(split_src_dst[0])
4403 filename = os.path.basename(split_src_dst[0])
4404 if len(split_src_dst) > 1:
4405 dst = split_src_dst[1] + ':z' if len(split_src_dst) == 3 else split_src_dst[1]
4406 mounts[mount] = dst
4407 else:
4408 mounts[mount] = '/mnt/{}:z'.format(filename)
f67539c2
TL
4409 if ctx.command:
4410 command = ctx.command
9f95a23c
TL
4411 else:
4412 command = ['bash']
4413 container_args += [
f67539c2 4414 '-t',
9f95a23c 4415 '-e', 'LANG=C',
f67539c2 4416 '-e', 'PS1=%s' % CUSTOM_PS1,
9f95a23c 4417 ]
f67539c2
TL
4418 if ctx.fsid:
4419 home = os.path.join(ctx.data_dir, ctx.fsid, 'home')
9f95a23c
TL
4420 if not os.path.exists(home):
4421 logger.debug('Creating root home at %s' % home)
4422 makedirs(home, 0, 0, 0o660)
4423 if os.path.exists('/etc/skel'):
4424 for f in os.listdir('/etc/skel'):
4425 if f.startswith('.bash'):
4426 shutil.copyfile(os.path.join('/etc/skel', f),
4427 os.path.join(home, f))
4428 mounts[home] = '/root'
4429
4430 c = CephContainer(
f67539c2
TL
4431 ctx,
4432 image=ctx.image,
9f95a23c
TL
4433 entrypoint='doesnotmatter',
4434 args=[],
4435 container_args=container_args,
4436 volume_mounts=mounts,
f6b5b4d7 4437 bind_mounts=binds,
f67539c2 4438 envs=ctx.env,
9f95a23c
TL
4439 privileged=True)
4440 command = c.shell_cmd(command)
4441
f67539c2 4442 return call_timeout(ctx, command, ctx.timeout)
9f95a23c
TL
4443
4444##################################
4445
f6b5b4d7 4446
9f95a23c 4447@infer_fsid
f67539c2
TL
4448def command_enter(ctx):
4449 # type: (CephadmContext) -> int
4450 if not ctx.fsid:
9f95a23c 4451 raise Error('must pass --fsid to specify cluster')
f67539c2
TL
4452 (daemon_type, daemon_id) = ctx.name.split('.', 1)
4453 container_args = ['-i'] # type: List[str]
4454 if ctx.command:
4455 command = ctx.command
9f95a23c
TL
4456 else:
4457 command = ['sh']
4458 container_args += [
f67539c2 4459 '-t',
9f95a23c 4460 '-e', 'LANG=C',
f67539c2 4461 '-e', 'PS1=%s' % CUSTOM_PS1,
9f95a23c 4462 ]
1911f103 4463 c = CephContainer(
f67539c2
TL
4464 ctx,
4465 image=ctx.image,
1911f103
TL
4466 entrypoint='doesnotmatter',
4467 container_args=container_args,
f67539c2 4468 cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id),
1911f103 4469 )
9f95a23c 4470 command = c.exec_cmd(command)
f67539c2 4471 return call_timeout(ctx, command, ctx.timeout)
9f95a23c
TL
4472
4473##################################
4474
f6b5b4d7 4475
9f95a23c
TL
4476@infer_fsid
4477@infer_image
f67539c2
TL
4478def command_ceph_volume(ctx):
4479 # type: (CephadmContext) -> None
4480 if ctx.fsid:
4481 make_log_dir(ctx, ctx.fsid)
9f95a23c 4482
f67539c2
TL
4483 lock = FileLock(ctx, ctx.fsid)
4484 lock.acquire()
1911f103 4485
f67539c2
TL
4486 (uid, gid) = (0, 0) # ceph-volume runs as root
4487 mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None)
9f95a23c
TL
4488
4489 tmp_config = None
4490 tmp_keyring = None
4491
f67539c2 4492 (config, keyring) = get_config_and_keyring(ctx)
9f95a23c 4493
801d1391 4494 if config:
9f95a23c
TL
4495 # tmp config file
4496 tmp_config = write_tmp(config, uid, gid)
9f95a23c 4497 mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
801d1391
TL
4498
4499 if keyring:
4500 # tmp keyring file
4501 tmp_keyring = write_tmp(keyring, uid, gid)
9f95a23c
TL
4502 mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
4503
4504 c = CephContainer(
f67539c2
TL
4505 ctx,
4506 image=ctx.image,
9f95a23c 4507 entrypoint='/usr/sbin/ceph-volume',
f67539c2
TL
4508 envs=ctx.env,
4509 args=ctx.command,
9f95a23c
TL
4510 privileged=True,
4511 volume_mounts=mounts,
4512 )
f67539c2
TL
4513 verbosity = CallVerbosity.VERBOSE if ctx.log_output else CallVerbosity.VERBOSE_ON_FAILURE
4514 out, err, code = call_throws(ctx, c.run_cmd(), verbosity=verbosity)
9f95a23c
TL
4515 if not code:
4516 print(out)
4517
4518##################################
4519
f6b5b4d7 4520
9f95a23c 4521@infer_fsid
f67539c2
TL
4522def command_unit(ctx):
4523 # type: (CephadmContext) -> None
4524 if not ctx.fsid:
9f95a23c 4525 raise Error('must pass --fsid to specify cluster')
e306af50 4526
f67539c2 4527 unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
e306af50 4528
f67539c2 4529 call_throws(ctx, [
9f95a23c 4530 'systemctl',
f67539c2 4531 ctx.command,
adb31ebb
TL
4532 unit_name],
4533 verbosity=CallVerbosity.VERBOSE,
4534 desc=''
4535 )
9f95a23c
TL
4536
4537##################################
4538
f6b5b4d7 4539
9f95a23c 4540@infer_fsid
f67539c2
TL
4541def command_logs(ctx):
4542 # type: (CephadmContext) -> None
4543 if not ctx.fsid:
9f95a23c
TL
4544 raise Error('must pass --fsid to specify cluster')
4545
f67539c2 4546 unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
9f95a23c
TL
4547
4548 cmd = [find_program('journalctl')]
4549 cmd.extend(['-u', unit_name])
f67539c2
TL
4550 if ctx.command:
4551 cmd.extend(ctx.command)
9f95a23c
TL
4552
4553 # call this directly, without our wrapper, so that we get an unmolested
4554 # stdout with logger prefixing.
f67539c2
TL
4555 logger.debug('Running command: %s' % ' '.join(cmd))
4556 subprocess.call(cmd) # type: ignore
9f95a23c
TL
4557
4558##################################
4559
f6b5b4d7 4560
f67539c2
TL
4561def list_networks(ctx):
4562 # type: (CephadmContext) -> Dict[str,Dict[str,List[str]]]
9f95a23c 4563
f67539c2
TL
4564 # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
4565 # so we'll need to use a regex to parse 'ip' command output.
4566 #
4567 # out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
4568 # j = json.loads(out)
4569 # for x in j:
9f95a23c 4570
f67539c2
TL
4571 res = _list_ipv4_networks(ctx)
4572 res.update(_list_ipv6_networks(ctx))
f6b5b4d7
TL
4573 return res
4574
4575
f67539c2
TL
4576def _list_ipv4_networks(ctx: CephadmContext):
4577 execstr: Optional[str] = find_executable('ip')
4578 if not execstr:
4579 raise FileNotFoundError("unable to find 'ip' command")
4580 out, _, _ = call_throws(ctx, [execstr, 'route', 'ls'])
f6b5b4d7
TL
4581 return _parse_ipv4_route(out)
4582
9f95a23c 4583
f6b5b4d7 4584def _parse_ipv4_route(out):
f67539c2
TL
4585 r = {} # type: Dict[str,Dict[str,List[str]]]
4586 p = re.compile(r'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)')
9f95a23c
TL
4587 for line in out.splitlines():
4588 m = p.findall(line)
4589 if not m:
4590 continue
4591 net = m[0][0]
f67539c2
TL
4592 iface = m[0][1]
4593 ip = m[0][4]
9f95a23c 4594 if net not in r:
f67539c2
TL
4595 r[net] = {}
4596 if iface not in r[net]:
4597 r[net][iface] = []
4598 r[net][iface].append(ip)
9f95a23c
TL
4599 return r
4600
f6b5b4d7 4601
f67539c2
TL
4602def _list_ipv6_networks(ctx: CephadmContext):
4603 execstr: Optional[str] = find_executable('ip')
4604 if not execstr:
4605 raise FileNotFoundError("unable to find 'ip' command")
4606 routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls'])
4607 ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls'])
f6b5b4d7
TL
4608 return _parse_ipv6_route(routes, ips)
4609
4610
4611def _parse_ipv6_route(routes, ips):
f67539c2 4612 r = {} # type: Dict[str,Dict[str,List[str]]]
f6b5b4d7
TL
4613 route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
4614 ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
f67539c2 4615 iface_p = re.compile(r'^(\d+): (\S+): (.*)$')
f6b5b4d7
TL
4616 for line in routes.splitlines():
4617 m = route_p.findall(line)
4618 if not m or m[0][0].lower() == 'default':
4619 continue
4620 net = m[0][0]
f67539c2
TL
4621 if '/' not in net: # only consider networks with a mask
4622 continue
4623 iface = m[0][1]
f6b5b4d7 4624 if net not in r:
f67539c2
TL
4625 r[net] = {}
4626 if iface not in r[net]:
4627 r[net][iface] = []
f6b5b4d7 4628
f67539c2 4629 iface = None
f6b5b4d7
TL
4630 for line in ips.splitlines():
4631 m = ip_p.findall(line)
4632 if not m:
f67539c2
TL
4633 m = iface_p.findall(line)
4634 if m:
4635 # drop @... suffix, if present
4636 iface = m[0][1].split('@')[0]
f6b5b4d7
TL
4637 continue
4638 ip = m[0][0]
4639 # find the network it belongs to
4640 net = [n for n in r.keys()
f67539c2 4641 if ipaddress.ip_address(ip) in ipaddress.ip_network(n)]
f6b5b4d7 4642 if net:
f67539c2
TL
4643 assert(iface)
4644 r[net[0]][iface].append(ip)
f6b5b4d7
TL
4645
4646 return r
4647
4648
f67539c2
TL
4649def command_list_networks(ctx):
4650 # type: (CephadmContext) -> None
4651 r = list_networks(ctx)
9f95a23c
TL
4652 print(json.dumps(r, indent=4))
4653
4654##################################
4655
f6b5b4d7 4656
f67539c2
TL
4657def command_ls(ctx):
4658 # type: (CephadmContext) -> None
4659 ls = list_daemons(ctx, detail=not ctx.no_detail,
4660 legacy_dir=ctx.legacy_dir)
9f95a23c
TL
4661 print(json.dumps(ls, indent=4))
4662
f6b5b4d7 4663
f67539c2
TL
4664def with_units_to_int(v: str) -> int:
4665 if v.endswith('iB'):
4666 v = v[:-2]
4667 elif v.endswith('B'):
4668 v = v[:-1]
4669 mult = 1
4670 if v[-1].upper() == 'K':
4671 mult = 1024
4672 v = v[:-1]
4673 elif v[-1].upper() == 'M':
4674 mult = 1024 * 1024
4675 v = v[:-1]
4676 elif v[-1].upper() == 'G':
4677 mult = 1024 * 1024 * 1024
4678 v = v[:-1]
4679 elif v[-1].upper() == 'T':
4680 mult = 1024 * 1024 * 1024 * 1024
4681 v = v[:-1]
4682 return int(float(v) * mult)
4683
4684
4685def list_daemons(ctx, detail=True, legacy_dir=None):
4686 # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
4687 host_version: Optional[str] = None
9f95a23c 4688 ls = []
f67539c2 4689 container_path = ctx.container_engine.path
9f95a23c 4690
f67539c2 4691 data_dir = ctx.data_dir
9f95a23c
TL
4692 if legacy_dir is not None:
4693 data_dir = os.path.abspath(legacy_dir + data_dir)
4694
4695 # keep track of ceph versions we see
4696 seen_versions = {} # type: Dict[str, Optional[str]]
4697
f67539c2
TL
4698 # keep track of image digests
4699 seen_digests = {} # type: Dict[str, List[str]]
4700
4701 # keep track of memory usage we've seen
4702 seen_memusage = {} # type: Dict[str, int]
4703 out, err, code = call(
4704 ctx,
4705 [container_path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
4706 verbosity=CallVerbosity.DEBUG
4707 )
4708 seen_memusage_cid_len = 0
4709 if not code:
4710 for line in out.splitlines():
4711 (cid, usage) = line.split(',')
4712 (used, limit) = usage.split(' / ')
4713 seen_memusage[cid] = with_units_to_int(used)
4714 if not seen_memusage_cid_len:
4715 seen_memusage_cid_len = len(cid)
4716
9f95a23c
TL
4717 # /var/lib/ceph
4718 if os.path.exists(data_dir):
4719 for i in os.listdir(data_dir):
4720 if i in ['mon', 'osd', 'mds', 'mgr']:
4721 daemon_type = i
4722 for j in os.listdir(os.path.join(data_dir, i)):
4723 if '-' not in j:
4724 continue
4725 (cluster, daemon_id) = j.split('-', 1)
f67539c2
TL
4726 fsid = get_legacy_daemon_fsid(ctx,
4727 cluster, daemon_type, daemon_id,
4728 legacy_dir=legacy_dir)
e306af50 4729 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
f67539c2 4730 val: Dict[str, Any] = {
9f95a23c
TL
4731 'style': 'legacy',
4732 'name': '%s.%s' % (daemon_type, daemon_id),
4733 'fsid': fsid if fsid is not None else 'unknown',
e306af50 4734 'systemd_unit': legacy_unit_name,
9f95a23c
TL
4735 }
4736 if detail:
f67539c2
TL
4737 (val['enabled'], val['state'], _) = \
4738 check_unit(ctx, legacy_unit_name)
9f95a23c
TL
4739 if not host_version:
4740 try:
f67539c2
TL
4741 out, err, code = call(ctx,
4742 ['ceph', '-v'],
4743 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4744 if not code and out.startswith('ceph version '):
4745 host_version = out.split(' ')[2]
4746 except Exception:
4747 pass
f67539c2
TL
4748 val['host_version'] = host_version
4749 ls.append(val)
9f95a23c
TL
4750 elif is_fsid(i):
4751 fsid = str(i) # convince mypy that fsid is a str here
4752 for j in os.listdir(os.path.join(data_dir, i)):
f67539c2 4753 if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
9f95a23c
TL
4754 name = j
4755 (daemon_type, daemon_id) = j.split('.', 1)
4756 unit_name = get_unit_name(fsid,
4757 daemon_type,
4758 daemon_id)
4759 else:
4760 continue
f67539c2 4761 val = {
9f95a23c
TL
4762 'style': 'cephadm:v1',
4763 'name': name,
4764 'fsid': fsid,
e306af50 4765 'systemd_unit': unit_name,
9f95a23c
TL
4766 }
4767 if detail:
4768 # get container id
f67539c2
TL
4769 (val['enabled'], val['state'], _) = \
4770 check_unit(ctx, unit_name)
9f95a23c
TL
4771 container_id = None
4772 image_name = None
4773 image_id = None
f67539c2 4774 image_digests = None
9f95a23c
TL
4775 version = None
4776 start_stamp = None
4777
f67539c2
TL
4778 cmd = [
4779 container_path, 'inspect',
4780 '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
4781 'ceph-%s-%s' % (fsid, j)
4782 ]
4783 out, err, code = call(ctx, cmd, verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4784 if not code:
4785 (container_id, image_name, image_id, start,
4786 version) = out.strip().split(',')
4787 image_id = normalize_container_id(image_id)
4788 daemon_type = name.split('.', 1)[0]
4789 start_stamp = try_convert_datetime(start)
f67539c2
TL
4790
4791 # collect digests for this image id
4792 image_digests = seen_digests.get(image_id)
4793 if not image_digests:
4794 out, err, code = call(
4795 ctx,
4796 [
4797 container_path, 'image', 'inspect', image_id,
4798 '--format', '{{.RepoDigests}}',
4799 ],
4800 verbosity=CallVerbosity.DEBUG)
4801 if not code:
4802 image_digests = out.strip()[1:-1].split(' ')
4803 seen_digests[image_id] = image_digests
4804
4805 # identify software version inside the container (if we can)
9f95a23c
TL
4806 if not version or '.' not in version:
4807 version = seen_versions.get(image_id, None)
4808 if daemon_type == NFSGanesha.daemon_type:
f67539c2 4809 version = NFSGanesha.get_version(ctx, container_id)
1911f103 4810 if daemon_type == CephIscsi.daemon_type:
f67539c2 4811 version = CephIscsi.get_version(ctx, container_id)
9f95a23c
TL
4812 elif not version:
4813 if daemon_type in Ceph.daemons:
f67539c2
TL
4814 out, err, code = call(ctx,
4815 [container_path, 'exec', container_id,
4816 'ceph', '-v'],
4817 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4818 if not code and \
4819 out.startswith('ceph version '):
4820 version = out.split(' ')[2]
4821 seen_versions[image_id] = version
4822 elif daemon_type == 'grafana':
f67539c2
TL
4823 out, err, code = call(ctx,
4824 [container_path, 'exec', container_id,
4825 'grafana-server', '-v'],
4826 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
4827 if not code and \
4828 out.startswith('Version '):
4829 version = out.split(' ')[1]
4830 seen_versions[image_id] = version
4831 elif daemon_type in ['prometheus',
4832 'alertmanager',
4833 'node-exporter']:
f67539c2
TL
4834 version = Monitoring.get_version(ctx, container_id, daemon_type)
4835 seen_versions[image_id] = version
4836 elif daemon_type == 'haproxy':
4837 out, err, code = call(ctx,
4838 [container_path, 'exec', container_id,
4839 'haproxy', '-v'],
4840 verbosity=CallVerbosity.DEBUG)
4841 if not code and \
4842 out.startswith('HA-Proxy version '):
4843 version = out.split(' ')[2]
4844 seen_versions[image_id] = version
4845 elif daemon_type == 'keepalived':
4846 out, err, code = call(ctx,
4847 [container_path, 'exec', container_id,
4848 'keepalived', '--version'],
4849 verbosity=CallVerbosity.DEBUG)
9f95a23c 4850 if not code and \
f67539c2
TL
4851 err.startswith('Keepalived '):
4852 version = err.split(' ')[1]
4853 if version[0] == 'v':
4854 version = version[1:]
9f95a23c 4855 seen_versions[image_id] = version
f91f0fd5
TL
4856 elif daemon_type == CustomContainer.daemon_type:
4857 # Because a custom container can contain
4858 # everything, we do not know which command
4859 # to execute to get the version.
4860 pass
9f95a23c 4861 else:
f91f0fd5 4862 logger.warning('version for unknown daemon type %s' % daemon_type)
9f95a23c 4863 else:
f67539c2 4864 vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore
9f95a23c
TL
4865 try:
4866 with open(vfile, 'r') as f:
4867 image_name = f.read().strip() or None
4868 except IOError:
4869 pass
f67539c2
TL
4870
4871 # unit.meta?
4872 mfile = os.path.join(data_dir, fsid, j, 'unit.meta') # type: ignore
4873 try:
4874 with open(mfile, 'r') as f:
4875 meta = json.loads(f.read())
4876 val.update(meta)
4877 except IOError:
4878 pass
4879
4880 val['container_id'] = container_id
4881 val['container_image_name'] = image_name
4882 val['container_image_id'] = image_id
4883 val['container_image_digests'] = image_digests
4884 if container_id:
4885 val['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
4886 val['version'] = version
4887 val['started'] = start_stamp
4888 val['created'] = get_file_timestamp(
9f95a23c
TL
4889 os.path.join(data_dir, fsid, j, 'unit.created')
4890 )
f67539c2 4891 val['deployed'] = get_file_timestamp(
9f95a23c 4892 os.path.join(data_dir, fsid, j, 'unit.image'))
f67539c2 4893 val['configured'] = get_file_timestamp(
9f95a23c
TL
4894 os.path.join(data_dir, fsid, j, 'unit.configured'))
4895
f67539c2 4896 ls.append(val)
9f95a23c 4897
9f95a23c
TL
4898 return ls
4899
4900
f67539c2
TL
4901def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None):
4902 # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str]
e306af50 4903
f67539c2 4904 for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir):
e306af50
TL
4905 if d['fsid'] != fsid:
4906 continue
4907 if d['name'] != name:
4908 continue
4909 return d
4910 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
4911
9f95a23c
TL
4912##################################
4913
f67539c2 4914
9f95a23c 4915@default_image
f67539c2
TL
4916def command_adopt(ctx):
4917 # type: (CephadmContext) -> None
9f95a23c 4918
f67539c2
TL
4919 if not ctx.skip_pull:
4920 _pull_image(ctx, ctx.image)
9f95a23c 4921
f67539c2 4922 (daemon_type, daemon_id) = ctx.name.split('.', 1)
9f95a23c
TL
4923
4924 # legacy check
f67539c2
TL
4925 if ctx.style != 'legacy':
4926 raise Error('adoption of style %s not implemented' % ctx.style)
9f95a23c
TL
4927
4928 # lock
f67539c2
TL
4929 fsid = get_legacy_daemon_fsid(ctx,
4930 ctx.cluster,
9f95a23c
TL
4931 daemon_type,
4932 daemon_id,
f67539c2 4933 legacy_dir=ctx.legacy_dir)
9f95a23c
TL
4934 if not fsid:
4935 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
f67539c2
TL
4936 lock = FileLock(ctx, fsid)
4937 lock.acquire()
9f95a23c
TL
4938
4939 # call correct adoption
4940 if daemon_type in Ceph.daemons:
f67539c2 4941 command_adopt_ceph(ctx, daemon_type, daemon_id, fsid)
9f95a23c 4942 elif daemon_type == 'prometheus':
f67539c2 4943 command_adopt_prometheus(ctx, daemon_id, fsid)
9f95a23c 4944 elif daemon_type == 'grafana':
f67539c2 4945 command_adopt_grafana(ctx, daemon_id, fsid)
9f95a23c
TL
4946 elif daemon_type == 'node-exporter':
4947 raise Error('adoption of node-exporter not implemented')
4948 elif daemon_type == 'alertmanager':
f67539c2 4949 command_adopt_alertmanager(ctx, daemon_id, fsid)
9f95a23c
TL
4950 else:
4951 raise Error('daemon type %s not recognized' % daemon_type)
4952
4953
1911f103 4954class AdoptOsd(object):
f67539c2
TL
4955 def __init__(self, ctx, osd_data_dir, osd_id):
4956 # type: (CephadmContext, str, str) -> None
4957 self.ctx = ctx
1911f103
TL
4958 self.osd_data_dir = osd_data_dir
4959 self.osd_id = osd_id
4960
4961 def check_online_osd(self):
4962 # type: () -> Tuple[Optional[str], Optional[str]]
4963
4964 osd_fsid, osd_type = None, None
4965
4966 path = os.path.join(self.osd_data_dir, 'fsid')
4967 try:
4968 with open(path, 'r') as f:
4969 osd_fsid = f.read().strip()
f67539c2 4970 logger.info('Found online OSD at %s' % path)
1911f103
TL
4971 except IOError:
4972 logger.info('Unable to read OSD fsid from %s' % path)
e306af50
TL
4973 if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
4974 with open(os.path.join(self.osd_data_dir, 'type')) as f:
4975 osd_type = f.read().strip()
4976 else:
4977 logger.info('"type" file missing for OSD data dir')
1911f103
TL
4978
4979 return osd_fsid, osd_type
4980
4981 def check_offline_lvm_osd(self):
4982 # type: () -> Tuple[Optional[str], Optional[str]]
1911f103
TL
4983 osd_fsid, osd_type = None, None
4984
4985 c = CephContainer(
f67539c2
TL
4986 self.ctx,
4987 image=self.ctx.image,
1911f103
TL
4988 entrypoint='/usr/sbin/ceph-volume',
4989 args=['lvm', 'list', '--format=json'],
4990 privileged=True
4991 )
f67539c2 4992 out, err, code = call_throws(self.ctx, c.run_cmd())
1911f103
TL
4993 if not code:
4994 try:
4995 js = json.loads(out)
4996 if self.osd_id in js:
f67539c2 4997 logger.info('Found offline LVM OSD {}'.format(self.osd_id))
1911f103
TL
4998 osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
4999 for device in js[self.osd_id]:
5000 if device['tags']['ceph.type'] == 'block':
5001 osd_type = 'bluestore'
5002 break
5003 if device['tags']['ceph.type'] == 'data':
5004 osd_type = 'filestore'
5005 break
5006 except ValueError as e:
f67539c2 5007 logger.info('Invalid JSON in ceph-volume lvm list: {}'.format(e))
1911f103
TL
5008
5009 return osd_fsid, osd_type
5010
5011 def check_offline_simple_osd(self):
5012 # type: () -> Tuple[Optional[str], Optional[str]]
1911f103
TL
5013 osd_fsid, osd_type = None, None
5014
f67539c2 5015 osd_file = glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self.osd_id))
1911f103
TL
5016 if len(osd_file) == 1:
5017 with open(osd_file[0], 'r') as f:
5018 try:
5019 js = json.loads(f.read())
f67539c2
TL
5020 logger.info('Found offline simple OSD {}'.format(self.osd_id))
5021 osd_fsid = js['fsid']
5022 osd_type = js['type']
5023 if osd_type != 'filestore':
1911f103
TL
5024 # need this to be mounted for the adopt to work, as it
5025 # needs to move files from this directory
f67539c2 5026 call_throws(self.ctx, ['mount', js['data']['path'], self.osd_data_dir])
1911f103 5027 except ValueError as e:
f67539c2 5028 logger.info('Invalid JSON in {}: {}'.format(osd_file, e))
1911f103
TL
5029
5030 return osd_fsid, osd_type
5031
9f95a23c 5032
f67539c2
TL
5033def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid):
5034 # type: (CephadmContext, str, str, str) -> None
9f95a23c 5035
f67539c2 5036 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
5037
5038 data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
f67539c2
TL
5039 (daemon_type, ctx.cluster, daemon_id))
5040 data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src)
9f95a23c 5041
1911f103
TL
5042 if not os.path.exists(data_dir_src):
5043 raise Error("{}.{} data directory '{}' does not exist. "
f67539c2
TL
5044 'Incorrect ID specified, or daemon already adopted?'.format(
5045 daemon_type, daemon_id, data_dir_src))
1911f103 5046
9f95a23c
TL
5047 osd_fsid = None
5048 if daemon_type == 'osd':
f67539c2 5049 adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id)
1911f103
TL
5050 osd_fsid, osd_type = adopt_osd.check_online_osd()
5051 if not osd_fsid:
5052 osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
5053 if not osd_fsid:
5054 osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
5055 if not osd_fsid:
5056 raise Error('Unable to find OSD {}'.format(daemon_id))
5057 logger.info('objectstore_type is %s' % osd_type)
e306af50 5058 assert osd_type
1911f103 5059 if osd_type == 'filestore':
9f95a23c
TL
5060 raise Error('FileStore is not supported by cephadm')
5061
5062 # NOTE: implicit assumption here that the units correspond to the
5063 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
5064 # CLUSTER field.
5065 unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
f67539c2 5066 (enabled, state, _) = check_unit(ctx, unit_name)
9f95a23c
TL
5067 if state == 'running':
5068 logger.info('Stopping old systemd unit %s...' % unit_name)
f67539c2 5069 call_throws(ctx, ['systemctl', 'stop', unit_name])
9f95a23c
TL
5070 if enabled:
5071 logger.info('Disabling old systemd unit %s...' % unit_name)
f67539c2 5072 call_throws(ctx, ['systemctl', 'disable', unit_name])
9f95a23c
TL
5073
5074 # data
5075 logger.info('Moving data...')
f67539c2 5076 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
9f95a23c 5077 uid=uid, gid=gid)
f67539c2 5078 move_files(ctx, glob(os.path.join(data_dir_src, '*')),
9f95a23c
TL
5079 data_dir_dst,
5080 uid=uid, gid=gid)
f67539c2 5081 logger.debug('Remove dir `%s`' % (data_dir_src))
9f95a23c 5082 if os.path.ismount(data_dir_src):
f67539c2 5083 call_throws(ctx, ['umount', data_dir_src])
9f95a23c
TL
5084 os.rmdir(data_dir_src)
5085
5086 logger.info('Chowning content...')
f67539c2 5087 call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
9f95a23c
TL
5088
5089 if daemon_type == 'mon':
5090 # rename *.ldb -> *.sst, in case they are coming from ubuntu
5091 store = os.path.join(data_dir_dst, 'store.db')
5092 num_renamed = 0
5093 if os.path.exists(store):
5094 for oldf in os.listdir(store):
5095 if oldf.endswith('.ldb'):
5096 newf = oldf.replace('.ldb', '.sst')
5097 oldp = os.path.join(store, oldf)
5098 newp = os.path.join(store, newf)
5099 logger.debug('Renaming %s -> %s' % (oldp, newp))
5100 os.rename(oldp, newp)
5101 if num_renamed:
5102 logger.info('Renamed %d leveldb *.ldb files to *.sst',
5103 num_renamed)
5104 if daemon_type == 'osd':
5105 for n in ['block', 'block.db', 'block.wal']:
5106 p = os.path.join(data_dir_dst, n)
5107 if os.path.exists(p):
5108 logger.info('Chowning %s...' % p)
5109 os.chown(p, uid, gid)
5110 # disable the ceph-volume 'simple' mode files on the host
5111 simple_fn = os.path.join('/etc/ceph/osd',
5112 '%s-%s.json' % (daemon_id, osd_fsid))
5113 if os.path.exists(simple_fn):
5114 new_fn = simple_fn + '.adopted-by-cephadm'
5115 logger.info('Renaming %s -> %s', simple_fn, new_fn)
5116 os.rename(simple_fn, new_fn)
5117 logger.info('Disabling host unit ceph-volume@ simple unit...')
f67539c2
TL
5118 call(ctx, ['systemctl', 'disable',
5119 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
9f95a23c
TL
5120 else:
5121 # assume this is an 'lvm' c-v for now, but don't error
5122 # out if it's not.
5123 logger.info('Disabling host unit ceph-volume@ lvm unit...')
f67539c2
TL
5124 call(ctx, ['systemctl', 'disable',
5125 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
9f95a23c
TL
5126
5127 # config
f67539c2
TL
5128 config_src = '/etc/ceph/%s.conf' % (ctx.cluster)
5129 config_src = os.path.abspath(ctx.legacy_dir + config_src)
9f95a23c 5130 config_dst = os.path.join(data_dir_dst, 'config')
f67539c2 5131 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
9f95a23c
TL
5132
5133 # logs
5134 logger.info('Moving logs...')
5135 log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
f67539c2
TL
5136 (ctx.cluster, daemon_type, daemon_id))
5137 log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src)
5138 log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid)
5139 move_files(ctx, glob(log_dir_src),
9f95a23c
TL
5140 log_dir_dst,
5141 uid=uid, gid=gid)
5142
5143 logger.info('Creating new units...')
f67539c2
TL
5144 make_var_run(ctx, fsid, uid, gid)
5145 c = get_container(ctx, fsid, daemon_type, daemon_id)
5146 deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c,
9f95a23c 5147 enable=True, # unconditionally enable the new unit
f67539c2 5148 start=(state == 'running' or ctx.force_start),
9f95a23c 5149 osd_fsid=osd_fsid)
f67539c2 5150 update_firewalld(ctx, daemon_type)
9f95a23c
TL
5151
5152
f67539c2
TL
5153def command_adopt_prometheus(ctx, daemon_id, fsid):
5154 # type: (CephadmContext, str, str) -> None
9f95a23c 5155 daemon_type = 'prometheus'
f67539c2 5156 (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
9f95a23c 5157
f67539c2 5158 _stop_and_disable(ctx, 'prometheus')
9f95a23c 5159
f67539c2
TL
5160 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
5161 uid=uid, gid=gid)
9f95a23c
TL
5162
5163 # config
5164 config_src = '/etc/prometheus/prometheus.yml'
f67539c2 5165 config_src = os.path.abspath(ctx.legacy_dir + config_src)
9f95a23c 5166 config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
1911f103 5167 makedirs(config_dst, uid, gid, 0o755)
f67539c2 5168 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
9f95a23c
TL
5169
5170 # data
5171 data_src = '/var/lib/prometheus/metrics/'
f67539c2 5172 data_src = os.path.abspath(ctx.legacy_dir + data_src)
9f95a23c 5173 data_dst = os.path.join(data_dir_dst, 'data')
f67539c2 5174 copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
9f95a23c 5175
f67539c2
TL
5176 make_var_run(ctx, fsid, uid, gid)
5177 c = get_container(ctx, fsid, daemon_type, daemon_id)
5178 deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
5179 update_firewalld(ctx, daemon_type)
9f95a23c 5180
f6b5b4d7 5181
f67539c2
TL
5182def command_adopt_grafana(ctx, daemon_id, fsid):
5183 # type: (CephadmContext, str, str) -> None
9f95a23c
TL
5184
5185 daemon_type = 'grafana'
f67539c2 5186 (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
9f95a23c 5187
f67539c2 5188 _stop_and_disable(ctx, 'grafana-server')
9f95a23c 5189
f67539c2
TL
5190 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
5191 uid=uid, gid=gid)
9f95a23c
TL
5192
5193 # config
5194 config_src = '/etc/grafana/grafana.ini'
f67539c2 5195 config_src = os.path.abspath(ctx.legacy_dir + config_src)
9f95a23c
TL
5196 config_dst = os.path.join(data_dir_dst, 'etc/grafana')
5197 makedirs(config_dst, uid, gid, 0o755)
f67539c2 5198 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
9f95a23c
TL
5199
5200 prov_src = '/etc/grafana/provisioning/'
f67539c2 5201 prov_src = os.path.abspath(ctx.legacy_dir + prov_src)
9f95a23c 5202 prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
f67539c2 5203 copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid)
9f95a23c
TL
5204
5205 # cert
5206 cert = '/etc/grafana/grafana.crt'
5207 key = '/etc/grafana/grafana.key'
5208 if os.path.exists(cert) and os.path.exists(key):
5209 cert_src = '/etc/grafana/grafana.crt'
f67539c2 5210 cert_src = os.path.abspath(ctx.legacy_dir + cert_src)
9f95a23c
TL
5211 makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
5212 cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
f67539c2 5213 copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid)
9f95a23c
TL
5214
5215 key_src = '/etc/grafana/grafana.key'
f67539c2 5216 key_src = os.path.abspath(ctx.legacy_dir + key_src)
9f95a23c 5217 key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
f67539c2 5218 copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid)
9f95a23c
TL
5219
5220 _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
5221 else:
f67539c2 5222 logger.debug('Skipping ssl, missing cert {} or key {}'.format(cert, key))
9f95a23c 5223
9f95a23c
TL
5224 # data - possible custom dashboards/plugins
5225 data_src = '/var/lib/grafana/'
f67539c2 5226 data_src = os.path.abspath(ctx.legacy_dir + data_src)
9f95a23c 5227 data_dst = os.path.join(data_dir_dst, 'data')
f67539c2 5228 copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
9f95a23c 5229
f67539c2
TL
5230 make_var_run(ctx, fsid, uid, gid)
5231 c = get_container(ctx, fsid, daemon_type, daemon_id)
5232 deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
5233 update_firewalld(ctx, daemon_type)
9f95a23c 5234
f6b5b4d7 5235
f67539c2
TL
5236def command_adopt_alertmanager(ctx, daemon_id, fsid):
5237 # type: (CephadmContext, str, str) -> None
801d1391
TL
5238
5239 daemon_type = 'alertmanager'
f67539c2 5240 (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
801d1391 5241
f67539c2 5242 _stop_and_disable(ctx, 'prometheus-alertmanager')
801d1391 5243
f67539c2
TL
5244 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
5245 uid=uid, gid=gid)
801d1391
TL
5246
5247 # config
5248 config_src = '/etc/prometheus/alertmanager.yml'
f67539c2 5249 config_src = os.path.abspath(ctx.legacy_dir + config_src)
801d1391
TL
5250 config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
5251 makedirs(config_dst, uid, gid, 0o755)
f67539c2 5252 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
801d1391
TL
5253
5254 # data
5255 data_src = '/var/lib/prometheus/alertmanager/'
f67539c2 5256 data_src = os.path.abspath(ctx.legacy_dir + data_src)
801d1391 5257 data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
f67539c2 5258 copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
801d1391 5259
f67539c2
TL
5260 make_var_run(ctx, fsid, uid, gid)
5261 c = get_container(ctx, fsid, daemon_type, daemon_id)
5262 deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
5263 update_firewalld(ctx, daemon_type)
801d1391 5264
f6b5b4d7 5265
9f95a23c
TL
5266def _adjust_grafana_ini(filename):
5267 # type: (str) -> None
5268
5269 # Update cert_file, cert_key pathnames in server section
5270 # ConfigParser does not preserve comments
5271 try:
f67539c2 5272 with open(filename, 'r') as grafana_ini:
9f95a23c 5273 lines = grafana_ini.readlines()
f67539c2
TL
5274 with open('{}.new'.format(filename), 'w') as grafana_ini:
5275 server_section = False
9f95a23c
TL
5276 for line in lines:
5277 if line.startswith('['):
f67539c2 5278 server_section = False
9f95a23c 5279 if line.startswith('[server]'):
f67539c2 5280 server_section = True
9f95a23c
TL
5281 if server_section:
5282 line = re.sub(r'^cert_file.*',
f67539c2 5283 'cert_file = /etc/grafana/certs/cert_file', line)
9f95a23c 5284 line = re.sub(r'^cert_key.*',
f67539c2 5285 'cert_key = /etc/grafana/certs/cert_key', line)
9f95a23c 5286 grafana_ini.write(line)
f67539c2 5287 os.rename('{}.new'.format(filename), filename)
9f95a23c 5288 except OSError as err:
f67539c2 5289 raise Error('Cannot update {}: {}'.format(filename, err))
9f95a23c
TL
5290
5291
f67539c2
TL
5292def _stop_and_disable(ctx, unit_name):
5293 # type: (CephadmContext, str) -> None
9f95a23c 5294
f67539c2 5295 (enabled, state, _) = check_unit(ctx, unit_name)
9f95a23c
TL
5296 if state == 'running':
5297 logger.info('Stopping old systemd unit %s...' % unit_name)
f67539c2 5298 call_throws(ctx, ['systemctl', 'stop', unit_name])
9f95a23c
TL
5299 if enabled:
5300 logger.info('Disabling old systemd unit %s...' % unit_name)
f67539c2 5301 call_throws(ctx, ['systemctl', 'disable', unit_name])
9f95a23c
TL
5302
5303##################################
5304
9f95a23c 5305
f67539c2
TL
5306def command_rm_daemon(ctx):
5307 # type: (CephadmContext) -> None
5308 lock = FileLock(ctx, ctx.fsid)
5309 lock.acquire()
9f95a23c 5310
f67539c2
TL
5311 (daemon_type, daemon_id) = ctx.name.split('.', 1)
5312 unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
e306af50 5313
f67539c2 5314 if daemon_type in ['mon', 'osd'] and not ctx.force:
9f95a23c 5315 raise Error('must pass --force to proceed: '
f67539c2 5316 'this command may destroy precious data!')
e306af50 5317
f67539c2 5318 call(ctx, ['systemctl', 'stop', unit_name],
adb31ebb 5319 verbosity=CallVerbosity.DEBUG)
f67539c2 5320 call(ctx, ['systemctl', 'reset-failed', unit_name],
adb31ebb 5321 verbosity=CallVerbosity.DEBUG)
f67539c2 5322 call(ctx, ['systemctl', 'disable', unit_name],
adb31ebb 5323 verbosity=CallVerbosity.DEBUG)
f67539c2 5324 data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c 5325 if daemon_type in ['mon', 'osd', 'prometheus'] and \
f67539c2 5326 not ctx.force_delete_data:
9f95a23c 5327 # rename it out of the way -- do not delete
f67539c2 5328 backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed')
9f95a23c
TL
5329 if not os.path.exists(backup_dir):
5330 makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
5331 dirname = '%s.%s_%s' % (daemon_type, daemon_id,
5332 datetime.datetime.utcnow().strftime(DATEFMT))
5333 os.rename(data_dir,
5334 os.path.join(backup_dir, dirname))
5335 else:
f67539c2
TL
5336 if daemon_type == CephadmDaemon.daemon_type:
5337 CephadmDaemon.uninstall(ctx, ctx.fsid, daemon_type, daemon_id)
5338 call_throws(ctx, ['rm', '-rf', data_dir])
9f95a23c
TL
5339
5340##################################
5341
f6b5b4d7 5342
f67539c2
TL
5343def command_rm_cluster(ctx):
5344 # type: (CephadmContext) -> None
5345 if not ctx.force:
9f95a23c 5346 raise Error('must pass --force to proceed: '
f67539c2 5347 'this command may destroy precious data!')
9f95a23c 5348
f67539c2
TL
5349 lock = FileLock(ctx, ctx.fsid)
5350 lock.acquire()
9f95a23c
TL
5351
5352 # stop + disable individual daemon units
f67539c2
TL
5353 for d in list_daemons(ctx, detail=False):
5354 if d['fsid'] != ctx.fsid:
9f95a23c
TL
5355 continue
5356 if d['style'] != 'cephadm:v1':
5357 continue
f67539c2
TL
5358 unit_name = get_unit_name(ctx.fsid, d['name'])
5359 call(ctx, ['systemctl', 'stop', unit_name],
adb31ebb 5360 verbosity=CallVerbosity.DEBUG)
f67539c2 5361 call(ctx, ['systemctl', 'reset-failed', unit_name],
adb31ebb 5362 verbosity=CallVerbosity.DEBUG)
f67539c2 5363 call(ctx, ['systemctl', 'disable', unit_name],
adb31ebb 5364 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
5365
5366 # cluster units
f67539c2
TL
5367 for unit_name in ['ceph-%s.target' % ctx.fsid]:
5368 call(ctx, ['systemctl', 'stop', unit_name],
adb31ebb 5369 verbosity=CallVerbosity.DEBUG)
f67539c2 5370 call(ctx, ['systemctl', 'reset-failed', unit_name],
adb31ebb 5371 verbosity=CallVerbosity.DEBUG)
f67539c2 5372 call(ctx, ['systemctl', 'disable', unit_name],
adb31ebb 5373 verbosity=CallVerbosity.DEBUG)
9f95a23c 5374
f67539c2
TL
5375 slice_name = 'system-%s.slice' % (('ceph-%s' % ctx.fsid).replace('-', '\\x2d'))
5376 call(ctx, ['systemctl', 'stop', slice_name],
adb31ebb 5377 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
5378
5379 # rm units
f67539c2
TL
5380 call_throws(ctx, ['rm', '-f', ctx.unit_dir + # noqa: W504
5381 '/ceph-%s@.service' % ctx.fsid])
5382 call_throws(ctx, ['rm', '-f', ctx.unit_dir + # noqa: W504
5383 '/ceph-%s.target' % ctx.fsid])
5384 call_throws(ctx, ['rm', '-rf',
5385 ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid])
9f95a23c 5386 # rm data
f67539c2
TL
5387 call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid])
5388
5389 if not ctx.keep_logs:
5390 # rm logs
5391 call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid])
5392 call_throws(ctx, ['rm', '-rf', ctx.log_dir + # noqa: W504
5393 '/*.wants/ceph-%s@*' % ctx.fsid])
5394
9f95a23c 5395 # rm logrotate config
f67539c2 5396 call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid])
9f95a23c 5397
1911f103
TL
5398 # clean up config, keyring, and pub key files
5399 files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring']
5400
5401 if os.path.exists(files[0]):
5402 valid_fsid = False
5403 with open(files[0]) as f:
f67539c2 5404 if ctx.fsid in f.read():
1911f103
TL
5405 valid_fsid = True
5406 if valid_fsid:
5407 for n in range(0, len(files)):
5408 if os.path.exists(files[n]):
5409 os.remove(files[n])
5410
9f95a23c
TL
5411##################################
5412
f67539c2
TL
5413
5414def check_time_sync(ctx, enabler=None):
5415 # type: (CephadmContext, Optional[Packager]) -> bool
9f95a23c
TL
5416 units = [
5417 'chrony.service', # 18.04 (at least)
f67539c2 5418 'chronyd.service', # el / opensuse
9f95a23c 5419 'systemd-timesyncd.service',
f67539c2 5420 'ntpd.service', # el7 (at least)
9f95a23c 5421 'ntp.service', # 18.04 (at least)
f91f0fd5 5422 'ntpsec.service', # 20.04 (at least) / buster
9f95a23c 5423 ]
f67539c2 5424 if not check_units(ctx, units, enabler):
9f95a23c
TL
5425 logger.warning('No time sync service is running; checked for %s' % units)
5426 return False
5427 return True
5428
f6b5b4d7 5429
f67539c2
TL
5430def command_check_host(ctx: CephadmContext) -> None:
5431 container_path = ctx.container_engine.path
f6b5b4d7 5432
1911f103 5433 errors = []
9f95a23c
TL
5434 commands = ['systemctl', 'lvcreate']
5435
f67539c2
TL
5436 try:
5437 check_container_engine(ctx)
5438 logger.info('podman|docker (%s) is present' % container_path)
5439 except Error as e:
5440 errors.append(str(e))
1911f103 5441
9f95a23c
TL
5442 for command in commands:
5443 try:
5444 find_program(command)
5445 logger.info('%s is present' % command)
5446 except ValueError:
1911f103 5447 errors.append('%s binary does not appear to be installed' % command)
9f95a23c
TL
5448
5449 # check for configured+running chronyd or ntp
f67539c2 5450 if not check_time_sync(ctx):
1911f103 5451 errors.append('No time synchronization is active')
9f95a23c 5452
f67539c2
TL
5453 if 'expect_hostname' in ctx and ctx.expect_hostname:
5454 if get_hostname().lower() != ctx.expect_hostname.lower():
1911f103 5455 errors.append('hostname "%s" does not match expected hostname "%s"' % (
f67539c2 5456 get_hostname(), ctx.expect_hostname))
9f95a23c 5457 logger.info('Hostname "%s" matches what is expected.',
f67539c2 5458 ctx.expect_hostname)
9f95a23c 5459
1911f103 5460 if errors:
f67539c2 5461 raise Error('\nERROR: '.join(errors))
1911f103 5462
9f95a23c
TL
5463 logger.info('Host looks OK')
5464
5465##################################
5466
f6b5b4d7 5467
f67539c2 5468def command_prepare_host(ctx: CephadmContext) -> None:
9f95a23c
TL
5469 logger.info('Verifying podman|docker is present...')
5470 pkg = None
f67539c2
TL
5471 try:
5472 check_container_engine(ctx)
5473 except Error as e:
5474 logger.warning(str(e))
9f95a23c 5475 if not pkg:
f67539c2 5476 pkg = create_packager(ctx)
9f95a23c
TL
5477 pkg.install_podman()
5478
5479 logger.info('Verifying lvm2 is present...')
5480 if not find_executable('lvcreate'):
5481 if not pkg:
f67539c2 5482 pkg = create_packager(ctx)
9f95a23c
TL
5483 pkg.install(['lvm2'])
5484
5485 logger.info('Verifying time synchronization is in place...')
f67539c2 5486 if not check_time_sync(ctx):
9f95a23c 5487 if not pkg:
f67539c2 5488 pkg = create_packager(ctx)
9f95a23c
TL
5489 pkg.install(['chrony'])
5490 # check again, and this time try to enable
5491 # the service
f67539c2 5492 check_time_sync(ctx, enabler=pkg)
9f95a23c 5493
f67539c2
TL
5494 if 'expect_hostname' in ctx and ctx.expect_hostname and ctx.expect_hostname != get_hostname():
5495 logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname))
5496 call_throws(ctx, ['hostname', ctx.expect_hostname])
9f95a23c 5497 with open('/etc/hostname', 'w') as f:
f67539c2 5498 f.write(ctx.expect_hostname + '\n')
9f95a23c
TL
5499
5500 logger.info('Repeating the final host check...')
f67539c2 5501 command_check_host(ctx)
9f95a23c
TL
5502
5503##################################
5504
f6b5b4d7 5505
9f95a23c
TL
5506class CustomValidation(argparse.Action):
5507
5508 def _check_name(self, values):
5509 try:
5510 (daemon_type, daemon_id) = values.split('.', 1)
5511 except ValueError:
5512 raise argparse.ArgumentError(self,
f67539c2 5513 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com')
9f95a23c
TL
5514
5515 daemons = get_supported_daemons()
5516 if daemon_type not in daemons:
5517 raise argparse.ArgumentError(self,
f67539c2
TL
5518 'name must declare the type of daemon e.g. '
5519 '{}'.format(', '.join(daemons)))
9f95a23c
TL
5520
5521 def __call__(self, parser, namespace, values, option_string=None):
f67539c2 5522 if self.dest == 'name':
9f95a23c
TL
5523 self._check_name(values)
5524 setattr(namespace, self.dest, values)
f67539c2
TL
5525 elif self.dest == 'exporter_config':
5526 cfg = get_parm(values)
5527 # run the class' validate method, and convert to an argparse error
5528 # if problems are found
5529 try:
5530 CephadmDaemon.validate_config(cfg)
5531 except Error as e:
5532 raise argparse.ArgumentError(self,
5533 str(e))
5534 setattr(namespace, self.dest, cfg)
9f95a23c
TL
5535
5536##################################
5537
f6b5b4d7 5538
9f95a23c 5539def get_distro():
e306af50 5540 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
9f95a23c
TL
5541 distro = None
5542 distro_version = None
5543 distro_codename = None
5544 with open('/etc/os-release', 'r') as f:
5545 for line in f.readlines():
5546 line = line.strip()
5547 if '=' not in line or line.startswith('#'):
5548 continue
5549 (var, val) = line.split('=', 1)
5550 if val[0] == '"' and val[-1] == '"':
5551 val = val[1:-1]
5552 if var == 'ID':
5553 distro = val.lower()
5554 elif var == 'VERSION_ID':
5555 distro_version = val.lower()
5556 elif var == 'VERSION_CODENAME':
5557 distro_codename = val.lower()
5558 return distro, distro_version, distro_codename
5559
f6b5b4d7 5560
9f95a23c 5561class Packager(object):
f67539c2
TL
5562 def __init__(self, ctx: CephadmContext,
5563 stable=None, version=None, branch=None, commit=None):
9f95a23c
TL
5564 assert \
5565 (stable and not version and not branch and not commit) or \
5566 (not stable and version and not branch and not commit) or \
5567 (not stable and not version and branch) or \
5568 (not stable and not version and not branch and not commit)
f67539c2 5569 self.ctx = ctx
9f95a23c
TL
5570 self.stable = stable
5571 self.version = version
5572 self.branch = branch
5573 self.commit = commit
5574
5575 def add_repo(self):
5576 raise NotImplementedError
5577
5578 def rm_repo(self):
5579 raise NotImplementedError
5580
5581 def query_shaman(self, distro, distro_version, branch, commit):
5582 # query shaman
f91f0fd5 5583 logger.info('Fetching repo metadata from shaman and chacra...')
9f95a23c
TL
5584 shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
5585 distro=distro,
5586 distro_version=distro_version,
5587 branch=branch,
5588 sha1=commit or 'latest',
5589 arch=get_arch()
5590 )
5591 try:
5592 shaman_response = urlopen(shaman_url)
5593 except HTTPError as err:
f91f0fd5 5594 logger.error('repository not found in shaman (might not be available yet)')
9f95a23c 5595 raise Error('%s, failed to fetch %s' % (err, shaman_url))
f67539c2 5596 chacra_url = ''
9f95a23c
TL
5597 try:
5598 chacra_url = shaman_response.geturl()
5599 chacra_response = urlopen(chacra_url)
5600 except HTTPError as err:
f91f0fd5 5601 logger.error('repository not found in chacra (might not be available yet)')
9f95a23c
TL
5602 raise Error('%s, failed to fetch %s' % (err, chacra_url))
5603 return chacra_response.read().decode('utf-8')
5604
5605 def repo_gpgkey(self):
f67539c2
TL
5606 if self.ctx.gpg_url:
5607 return self.ctx.gpg_url
9f95a23c
TL
5608 if self.stable or self.version:
5609 return 'https://download.ceph.com/keys/release.asc', 'release'
5610 else:
5611 return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild'
5612
5613 def enable_service(self, service):
5614 """
5615 Start and enable the service (typically using systemd).
5616 """
f67539c2 5617 call_throws(self.ctx, ['systemctl', 'enable', '--now', service])
9f95a23c
TL
5618
5619
5620class Apt(Packager):
5621 DISTRO_NAMES = {
5622 'ubuntu': 'ubuntu',
5623 'debian': 'debian',
5624 }
5625
f67539c2
TL
5626 def __init__(self, ctx: CephadmContext,
5627 stable, version, branch, commit,
9f95a23c 5628 distro, distro_version, distro_codename):
f67539c2 5629 super(Apt, self).__init__(ctx, stable=stable, version=version,
9f95a23c 5630 branch=branch, commit=commit)
f67539c2 5631 self.ctx = ctx
9f95a23c
TL
5632 self.distro = self.DISTRO_NAMES[distro]
5633 self.distro_codename = distro_codename
f91f0fd5 5634 self.distro_version = distro_version
9f95a23c
TL
5635
5636 def repo_path(self):
5637 return '/etc/apt/sources.list.d/ceph.list'
5638
5639 def add_repo(self):
f67539c2 5640
9f95a23c 5641 url, name = self.repo_gpgkey()
f91f0fd5 5642 logger.info('Installing repo GPG key from %s...' % url)
9f95a23c
TL
5643 try:
5644 response = urlopen(url)
5645 except HTTPError as err:
f91f0fd5 5646 logger.error('failed to fetch GPG repo key from %s: %s' % (
9f95a23c
TL
5647 url, err))
5648 raise Error('failed to fetch GPG key')
5649 key = response.read().decode('utf-8')
5650 with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'w') as f:
5651 f.write(key)
5652
5653 if self.version:
5654 content = 'deb %s/debian-%s/ %s main\n' % (
f67539c2 5655 self.ctx.repo_url, self.version, self.distro_codename)
9f95a23c
TL
5656 elif self.stable:
5657 content = 'deb %s/debian-%s/ %s main\n' % (
f67539c2 5658 self.ctx.repo_url, self.stable, self.distro_codename)
9f95a23c
TL
5659 else:
5660 content = self.query_shaman(self.distro, self.distro_codename, self.branch,
5661 self.commit)
5662
f91f0fd5 5663 logger.info('Installing repo file at %s...' % self.repo_path())
9f95a23c
TL
5664 with open(self.repo_path(), 'w') as f:
5665 f.write(content)
5666
5667 def rm_repo(self):
5668 for name in ['autobuild', 'release']:
5669 p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
5670 if os.path.exists(p):
f91f0fd5 5671 logger.info('Removing repo GPG key %s...' % p)
9f95a23c
TL
5672 os.unlink(p)
5673 if os.path.exists(self.repo_path()):
f91f0fd5 5674 logger.info('Removing repo at %s...' % self.repo_path())
9f95a23c
TL
5675 os.unlink(self.repo_path())
5676
f91f0fd5
TL
5677 if self.distro == 'ubuntu':
5678 self.rm_kubic_repo()
5679
9f95a23c 5680 def install(self, ls):
f91f0fd5 5681 logger.info('Installing packages %s...' % ls)
f67539c2 5682 call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls)
9f95a23c
TL
5683
5684 def install_podman(self):
5685 if self.distro == 'ubuntu':
f91f0fd5
TL
5686 logger.info('Setting up repo for podman...')
5687 self.add_kubic_repo()
f67539c2 5688 call_throws(self.ctx, ['apt-get', 'update'])
9f95a23c 5689
f91f0fd5 5690 logger.info('Attempting podman install...')
9f95a23c
TL
5691 try:
5692 self.install(['podman'])
f67539c2 5693 except Error:
f91f0fd5 5694 logger.info('Podman did not work. Falling back to docker...')
9f95a23c
TL
5695 self.install(['docker.io'])
5696
f91f0fd5
TL
5697 def kubic_repo_url(self):
5698 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
5699 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version
5700
5701 def kubic_repo_path(self):
5702 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
5703
5704 def kubric_repo_gpgkey_url(self):
5705 return '%s/Release.key' % self.kubic_repo_url()
5706
5707 def kubric_repo_gpgkey_path(self):
5708 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
5709
5710 def add_kubic_repo(self):
5711 url = self.kubric_repo_gpgkey_url()
5712 logger.info('Installing repo GPG key from %s...' % url)
5713 try:
5714 response = urlopen(url)
5715 except HTTPError as err:
5716 logger.error('failed to fetch GPG repo key from %s: %s' % (
5717 url, err))
5718 raise Error('failed to fetch GPG key')
5719 key = response.read().decode('utf-8')
5720 tmp_key = write_tmp(key, 0, 0)
5721 keyring = self.kubric_repo_gpgkey_path()
f67539c2 5722 call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name])
f91f0fd5
TL
5723
5724 logger.info('Installing repo file at %s...' % self.kubic_repo_path())
5725 content = 'deb %s /\n' % self.kubic_repo_url()
5726 with open(self.kubic_repo_path(), 'w') as f:
5727 f.write(content)
5728
5729 def rm_kubic_repo(self):
5730 keyring = self.kubric_repo_gpgkey_path()
5731 if os.path.exists(keyring):
5732 logger.info('Removing repo GPG key %s...' % keyring)
5733 os.unlink(keyring)
5734
5735 p = self.kubic_repo_path()
5736 if os.path.exists(p):
5737 logger.info('Removing repo at %s...' % p)
5738 os.unlink(p)
5739
f6b5b4d7 5740
9f95a23c
TL
5741class YumDnf(Packager):
5742 DISTRO_NAMES = {
5743 'centos': ('centos', 'el'),
5744 'rhel': ('centos', 'el'),
5745 'scientific': ('centos', 'el'),
5746 'fedora': ('fedora', 'fc'),
5747 }
5748
f67539c2
TL
5749 def __init__(self, ctx: CephadmContext,
5750 stable, version, branch, commit,
9f95a23c 5751 distro, distro_version):
f67539c2 5752 super(YumDnf, self).__init__(ctx, stable=stable, version=version,
9f95a23c 5753 branch=branch, commit=commit)
f67539c2 5754 self.ctx = ctx
9f95a23c
TL
5755 self.major = int(distro_version.split('.')[0])
5756 self.distro_normalized = self.DISTRO_NAMES[distro][0]
5757 self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
5758 if (self.distro_code == 'fc' and self.major >= 30) or \
5759 (self.distro_code == 'el' and self.major >= 8):
5760 self.tool = 'dnf'
5761 else:
5762 self.tool = 'yum'
5763
5764 def custom_repo(self, **kw):
5765 """
5766 Repo files need special care in that a whole line should not be present
5767 if there is no value for it. Because we were using `format()` we could
5768 not conditionally add a line for a repo file. So the end result would
5769 contain a key with a missing value (say if we were passing `None`).
5770
5771 For example, it could look like::
5772
5773 [ceph repo]
5774 name= ceph repo
5775 proxy=
5776 gpgcheck=
5777
5778 Which breaks. This function allows us to conditionally add lines,
5779 preserving an order and be more careful.
5780
5781 Previously, and for historical purposes, this is how the template used
5782 to look::
5783
5784 custom_repo =
5785 [{repo_name}]
5786 name={name}
5787 baseurl={baseurl}
5788 enabled={enabled}
5789 gpgcheck={gpgcheck}
5790 type={_type}
5791 gpgkey={gpgkey}
5792 proxy={proxy}
5793
5794 """
5795 lines = []
5796
5797 # by using tuples (vs a dict) we preserve the order of what we want to
5798 # return, like starting with a [repo name]
5799 tmpl = (
5800 ('reponame', '[%s]'),
5801 ('name', 'name=%s'),
5802 ('baseurl', 'baseurl=%s'),
5803 ('enabled', 'enabled=%s'),
5804 ('gpgcheck', 'gpgcheck=%s'),
5805 ('_type', 'type=%s'),
5806 ('gpgkey', 'gpgkey=%s'),
5807 ('proxy', 'proxy=%s'),
5808 ('priority', 'priority=%s'),
5809 )
5810
5811 for line in tmpl:
5812 tmpl_key, tmpl_value = line # key values from tmpl
5813
5814 # ensure that there is an actual value (not None nor empty string)
5815 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
5816 lines.append(tmpl_value % kw.get(tmpl_key))
5817
5818 return '\n'.join(lines)
5819
5820 def repo_path(self):
5821 return '/etc/yum.repos.d/ceph.repo'
5822
5823 def repo_baseurl(self):
5824 assert self.stable or self.version
5825 if self.version:
f67539c2 5826 return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version,
9f95a23c
TL
5827 self.distro_code)
5828 else:
f67539c2 5829 return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable,
9f95a23c
TL
5830 self.distro_code)
5831
5832 def add_repo(self):
5833 if self.stable or self.version:
5834 content = ''
5835 for n, t in {
5836 'Ceph': '$basearch',
5837 'Ceph-noarch': 'noarch',
5838 'Ceph-source': 'SRPMS'}.items():
5839 content += '[%s]\n' % (n)
5840 content += self.custom_repo(
5841 name='Ceph %s' % t,
5842 baseurl=self.repo_baseurl() + '/' + t,
5843 enabled=1,
5844 gpgcheck=1,
5845 gpgkey=self.repo_gpgkey()[0],
5846 )
5847 content += '\n\n'
5848 else:
5849 content = self.query_shaman(self.distro_normalized, self.major,
5850 self.branch,
5851 self.commit)
5852
f91f0fd5 5853 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
5854 with open(self.repo_path(), 'w') as f:
5855 f.write(content)
5856
5857 if self.distro_code.startswith('el'):
5858 logger.info('Enabling EPEL...')
f67539c2 5859 call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release'])
9f95a23c
TL
5860
5861 def rm_repo(self):
5862 if os.path.exists(self.repo_path()):
5863 os.unlink(self.repo_path())
9f95a23c
TL
5864
5865 def install(self, ls):
5866 logger.info('Installing packages %s...' % ls)
f67539c2 5867 call_throws(self.ctx, [self.tool, 'install', '-y'] + ls)
9f95a23c
TL
5868
5869 def install_podman(self):
5870 self.install(['podman'])
5871
5872
5873class Zypper(Packager):
5874 DISTRO_NAMES = [
5875 'sles',
5876 'opensuse-tumbleweed',
5877 'opensuse-leap'
5878 ]
5879
f67539c2
TL
5880 def __init__(self, ctx: CephadmContext,
5881 stable, version, branch, commit,
9f95a23c 5882 distro, distro_version):
f67539c2 5883 super(Zypper, self).__init__(ctx, stable=stable, version=version,
9f95a23c 5884 branch=branch, commit=commit)
f67539c2 5885 self.ctx = ctx
9f95a23c
TL
5886 self.tool = 'zypper'
5887 self.distro = 'opensuse'
5888 self.distro_version = '15.1'
5889 if 'tumbleweed' not in distro and distro_version is not None:
5890 self.distro_version = distro_version
5891
5892 def custom_repo(self, **kw):
5893 """
5894 See YumDnf for format explanation.
5895 """
5896 lines = []
5897
5898 # by using tuples (vs a dict) we preserve the order of what we want to
5899 # return, like starting with a [repo name]
5900 tmpl = (
5901 ('reponame', '[%s]'),
5902 ('name', 'name=%s'),
5903 ('baseurl', 'baseurl=%s'),
5904 ('enabled', 'enabled=%s'),
5905 ('gpgcheck', 'gpgcheck=%s'),
5906 ('_type', 'type=%s'),
5907 ('gpgkey', 'gpgkey=%s'),
5908 ('proxy', 'proxy=%s'),
5909 ('priority', 'priority=%s'),
5910 )
5911
5912 for line in tmpl:
5913 tmpl_key, tmpl_value = line # key values from tmpl
5914
5915 # ensure that there is an actual value (not None nor empty string)
5916 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
5917 lines.append(tmpl_value % kw.get(tmpl_key))
5918
5919 return '\n'.join(lines)
5920
5921 def repo_path(self):
5922 return '/etc/zypp/repos.d/ceph.repo'
5923
5924 def repo_baseurl(self):
5925 assert self.stable or self.version
5926 if self.version:
f67539c2
TL
5927 return '%s/rpm-%s/%s' % (self.ctx.repo_url,
5928 self.stable, self.distro)
9f95a23c 5929 else:
f67539c2
TL
5930 return '%s/rpm-%s/%s' % (self.ctx.repo_url,
5931 self.stable, self.distro)
9f95a23c
TL
5932
5933 def add_repo(self):
5934 if self.stable or self.version:
5935 content = ''
5936 for n, t in {
5937 'Ceph': '$basearch',
5938 'Ceph-noarch': 'noarch',
5939 'Ceph-source': 'SRPMS'}.items():
5940 content += '[%s]\n' % (n)
5941 content += self.custom_repo(
5942 name='Ceph %s' % t,
5943 baseurl=self.repo_baseurl() + '/' + t,
5944 enabled=1,
5945 gpgcheck=1,
5946 gpgkey=self.repo_gpgkey()[0],
5947 )
5948 content += '\n\n'
5949 else:
5950 content = self.query_shaman(self.distro, self.distro_version,
5951 self.branch,
5952 self.commit)
5953
f91f0fd5 5954 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
5955 with open(self.repo_path(), 'w') as f:
5956 f.write(content)
5957
5958 def rm_repo(self):
5959 if os.path.exists(self.repo_path()):
5960 os.unlink(self.repo_path())
5961
5962 def install(self, ls):
5963 logger.info('Installing packages %s...' % ls)
f67539c2 5964 call_throws(self.ctx, [self.tool, 'in', '-y'] + ls)
9f95a23c
TL
5965
5966 def install_podman(self):
5967 self.install(['podman'])
5968
5969
f67539c2
TL
5970def create_packager(ctx: CephadmContext,
5971 stable=None, version=None, branch=None, commit=None):
9f95a23c
TL
5972 distro, distro_version, distro_codename = get_distro()
5973 if distro in YumDnf.DISTRO_NAMES:
f67539c2 5974 return YumDnf(ctx, stable=stable, version=version,
9f95a23c 5975 branch=branch, commit=commit,
f67539c2 5976 distro=distro, distro_version=distro_version)
9f95a23c 5977 elif distro in Apt.DISTRO_NAMES:
f67539c2 5978 return Apt(ctx, stable=stable, version=version,
9f95a23c
TL
5979 branch=branch, commit=commit,
5980 distro=distro, distro_version=distro_version,
5981 distro_codename=distro_codename)
5982 elif distro in Zypper.DISTRO_NAMES:
f67539c2 5983 return Zypper(ctx, stable=stable, version=version,
9f95a23c
TL
5984 branch=branch, commit=commit,
5985 distro=distro, distro_version=distro_version)
5986 raise Error('Distro %s version %s not supported' % (distro, distro_version))
5987
5988
f67539c2
TL
5989def command_add_repo(ctx: CephadmContext):
5990 if ctx.version and ctx.release:
9f95a23c 5991 raise Error('you can specify either --release or --version but not both')
f67539c2 5992 if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit:
1911f103 5993 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
f67539c2 5994 if ctx.version:
9f95a23c 5995 try:
f67539c2
TL
5996 (x, y, z) = ctx.version.split('.')
5997 except Exception:
9f95a23c
TL
5998 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
5999
f67539c2
TL
6000 pkg = create_packager(ctx, stable=ctx.release,
6001 version=ctx.version,
6002 branch=ctx.dev,
6003 commit=ctx.dev_commit)
9f95a23c
TL
6004 pkg.add_repo()
6005
f6b5b4d7 6006
f67539c2
TL
6007def command_rm_repo(ctx: CephadmContext):
6008 pkg = create_packager(ctx)
9f95a23c
TL
6009 pkg.rm_repo()
6010
f6b5b4d7 6011
f67539c2
TL
6012def command_install(ctx: CephadmContext):
6013 pkg = create_packager(ctx)
6014 pkg.install(ctx.packages)
9f95a23c
TL
6015
6016##################################
6017
f67539c2 6018
f91f0fd5
TL
6019def get_ipv4_address(ifname):
6020 # type: (str) -> str
6021 def _extract(sock, offset):
6022 return socket.inet_ntop(
f67539c2
TL
6023 socket.AF_INET,
6024 fcntl.ioctl(
6025 sock.fileno(),
6026 offset,
6027 struct.pack('256s', bytes(ifname[:15], 'utf-8'))
6028 )[20:24])
f91f0fd5
TL
6029
6030 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
6031 try:
6032 addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR
6033 dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK
6034 except OSError:
6035 # interface does not have an ipv4 address
6036 return ''
6037
6038 dec_mask = sum([bin(int(i)).count('1')
6039 for i in dq_mask.split('.')])
6040 return '{}/{}'.format(addr, dec_mask)
6041
6042
6043def get_ipv6_address(ifname):
6044 # type: (str) -> str
6045 if not os.path.exists('/proc/net/if_inet6'):
6046 return ''
6047
6048 raw = read_file(['/proc/net/if_inet6'])
6049 data = raw.splitlines()
6050 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
6051 # field 0 is ipv6, field 2 is scope
6052 for iface_setting in data:
6053 field = iface_setting.split()
6054 if field[-1] == ifname:
6055 ipv6_raw = field[0]
f67539c2 6056 ipv6_fmtd = ':'.join([ipv6_raw[_p:_p + 4] for _p in range(0, len(field[0]), 4)])
f91f0fd5
TL
6057 # apply naming rules using ipaddress module
6058 ipv6 = ipaddress.ip_address(ipv6_fmtd)
f67539c2 6059 return '{}/{}'.format(str(ipv6), int('0x{}'.format(field[2]), 16))
f91f0fd5
TL
6060 return ''
6061
6062
6063def bytes_to_human(num, mode='decimal'):
6064 # type: (float, str) -> str
6065 """Convert a bytes value into it's human-readable form.
6066
6067 :param num: number, in bytes, to convert
6068 :param mode: Either decimal (default) or binary to determine divisor
6069 :returns: string representing the bytes value in a more readable format
6070 """
6071 unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
6072 divisor = 1000.0
f67539c2 6073 yotta = 'YB'
f91f0fd5
TL
6074
6075 if mode == 'binary':
6076 unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
6077 divisor = 1024.0
f67539c2 6078 yotta = 'YiB'
f91f0fd5
TL
6079
6080 for unit in unit_list:
6081 if abs(num) < divisor:
f67539c2 6082 return '%3.1f%s' % (num, unit)
f91f0fd5 6083 num /= divisor
f67539c2 6084 return '%.1f%s' % (num, yotta)
f91f0fd5
TL
6085
6086
6087def read_file(path_list, file_name=''):
6088 # type: (List[str], str) -> str
6089 """Returns the content of the first file found within the `path_list`
6090
6091 :param path_list: list of file paths to search
6092 :param file_name: optional file_name to be applied to a file path
6093 :returns: content of the file or 'Unknown'
6094 """
6095 for path in path_list:
6096 if file_name:
6097 file_path = os.path.join(path, file_name)
6098 else:
6099 file_path = path
6100 if os.path.exists(file_path):
6101 with open(file_path, 'r') as f:
6102 try:
6103 content = f.read().strip()
6104 except OSError:
6105 # sysfs may populate the file, but for devices like
6106 # virtio reads can fail
f67539c2 6107 return 'Unknown'
f91f0fd5
TL
6108 else:
6109 return content
f67539c2 6110 return 'Unknown'
f91f0fd5
TL
6111
6112##################################
f67539c2
TL
6113
6114
f91f0fd5
TL
6115class HostFacts():
6116 _dmi_path_list = ['/sys/class/dmi/id']
6117 _nic_path_list = ['/sys/class/net']
6118 _selinux_path_list = ['/etc/selinux/config']
6119 _apparmor_path_list = ['/etc/apparmor']
6120 _disk_vendor_workarounds = {
f67539c2 6121 '0x1af4': 'Virtio Block Device'
f91f0fd5
TL
6122 }
6123
f67539c2
TL
6124 def __init__(self, ctx: CephadmContext):
6125 self.ctx: CephadmContext = ctx
6126 self.cpu_model: str = 'Unknown'
6127 self.cpu_count: int = 0
6128 self.cpu_cores: int = 0
6129 self.cpu_threads: int = 0
6130 self.interfaces: Dict[str, Any] = {}
f91f0fd5 6131
f67539c2 6132 self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines()
f91f0fd5
TL
6133 self._get_cpuinfo()
6134 self._process_nics()
f67539c2
TL
6135 self.arch: str = platform.processor()
6136 self.kernel: str = platform.release()
f91f0fd5
TL
6137
6138 def _get_cpuinfo(self):
6139 # type: () -> None
6140 """Determine cpu information via /proc/cpuinfo"""
6141 raw = read_file(['/proc/cpuinfo'])
6142 output = raw.splitlines()
6143 cpu_set = set()
6144
6145 for line in output:
f67539c2
TL
6146 field = [f.strip() for f in line.split(':')]
6147 if 'model name' in line:
f91f0fd5 6148 self.cpu_model = field[1]
f67539c2 6149 if 'physical id' in line:
f91f0fd5 6150 cpu_set.add(field[1])
f67539c2 6151 if 'siblings' in line:
f91f0fd5 6152 self.cpu_threads = int(field[1].strip())
f67539c2 6153 if 'cpu cores' in line:
f91f0fd5
TL
6154 self.cpu_cores = int(field[1].strip())
6155 pass
6156 self.cpu_count = len(cpu_set)
6157
6158 def _get_block_devs(self):
6159 # type: () -> List[str]
6160 """Determine the list of block devices by looking at /sys/block"""
6161 return [dev for dev in os.listdir('/sys/block')
6162 if not dev.startswith('dm')]
6163
6164 def _get_devs_by_type(self, rota='0'):
6165 # type: (str) -> List[str]
6166 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
6167 devs = list()
6168 for blk_dev in self._get_block_devs():
6169 rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev)
6170 rot_value = read_file([rot_path])
6171 if rot_value == rota:
6172 devs.append(blk_dev)
6173 return devs
6174
6175 @property
6176 def operating_system(self):
6177 # type: () -> str
6178 """Determine OS version"""
6179 raw_info = read_file(['/etc/os-release'])
6180 os_release = raw_info.splitlines()
6181 rel_str = 'Unknown'
6182 rel_dict = dict()
6183
6184 for line in os_release:
f67539c2 6185 if '=' in line:
f91f0fd5
TL
6186 var_name, var_value = line.split('=')
6187 rel_dict[var_name] = var_value.strip('"')
6188
6189 # Would normally use PRETTY_NAME, but NAME and VERSION are more
6190 # consistent
f67539c2
TL
6191 if all(_v in rel_dict for _v in ['NAME', 'VERSION']):
6192 rel_str = '{} {}'.format(rel_dict['NAME'], rel_dict['VERSION'])
f91f0fd5
TL
6193 return rel_str
6194
6195 @property
6196 def hostname(self):
6197 # type: () -> str
6198 """Return the hostname"""
6199 return platform.node()
6200
6201 @property
6202 def subscribed(self):
6203 # type: () -> str
6204 """Highlevel check to see if the host is subscribed to receive updates/support"""
6205 def _red_hat():
6206 # type: () -> str
6207 # RHEL 7 and RHEL 8
6208 entitlements_dir = '/etc/pki/entitlement'
6209 if os.path.exists(entitlements_dir):
6210 pems = glob('{}/*.pem'.format(entitlements_dir))
6211 if len(pems) >= 2:
f67539c2 6212 return 'Yes'
f91f0fd5 6213
f67539c2 6214 return 'No'
f91f0fd5
TL
6215
6216 os_name = self.operating_system
f67539c2 6217 if os_name.upper().startswith('RED HAT'):
f91f0fd5
TL
6218 return _red_hat()
6219
f67539c2 6220 return 'Unknown'
f91f0fd5
TL
6221
6222 @property
6223 def hdd_count(self):
6224 # type: () -> int
6225 """Return a count of HDDs (spinners)"""
6226 return len(self._get_devs_by_type(rota='1'))
6227
6228 def _get_capacity(self, dev):
6229 # type: (str) -> int
6230 """Determine the size of a given device"""
6231 size_path = os.path.join('/sys/block', dev, 'size')
6232 size_blocks = int(read_file([size_path]))
6233 blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size')
6234 blk_count = int(read_file([blk_path]))
6235 return size_blocks * blk_count
6236
6237 def _get_capacity_by_type(self, rota='0'):
6238 # type: (str) -> int
6239 """Return the total capacity of a category of device (flash or hdd)"""
6240 devs = self._get_devs_by_type(rota=rota)
6241 capacity = 0
6242 for dev in devs:
6243 capacity += self._get_capacity(dev)
6244 return capacity
6245
6246 def _dev_list(self, dev_list):
6247 # type: (List[str]) -> List[Dict[str, object]]
6248 """Return a 'pretty' name list for each device in the `dev_list`"""
6249 disk_list = list()
6250
6251 for dev in dev_list:
6252 disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
6253 disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
6254 disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
6255 vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
6256 disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
6257 disk_size_bytes = self._get_capacity(dev)
6258 disk_list.append({
f67539c2
TL
6259 'description': '{} {} ({})'.format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
6260 'vendor': disk_vendor,
6261 'model': disk_model,
6262 'rev': disk_rev,
6263 'wwid': disk_wwid,
6264 'dev_name': dev,
6265 'disk_size_bytes': disk_size_bytes,
6266 })
f91f0fd5
TL
6267 return disk_list
6268
6269 @property
6270 def hdd_list(self):
6271 # type: () -> List[Dict[str, object]]
6272 """Return a list of devices that are HDDs (spinners)"""
6273 devs = self._get_devs_by_type(rota='1')
6274 return self._dev_list(devs)
6275
6276 @property
6277 def flash_list(self):
6278 # type: () -> List[Dict[str, object]]
6279 """Return a list of devices that are flash based (SSD, NVMe)"""
6280 devs = self._get_devs_by_type(rota='0')
6281 return self._dev_list(devs)
6282
6283 @property
6284 def hdd_capacity_bytes(self):
6285 # type: () -> int
6286 """Return the total capacity for all HDD devices (bytes)"""
6287 return self._get_capacity_by_type(rota='1')
6288
6289 @property
6290 def hdd_capacity(self):
6291 # type: () -> str
6292 """Return the total capacity for all HDD devices (human readable format)"""
6293 return bytes_to_human(self.hdd_capacity_bytes)
6294
6295 @property
6296 def cpu_load(self):
6297 # type: () -> Dict[str, float]
6298 """Return the cpu load average data for the host"""
6299 raw = read_file(['/proc/loadavg']).strip()
6300 data = raw.split()
6301 return {
f67539c2
TL
6302 '1min': float(data[0]),
6303 '5min': float(data[1]),
6304 '15min': float(data[2]),
f91f0fd5
TL
6305 }
6306
6307 @property
6308 def flash_count(self):
6309 # type: () -> int
6310 """Return the number of flash devices in the system (SSD, NVMe)"""
6311 return len(self._get_devs_by_type(rota='0'))
6312
6313 @property
6314 def flash_capacity_bytes(self):
6315 # type: () -> int
6316 """Return the total capacity for all flash devices (bytes)"""
6317 return self._get_capacity_by_type(rota='0')
6318
6319 @property
6320 def flash_capacity(self):
6321 # type: () -> str
6322 """Return the total capacity for all Flash devices (human readable format)"""
6323 return bytes_to_human(self.flash_capacity_bytes)
6324
6325 def _process_nics(self):
6326 # type: () -> None
6327 """Look at the NIC devices and extract network related metadata"""
6328 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
6329 hw_lookup = {
f67539c2
TL
6330 '1': 'ethernet',
6331 '32': 'infiniband',
6332 '772': 'loopback',
f91f0fd5
TL
6333 }
6334
6335 for nic_path in HostFacts._nic_path_list:
6336 if not os.path.exists(nic_path):
6337 continue
6338 for iface in os.listdir(nic_path):
6339
f67539c2
TL
6340 lower_devs_list = [os.path.basename(link.replace('lower_', '')) for link in glob(os.path.join(nic_path, iface, 'lower_*'))]
6341 upper_devs_list = [os.path.basename(link.replace('upper_', '')) for link in glob(os.path.join(nic_path, iface, 'upper_*'))]
f91f0fd5
TL
6342
6343 try:
6344 mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
6345 except ValueError:
6346 mtu = 0
6347
6348 operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
6349 try:
6350 speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
6351 except (OSError, ValueError):
6352 # OSError : device doesn't support the ethtool get_link_ksettings
6353 # ValueError : raised when the read fails, and returns Unknown
6354 #
6355 # Either way, we show a -1 when speed isn't available
6356 speed = -1
6357
6358 if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
f67539c2 6359 nic_type = 'bridge'
f91f0fd5 6360 elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
f67539c2 6361 nic_type = 'bonding'
f91f0fd5 6362 else:
f67539c2 6363 nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), 'Unknown')
f91f0fd5
TL
6364
6365 dev_link = os.path.join(nic_path, iface, 'device')
6366 if os.path.exists(dev_link):
6367 iftype = 'physical'
6368 driver_path = os.path.join(dev_link, 'driver')
6369 if os.path.exists(driver_path):
f67539c2 6370 driver = os.path.basename(os.path.realpath(driver_path))
f91f0fd5
TL
6371 else:
6372 driver = 'Unknown'
6373
6374 else:
6375 iftype = 'logical'
6376 driver = ''
6377
6378 self.interfaces[iface] = {
f67539c2
TL
6379 'mtu': mtu,
6380 'upper_devs_list': upper_devs_list,
6381 'lower_devs_list': lower_devs_list,
6382 'operstate': operstate,
6383 'iftype': iftype,
6384 'nic_type': nic_type,
6385 'driver': driver,
6386 'speed': speed,
6387 'ipv4_address': get_ipv4_address(iface),
6388 'ipv6_address': get_ipv6_address(iface),
f91f0fd5
TL
6389 }
6390
6391 @property
6392 def nic_count(self):
6393 # type: () -> int
6394 """Return a total count of all physical NICs detected in the host"""
6395 phys_devs = []
6396 for iface in self.interfaces:
f67539c2 6397 if self.interfaces[iface]['iftype'] == 'physical':
f91f0fd5
TL
6398 phys_devs.append(iface)
6399 return len(phys_devs)
6400
f91f0fd5
TL
6401 def _get_mem_data(self, field_name):
6402 # type: (str) -> int
6403 for line in self._meminfo:
6404 if line.startswith(field_name):
6405 _d = line.split()
6406 return int(_d[1])
6407 return 0
6408
6409 @property
6410 def memory_total_kb(self):
6411 # type: () -> int
6412 """Determine the memory installed (kb)"""
6413 return self._get_mem_data('MemTotal')
6414
6415 @property
6416 def memory_free_kb(self):
6417 # type: () -> int
6418 """Determine the memory free (not cache, immediately usable)"""
6419 return self._get_mem_data('MemFree')
6420
6421 @property
6422 def memory_available_kb(self):
6423 # type: () -> int
6424 """Determine the memory available to new applications without swapping"""
6425 return self._get_mem_data('MemAvailable')
6426
6427 @property
6428 def vendor(self):
6429 # type: () -> str
6430 """Determine server vendor from DMI data in sysfs"""
f67539c2 6431 return read_file(HostFacts._dmi_path_list, 'sys_vendor')
f91f0fd5
TL
6432
6433 @property
6434 def model(self):
6435 # type: () -> str
6436 """Determine server model information from DMI data in sysfs"""
f67539c2
TL
6437 family = read_file(HostFacts._dmi_path_list, 'product_family')
6438 product = read_file(HostFacts._dmi_path_list, 'product_name')
f91f0fd5 6439 if family == 'Unknown' and product:
f67539c2 6440 return '{}'.format(product)
f91f0fd5 6441
f67539c2 6442 return '{} ({})'.format(family, product)
f91f0fd5
TL
6443
6444 @property
6445 def bios_version(self):
6446 # type: () -> str
6447 """Determine server BIOS version from DMI data in sysfs"""
f67539c2 6448 return read_file(HostFacts._dmi_path_list, 'bios_version')
f91f0fd5
TL
6449
6450 @property
6451 def bios_date(self):
6452 # type: () -> str
6453 """Determine server BIOS date from DMI data in sysfs"""
f67539c2 6454 return read_file(HostFacts._dmi_path_list, 'bios_date')
f91f0fd5
TL
6455
6456 @property
6457 def timestamp(self):
6458 # type: () -> float
6459 """Return the current time as Epoch seconds"""
6460 return time.time()
6461
6462 @property
6463 def system_uptime(self):
6464 # type: () -> float
6465 """Return the system uptime (in secs)"""
6466 raw_time = read_file(['/proc/uptime'])
6467 up_secs, _ = raw_time.split()
6468 return float(up_secs)
6469
f67539c2 6470 @property
f91f0fd5
TL
6471 def kernel_security(self):
6472 # type: () -> Dict[str, str]
6473 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
f67539c2 6474 def _fetch_selinux() -> Dict[str, str]:
f91f0fd5
TL
6475 """Read the selinux config file to determine state"""
6476 security = {}
6477 for selinux_path in HostFacts._selinux_path_list:
6478 if os.path.exists(selinux_path):
6479 selinux_config = read_file([selinux_path]).splitlines()
6480 security['type'] = 'SELinux'
6481 for line in selinux_config:
6482 if line.strip().startswith('#'):
6483 continue
6484 k, v = line.split('=')
6485 security[k] = v
f67539c2
TL
6486 if security['SELINUX'].lower() == 'disabled':
6487 security['description'] = 'SELinux: Disabled'
f91f0fd5 6488 else:
f67539c2 6489 security['description'] = 'SELinux: Enabled({}, {})'.format(security['SELINUX'], security['SELINUXTYPE'])
f91f0fd5 6490 return security
f67539c2 6491 return {}
f91f0fd5 6492
f67539c2 6493 def _fetch_apparmor() -> Dict[str, str]:
f91f0fd5
TL
6494 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
6495 security = {}
6496 for apparmor_path in HostFacts._apparmor_path_list:
6497 if os.path.exists(apparmor_path):
f67539c2
TL
6498 security['type'] = 'AppArmor'
6499 security['description'] = 'AppArmor: Enabled'
f91f0fd5
TL
6500 try:
6501 profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
6502 except OSError:
6503 pass
6504 else:
6505 summary = {} # type: Dict[str, int]
6506 for line in profiles.split('\n'):
6507 item, mode = line.split(' ')
f67539c2 6508 mode = mode.strip('()')
f91f0fd5
TL
6509 if mode in summary:
6510 summary[mode] += 1
6511 else:
6512 summary[mode] = 0
f67539c2
TL
6513 summary_str = ','.join(['{} {}'.format(v, k) for k, v in summary.items()])
6514 security = {**security, **summary} # type: ignore
6515 security['description'] += '({})'.format(summary_str)
f91f0fd5
TL
6516
6517 return security
f67539c2 6518 return {}
f91f0fd5 6519
f67539c2 6520 ret = {}
f91f0fd5
TL
6521 if os.path.exists('/sys/kernel/security/lsm'):
6522 lsm = read_file(['/sys/kernel/security/lsm']).strip()
6523 if 'selinux' in lsm:
f67539c2 6524 ret = _fetch_selinux()
f91f0fd5 6525 elif 'apparmor' in lsm:
f67539c2 6526 ret = _fetch_apparmor()
f91f0fd5
TL
6527 else:
6528 return {
f67539c2
TL
6529 'type': 'Unknown',
6530 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor'
f91f0fd5
TL
6531 }
6532
f67539c2
TL
6533 if ret:
6534 return ret
6535
f91f0fd5 6536 return {
f67539c2
TL
6537 'type': 'None',
6538 'description': 'Linux Security Module framework is not available'
f91f0fd5
TL
6539 }
6540
f67539c2
TL
6541 @property
6542 def selinux_enabled(self):
6543 return (self.kernel_security['type'] == 'SELinux') and \
6544 (self.kernel_security['description'] != 'SELinux: Disabled')
6545
adb31ebb
TL
6546 @property
6547 def kernel_parameters(self):
6548 # type: () -> Dict[str, str]
6549 """Get kernel parameters required/used in Ceph clusters"""
6550
6551 k_param = {}
f67539c2 6552 out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
adb31ebb
TL
6553 if out:
6554 param_list = out.split('\n')
f67539c2 6555 param_dict = {param.split(' = ')[0]: param.split(' = ')[-1] for param in param_list}
adb31ebb
TL
6556
6557 # return only desired parameters
6558 if 'net.ipv4.ip_nonlocal_bind' in param_dict:
6559 k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
6560
6561 return k_param
6562
f91f0fd5
TL
6563 def dump(self):
6564 # type: () -> str
6565 """Return the attributes of this HostFacts object as json"""
f67539c2
TL
6566 data = {
6567 k: getattr(self, k) for k in dir(self)
6568 if not k.startswith('_')
6569 and isinstance(getattr(self, k), (float, int, str, list, dict, tuple))
f91f0fd5
TL
6570 }
6571 return json.dumps(data, indent=2, sort_keys=True)
6572
6573##################################
6574
f67539c2
TL
6575
6576def command_gather_facts(ctx: CephadmContext):
f91f0fd5 6577 """gather_facts is intended to provide host releated metadata to the caller"""
f67539c2 6578 host = HostFacts(ctx)
f91f0fd5
TL
6579 print(host.dump())
6580
f67539c2
TL
6581##################################
6582
6583
6584def command_verify_prereqs(ctx: CephadmContext):
6585 if ctx.service_type == 'haproxy' or ctx.service_type == 'keepalived':
6586 out, err, code = call(
6587 ctx, ['sysctl', '-n', 'net.ipv4.ip_nonlocal_bind']
6588 )
6589 if out.strip() != '1':
6590 raise Error('net.ipv4.ip_nonlocal_bind not set to 1')
6591
6592##################################
6593
6594
6595class CephadmCache:
6596 task_types = ['disks', 'daemons', 'host', 'http_server']
6597
6598 def __init__(self):
6599 self.started_epoch_secs = time.time()
6600 self.tasks = {
6601 'daemons': 'inactive',
6602 'disks': 'inactive',
6603 'host': 'inactive',
6604 'http_server': 'inactive',
6605 }
6606 self.errors = []
6607 self.disks = {}
6608 self.daemons = {}
6609 self.host = {}
6610 self.lock = RLock()
6611
6612 @property
6613 def health(self):
6614 return {
6615 'started_epoch_secs': self.started_epoch_secs,
6616 'tasks': self.tasks,
6617 'errors': self.errors,
6618 }
6619
6620 def to_json(self):
6621 return {
6622 'health': self.health,
6623 'host': self.host,
6624 'daemons': self.daemons,
6625 'disks': self.disks,
6626 }
6627
6628 def update_health(self, task_type, task_status, error_msg=None):
6629 assert task_type in CephadmCache.task_types
6630 with self.lock:
6631 self.tasks[task_type] = task_status
6632 if error_msg:
6633 self.errors.append(error_msg)
6634
6635 def update_task(self, task_type, content):
6636 assert task_type in CephadmCache.task_types
6637 assert isinstance(content, dict)
6638 with self.lock:
6639 current = getattr(self, task_type)
6640 for k in content:
6641 current[k] = content[k]
6642
6643 setattr(self, task_type, current)
6644
6645
6646class CephadmHTTPServer(ThreadingMixIn, HTTPServer):
6647 allow_reuse_address = True
6648 daemon_threads = True
6649 cephadm_cache: CephadmCache
6650 token: str
6651
6652
6653class CephadmDaemonHandler(BaseHTTPRequestHandler):
6654 server: CephadmHTTPServer
6655 api_version = 'v1'
6656 valid_routes = [
6657 f'/{api_version}/metadata',
6658 f'/{api_version}/metadata/health',
6659 f'/{api_version}/metadata/disks',
6660 f'/{api_version}/metadata/daemons',
6661 f'/{api_version}/metadata/host',
6662 ]
6663
6664 class Decorators:
6665 @classmethod
6666 def authorize(cls, f):
6667 """Implement a basic token check.
6668
6669 The token is installed at deployment time and must be provided to
6670 ensure we only respond to callers who know our token i.e. mgr
6671 """
6672 def wrapper(self, *args, **kwargs):
6673 auth = self.headers.get('Authorization', None)
6674 if auth != 'Bearer ' + self.server.token:
6675 self.send_error(401)
6676 return
6677 f(self, *args, **kwargs)
6678 return wrapper
6679
6680 def _help_page(self):
6681 return """<!DOCTYPE html>
6682<html>
6683<head><title>cephadm metadata exporter</title></head>
6684<style>
6685body {{
6686 font-family: sans-serif;
6687 font-size: 0.8em;
6688}}
6689table {{
6690 border-width: 0px;
6691 border-spacing: 0px;
6692 margin-left:20px;
6693}}
6694tr:hover {{
6695 background: PowderBlue;
6696}}
6697td,th {{
6698 padding: 5px;
6699}}
6700</style>
6701<body>
6702 <h1>cephadm metadata exporter {api_version}</h1>
6703 <table>
6704 <thead>
6705 <tr><th>Endpoint</th><th>Methods</th><th>Response</th><th>Description</th></tr>
6706 </thead>
6707 <tr><td><a href='{api_version}/metadata'>{api_version}/metadata</a></td><td>GET</td><td>JSON</td><td>Return <b>all</b> metadata for the host</td></tr>
6708 <tr><td><a href='{api_version}/metadata/daemons'>{api_version}/metadata/daemons</a></td><td>GET</td><td>JSON</td><td>Return daemon and systemd states for ceph daemons (ls)</td></tr>
6709 <tr><td><a href='{api_version}/metadata/disks'>{api_version}/metadata/disks</a></td><td>GET</td><td>JSON</td><td>show disk inventory (ceph-volume)</td></tr>
6710 <tr><td><a href='{api_version}/metadata/health'>{api_version}/metadata/health</a></td><td>GET</td><td>JSON</td><td>Show current health of the exporter sub-tasks</td></tr>
6711 <tr><td><a href='{api_version}/metadata/host'>{api_version}/metadata/host</a></td><td>GET</td><td>JSON</td><td>Show host metadata (gather-facts)</td></tr>
6712 </table>
6713</body>
6714</html>""".format(api_version=CephadmDaemonHandler.api_version)
6715
6716 def _fetch_root(self):
6717 self.send_response(200)
6718 self.send_header('Content-type', 'text/html; charset=utf-8')
6719 self.end_headers()
6720 self.wfile.write(self._help_page().encode('utf-8'))
6721
6722 @Decorators.authorize
6723 def do_GET(self):
6724 """Handle *all* GET requests"""
6725
6726 if self.path == '/':
6727 # provide a html response if someone hits the root url, to document the
6728 # available api endpoints
6729 return self._fetch_root()
6730 elif self.path in CephadmDaemonHandler.valid_routes:
6731 u = self.path.split('/')[-1]
6732 data = json.dumps({})
6733 status_code = 200
6734
6735 tasks = self.server.cephadm_cache.health.get('tasks', {})
6736 assert tasks
6737
6738 # We're using the http status code to help indicate thread health
6739 # - 200 (OK): request successful
6740 # - 204 (No Content): access to a cache relating to a dead thread
6741 # - 206 (Partial content): one or more theads are inactive
6742 # - 500 (Server Error): all threads inactive
6743 if u == 'metadata':
6744 data = json.dumps(self.server.cephadm_cache.to_json())
6745 if all([tasks[task_name] == 'inactive' for task_name in tasks if task_name != 'http_server']):
6746 # All the subtasks are dead!
6747 status_code = 500
6748 elif any([tasks[task_name] == 'inactive' for task_name in tasks if task_name != 'http_server']):
6749 status_code = 206
6750
6751 # Individual GETs against the a tasks endpoint will also return a 503 if the corresponding thread is inactive
6752 elif u == 'daemons':
6753 data = json.dumps(self.server.cephadm_cache.daemons)
6754 if tasks['daemons'] == 'inactive':
6755 status_code = 204
6756 elif u == 'disks':
6757 data = json.dumps(self.server.cephadm_cache.disks)
6758 if tasks['disks'] == 'inactive':
6759 status_code = 204
6760 elif u == 'host':
6761 data = json.dumps(self.server.cephadm_cache.host)
6762 if tasks['host'] == 'inactive':
6763 status_code = 204
6764
6765 # a GET against health will always return a 200, since the op is always successful
6766 elif u == 'health':
6767 data = json.dumps(self.server.cephadm_cache.health)
6768
6769 self.send_response(status_code)
6770 self.send_header('Content-type', 'application/json')
6771 self.end_headers()
6772 self.wfile.write(data.encode('utf-8'))
6773 else:
6774 # Invalid GET URL
6775 bad_request_msg = 'Valid URLs are: {}'.format(', '.join(CephadmDaemonHandler.valid_routes))
6776 self.send_response(404, message=bad_request_msg) # reason
6777 self.send_header('Content-type', 'application/json')
6778 self.end_headers()
6779 self.wfile.write(json.dumps({'message': bad_request_msg}).encode('utf-8'))
6780
6781 def log_message(self, format, *args):
6782 rqst = ' '.join(str(a) for a in args)
6783 logger.info(f'client:{self.address_string()} [{self.log_date_time_string()}] {rqst}')
6784
6785
6786class CephadmDaemon():
6787
6788 daemon_type = 'cephadm-exporter'
6789 default_port = 9443
6790 key_name = 'key'
6791 crt_name = 'crt'
6792 token_name = 'token'
6793 config_requirements = [
6794 key_name,
6795 crt_name,
6796 token_name,
6797 ]
6798 loop_delay = 1
6799 thread_check_interval = 5
6800
6801 def __init__(self, ctx: CephadmContext, fsid, daemon_id=None, port=None):
6802 self.ctx = ctx
6803 self.fsid = fsid
6804 self.daemon_id = daemon_id
6805 if not port:
6806 self.port = CephadmDaemon.default_port
6807 else:
6808 self.port = port
6809 self.workers: List[Thread] = []
6810 self.http_server: CephadmHTTPServer
6811 self.stop = False
6812 self.cephadm_cache = CephadmCache()
6813 self.errors: List[str] = []
6814 self.token = read_file([os.path.join(self.daemon_path, CephadmDaemon.token_name)])
6815
6816 @classmethod
6817 def validate_config(cls, config):
6818 reqs = ', '.join(CephadmDaemon.config_requirements)
6819 errors = []
6820
6821 if not config or not all([k_name in config for k_name in CephadmDaemon.config_requirements]):
6822 raise Error(f'config must contain the following fields : {reqs}')
6823
6824 if not all([isinstance(config[k_name], str) for k_name in CephadmDaemon.config_requirements]):
6825 errors.append(f'the following fields must be strings: {reqs}')
6826
6827 crt = config[CephadmDaemon.crt_name]
6828 key = config[CephadmDaemon.key_name]
6829 token = config[CephadmDaemon.token_name]
6830
6831 if not crt.startswith('-----BEGIN CERTIFICATE-----') or not crt.endswith('-----END CERTIFICATE-----\n'):
6832 errors.append('crt field is not a valid SSL certificate')
6833 if not key.startswith('-----BEGIN PRIVATE KEY-----') or not key.endswith('-----END PRIVATE KEY-----\n'):
6834 errors.append('key is not a valid SSL private key')
6835 if len(token) < 8:
6836 errors.append("'token' must be more than 8 characters long")
6837
6838 if 'port' in config:
6839 try:
6840 p = int(config['port'])
6841 if p <= 1024:
6842 raise ValueError
6843 except (TypeError, ValueError):
6844 errors.append('port must be an integer > 1024')
6845
6846 if errors:
6847 raise Error('Parameter errors : {}'.format(', '.join(errors)))
6848
6849 @property
6850 def port_active(self):
6851 return port_in_use(self.ctx, self.port)
6852
6853 @property
6854 def can_run(self):
6855 # if port is in use
6856 if self.port_active:
6857 self.errors.append(f'TCP port {self.port} already in use, unable to bind')
6858 if not os.path.exists(os.path.join(self.daemon_path, CephadmDaemon.key_name)):
6859 self.errors.append(f"Key file '{CephadmDaemon.key_name}' is missing from {self.daemon_path}")
6860 if not os.path.exists(os.path.join(self.daemon_path, CephadmDaemon.crt_name)):
6861 self.errors.append(f"Certificate file '{CephadmDaemon.crt_name}' is missing from {self.daemon_path}")
6862 if self.token == 'Unknown':
6863 self.errors.append(f"Authentication token '{CephadmDaemon.token_name}' is missing from {self.daemon_path}")
6864 return len(self.errors) == 0
6865
6866 @staticmethod
6867 def _unit_name(fsid, daemon_id):
6868 return '{}.service'.format(get_unit_name(fsid, CephadmDaemon.daemon_type, daemon_id))
6869
6870 @property
6871 def unit_name(self):
6872 return CephadmDaemon._unit_name(self.fsid, self.daemon_id)
6873
6874 @property
6875 def daemon_path(self):
6876 return os.path.join(
6877 self.ctx.data_dir,
6878 self.fsid,
6879 f'{self.daemon_type}.{self.daemon_id}'
6880 )
6881
6882 @property
6883 def binary_path(self):
6884 path = os.path.realpath(__file__)
6885 assert os.path.isfile(path)
6886 return path
6887
6888 def _handle_thread_exception(self, exc, thread_type):
6889 e_msg = f'{exc.__class__.__name__} exception: {str(exc)}'
6890 thread_info = getattr(self.cephadm_cache, thread_type)
6891 errors = thread_info.get('scrape_errors', [])
6892 errors.append(e_msg)
6893 logger.error(e_msg)
6894 logger.exception(exc)
6895 self.cephadm_cache.update_task(
6896 thread_type,
6897 {
6898 'scrape_errors': errors,
6899 'data': None,
6900 }
6901 )
6902
6903 def _scrape_host_facts(self, refresh_interval=10):
6904 ctr = 0
6905 exception_encountered = False
6906
6907 while True:
6908
6909 if self.stop or exception_encountered:
6910 break
6911
6912 if ctr >= refresh_interval:
6913 ctr = 0
6914 logger.debug('executing host-facts scrape')
6915 errors = []
6916 s_time = time.time()
6917
6918 try:
6919 facts = HostFacts(self.ctx)
6920 except Exception as e:
6921 self._handle_thread_exception(e, 'host')
6922 exception_encountered = True
6923 else:
6924 elapsed = time.time() - s_time
6925 try:
6926 data = json.loads(facts.dump())
6927 except json.decoder.JSONDecodeError:
6928 errors.append('host-facts provided invalid JSON')
6929 logger.warning(errors[-1])
6930 data = {}
6931 self.cephadm_cache.update_task(
6932 'host',
6933 {
6934 'scrape_timestamp': s_time,
6935 'scrape_duration_secs': elapsed,
6936 'scrape_errors': errors,
6937 'data': data,
6938 }
6939 )
6940 logger.debug(f'completed host-facts scrape - {elapsed}s')
6941
6942 time.sleep(CephadmDaemon.loop_delay)
6943 ctr += CephadmDaemon.loop_delay
6944 logger.info('host-facts thread stopped')
6945
6946 def _scrape_ceph_volume(self, refresh_interval=15):
6947 # we're invoking the ceph_volume command, so we need to set the args that it
6948 # expects to use
6949 self.ctx.command = 'inventory --format=json'.split()
6950 self.ctx.fsid = self.fsid
6951 self.ctx.log_output = False
6952
6953 ctr = 0
6954 exception_encountered = False
6955
6956 while True:
6957 if self.stop or exception_encountered:
6958 break
6959
6960 if ctr >= refresh_interval:
6961 ctr = 0
6962 logger.debug('executing ceph-volume scrape')
6963 errors = []
6964 s_time = time.time()
6965 stream = io.StringIO()
6966 try:
6967 with redirect_stdout(stream):
6968 command_ceph_volume(self.ctx)
6969 except Exception as e:
6970 self._handle_thread_exception(e, 'disks')
6971 exception_encountered = True
6972 else:
6973 elapsed = time.time() - s_time
6974
6975 # if the call to ceph-volume returns junk with the
6976 # json, it won't parse
6977 stdout = stream.getvalue()
6978
6979 data = []
6980 if stdout:
6981 try:
6982 data = json.loads(stdout)
6983 except json.decoder.JSONDecodeError:
6984 errors.append('ceph-volume thread provided bad json data')
6985 logger.warning(errors[-1])
6986 else:
6987 errors.append('ceph-volume did not return any data')
6988 logger.warning(errors[-1])
6989
6990 self.cephadm_cache.update_task(
6991 'disks',
6992 {
6993 'scrape_timestamp': s_time,
6994 'scrape_duration_secs': elapsed,
6995 'scrape_errors': errors,
6996 'data': data,
6997 }
6998 )
6999
7000 logger.debug(f'completed ceph-volume scrape - {elapsed}s')
7001 time.sleep(CephadmDaemon.loop_delay)
7002 ctr += CephadmDaemon.loop_delay
7003
7004 logger.info('ceph-volume thread stopped')
7005
7006 def _scrape_list_daemons(self, refresh_interval=20):
7007 ctr = 0
7008 exception_encountered = False
7009 while True:
7010 if self.stop or exception_encountered:
7011 break
7012
7013 if ctr >= refresh_interval:
7014 ctr = 0
7015 logger.debug('executing list-daemons scrape')
7016 errors = []
7017 s_time = time.time()
7018
7019 try:
7020 # list daemons should ideally be invoked with a fsid
7021 data = list_daemons(self.ctx)
7022 except Exception as e:
7023 self._handle_thread_exception(e, 'daemons')
7024 exception_encountered = True
7025 else:
7026 if not isinstance(data, list):
7027 errors.append('list-daemons did not supply a list?')
7028 logger.warning(errors[-1])
7029 data = []
7030 elapsed = time.time() - s_time
7031 self.cephadm_cache.update_task(
7032 'daemons',
7033 {
7034 'scrape_timestamp': s_time,
7035 'scrape_duration_secs': elapsed,
7036 'scrape_errors': errors,
7037 'data': data,
7038 }
7039 )
7040 logger.debug(f'completed list-daemons scrape - {elapsed}s')
7041
7042 time.sleep(CephadmDaemon.loop_delay)
7043 ctr += CephadmDaemon.loop_delay
7044 logger.info('list-daemons thread stopped')
7045
7046 def _create_thread(self, target, name, refresh_interval=None):
7047 if refresh_interval:
7048 t = Thread(target=target, args=(refresh_interval,))
7049 else:
7050 t = Thread(target=target)
7051 t.daemon = True
7052 t.name = name
7053 self.cephadm_cache.update_health(name, 'active')
7054 t.start()
7055
7056 start_msg = f'Started {name} thread'
7057 if refresh_interval:
7058 logger.info(f'{start_msg}, with a refresh interval of {refresh_interval}s')
7059 else:
7060 logger.info(f'{start_msg}')
7061 return t
7062
7063 def reload(self, *args):
7064 """reload -HUP received
7065
7066 This is a placeholder function only, and serves to provide the hook that could
7067 be exploited later if the exporter evolves to incorporate a config file
7068 """
7069 logger.info('Reload request received - ignoring, no action needed')
7070
7071 def shutdown(self, *args):
7072 logger.info('Shutdown request received')
7073 self.stop = True
7074 self.http_server.shutdown()
7075
7076 def run(self):
7077 logger.info(f"cephadm exporter starting for FSID '{self.fsid}'")
7078 if not self.can_run:
7079 logger.error('Unable to start the exporter daemon')
7080 for e in self.errors:
7081 logger.error(e)
7082 return
7083
7084 # register signal handlers for running under systemd control
7085 signal.signal(signal.SIGTERM, self.shutdown)
7086 signal.signal(signal.SIGINT, self.shutdown)
7087 signal.signal(signal.SIGHUP, self.reload)
7088 logger.debug('Signal handlers attached')
7089
7090 host_facts = self._create_thread(self._scrape_host_facts, 'host', 5)
7091 self.workers.append(host_facts)
7092
7093 daemons = self._create_thread(self._scrape_list_daemons, 'daemons', 20)
7094 self.workers.append(daemons)
7095
7096 disks = self._create_thread(self._scrape_ceph_volume, 'disks', 20)
7097 self.workers.append(disks)
7098
7099 self.http_server = CephadmHTTPServer(('0.0.0.0', self.port), CephadmDaemonHandler) # IPv4 only
7100 self.http_server.socket = ssl.wrap_socket(self.http_server.socket,
7101 keyfile=os.path.join(self.daemon_path, CephadmDaemon.key_name),
7102 certfile=os.path.join(self.daemon_path, CephadmDaemon.crt_name),
7103 server_side=True)
7104
7105 self.http_server.cephadm_cache = self.cephadm_cache
7106 self.http_server.token = self.token
7107 server_thread = self._create_thread(self.http_server.serve_forever, 'http_server')
7108 logger.info(f'https server listening on {self.http_server.server_address[0]}:{self.http_server.server_port}')
7109
7110 ctr = 0
7111 while server_thread.is_alive():
7112 if self.stop:
7113 break
7114
7115 if ctr >= CephadmDaemon.thread_check_interval:
7116 ctr = 0
7117 for worker in self.workers:
7118 if self.cephadm_cache.tasks[worker.name] == 'inactive':
7119 continue
7120 if not worker.is_alive():
7121 logger.warning(f'{worker.name} thread not running')
7122 stop_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
7123 self.cephadm_cache.update_health(worker.name, 'inactive', f'{worker.name} stopped at {stop_time}')
7124
7125 time.sleep(CephadmDaemon.loop_delay)
7126 ctr += CephadmDaemon.loop_delay
7127
7128 logger.info('Main http server thread stopped')
7129
7130 @property
7131 def unit_run(self):
7132
7133 return """set -e
7134{py3} {bin_path} exporter --fsid {fsid} --id {daemon_id} --port {port} &""".format(
7135 py3=shutil.which('python3'),
7136 bin_path=self.binary_path,
7137 fsid=self.fsid,
7138 daemon_id=self.daemon_id,
7139 port=self.port
7140 )
7141
7142 @property
7143 def unit_file(self):
7144 docker = isinstance(self.ctx.container_engine, Docker)
7145 return """#generated by cephadm
7146[Unit]
7147Description=cephadm exporter service for cluster {fsid}
7148After=network-online.target{docker_after}
7149Wants=network-online.target
7150{docker_requires}
7151
7152PartOf=ceph-{fsid}.target
7153Before=ceph-{fsid}.target
7154
7155[Service]
7156Type=forking
7157ExecStart=/bin/bash {daemon_path}/unit.run
7158ExecReload=/bin/kill -HUP $MAINPID
7159Restart=on-failure
7160RestartSec=10s
7161
7162[Install]
7163WantedBy=ceph-{fsid}.target
7164""".format(fsid=self.fsid,
7165 daemon_path=self.daemon_path,
7166 # if docker, we depend on docker.service
7167 docker_after=' docker.service' if docker else '',
7168 docker_requires='Requires=docker.service\n' if docker else '')
7169
7170 def deploy_daemon_unit(self, config=None):
7171 """deploy a specific unit file for cephadm
7172
7173 The normal deploy_daemon_units doesn't apply for this
7174 daemon since it's not a container, so we just create a
7175 simple service definition and add it to the fsid's target
7176 """
7177 if not config:
7178 raise Error('Attempting to deploy cephadm daemon without a config')
7179 assert isinstance(config, dict)
7180
7181 # Create the required config files in the daemons dir, with restricted permissions
7182 for filename in config:
7183 with open(os.open(os.path.join(self.daemon_path, filename), os.O_CREAT | os.O_WRONLY, mode=0o600), 'w') as f:
7184 f.write(config[filename])
7185
7186 # When __file__ is <stdin> we're being invoked over remoto via the orchestrator, so
7187 # we pick up the file from where the orchestrator placed it - otherwise we'll
7188 # copy it to the binary location for this cluster
7189 if not __file__ == '<stdin>':
7190 shutil.copy(__file__,
7191 self.binary_path)
7192
7193 with open(os.path.join(self.daemon_path, 'unit.run'), 'w') as f:
7194 f.write(self.unit_run)
7195
7196 with open(
7197 os.path.join(self.ctx.unit_dir,
7198 f'{self.unit_name}.new'),
7199 'w'
7200 ) as f:
7201 f.write(self.unit_file)
7202 os.rename(
7203 os.path.join(self.ctx.unit_dir, f'{self.unit_name}.new'),
7204 os.path.join(self.ctx.unit_dir, self.unit_name))
7205
7206 call_throws(self.ctx, ['systemctl', 'daemon-reload'])
7207 call(self.ctx, ['systemctl', 'stop', self.unit_name],
7208 verbosity=CallVerbosity.DEBUG)
7209 call(self.ctx, ['systemctl', 'reset-failed', self.unit_name],
7210 verbosity=CallVerbosity.DEBUG)
7211 call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name])
7212
7213 @classmethod
7214 def uninstall(cls, ctx: CephadmContext, fsid, daemon_type, daemon_id):
7215 unit_name = CephadmDaemon._unit_name(fsid, daemon_id)
7216 unit_path = os.path.join(ctx.unit_dir, unit_name)
7217 unit_run = os.path.join(ctx.data_dir, fsid, f'{daemon_type}.{daemon_id}', 'unit.run')
7218 port = None
7219 try:
7220 with open(unit_run, 'r') as u:
7221 contents = u.read().strip(' &')
7222 except OSError:
7223 logger.warning(f'Unable to access the unit.run file @ {unit_run}')
7224 return
7225
7226 port = None
7227 for line in contents.split('\n'):
7228 if '--port ' in line:
7229 try:
7230 port = int(line.split('--port ')[-1])
7231 except ValueError:
7232 logger.warning('Unexpected format in unit.run file: port is not numeric')
7233 logger.warning('Unable to remove the systemd file and close the port')
7234 return
7235 break
7236
7237 if port:
7238 fw = Firewalld(ctx)
7239 try:
7240 fw.close_ports([port])
7241 except RuntimeError:
7242 logger.error(f'Unable to close port {port}')
7243
7244 stdout, stderr, rc = call(ctx, ['rm', '-f', unit_path])
7245 if rc:
7246 logger.error(f'Unable to remove the systemd file @ {unit_path}')
7247 else:
7248 logger.info(f'removed systemd unit file @ {unit_path}')
7249 stdout, stderr, rc = call(ctx, ['systemctl', 'daemon-reload'])
7250
7251
7252def command_exporter(ctx: CephadmContext):
7253 exporter = CephadmDaemon(ctx, ctx.fsid, daemon_id=ctx.id, port=ctx.port)
7254
7255 if ctx.fsid not in os.listdir(ctx.data_dir):
7256 raise Error(f"cluster fsid '{ctx.fsid}' not found in '{ctx.data_dir}'")
7257
7258 exporter.run()
7259
7260##################################
7261
7262
7263def systemd_target_state(target_name: str, subsystem: str = 'ceph') -> bool:
7264 # TODO: UNITTEST
7265 return os.path.exists(
7266 os.path.join(
7267 UNIT_DIR,
7268 f'{subsystem}.target.wants',
7269 target_name
7270 )
7271 )
7272
7273
7274@infer_fsid
7275def command_maintenance(ctx: CephadmContext):
7276 if not ctx.fsid:
7277 raise Error('must pass --fsid to specify cluster')
7278
7279 target = f'ceph-{ctx.fsid}.target'
7280
7281 if ctx.maintenance_action.lower() == 'enter':
7282 logger.info('Requested to place host into maintenance')
7283 if systemd_target_state(target):
7284 _out, _err, code = call(ctx,
7285 ['systemctl', 'disable', target],
7286 verbosity=CallVerbosity.DEBUG)
7287 if code:
7288 logger.error(f'Failed to disable the {target} target')
7289 return 'failed - to disable the target'
7290 else:
7291 # stopping a target waits by default
7292 _out, _err, code = call(ctx,
7293 ['systemctl', 'stop', target],
7294 verbosity=CallVerbosity.DEBUG)
7295 if code:
7296 logger.error(f'Failed to stop the {target} target')
7297 return 'failed - to disable the target'
7298 else:
7299 return f'success - systemd target {target} disabled'
7300
7301 else:
7302 return 'skipped - target already disabled'
7303
7304 else:
7305 logger.info('Requested to exit maintenance state')
7306 # exit maintenance request
7307 if not systemd_target_state(target):
7308 _out, _err, code = call(ctx,
7309 ['systemctl', 'enable', target],
7310 verbosity=CallVerbosity.DEBUG)
7311 if code:
7312 logger.error(f'Failed to enable the {target} target')
7313 return 'failed - unable to enable the target'
7314 else:
7315 # starting a target waits by default
7316 _out, _err, code = call(ctx,
7317 ['systemctl', 'start', target],
7318 verbosity=CallVerbosity.DEBUG)
7319 if code:
7320 logger.error(f'Failed to start the {target} target')
7321 return 'failed - unable to start the target'
7322 else:
7323 return f'success - systemd target {target} enabled and started'
f91f0fd5
TL
7324
7325##################################
7326
f6b5b4d7 7327
9f95a23c
TL
7328def _get_parser():
7329 # type: () -> argparse.ArgumentParser
7330 parser = argparse.ArgumentParser(
7331 description='Bootstrap Ceph daemons with systemd and containers.',
7332 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
7333 parser.add_argument(
7334 '--image',
7335 help='container image. Can also be set via the "CEPHADM_IMAGE" '
7336 'env var')
7337 parser.add_argument(
7338 '--docker',
7339 action='store_true',
7340 help='use docker instead of podman')
7341 parser.add_argument(
7342 '--data-dir',
7343 default=DATA_DIR,
7344 help='base directory for daemon data')
7345 parser.add_argument(
7346 '--log-dir',
7347 default=LOG_DIR,
7348 help='base directory for daemon logs')
7349 parser.add_argument(
7350 '--logrotate-dir',
7351 default=LOGROTATE_DIR,
7352 help='location of logrotate configuration files')
7353 parser.add_argument(
7354 '--unit-dir',
7355 default=UNIT_DIR,
7356 help='base directory for systemd units')
7357 parser.add_argument(
7358 '--verbose', '-v',
7359 action='store_true',
7360 help='Show debug-level log messages')
7361 parser.add_argument(
7362 '--timeout',
7363 type=int,
7364 default=DEFAULT_TIMEOUT,
7365 help='timeout in seconds')
7366 parser.add_argument(
7367 '--retry',
7368 type=int,
7369 default=DEFAULT_RETRY,
7370 help='max number of retries')
e306af50
TL
7371 parser.add_argument(
7372 '--env', '-e',
7373 action='append',
7374 default=[],
7375 help='set environment variable')
f67539c2
TL
7376 parser.add_argument(
7377 '--no-container-init',
7378 action='store_true',
7379 default=not CONTAINER_INIT,
7380 help='Do not run podman/docker with `--init`')
9f95a23c
TL
7381
7382 subparsers = parser.add_subparsers(help='sub-command')
7383
7384 parser_version = subparsers.add_parser(
7385 'version', help='get ceph version from container')
7386 parser_version.set_defaults(func=command_version)
7387
7388 parser_pull = subparsers.add_parser(
7389 'pull', help='pull latest image version')
7390 parser_pull.set_defaults(func=command_pull)
7391
7392 parser_inspect_image = subparsers.add_parser(
7393 'inspect-image', help='inspect local container image')
7394 parser_inspect_image.set_defaults(func=command_inspect_image)
7395
7396 parser_ls = subparsers.add_parser(
7397 'ls', help='list daemon instances on this host')
7398 parser_ls.set_defaults(func=command_ls)
7399 parser_ls.add_argument(
7400 '--no-detail',
7401 action='store_true',
7402 help='Do not include daemon status')
7403 parser_ls.add_argument(
7404 '--legacy-dir',
7405 default='/',
7406 help='base directory for legacy daemon data')
7407
7408 parser_list_networks = subparsers.add_parser(
7409 'list-networks', help='list IP networks')
7410 parser_list_networks.set_defaults(func=command_list_networks)
7411
7412 parser_adopt = subparsers.add_parser(
7413 'adopt', help='adopt daemon deployed with a different tool')
7414 parser_adopt.set_defaults(func=command_adopt)
7415 parser_adopt.add_argument(
7416 '--name', '-n',
7417 required=True,
7418 help='daemon name (type.id)')
7419 parser_adopt.add_argument(
7420 '--style',
7421 required=True,
7422 help='deployment style (legacy, ...)')
7423 parser_adopt.add_argument(
7424 '--cluster',
7425 default='ceph',
7426 help='cluster name')
7427 parser_adopt.add_argument(
7428 '--legacy-dir',
7429 default='/',
7430 help='base directory for legacy daemon data')
7431 parser_adopt.add_argument(
7432 '--config-json',
7433 help='Additional configuration information in JSON format')
7434 parser_adopt.add_argument(
7435 '--skip-firewalld',
7436 action='store_true',
7437 help='Do not configure firewalld')
7438 parser_adopt.add_argument(
7439 '--skip-pull',
7440 action='store_true',
7441 help='do not pull the latest image before adopting')
1911f103
TL
7442 parser_adopt.add_argument(
7443 '--force-start',
7444 action='store_true',
f67539c2 7445 help='start newly adoped daemon, even if it was not running previously')
f91f0fd5
TL
7446 parser_adopt.add_argument(
7447 '--container-init',
7448 action='store_true',
f67539c2
TL
7449 default=CONTAINER_INIT,
7450 help=argparse.SUPPRESS)
9f95a23c
TL
7451
7452 parser_rm_daemon = subparsers.add_parser(
7453 'rm-daemon', help='remove daemon instance')
7454 parser_rm_daemon.set_defaults(func=command_rm_daemon)
7455 parser_rm_daemon.add_argument(
7456 '--name', '-n',
7457 required=True,
7458 action=CustomValidation,
7459 help='daemon name (type.id)')
7460 parser_rm_daemon.add_argument(
7461 '--fsid',
7462 required=True,
7463 help='cluster FSID')
7464 parser_rm_daemon.add_argument(
7465 '--force',
7466 action='store_true',
7467 help='proceed, even though this may destroy valuable data')
7468 parser_rm_daemon.add_argument(
7469 '--force-delete-data',
7470 action='store_true',
7471 help='delete valuable daemon data instead of making a backup')
7472
7473 parser_rm_cluster = subparsers.add_parser(
7474 'rm-cluster', help='remove all daemons for a cluster')
7475 parser_rm_cluster.set_defaults(func=command_rm_cluster)
7476 parser_rm_cluster.add_argument(
7477 '--fsid',
7478 required=True,
7479 help='cluster FSID')
7480 parser_rm_cluster.add_argument(
7481 '--force',
7482 action='store_true',
7483 help='proceed, even though this may destroy valuable data')
f67539c2
TL
7484 parser_rm_cluster.add_argument(
7485 '--keep-logs',
7486 action='store_true',
7487 help='do not remove log files')
9f95a23c
TL
7488
7489 parser_run = subparsers.add_parser(
7490 'run', help='run a ceph daemon, in a container, in the foreground')
7491 parser_run.set_defaults(func=command_run)
7492 parser_run.add_argument(
7493 '--name', '-n',
7494 required=True,
7495 help='daemon name (type.id)')
7496 parser_run.add_argument(
7497 '--fsid',
7498 required=True,
7499 help='cluster FSID')
7500
7501 parser_shell = subparsers.add_parser(
7502 'shell', help='run an interactive shell inside a daemon container')
7503 parser_shell.set_defaults(func=command_shell)
7504 parser_shell.add_argument(
7505 '--fsid',
7506 help='cluster FSID')
7507 parser_shell.add_argument(
7508 '--name', '-n',
7509 help='daemon name (type.id)')
7510 parser_shell.add_argument(
7511 '--config', '-c',
7512 help='ceph.conf to pass through to the container')
7513 parser_shell.add_argument(
7514 '--keyring', '-k',
7515 help='ceph.keyring to pass through to the container')
e306af50
TL
7516 parser_shell.add_argument(
7517 '--mount', '-m',
f67539c2
TL
7518 help=('mount a file or directory in the container. '
7519 'Support multiple mounts. '
7520 'ie: `--mount /foo /bar:/bar`. '
7521 'When no destination is passed, default is /mnt'),
7522 nargs='+')
9f95a23c
TL
7523 parser_shell.add_argument(
7524 '--env', '-e',
7525 action='append',
7526 default=[],
7527 help='set environment variable')
7528 parser_shell.add_argument(
e306af50 7529 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
7530 help='command (optional)')
7531
7532 parser_enter = subparsers.add_parser(
7533 'enter', help='run an interactive shell inside a running daemon container')
7534 parser_enter.set_defaults(func=command_enter)
7535 parser_enter.add_argument(
7536 '--fsid',
7537 help='cluster FSID')
7538 parser_enter.add_argument(
7539 '--name', '-n',
7540 required=True,
7541 help='daemon name (type.id)')
7542 parser_enter.add_argument(
e306af50 7543 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
7544 help='command')
7545
7546 parser_ceph_volume = subparsers.add_parser(
7547 'ceph-volume', help='run ceph-volume inside a container')
7548 parser_ceph_volume.set_defaults(func=command_ceph_volume)
7549 parser_ceph_volume.add_argument(
7550 '--fsid',
7551 help='cluster FSID')
7552 parser_ceph_volume.add_argument(
7553 '--config-json',
7554 help='JSON file with config and (client.bootrap-osd) key')
801d1391
TL
7555 parser_ceph_volume.add_argument(
7556 '--config', '-c',
7557 help='ceph conf file')
7558 parser_ceph_volume.add_argument(
7559 '--keyring', '-k',
7560 help='ceph.keyring to pass through to the container')
f67539c2
TL
7561 parser_ceph_volume.add_argument(
7562 '--log-output',
7563 action='store_true',
7564 default=True,
7565 help='suppress ceph volume output from the log')
9f95a23c 7566 parser_ceph_volume.add_argument(
e306af50 7567 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
7568 help='command')
7569
7570 parser_unit = subparsers.add_parser(
f67539c2 7571 'unit', help="operate on the daemon's systemd unit")
9f95a23c
TL
7572 parser_unit.set_defaults(func=command_unit)
7573 parser_unit.add_argument(
7574 'command',
7575 help='systemd command (start, stop, restart, enable, disable, ...)')
7576 parser_unit.add_argument(
7577 '--fsid',
7578 help='cluster FSID')
7579 parser_unit.add_argument(
7580 '--name', '-n',
7581 required=True,
7582 help='daemon name (type.id)')
7583
7584 parser_logs = subparsers.add_parser(
7585 'logs', help='print journald logs for a daemon container')
7586 parser_logs.set_defaults(func=command_logs)
7587 parser_logs.add_argument(
7588 '--fsid',
7589 help='cluster FSID')
7590 parser_logs.add_argument(
7591 '--name', '-n',
7592 required=True,
7593 help='daemon name (type.id)')
7594 parser_logs.add_argument(
7595 'command', nargs='*',
7596 help='additional journalctl args')
7597
7598 parser_bootstrap = subparsers.add_parser(
7599 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
7600 parser_bootstrap.set_defaults(func=command_bootstrap)
7601 parser_bootstrap.add_argument(
7602 '--config', '-c',
7603 help='ceph conf file to incorporate')
7604 parser_bootstrap.add_argument(
7605 '--mon-id',
7606 required=False,
7607 help='mon id (default: local hostname)')
7608 parser_bootstrap.add_argument(
7609 '--mon-addrv',
7610 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
7611 parser_bootstrap.add_argument(
7612 '--mon-ip',
7613 help='mon IP')
7614 parser_bootstrap.add_argument(
7615 '--mgr-id',
7616 required=False,
7617 help='mgr id (default: randomly generated)')
7618 parser_bootstrap.add_argument(
7619 '--fsid',
7620 help='cluster FSID')
7621 parser_bootstrap.add_argument(
7622 '--output-dir',
7623 default='/etc/ceph',
7624 help='directory to write config, keyring, and pub key files')
7625 parser_bootstrap.add_argument(
7626 '--output-keyring',
7627 help='location to write keyring file with new cluster admin and mon keys')
7628 parser_bootstrap.add_argument(
7629 '--output-config',
7630 help='location to write conf file to connect to new cluster')
7631 parser_bootstrap.add_argument(
7632 '--output-pub-ssh-key',
f67539c2 7633 help="location to write the cluster's public SSH key")
9f95a23c
TL
7634 parser_bootstrap.add_argument(
7635 '--skip-ssh',
7636 action='store_true',
7637 help='skip setup of ssh key on local host')
7638 parser_bootstrap.add_argument(
7639 '--initial-dashboard-user',
7640 default='admin',
7641 help='Initial user for the dashboard')
7642 parser_bootstrap.add_argument(
7643 '--initial-dashboard-password',
7644 help='Initial password for the initial dashboard user')
f6b5b4d7
TL
7645 parser_bootstrap.add_argument(
7646 '--ssl-dashboard-port',
7647 type=int,
f67539c2 7648 default=8443,
f6b5b4d7 7649 help='Port number used to connect with dashboard using SSL')
9f95a23c
TL
7650 parser_bootstrap.add_argument(
7651 '--dashboard-key',
e306af50 7652 type=argparse.FileType('r'),
9f95a23c
TL
7653 help='Dashboard key')
7654 parser_bootstrap.add_argument(
7655 '--dashboard-crt',
e306af50 7656 type=argparse.FileType('r'),
9f95a23c
TL
7657 help='Dashboard certificate')
7658
e306af50
TL
7659 parser_bootstrap.add_argument(
7660 '--ssh-config',
7661 type=argparse.FileType('r'),
7662 help='SSH config')
7663 parser_bootstrap.add_argument(
7664 '--ssh-private-key',
7665 type=argparse.FileType('r'),
7666 help='SSH private key')
7667 parser_bootstrap.add_argument(
7668 '--ssh-public-key',
7669 type=argparse.FileType('r'),
7670 help='SSH public key')
f6b5b4d7
TL
7671 parser_bootstrap.add_argument(
7672 '--ssh-user',
7673 default='root',
7674 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
e306af50 7675
9f95a23c
TL
7676 parser_bootstrap.add_argument(
7677 '--skip-mon-network',
7678 action='store_true',
7679 help='set mon public_network based on bootstrap mon ip')
7680 parser_bootstrap.add_argument(
7681 '--skip-dashboard',
7682 action='store_true',
7683 help='do not enable the Ceph Dashboard')
7684 parser_bootstrap.add_argument(
7685 '--dashboard-password-noupdate',
7686 action='store_true',
7687 help='stop forced dashboard password change')
7688 parser_bootstrap.add_argument(
7689 '--no-minimize-config',
7690 action='store_true',
7691 help='do not assimilate and minimize the config file')
7692 parser_bootstrap.add_argument(
7693 '--skip-ping-check',
7694 action='store_true',
7695 help='do not verify that mon IP is pingable')
7696 parser_bootstrap.add_argument(
7697 '--skip-pull',
7698 action='store_true',
7699 help='do not pull the latest image before bootstrapping')
7700 parser_bootstrap.add_argument(
7701 '--skip-firewalld',
7702 action='store_true',
7703 help='Do not configure firewalld')
7704 parser_bootstrap.add_argument(
7705 '--allow-overwrite',
7706 action='store_true',
7707 help='allow overwrite of existing --output-* config/keyring/ssh files')
7708 parser_bootstrap.add_argument(
7709 '--allow-fqdn-hostname',
7710 action='store_true',
7711 help='allow hostname that is fully-qualified (contains ".")')
f67539c2
TL
7712 parser_bootstrap.add_argument(
7713 '--allow-mismatched-release',
7714 action='store_true',
7715 help="allow bootstrap of ceph that doesn't match this version of cephadm")
9f95a23c
TL
7716 parser_bootstrap.add_argument(
7717 '--skip-prepare-host',
7718 action='store_true',
7719 help='Do not prepare host')
7720 parser_bootstrap.add_argument(
7721 '--orphan-initial-daemons',
7722 action='store_true',
f67539c2 7723 help='Set mon and mgr service to `unmanaged`, Do not create the crash service')
9f95a23c
TL
7724 parser_bootstrap.add_argument(
7725 '--skip-monitoring-stack',
7726 action='store_true',
7727 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
e306af50
TL
7728 parser_bootstrap.add_argument(
7729 '--apply-spec',
7730 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
7731
e306af50
TL
7732 parser_bootstrap.add_argument(
7733 '--shared_ceph_folder',
7734 metavar='CEPH_SOURCE_FOLDER',
7735 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c 7736
f6b5b4d7
TL
7737 parser_bootstrap.add_argument(
7738 '--registry-url',
7739 help='url for custom registry')
7740 parser_bootstrap.add_argument(
7741 '--registry-username',
7742 help='username for custom registry')
7743 parser_bootstrap.add_argument(
7744 '--registry-password',
7745 help='password for custom registry')
7746 parser_bootstrap.add_argument(
7747 '--registry-json',
7748 help='json file with custom registry login info (URL, Username, Password)')
f91f0fd5
TL
7749 parser_bootstrap.add_argument(
7750 '--container-init',
7751 action='store_true',
f67539c2
TL
7752 default=CONTAINER_INIT,
7753 help=argparse.SUPPRESS)
7754 parser_bootstrap.add_argument(
7755 '--with-exporter',
7756 action='store_true',
7757 help='Automatically deploy cephadm metadata exporter to each node')
7758 parser_bootstrap.add_argument(
7759 '--exporter-config',
7760 action=CustomValidation,
7761 help=f'Exporter configuration information in JSON format (providing: {", ".join(CephadmDaemon.config_requirements)}, port information)')
7762 parser_bootstrap.add_argument(
7763 '--cluster-network',
7764 help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
f6b5b4d7 7765
9f95a23c
TL
7766 parser_deploy = subparsers.add_parser(
7767 'deploy', help='deploy a daemon')
7768 parser_deploy.set_defaults(func=command_deploy)
7769 parser_deploy.add_argument(
7770 '--name',
7771 required=True,
7772 action=CustomValidation,
7773 help='daemon name (type.id)')
7774 parser_deploy.add_argument(
7775 '--fsid',
7776 required=True,
7777 help='cluster FSID')
7778 parser_deploy.add_argument(
7779 '--config', '-c',
7780 help='config file for new daemon')
7781 parser_deploy.add_argument(
7782 '--config-json',
7783 help='Additional configuration information in JSON format')
7784 parser_deploy.add_argument(
7785 '--keyring',
7786 help='keyring for new daemon')
7787 parser_deploy.add_argument(
7788 '--key',
7789 help='key for new daemon')
7790 parser_deploy.add_argument(
7791 '--osd-fsid',
7792 help='OSD uuid, if creating an OSD container')
7793 parser_deploy.add_argument(
7794 '--skip-firewalld',
7795 action='store_true',
7796 help='Do not configure firewalld')
f6b5b4d7
TL
7797 parser_deploy.add_argument(
7798 '--tcp-ports',
7799 help='List of tcp ports to open in the host firewall')
9f95a23c
TL
7800 parser_deploy.add_argument(
7801 '--reconfig',
7802 action='store_true',
7803 help='Reconfigure a previously deployed daemon')
7804 parser_deploy.add_argument(
7805 '--allow-ptrace',
7806 action='store_true',
7807 help='Allow SYS_PTRACE on daemon container')
f91f0fd5
TL
7808 parser_deploy.add_argument(
7809 '--container-init',
7810 action='store_true',
f67539c2
TL
7811 default=CONTAINER_INIT,
7812 help=argparse.SUPPRESS)
7813 parser_deploy.add_argument(
7814 '--memory-request',
7815 help='Container memory request/target'
7816 )
7817 parser_deploy.add_argument(
7818 '--memory-limit',
7819 help='Container memory hard limit'
7820 )
7821 parser_deploy.add_argument(
7822 '--meta-json',
7823 help='JSON dict of additional metadata'
7824 )
9f95a23c
TL
7825
7826 parser_check_host = subparsers.add_parser(
7827 'check-host', help='check host configuration')
7828 parser_check_host.set_defaults(func=command_check_host)
7829 parser_check_host.add_argument(
7830 '--expect-hostname',
7831 help='Check that hostname matches an expected value')
7832
7833 parser_prepare_host = subparsers.add_parser(
7834 'prepare-host', help='prepare a host for cephadm use')
7835 parser_prepare_host.set_defaults(func=command_prepare_host)
7836 parser_prepare_host.add_argument(
7837 '--expect-hostname',
7838 help='Set hostname')
7839
7840 parser_add_repo = subparsers.add_parser(
7841 'add-repo', help='configure package repository')
7842 parser_add_repo.set_defaults(func=command_add_repo)
7843 parser_add_repo.add_argument(
7844 '--release',
1911f103 7845 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
9f95a23c
TL
7846 parser_add_repo.add_argument(
7847 '--version',
7848 help='use specific upstream version (x.y.z)')
7849 parser_add_repo.add_argument(
7850 '--dev',
7851 help='use specified bleeding edge build from git branch or tag')
7852 parser_add_repo.add_argument(
7853 '--dev-commit',
7854 help='use specified bleeding edge build from git commit')
7855 parser_add_repo.add_argument(
7856 '--gpg-url',
7857 help='specify alternative GPG key location')
7858 parser_add_repo.add_argument(
7859 '--repo-url',
7860 default='https://download.ceph.com',
7861 help='specify alternative repo location')
7862 # TODO: proxy?
7863
7864 parser_rm_repo = subparsers.add_parser(
7865 'rm-repo', help='remove package repository configuration')
7866 parser_rm_repo.set_defaults(func=command_rm_repo)
7867
7868 parser_install = subparsers.add_parser(
7869 'install', help='install ceph package(s)')
7870 parser_install.set_defaults(func=command_install)
7871 parser_install.add_argument(
7872 'packages', nargs='*',
7873 default=['cephadm'],
7874 help='packages')
7875
f6b5b4d7
TL
7876 parser_registry_login = subparsers.add_parser(
7877 'registry-login', help='log host into authenticated registry')
7878 parser_registry_login.set_defaults(func=command_registry_login)
7879 parser_registry_login.add_argument(
7880 '--registry-url',
7881 help='url for custom registry')
7882 parser_registry_login.add_argument(
7883 '--registry-username',
7884 help='username for custom registry')
7885 parser_registry_login.add_argument(
7886 '--registry-password',
7887 help='password for custom registry')
7888 parser_registry_login.add_argument(
7889 '--registry-json',
7890 help='json file with custom registry login info (URL, Username, Password)')
7891 parser_registry_login.add_argument(
7892 '--fsid',
7893 help='cluster FSID')
7894
f91f0fd5
TL
7895 parser_gather_facts = subparsers.add_parser(
7896 'gather-facts', help='gather and return host related information (JSON format)')
7897 parser_gather_facts.set_defaults(func=command_gather_facts)
7898
f67539c2
TL
7899 parser_exporter = subparsers.add_parser(
7900 'exporter', help='Start cephadm in exporter mode (web service), providing host/daemon/disk metadata')
7901 parser_exporter.add_argument(
7902 '--fsid',
7903 required=True,
7904 type=str,
7905 help='fsid of the cephadm exporter to run against')
7906 parser_exporter.add_argument(
7907 '--port',
7908 type=int,
7909 default=int(CephadmDaemon.default_port),
7910 help='port number for the cephadm exporter service')
7911 parser_exporter.add_argument(
7912 '--id',
7913 type=str,
7914 default=get_hostname().split('.')[0],
7915 help='daemon identifer for the exporter')
7916 parser_exporter.set_defaults(func=command_exporter)
7917
7918 parser_maintenance = subparsers.add_parser(
7919 'host-maintenance', help='Manage the maintenance state of a host')
7920 parser_maintenance.add_argument(
7921 '--fsid',
7922 help='cluster FSID')
7923 parser_maintenance.add_argument(
7924 'maintenance_action',
7925 type=str,
7926 choices=['enter', 'exit'],
7927 help='Maintenance action - enter maintenance, or exit maintenance')
7928 parser_maintenance.set_defaults(func=command_maintenance)
7929
7930 parser_verify_prereqs = subparsers.add_parser(
7931 'verify-prereqs',
7932 help='verify system prerequisites for a given service are met on this host')
7933 parser_verify_prereqs.set_defaults(func=command_verify_prereqs)
7934 parser_verify_prereqs.add_argument(
7935 '--daemon-type',
7936 required=True,
7937 help='service type of service to whose prereqs will be checked')
7938
9f95a23c
TL
7939 return parser
7940
f6b5b4d7 7941
9f95a23c
TL
7942def _parse_args(av):
7943 parser = _get_parser()
f67539c2 7944
e306af50 7945 args = parser.parse_args(av)
f67539c2 7946 if 'command' in args and args.command and args.command[0] == '--':
e306af50 7947 args.command.pop(0)
f67539c2
TL
7948
7949 # workaround argparse to deprecate the subparser `--container-init` flag
7950 # container_init and no_container_init must always be mutually exclusive
7951 container_init_args = ('--container-init', '--no-container-init')
7952 if set(container_init_args).issubset(av):
7953 parser.error('argument %s: not allowed with argument %s' % (container_init_args))
7954 elif '--container-init' in av:
7955 args.no_container_init = not args.container_init
7956 else:
7957 args.container_init = not args.no_container_init
7958 assert args.container_init is not args.no_container_init
7959
e306af50 7960 return args
9f95a23c 7961
f6b5b4d7 7962
f67539c2 7963def cephadm_init_ctx(args: List[str]) -> Optional[CephadmContext]:
f91f0fd5 7964
f67539c2
TL
7965 ctx = CephadmContext()
7966 ctx.set_args(_parse_args(args))
7967 return ctx
7968
7969
7970def cephadm_init(args: List[str]) -> Optional[CephadmContext]:
7971
7972 global logger
7973 ctx = cephadm_init_ctx(args)
7974 assert ctx is not None
f91f0fd5
TL
7975
7976 # Logger configuration
7977 if not os.path.exists(LOG_DIR):
7978 os.makedirs(LOG_DIR)
7979 dictConfig(logging_config)
7980 logger = logging.getLogger()
7981
f67539c2 7982 if ctx.verbose:
f91f0fd5 7983 for handler in logger.handlers:
f67539c2
TL
7984 if handler.name == 'console':
7985 handler.setLevel(logging.DEBUG)
9f95a23c 7986
f67539c2 7987 if not ctx.has_function():
9f95a23c 7988 sys.stderr.write('No command specified; pass -h or --help for usage\n')
f67539c2
TL
7989 return None
7990
7991 return ctx
7992
7993
7994def main():
7995
7996 # root?
7997 if os.geteuid() != 0:
7998 sys.stderr.write('ERROR: cephadm should be run as root\n')
9f95a23c
TL
7999 sys.exit(1)
8000
f67539c2
TL
8001 av: List[str] = []
8002 av = sys.argv[1:]
8003
8004 ctx = cephadm_init(av)
8005 if not ctx: # error, exit
8006 sys.exit(1)
1911f103 8007
9f95a23c 8008 try:
f67539c2
TL
8009 # podman or docker?
8010 ctx.container_engine = find_container_engine(ctx)
8011 if ctx.func not in \
8012 [command_check_host, command_prepare_host, command_add_repo]:
8013 check_container_engine(ctx)
8014 # command handler
8015 r = ctx.func(ctx)
9f95a23c 8016 except Error as e:
f67539c2 8017 if ctx.verbose:
9f95a23c 8018 raise
f67539c2 8019 logger.error('ERROR: %s' % e)
9f95a23c
TL
8020 sys.exit(1)
8021 if not r:
8022 r = 0
8023 sys.exit(r)
f67539c2
TL
8024
8025
8026if __name__ == '__main__':
8027 main()