]> git.proxmox.com Git - ceph.git/blame - ceph/src/cephadm/cephadm
import ceph quincy 17.2.1
[ceph.git] / ceph / src / cephadm / cephadm
CommitLineData
9f95a23c
TL
1#!/usr/bin/python3
2
f67539c2
TL
3import asyncio
4import asyncio.subprocess
5import argparse
6import datetime
7import fcntl
8import ipaddress
20effc67 9import io
f67539c2
TL
10import json
11import logging
12from logging.config import dictConfig
13import os
14import platform
15import pwd
16import random
17import shlex
18import shutil
19import socket
20import string
21import subprocess
22import sys
23import tempfile
24import time
25import errno
26import struct
f67539c2
TL
27import ssl
28from enum import Enum
a4b75251 29from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO, Sequence, TypeVar, cast, Set, Iterable
f67539c2
TL
30
31import re
32import uuid
33
34from configparser import ConfigParser
20effc67 35from contextlib import redirect_stdout
f67539c2
TL
36from functools import wraps
37from glob import glob
38from io import StringIO
20effc67
TL
39from threading import Thread, Event
40from urllib.error import HTTPError, URLError
41from urllib.request import urlopen, Request
f67539c2
TL
42from pathlib import Path
43
522d829b
TL
44FuncT = TypeVar('FuncT', bound=Callable)
45
f67539c2 46# Default container images -----------------------------------------------------
1d09f67e
TL
47DEFAULT_IMAGE = 'quay.io/ceph/ceph:v17'
48DEFAULT_IMAGE_IS_MASTER = False
20effc67 49DEFAULT_IMAGE_RELEASE = 'quincy'
1d09f67e 50DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4'
33c7a0ef
TL
51DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
52DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
1d09f67e
TL
53DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1'
54DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0'
55DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:8.3.5'
522d829b
TL
56DEFAULT_HAPROXY_IMAGE = 'docker.io/library/haproxy:2.3'
57DEFAULT_KEEPALIVED_IMAGE = 'docker.io/arcts/keepalived'
20effc67 58DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
f67539c2
TL
59DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this
60# ------------------------------------------------------------------------------
61
1d09f67e 62LATEST_STABLE_RELEASE = 'quincy'
f6b5b4d7
TL
63DATA_DIR = '/var/lib/ceph'
64LOG_DIR = '/var/log/ceph'
65LOCK_DIR = '/run/cephadm'
66LOGROTATE_DIR = '/etc/logrotate.d'
33c7a0ef 67SYSCTL_DIR = '/etc/sysctl.d'
f6b5b4d7 68UNIT_DIR = '/etc/systemd/system'
33c7a0ef
TL
69CEPH_CONF_DIR = 'config'
70CEPH_CONF = 'ceph.conf'
71CEPH_PUBKEY = 'ceph.pub'
72CEPH_KEYRING = 'ceph.client.admin.keyring'
73CEPH_DEFAULT_CONF = f'/etc/ceph/{CEPH_CONF}'
74CEPH_DEFAULT_KEYRING = f'/etc/ceph/{CEPH_KEYRING}'
75CEPH_DEFAULT_PUBKEY = f'/etc/ceph/{CEPH_PUBKEY}'
f6b5b4d7
TL
76LOG_DIR_MODE = 0o770
77DATA_DIR_MODE = 0o700
f67539c2
TL
78CONTAINER_INIT = True
79MIN_PODMAN_VERSION = (2, 0, 2)
80CGROUPS_SPLIT_PODMAN_VERSION = (2, 1, 0)
f6b5b4d7
TL
81CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
82DEFAULT_TIMEOUT = None # in seconds
f67539c2 83DEFAULT_RETRY = 15
f67539c2
TL
84DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
85
86logger: logging.Logger = None # type: ignore
9f95a23c
TL
87
88"""
89You can invoke cephadm in two ways:
90
911. The normal way, at the command line.
92
932. By piping the script to the python3 binary. In this latter case, you should
94 prepend one or more lines to the beginning of the script.
95
96 For arguments,
97
98 injected_argv = [...]
99
100 e.g.,
101
102 injected_argv = ['ls']
103
104 For reading stdin from the '--config-json -' argument,
105
106 injected_stdin = '...'
107"""
f67539c2 108cached_stdin = None
f91f0fd5 109
f67539c2 110##################################
9f95a23c 111
9f95a23c 112
33c7a0ef
TL
113class EndPoint:
114 """EndPoint representing an ip:port format"""
115
116 def __init__(self, ip: str, port: int) -> None:
117 self.ip = ip
118 self.port = port
119
120 def __str__(self) -> str:
121 return f'{self.ip}:{self.port}'
122
123 def __repr__(self) -> str:
124 return f'{self.ip}:{self.port}'
125
126
127class ContainerInfo:
128 def __init__(self, container_id: str,
129 image_name: str,
130 image_id: str,
131 start: str,
132 version: str) -> None:
133 self.container_id = container_id
134 self.image_name = image_name
135 self.image_id = image_id
136 self.start = start
137 self.version = version
138
139 def __eq__(self, other: Any) -> bool:
140 if not isinstance(other, ContainerInfo):
141 return NotImplemented
142 return (self.container_id == other.container_id
143 and self.image_name == other.image_name
144 and self.image_id == other.image_id
145 and self.start == other.start
146 and self.version == other.version)
147
148
f67539c2 149class BaseConfig:
9f95a23c 150
522d829b 151 def __init__(self) -> None:
f67539c2
TL
152 self.image: str = ''
153 self.docker: bool = False
154 self.data_dir: str = DATA_DIR
155 self.log_dir: str = LOG_DIR
156 self.logrotate_dir: str = LOGROTATE_DIR
b3b6e05e 157 self.sysctl_dir: str = SYSCTL_DIR
f67539c2
TL
158 self.unit_dir: str = UNIT_DIR
159 self.verbose: bool = False
160 self.timeout: Optional[int] = DEFAULT_TIMEOUT
161 self.retry: int = DEFAULT_RETRY
162 self.env: List[str] = []
163 self.memory_request: Optional[int] = None
164 self.memory_limit: Optional[int] = None
20effc67 165 self.log_to_journald: Optional[bool] = None
f67539c2
TL
166
167 self.container_init: bool = CONTAINER_INIT
168 self.container_engine: Optional[ContainerEngine] = None
169
522d829b 170 def set_from_args(self, args: argparse.Namespace) -> None:
f67539c2
TL
171 argdict: Dict[str, Any] = vars(args)
172 for k, v in argdict.items():
173 if hasattr(self, k):
174 setattr(self, k, v)
175
176
177class CephadmContext:
9f95a23c 178
522d829b 179 def __init__(self) -> None:
f67539c2
TL
180 self.__dict__['_args'] = None
181 self.__dict__['_conf'] = BaseConfig()
9f95a23c 182
f67539c2
TL
183 def set_args(self, args: argparse.Namespace) -> None:
184 self._conf.set_from_args(args)
185 self._args = args
f6b5b4d7 186
f67539c2
TL
187 def has_function(self) -> bool:
188 return 'func' in self._args
189
190 def __contains__(self, name: str) -> bool:
191 return hasattr(self, name)
192
193 def __getattr__(self, name: str) -> Any:
194 if '_conf' in self.__dict__ and hasattr(self._conf, name):
195 return getattr(self._conf, name)
196 elif '_args' in self.__dict__ and hasattr(self._args, name):
197 return getattr(self._args, name)
198 else:
199 return super().__getattribute__(name)
200
201 def __setattr__(self, name: str, value: Any) -> None:
202 if hasattr(self._conf, name):
203 setattr(self._conf, name, value)
204 elif hasattr(self._args, name):
205 setattr(self._args, name, value)
206 else:
207 super().__setattr__(name, value)
208
209
210class ContainerEngine:
522d829b 211 def __init__(self) -> None:
f67539c2
TL
212 self.path = find_program(self.EXE)
213
522d829b 214 @classmethod
f67539c2 215 @property
522d829b 216 def EXE(cls) -> str:
f67539c2
TL
217 raise NotImplementedError()
218
a4b75251
TL
219 def __str__(self) -> str:
220 return f'{self.EXE} ({self.path})'
221
f67539c2
TL
222
223class Podman(ContainerEngine):
224 EXE = 'podman'
225
522d829b 226 def __init__(self) -> None:
f67539c2 227 super().__init__()
522d829b 228 self._version: Optional[Tuple[int, ...]] = None
f67539c2
TL
229
230 @property
522d829b 231 def version(self) -> Tuple[int, ...]:
f67539c2
TL
232 if self._version is None:
233 raise RuntimeError('Please call `get_version` first')
234 return self._version
235
522d829b 236 def get_version(self, ctx: CephadmContext) -> None:
f67539c2
TL
237 out, _, _ = call_throws(ctx, [self.path, 'version', '--format', '{{.Client.Version}}'])
238 self._version = _parse_podman_version(out)
239
a4b75251
TL
240 def __str__(self) -> str:
241 version = '.'.join(map(str, self.version))
242 return f'{self.EXE} ({self.path}) version {version}'
243
f67539c2
TL
244
245class Docker(ContainerEngine):
246 EXE = 'docker'
247
248
249CONTAINER_PREFERENCE = (Podman, Docker) # prefer podman to docker
9f95a23c 250
9f95a23c 251
33c7a0ef
TL
252# During normal cephadm operations (cephadm ls, gather-facts, etc ) we use:
253# stdout: for JSON output only
254# stderr: for error, debug, info, etc
f91f0fd5
TL
255logging_config = {
256 'version': 1,
257 'disable_existing_loggers': True,
258 'formatters': {
259 'cephadm': {
a4b75251 260 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
f91f0fd5
TL
261 },
262 },
263 'handlers': {
f67539c2
TL
264 'console': {
265 'level': 'INFO',
266 'class': 'logging.StreamHandler',
f91f0fd5
TL
267 },
268 'log_file': {
269 'level': 'DEBUG',
b3b6e05e 270 'class': 'logging.handlers.WatchedFileHandler',
f91f0fd5
TL
271 'formatter': 'cephadm',
272 'filename': '%s/cephadm.log' % LOG_DIR,
f91f0fd5
TL
273 }
274 },
275 'loggers': {
276 '': {
277 'level': 'DEBUG',
278 'handlers': ['console', 'log_file'],
279 }
280 }
281}
e306af50 282
f67539c2 283
33c7a0ef
TL
284class ExcludeErrorsFilter(logging.Filter):
285 def filter(self, record: logging.LogRecord) -> bool:
286 """Only lets through log messages with log level below WARNING ."""
287 return record.levelno < logging.WARNING
288
289
290# When cephadm is used as standard binary (bootstrap, rm-cluster, etc) we use:
291# stdout: for debug and info
292# stderr: for errors and warnings
293interactive_logging_config = {
294 'version': 1,
295 'filters': {
296 'exclude_errors': {
297 '()': ExcludeErrorsFilter
298 }
299 },
300 'disable_existing_loggers': True,
301 'formatters': {
302 'cephadm': {
303 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
304 },
305 },
306 'handlers': {
307 'console_stdout': {
308 'level': 'INFO',
309 'class': 'logging.StreamHandler',
310 'filters': ['exclude_errors'],
311 'stream': sys.stdout
312 },
313 'console_stderr': {
314 'level': 'WARNING',
315 'class': 'logging.StreamHandler',
316 'stream': sys.stderr
317 },
318 'log_file': {
319 'level': 'DEBUG',
320 'class': 'logging.handlers.WatchedFileHandler',
321 'formatter': 'cephadm',
322 'filename': '%s/cephadm.log' % LOG_DIR,
323 }
324 },
325 'loggers': {
326 '': {
327 'level': 'DEBUG',
328 'handlers': ['console_stdout', 'console_stderr', 'log_file'],
329 }
330 }
331}
332
333
e306af50
TL
334class termcolor:
335 yellow = '\033[93m'
336 red = '\033[31m'
337 end = '\033[0m'
338
f6b5b4d7 339
9f95a23c
TL
340class Error(Exception):
341 pass
342
f6b5b4d7 343
9f95a23c
TL
344class TimeoutExpired(Error):
345 pass
346
33c7a0ef
TL
347
348class UnauthorizedRegistryError(Error):
349 pass
350
9f95a23c
TL
351##################################
352
f6b5b4d7 353
9f95a23c 354class Ceph(object):
33c7a0ef 355 daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror',
f67539c2 356 'crash', 'cephfs-mirror')
9f95a23c
TL
357
358##################################
359
f6b5b4d7 360
b3b6e05e
TL
361class OSD(object):
362 @staticmethod
363 def get_sysctl_settings() -> List[str]:
364 return [
365 '# allow a large number of OSDs',
366 'fs.aio-max-nr = 1048576',
367 'kernel.pid_max = 4194304',
368 ]
369
20effc67 370
b3b6e05e
TL
371##################################
372
373
20effc67
TL
374class SNMPGateway:
375 """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks"""
376 daemon_type = 'snmp-gateway'
377 SUPPORTED_VERSIONS = ['V2c', 'V3']
378 default_image = DEFAULT_SNMP_GATEWAY_IMAGE
379 DEFAULT_PORT = 9464
380 env_filename = 'snmp-gateway.conf'
381
382 def __init__(self,
383 ctx: CephadmContext,
384 fsid: str,
385 daemon_id: Union[int, str],
386 config_json: Dict[str, Any],
387 image: Optional[str] = None) -> None:
388 self.ctx = ctx
389 self.fsid = fsid
390 self.daemon_id = daemon_id
391 self.image = image or SNMPGateway.default_image
392
393 self.uid = config_json.get('uid', 0)
394 self.gid = config_json.get('gid', 0)
395
396 self.destination = config_json.get('destination', '')
397 self.snmp_version = config_json.get('snmp_version', 'V2c')
398 self.snmp_community = config_json.get('snmp_community', 'public')
399 self.log_level = config_json.get('log_level', 'info')
400 self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '')
401 self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '')
402 self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '')
403 self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '')
404 self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '')
405 self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '')
406
407 self.validate()
408
409 @classmethod
410 def init(cls, ctx: CephadmContext, fsid: str,
411 daemon_id: Union[int, str]) -> 'SNMPGateway':
412 assert ctx.config_json
413 return cls(ctx, fsid, daemon_id,
414 get_parm(ctx.config_json), ctx.image)
415
416 @staticmethod
417 def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]:
418 """Return the version of the notifer from it's http endpoint"""
419 path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta')
420 try:
421 with open(path, 'r') as env:
422 metadata = json.loads(env.read())
423 except (OSError, json.JSONDecodeError):
424 return None
425
426 ports = metadata.get('ports', [])
427 if not ports:
428 return None
429
430 try:
431 with urlopen(f'http://127.0.0.1:{ports[0]}/') as r:
432 html = r.read().decode('utf-8').split('\n')
433 except (HTTPError, URLError):
434 return None
435
436 for h in html:
437 stripped = h.strip()
438 if stripped.startswith(('<pre>', '<PRE>')) and \
439 stripped.endswith(('</pre>', '</PRE>')):
440 # <pre>(version=1.2.1, branch=HEAD, revision=7...
441 return stripped.split(',')[0].split('version=')[1]
442
443 return None
444
445 @property
446 def port(self) -> int:
447 if not self.ctx.tcp_ports:
448 return self.DEFAULT_PORT
449 else:
450 if len(self.ctx.tcp_ports) > 0:
451 return int(self.ctx.tcp_ports.split()[0])
452 else:
453 return self.DEFAULT_PORT
454
455 def get_daemon_args(self) -> List[str]:
456 v3_args = []
457 base_args = [
458 f'--web.listen-address=:{self.port}',
459 f'--snmp.destination={self.destination}',
460 f'--snmp.version={self.snmp_version}',
461 f'--log.level={self.log_level}',
462 '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl'
463 ]
464
465 if self.snmp_version == 'V3':
466 # common auth settings
467 v3_args.extend([
468 '--snmp.authentication-enabled',
469 f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}',
470 f'--snmp.security-engine-id={self.snmp_v3_engine_id}'
471 ])
472 # authPriv setting is applied if we have a privacy protocol setting
473 if self.snmp_v3_priv_protocol:
474 v3_args.extend([
475 '--snmp.private-enabled',
476 f'--snmp.private-protocol={self.snmp_v3_priv_protocol}'
477 ])
478
479 return base_args + v3_args
480
481 @property
482 def data_dir(self) -> str:
483 return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}')
484
485 @property
486 def conf_file_path(self) -> str:
487 return os.path.join(self.data_dir, self.env_filename)
488
489 def create_daemon_conf(self) -> None:
490 """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
491 with open(os.open(self.conf_file_path, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
492 if self.snmp_version == 'V2c':
493 f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n')
494 else:
495 f.write(f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n')
496 f.write(f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n')
497 if self.snmp_v3_priv_password:
498 f.write(f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n')
499
500 def validate(self) -> None:
501 """Validate the settings
502
503 Raises:
504 Error: if the fsid doesn't look like an fsid
505 Error: if the snmp version is not supported
506 Error: destination IP and port address missing
507 """
508 if not is_fsid(self.fsid):
509 raise Error(f'not a valid fsid: {self.fsid}')
510
511 if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS:
512 raise Error(f'not a valid snmp version: {self.snmp_version}')
513
514 if not self.destination:
515 raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener')
516
517
518##################################
9f95a23c
TL
519class Monitoring(object):
520 """Define the configs for the monitoring containers"""
521
522 port_map = {
f67539c2
TL
523 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI
524 'node-exporter': [9100],
525 'grafana': [3000],
526 'alertmanager': [9093, 9094],
33c7a0ef
TL
527 'loki': [3100],
528 'promtail': [9080]
9f95a23c
TL
529 }
530
531 components = {
f67539c2
TL
532 'prometheus': {
533 'image': DEFAULT_PROMETHEUS_IMAGE,
534 'cpus': '2',
535 'memory': '4GB',
536 'args': [
537 '--config.file=/etc/prometheus/prometheus.yml',
538 '--storage.tsdb.path=/prometheus',
9f95a23c 539 ],
f67539c2
TL
540 'config-json-files': [
541 'prometheus.yml',
9f95a23c
TL
542 ],
543 },
33c7a0ef
TL
544 'loki': {
545 'image': DEFAULT_LOKI_IMAGE,
546 'cpus': '1',
547 'memory': '1GB',
548 'args': [
549 '--config.file=/etc/loki/loki.yml',
550 ],
551 'config-json-files': [
552 'loki.yml'
553 ],
554 },
555 'promtail': {
556 'image': DEFAULT_PROMTAIL_IMAGE,
557 'cpus': '1',
558 'memory': '1GB',
559 'args': [
560 '--config.file=/etc/promtail/promtail.yml',
561 ],
562 'config-json-files': [
563 'promtail.yml',
564 ],
565 },
f67539c2
TL
566 'node-exporter': {
567 'image': DEFAULT_NODE_EXPORTER_IMAGE,
568 'cpus': '1',
569 'memory': '1GB',
570 'args': [
571 '--no-collector.timex',
9f95a23c
TL
572 ],
573 },
f67539c2
TL
574 'grafana': {
575 'image': DEFAULT_GRAFANA_IMAGE,
576 'cpus': '2',
577 'memory': '4GB',
578 'args': [],
579 'config-json-files': [
580 'grafana.ini',
581 'provisioning/datasources/ceph-dashboard.yml',
582 'certs/cert_file',
583 'certs/cert_key',
9f95a23c
TL
584 ],
585 },
f67539c2
TL
586 'alertmanager': {
587 'image': DEFAULT_ALERT_MANAGER_IMAGE,
588 'cpus': '2',
589 'memory': '2GB',
590 'args': [
f67539c2 591 '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
f91f0fd5 592 ],
f67539c2
TL
593 'config-json-files': [
594 'alertmanager.yml',
9f95a23c 595 ],
f67539c2
TL
596 'config-json-args': [
597 'peers',
9f95a23c
TL
598 ],
599 },
600 } # type: ignore
601
f67539c2
TL
602 @staticmethod
603 def get_version(ctx, container_id, daemon_type):
604 # type: (CephadmContext, str, str) -> str
605 """
33c7a0ef 606 :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
f67539c2 607 """
33c7a0ef 608 assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail')
f67539c2
TL
609 cmd = daemon_type.replace('-', '_')
610 code = -1
611 err = ''
612 version = ''
613 if daemon_type == 'alertmanager':
614 for cmd in ['alertmanager', 'prometheus-alertmanager']:
615 _, err, code = call(ctx, [
616 ctx.container_engine.path, 'exec', container_id, cmd,
617 '--version'
618 ], verbosity=CallVerbosity.DEBUG)
619 if code == 0:
620 break
621 cmd = 'alertmanager' # reset cmd for version extraction
622 else:
623 _, err, code = call(ctx, [
624 ctx.container_engine.path, 'exec', container_id, cmd, '--version'
625 ], verbosity=CallVerbosity.DEBUG)
626 if code == 0 and \
627 err.startswith('%s, version ' % cmd):
628 version = err.split(' ')[2]
629 return version
630
9f95a23c
TL
631##################################
632
f6b5b4d7 633
f67539c2
TL
634def populate_files(config_dir, config_files, uid, gid):
635 # type: (str, Dict, int, int) -> None
636 """create config files for different services"""
637 for fname in config_files:
638 config_file = os.path.join(config_dir, fname)
639 config_content = dict_get_join(config_files, fname)
640 logger.info('Write file: %s' % (config_file))
b3b6e05e 641 with open(config_file, 'w', encoding='utf-8') as f:
f67539c2
TL
642 os.fchown(f.fileno(), uid, gid)
643 os.fchmod(f.fileno(), 0o600)
644 f.write(config_content)
645
646
9f95a23c
TL
647class NFSGanesha(object):
648 """Defines a NFS-Ganesha container"""
649
650 daemon_type = 'nfs'
651 entrypoint = '/usr/bin/ganesha.nfsd'
652 daemon_args = ['-F', '-L', 'STDERR']
653
654 required_files = ['ganesha.conf']
655
656 port_map = {
f67539c2 657 'nfs': 2049,
9f95a23c
TL
658 }
659
660 def __init__(self,
f67539c2 661 ctx,
9f95a23c
TL
662 fsid,
663 daemon_id,
664 config_json,
665 image=DEFAULT_IMAGE):
f67539c2
TL
666 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
667 self.ctx = ctx
9f95a23c
TL
668 self.fsid = fsid
669 self.daemon_id = daemon_id
670 self.image = image
671
9f95a23c 672 # config-json options
f91f0fd5
TL
673 self.pool = dict_get(config_json, 'pool', require=True)
674 self.namespace = dict_get(config_json, 'namespace')
675 self.userid = dict_get(config_json, 'userid')
676 self.extra_args = dict_get(config_json, 'extra_args', [])
677 self.files = dict_get(config_json, 'files', {})
678 self.rgw = dict_get(config_json, 'rgw', {})
9f95a23c
TL
679
680 # validate the supplied args
681 self.validate()
682
683 @classmethod
f67539c2
TL
684 def init(cls, ctx, fsid, daemon_id):
685 # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
686 return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), ctx.image)
9f95a23c 687
f91f0fd5 688 def get_container_mounts(self, data_dir):
9f95a23c
TL
689 # type: (str) -> Dict[str, str]
690 mounts = dict()
691 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
692 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
693 mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
f91f0fd5
TL
694 if self.rgw:
695 cluster = self.rgw.get('cluster', 'ceph')
696 rgw_user = self.rgw.get('user', 'admin')
697 mounts[os.path.join(data_dir, 'keyring.rgw')] = \
f67539c2 698 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
9f95a23c
TL
699 return mounts
700
701 @staticmethod
702 def get_container_envs():
703 # type: () -> List[str]
704 envs = [
33c7a0ef 705 'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF)
9f95a23c
TL
706 ]
707 return envs
708
709 @staticmethod
f67539c2
TL
710 def get_version(ctx, container_id):
711 # type: (CephadmContext, str) -> Optional[str]
9f95a23c 712 version = None
f67539c2
TL
713 out, err, code = call(ctx,
714 [ctx.container_engine.path, 'exec', container_id,
715 NFSGanesha.entrypoint, '-v'],
716 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
717 if code == 0:
718 match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
719 if match:
720 version = match.group(1)
721 return version
722
723 def validate(self):
e306af50 724 # type: () -> None
9f95a23c
TL
725 if not is_fsid(self.fsid):
726 raise Error('not an fsid: %s' % self.fsid)
727 if not self.daemon_id:
728 raise Error('invalid daemon_id: %s' % self.daemon_id)
729 if not self.image:
730 raise Error('invalid image: %s' % self.image)
731
732 # check for the required files
733 if self.required_files:
734 for fname in self.required_files:
735 if fname not in self.files:
736 raise Error('required file missing from config-json: %s' % fname)
737
f91f0fd5
TL
738 # check for an RGW config
739 if self.rgw:
740 if not self.rgw.get('keyring'):
741 raise Error('RGW keyring is missing')
742 if not self.rgw.get('user'):
743 raise Error('RGW user is missing')
744
9f95a23c
TL
745 def get_daemon_name(self):
746 # type: () -> str
747 return '%s.%s' % (self.daemon_type, self.daemon_id)
748
749 def get_container_name(self, desc=None):
750 # type: (Optional[str]) -> str
751 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
752 if desc:
753 cname = '%s-%s' % (cname, desc)
754 return cname
755
1911f103
TL
756 def get_daemon_args(self):
757 # type: () -> List[str]
758 return self.daemon_args + self.extra_args
759
9f95a23c
TL
760 def create_daemon_dirs(self, data_dir, uid, gid):
761 # type: (str, int, int) -> None
762 """Create files under the container data dir"""
763 if not os.path.isdir(data_dir):
764 raise OSError('data_dir is not a directory: %s' % (data_dir))
765
766 logger.info('Creating ganesha config...')
767
768 # create the ganesha conf dir
769 config_dir = os.path.join(data_dir, 'etc/ganesha')
770 makedirs(config_dir, uid, gid, 0o755)
771
772 # populate files from the config-json
f67539c2 773 populate_files(config_dir, self.files, uid, gid)
9f95a23c 774
f91f0fd5
TL
775 # write the RGW keyring
776 if self.rgw:
777 keyring_path = os.path.join(data_dir, 'keyring.rgw')
778 with open(keyring_path, 'w') as f:
779 os.fchmod(f.fileno(), 0o600)
780 os.fchown(f.fileno(), uid, gid)
781 f.write(self.rgw.get('keyring', ''))
782
9f95a23c
TL
783##################################
784
f6b5b4d7 785
1911f103
TL
786class CephIscsi(object):
787 """Defines a Ceph-Iscsi container"""
788
789 daemon_type = 'iscsi'
790 entrypoint = '/usr/bin/rbd-target-api'
791
792 required_files = ['iscsi-gateway.cfg']
793
794 def __init__(self,
f67539c2 795 ctx,
1911f103
TL
796 fsid,
797 daemon_id,
798 config_json,
799 image=DEFAULT_IMAGE):
f67539c2
TL
800 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
801 self.ctx = ctx
1911f103
TL
802 self.fsid = fsid
803 self.daemon_id = daemon_id
804 self.image = image
805
1911f103 806 # config-json options
f91f0fd5 807 self.files = dict_get(config_json, 'files', {})
1911f103
TL
808
809 # validate the supplied args
810 self.validate()
811
812 @classmethod
f67539c2
TL
813 def init(cls, ctx, fsid, daemon_id):
814 # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
815 return cls(ctx, fsid, daemon_id,
816 get_parm(ctx.config_json), ctx.image)
1911f103
TL
817
818 @staticmethod
819 def get_container_mounts(data_dir, log_dir):
820 # type: (str, str) -> Dict[str, str]
821 mounts = dict()
822 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
823 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
824 mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
f91f0fd5 825 mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
a4b75251 826 mounts[log_dir] = '/var/log:z'
f91f0fd5 827 mounts['/dev'] = '/dev'
1911f103
TL
828 return mounts
829
f6b5b4d7
TL
830 @staticmethod
831 def get_container_binds():
832 # type: () -> List[List[str]]
833 binds = []
834 lib_modules = ['type=bind',
835 'source=/lib/modules',
836 'destination=/lib/modules',
837 'ro=true']
838 binds.append(lib_modules)
839 return binds
840
1911f103 841 @staticmethod
f67539c2
TL
842 def get_version(ctx, container_id):
843 # type: (CephadmContext, str) -> Optional[str]
1911f103 844 version = None
f67539c2
TL
845 out, err, code = call(ctx,
846 [ctx.container_engine.path, 'exec', container_id,
847 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
848 verbosity=CallVerbosity.DEBUG)
1911f103 849 if code == 0:
f6b5b4d7 850 version = out.strip()
1911f103
TL
851 return version
852
853 def validate(self):
e306af50 854 # type: () -> None
1911f103
TL
855 if not is_fsid(self.fsid):
856 raise Error('not an fsid: %s' % self.fsid)
857 if not self.daemon_id:
858 raise Error('invalid daemon_id: %s' % self.daemon_id)
859 if not self.image:
860 raise Error('invalid image: %s' % self.image)
861
862 # check for the required files
863 if self.required_files:
864 for fname in self.required_files:
865 if fname not in self.files:
866 raise Error('required file missing from config-json: %s' % fname)
867
868 def get_daemon_name(self):
869 # type: () -> str
870 return '%s.%s' % (self.daemon_type, self.daemon_id)
871
872 def get_container_name(self, desc=None):
873 # type: (Optional[str]) -> str
874 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
875 if desc:
876 cname = '%s-%s' % (cname, desc)
877 return cname
878
1911f103
TL
879 def create_daemon_dirs(self, data_dir, uid, gid):
880 # type: (str, int, int) -> None
881 """Create files under the container data dir"""
882 if not os.path.isdir(data_dir):
883 raise OSError('data_dir is not a directory: %s' % (data_dir))
884
885 logger.info('Creating ceph-iscsi config...')
886 configfs_dir = os.path.join(data_dir, 'configfs')
887 makedirs(configfs_dir, uid, gid, 0o755)
888
889 # populate files from the config-json
f67539c2 890 populate_files(data_dir, self.files, uid, gid)
1911f103
TL
891
892 @staticmethod
893 def configfs_mount_umount(data_dir, mount=True):
e306af50 894 # type: (str, bool) -> List[str]
1911f103
TL
895 mount_path = os.path.join(data_dir, 'configfs')
896 if mount:
f67539c2
TL
897 cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
898 'mount -t configfs none {0}; fi'.format(mount_path)
1911f103 899 else:
f67539c2
TL
900 cmd = 'if grep -qs {0} /proc/mounts; then ' \
901 'umount {0}; fi'.format(mount_path)
1911f103
TL
902 return cmd.split()
903
f6b5b4d7
TL
904 def get_tcmu_runner_container(self):
905 # type: () -> CephContainer
f67539c2
TL
906 tcmu_container = get_container(self.ctx, self.fsid, self.daemon_type, self.daemon_id)
907 tcmu_container.entrypoint = '/usr/bin/tcmu-runner'
f6b5b4d7 908 tcmu_container.cname = self.get_container_name(desc='tcmu')
f91f0fd5
TL
909 # remove extra container args for tcmu container.
910 # extra args could cause issue with forking service type
911 tcmu_container.container_args = []
f6b5b4d7
TL
912 return tcmu_container
913
1911f103
TL
914##################################
915
f6b5b4d7 916
f67539c2
TL
917class HAproxy(object):
918 """Defines an HAproxy container"""
919 daemon_type = 'haproxy'
920 required_files = ['haproxy.cfg']
522d829b 921 default_image = DEFAULT_HAPROXY_IMAGE
f67539c2
TL
922
923 def __init__(self,
924 ctx: CephadmContext,
925 fsid: str, daemon_id: Union[int, str],
926 config_json: Dict, image: str) -> None:
927 self.ctx = ctx
928 self.fsid = fsid
929 self.daemon_id = daemon_id
930 self.image = image
931
932 # config-json options
933 self.files = dict_get(config_json, 'files', {})
934
935 self.validate()
936
937 @classmethod
938 def init(cls, ctx: CephadmContext,
939 fsid: str, daemon_id: Union[int, str]) -> 'HAproxy':
940 return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json),
941 ctx.image)
942
943 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
944 """Create files under the container data dir"""
945 if not os.path.isdir(data_dir):
946 raise OSError('data_dir is not a directory: %s' % (data_dir))
947
948 # create additional directories in data dir for HAproxy to use
949 if not os.path.isdir(os.path.join(data_dir, 'haproxy')):
950 makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE)
951
952 data_dir = os.path.join(data_dir, 'haproxy')
953 populate_files(data_dir, self.files, uid, gid)
954
955 def get_daemon_args(self) -> List[str]:
956 return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
957
958 def validate(self):
959 # type: () -> None
960 if not is_fsid(self.fsid):
961 raise Error('not an fsid: %s' % self.fsid)
962 if not self.daemon_id:
963 raise Error('invalid daemon_id: %s' % self.daemon_id)
964 if not self.image:
965 raise Error('invalid image: %s' % self.image)
966
967 # check for the required files
968 if self.required_files:
969 for fname in self.required_files:
970 if fname not in self.files:
971 raise Error('required file missing from config-json: %s' % fname)
972
973 def get_daemon_name(self):
974 # type: () -> str
975 return '%s.%s' % (self.daemon_type, self.daemon_id)
976
977 def get_container_name(self, desc=None):
978 # type: (Optional[str]) -> str
979 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
980 if desc:
981 cname = '%s-%s' % (cname, desc)
982 return cname
983
522d829b 984 def extract_uid_gid_haproxy(self) -> Tuple[int, int]:
f67539c2
TL
985 # better directory for this?
986 return extract_uid_gid(self.ctx, file_path='/var/lib')
987
988 @staticmethod
989 def get_container_mounts(data_dir: str) -> Dict[str, str]:
990 mounts = dict()
991 mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
992 return mounts
993
b3b6e05e
TL
994 @staticmethod
995 def get_sysctl_settings() -> List[str]:
996 return [
997 '# IP forwarding',
998 'net.ipv4.ip_forward = 1',
999 ]
1000
f67539c2
TL
1001##################################
1002
1003
1004class Keepalived(object):
1005 """Defines an Keepalived container"""
1006 daemon_type = 'keepalived'
1007 required_files = ['keepalived.conf']
522d829b 1008 default_image = DEFAULT_KEEPALIVED_IMAGE
f67539c2
TL
1009
1010 def __init__(self,
1011 ctx: CephadmContext,
1012 fsid: str, daemon_id: Union[int, str],
1013 config_json: Dict, image: str) -> None:
1014 self.ctx = ctx
1015 self.fsid = fsid
1016 self.daemon_id = daemon_id
1017 self.image = image
1018
1019 # config-json options
1020 self.files = dict_get(config_json, 'files', {})
1021
1022 self.validate()
1023
1024 @classmethod
1025 def init(cls, ctx: CephadmContext, fsid: str,
1026 daemon_id: Union[int, str]) -> 'Keepalived':
1027 return cls(ctx, fsid, daemon_id,
1028 get_parm(ctx.config_json), ctx.image)
1029
1030 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
1031 """Create files under the container data dir"""
1032 if not os.path.isdir(data_dir):
1033 raise OSError('data_dir is not a directory: %s' % (data_dir))
1034
1035 # create additional directories in data dir for keepalived to use
1036 if not os.path.isdir(os.path.join(data_dir, 'keepalived')):
1037 makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE)
1038
1039 # populate files from the config-json
1040 populate_files(data_dir, self.files, uid, gid)
1041
1042 def validate(self):
1043 # type: () -> None
1044 if not is_fsid(self.fsid):
1045 raise Error('not an fsid: %s' % self.fsid)
1046 if not self.daemon_id:
1047 raise Error('invalid daemon_id: %s' % self.daemon_id)
1048 if not self.image:
1049 raise Error('invalid image: %s' % self.image)
1050
1051 # check for the required files
1052 if self.required_files:
1053 for fname in self.required_files:
1054 if fname not in self.files:
1055 raise Error('required file missing from config-json: %s' % fname)
1056
1057 def get_daemon_name(self):
1058 # type: () -> str
1059 return '%s.%s' % (self.daemon_type, self.daemon_id)
1060
1061 def get_container_name(self, desc=None):
1062 # type: (Optional[str]) -> str
1063 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
1064 if desc:
1065 cname = '%s-%s' % (cname, desc)
1066 return cname
1067
1068 @staticmethod
1069 def get_container_envs():
1070 # type: () -> List[str]
1071 envs = [
1072 'KEEPALIVED_AUTOCONF=false',
1073 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
1074 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
1075 'KEEPALIVED_DEBUG=false'
1076 ]
1077 return envs
1078
1079 @staticmethod
b3b6e05e
TL
1080 def get_sysctl_settings() -> List[str]:
1081 return [
1082 '# IP forwarding and non-local bind',
1083 'net.ipv4.ip_forward = 1',
1084 'net.ipv4.ip_nonlocal_bind = 1',
1085 ]
f67539c2 1086
522d829b 1087 def extract_uid_gid_keepalived(self) -> Tuple[int, int]:
f67539c2
TL
1088 # better directory for this?
1089 return extract_uid_gid(self.ctx, file_path='/var/lib')
1090
1091 @staticmethod
1092 def get_container_mounts(data_dir: str) -> Dict[str, str]:
1093 mounts = dict()
1094 mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
1095 return mounts
1096
1097##################################
1098
1099
f91f0fd5
TL
1100class CustomContainer(object):
1101 """Defines a custom container"""
1102 daemon_type = 'container'
1103
f67539c2
TL
1104 def __init__(self,
1105 fsid: str, daemon_id: Union[int, str],
f91f0fd5
TL
1106 config_json: Dict, image: str) -> None:
1107 self.fsid = fsid
1108 self.daemon_id = daemon_id
1109 self.image = image
1110
1111 # config-json options
1112 self.entrypoint = dict_get(config_json, 'entrypoint')
1113 self.uid = dict_get(config_json, 'uid', 65534) # nobody
1114 self.gid = dict_get(config_json, 'gid', 65534) # nobody
1115 self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
1116 self.args = dict_get(config_json, 'args', [])
1117 self.envs = dict_get(config_json, 'envs', [])
1118 self.privileged = dict_get(config_json, 'privileged', False)
1119 self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
1120 self.ports = dict_get(config_json, 'ports', [])
1121 self.dirs = dict_get(config_json, 'dirs', [])
1122 self.files = dict_get(config_json, 'files', {})
1123
1124 @classmethod
f67539c2
TL
1125 def init(cls, ctx: CephadmContext,
1126 fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
1127 return cls(fsid, daemon_id,
1128 get_parm(ctx.config_json), ctx.image)
f91f0fd5
TL
1129
1130 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
1131 """
1132 Create dirs/files below the container data directory.
1133 """
1134 logger.info('Creating custom container configuration '
1135 'dirs/files in {} ...'.format(data_dir))
1136
1137 if not os.path.isdir(data_dir):
1138 raise OSError('data_dir is not a directory: %s' % data_dir)
1139
1140 for dir_path in self.dirs:
1141 logger.info('Creating directory: {}'.format(dir_path))
1142 dir_path = os.path.join(data_dir, dir_path.strip('/'))
1143 makedirs(dir_path, uid, gid, 0o755)
1144
1145 for file_path in self.files:
1146 logger.info('Creating file: {}'.format(file_path))
1147 content = dict_get_join(self.files, file_path)
1148 file_path = os.path.join(data_dir, file_path.strip('/'))
1149 with open(file_path, 'w', encoding='utf-8') as f:
1150 os.fchown(f.fileno(), uid, gid)
1151 os.fchmod(f.fileno(), 0o600)
1152 f.write(content)
1153
1154 def get_daemon_args(self) -> List[str]:
1155 return []
1156
1157 def get_container_args(self) -> List[str]:
1158 return self.args
1159
1160 def get_container_envs(self) -> List[str]:
1161 return self.envs
1162
1163 def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
1164 """
1165 Get the volume mounts. Relative source paths will be located below
1166 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
1167
1168 Example:
1169 {
1170 /foo/conf: /conf
1171 foo/conf: /conf
1172 }
1173 becomes
1174 {
1175 /foo/conf: /conf
1176 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
1177 }
1178 """
1179 mounts = {}
1180 for source, destination in self.volume_mounts.items():
1181 source = os.path.join(data_dir, source)
1182 mounts[source] = destination
1183 return mounts
1184
1185 def get_container_binds(self, data_dir: str) -> List[List[str]]:
1186 """
1187 Get the bind mounts. Relative `source=...` paths will be located below
1188 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
1189
1190 Example:
1191 [
1192 'type=bind',
1193 'source=lib/modules',
1194 'destination=/lib/modules',
1195 'ro=true'
1196 ]
1197 becomes
1198 [
1199 ...
1200 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
1201 ...
1202 ]
1203 """
1204 binds = self.bind_mounts.copy()
1205 for bind in binds:
1206 for index, value in enumerate(bind):
1207 match = re.match(r'^source=(.+)$', value)
1208 if match:
1209 bind[index] = 'source={}'.format(os.path.join(
1210 data_dir, match.group(1)))
1211 return binds
1212
1213##################################
1214
1215
f67539c2
TL
1216def touch(file_path: str, uid: Optional[int] = None, gid: Optional[int] = None) -> None:
1217 Path(file_path).touch()
1218 if uid and gid:
1219 os.chown(file_path, uid, gid)
1220
1221
1222##################################
1223
1224
f91f0fd5
TL
1225def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
1226 """
1227 Helper function to get a key from a dictionary.
1228 :param d: The dictionary to process.
1229 :param key: The name of the key to get.
1230 :param default: The default value in case the key does not
1231 exist. Default is `None`.
1232 :param require: Set to `True` if the key is required. An
1233 exception will be raised if the key does not exist in
1234 the given dictionary.
1235 :return: Returns the value of the given key.
1236 :raises: :exc:`self.Error` if the given key does not exist
1237 and `require` is set to `True`.
1238 """
1239 if require and key not in d.keys():
1240 raise Error('{} missing from dict'.format(key))
f67539c2 1241 return d.get(key, default) # type: ignore
f91f0fd5
TL
1242
1243##################################
1244
1245
1246def dict_get_join(d: Dict, key: str) -> Any:
1247 """
1248 Helper function to get the value of a given key from a dictionary.
1249 `List` values will be converted to a string by joining them with a
1250 line break.
1251 :param d: The dictionary to process.
1252 :param key: The name of the key to get.
1253 :return: Returns the value of the given key. If it was a `list`, it
1254 will be joining with a line break.
1255 """
1256 value = d.get(key)
1257 if isinstance(value, list):
1258 value = '\n'.join(map(str, value))
1259 return value
1260
1261##################################
1262
1263
9f95a23c 1264def get_supported_daemons():
e306af50 1265 # type: () -> List[str]
9f95a23c
TL
1266 supported_daemons = list(Ceph.daemons)
1267 supported_daemons.extend(Monitoring.components)
1268 supported_daemons.append(NFSGanesha.daemon_type)
1911f103 1269 supported_daemons.append(CephIscsi.daemon_type)
f91f0fd5 1270 supported_daemons.append(CustomContainer.daemon_type)
f67539c2
TL
1271 supported_daemons.append(HAproxy.daemon_type)
1272 supported_daemons.append(Keepalived.daemon_type)
20effc67
TL
1273 supported_daemons.append(CephadmAgent.daemon_type)
1274 supported_daemons.append(SNMPGateway.daemon_type)
9f95a23c
TL
1275 assert len(supported_daemons) == len(set(supported_daemons))
1276 return supported_daemons
1277
1278##################################
1279
f6b5b4d7 1280
f67539c2
TL
1281class PortOccupiedError(Error):
1282 pass
1283
1284
1285def attempt_bind(ctx, s, address, port):
1286 # type: (CephadmContext, socket.socket, str, int) -> None
9f95a23c
TL
1287 try:
1288 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1289 s.bind((address, port))
b3b6e05e 1290 except OSError as e:
9f95a23c 1291 if e.errno == errno.EADDRINUSE:
f67539c2
TL
1292 msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
1293 logger.warning(msg)
1294 raise PortOccupiedError(msg)
1295 else:
b3b6e05e
TL
1296 raise Error(e)
1297 except Exception as e:
1298 raise Error(e)
9f95a23c
TL
1299 finally:
1300 s.close()
1301
f6b5b4d7 1302
f67539c2
TL
1303def port_in_use(ctx, port_num):
1304 # type: (CephadmContext, int) -> bool
9f95a23c 1305 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
e306af50 1306 logger.info('Verifying port %d ...' % port_num)
9f95a23c 1307
f67539c2
TL
1308 def _port_in_use(af: socket.AddressFamily, address: str) -> bool:
1309 try:
1310 s = socket.socket(af, socket.SOCK_STREAM)
1311 attempt_bind(ctx, s, address, port_num)
1312 except PortOccupiedError:
1313 return True
1314 except OSError as e:
1315 if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL):
1316 # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
1317 # being tested here and one might be intentionally be disabled.
1318 # In that case no error should be raised.
1319 return False
1320 else:
1321 raise e
9f95a23c 1322 return False
f67539c2
TL
1323 return any(_port_in_use(af, address) for af, address in (
1324 (socket.AF_INET, '0.0.0.0'),
1325 (socket.AF_INET6, '::')
1326 ))
9f95a23c 1327
f6b5b4d7 1328
33c7a0ef
TL
1329def check_ip_port(ctx, ep):
1330 # type: (CephadmContext, EndPoint) -> None
f67539c2 1331 if not ctx.skip_ping_check:
33c7a0ef
TL
1332 logger.info(f'Verifying IP {ep.ip} port {ep.port} ...')
1333 if is_ipv6(ep.ip):
9f95a23c 1334 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
33c7a0ef 1335 ip = unwrap_ipv6(ep.ip)
9f95a23c
TL
1336 else:
1337 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
33c7a0ef
TL
1338 ip = ep.ip
1339 attempt_bind(ctx, s, ip, ep.port)
9f95a23c
TL
1340
1341##################################
1342
f67539c2 1343
9f95a23c
TL
1344# this is an abbreviated version of
1345# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
1346# that drops all of the compatibility (this is Unix/Linux only).
1347
9f95a23c
TL
1348class Timeout(TimeoutError):
1349 """
1350 Raised when the lock could not be acquired in *timeout*
1351 seconds.
1352 """
1353
522d829b 1354 def __init__(self, lock_file: str) -> None:
9f95a23c
TL
1355 """
1356 """
1357 #: The path of the file lock.
1358 self.lock_file = lock_file
1359 return None
1360
522d829b 1361 def __str__(self) -> str:
9f95a23c
TL
1362 temp = "The file lock '{}' could not be acquired."\
1363 .format(self.lock_file)
1364 return temp
1365
1366
1367class _Acquire_ReturnProxy(object):
522d829b 1368 def __init__(self, lock: 'FileLock') -> None:
9f95a23c
TL
1369 self.lock = lock
1370 return None
1371
522d829b 1372 def __enter__(self) -> 'FileLock':
9f95a23c
TL
1373 return self.lock
1374
522d829b 1375 def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
9f95a23c
TL
1376 self.lock.release()
1377 return None
1378
1379
1380class FileLock(object):
522d829b 1381 def __init__(self, ctx: CephadmContext, name: str, timeout: int = -1) -> None:
9f95a23c
TL
1382 if not os.path.exists(LOCK_DIR):
1383 os.mkdir(LOCK_DIR, 0o700)
1384 self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
f67539c2 1385 self.ctx = ctx
9f95a23c
TL
1386
1387 # The file descriptor for the *_lock_file* as it is returned by the
1388 # os.open() function.
1389 # This file lock is only NOT None, if the object currently holds the
1390 # lock.
f67539c2 1391 self._lock_file_fd: Optional[int] = None
9f95a23c
TL
1392 self.timeout = timeout
1393 # The lock counter is used for implementing the nested locking
1394 # mechanism. Whenever the lock is acquired, the counter is increased and
1395 # the lock is only released, when this value is 0 again.
1396 self._lock_counter = 0
1397 return None
1398
1399 @property
522d829b 1400 def is_locked(self) -> bool:
9f95a23c
TL
1401 return self._lock_file_fd is not None
1402
522d829b 1403 def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy:
9f95a23c
TL
1404 """
1405 Acquires the file lock or fails with a :exc:`Timeout` error.
1406 .. code-block:: python
1407 # You can use this method in the context manager (recommended)
1408 with lock.acquire():
1409 pass
1410 # Or use an equivalent try-finally construct:
1411 lock.acquire()
1412 try:
1413 pass
1414 finally:
1415 lock.release()
1416 :arg float timeout:
1417 The maximum time waited for the file lock.
1418 If ``timeout < 0``, there is no timeout and this method will
1419 block until the lock could be acquired.
1420 If ``timeout`` is None, the default :attr:`~timeout` is used.
1421 :arg float poll_intervall:
1422 We check once in *poll_intervall* seconds if we can acquire the
1423 file lock.
1424 :raises Timeout:
1425 if the lock could not be acquired in *timeout* seconds.
1426 .. versionchanged:: 2.0.0
1427 This method returns now a *proxy* object instead of *self*,
1428 so that it can be used in a with statement without side effects.
1429 """
f67539c2 1430
9f95a23c
TL
1431 # Use the default timeout, if no timeout is provided.
1432 if timeout is None:
1433 timeout = self.timeout
1434
1435 # Increment the number right at the beginning.
1436 # We can still undo it, if something fails.
1437 self._lock_counter += 1
1438
1439 lock_id = id(self)
1440 lock_filename = self._lock_file
1441 start_time = time.time()
1442 try:
1443 while True:
1444 if not self.is_locked:
1445 logger.debug('Acquiring lock %s on %s', lock_id,
1446 lock_filename)
1447 self._acquire()
1448
1449 if self.is_locked:
1450 logger.debug('Lock %s acquired on %s', lock_id,
1451 lock_filename)
1452 break
1453 elif timeout >= 0 and time.time() - start_time > timeout:
1454 logger.warning('Timeout acquiring lock %s on %s', lock_id,
1455 lock_filename)
1456 raise Timeout(self._lock_file)
1457 else:
1458 logger.debug(
1459 'Lock %s not acquired on %s, waiting %s seconds ...',
1460 lock_id, lock_filename, poll_intervall
1461 )
1462 time.sleep(poll_intervall)
b3b6e05e 1463 except Exception:
9f95a23c
TL
1464 # Something did go wrong, so decrement the counter.
1465 self._lock_counter = max(0, self._lock_counter - 1)
1466
1467 raise
f67539c2 1468 return _Acquire_ReturnProxy(lock=self)
9f95a23c 1469
522d829b 1470 def release(self, force: bool = False) -> None:
9f95a23c
TL
1471 """
1472 Releases the file lock.
1473 Please note, that the lock is only completly released, if the lock
1474 counter is 0.
1475 Also note, that the lock file itself is not automatically deleted.
1476 :arg bool force:
1477 If true, the lock counter is ignored and the lock is released in
1478 every case.
1479 """
1480 if self.is_locked:
1481 self._lock_counter -= 1
1482
1483 if self._lock_counter == 0 or force:
522d829b
TL
1484 # lock_id = id(self)
1485 # lock_filename = self._lock_file
9f95a23c 1486
522d829b
TL
1487 # Can't log in shutdown:
1488 # File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open
1489 # NameError: name 'open' is not defined
1490 # logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
9f95a23c
TL
1491 self._release()
1492 self._lock_counter = 0
522d829b 1493 # logger.debug('Lock %s released on %s', lock_id, lock_filename)
9f95a23c
TL
1494
1495 return None
1496
522d829b 1497 def __enter__(self) -> 'FileLock':
9f95a23c
TL
1498 self.acquire()
1499 return self
1500
522d829b 1501 def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
9f95a23c
TL
1502 self.release()
1503 return None
1504
522d829b 1505 def __del__(self) -> None:
f6b5b4d7 1506 self.release(force=True)
9f95a23c
TL
1507 return None
1508
522d829b 1509 def _acquire(self) -> None:
9f95a23c
TL
1510 open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
1511 fd = os.open(self._lock_file, open_mode)
1512
1513 try:
1514 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
1515 except (IOError, OSError):
1516 os.close(fd)
1517 else:
1518 self._lock_file_fd = fd
1519 return None
1520
522d829b 1521 def _release(self) -> None:
9f95a23c
TL
1522 # Do not remove the lockfile:
1523 #
1524 # https://github.com/benediktschmitt/py-filelock/issues/31
1525 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
1526 fd = self._lock_file_fd
1527 self._lock_file_fd = None
f6b5b4d7
TL
1528 fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
1529 os.close(fd) # type: ignore
9f95a23c
TL
1530 return None
1531
1532
1533##################################
1534# Popen wrappers, lifted from ceph-volume
1535
adb31ebb
TL
1536class CallVerbosity(Enum):
1537 SILENT = 0
1538 # log stdout/stderr to logger.debug
1539 DEBUG = 1
1540 # On a non-zero exit status, it will forcefully set
1541 # logging ON for the terminal
1542 VERBOSE_ON_FAILURE = 2
1543 # log at info (instead of debug) level.
1544 VERBOSE = 3
1545
1546
f67539c2
TL
1547if sys.version_info < (3, 8):
1548 import itertools
1549 import threading
1550 import warnings
1551 from asyncio import events
1552
1553 class ThreadedChildWatcher(asyncio.AbstractChildWatcher):
1554 """Threaded child watcher implementation.
1555 The watcher uses a thread per process
1556 for waiting for the process finish.
1557 It doesn't require subscription on POSIX signal
1558 but a thread creation is not free.
1559 The watcher has O(1) complexity, its performance doesn't depend
1560 on amount of spawn processes.
1561 """
1562
522d829b 1563 def __init__(self) -> None:
f67539c2 1564 self._pid_counter = itertools.count(0)
a4b75251 1565 self._threads: Dict[Any, Any] = {}
f67539c2 1566
a4b75251 1567 def is_active(self) -> bool:
f67539c2
TL
1568 return True
1569
a4b75251 1570 def close(self) -> None:
f67539c2
TL
1571 self._join_threads()
1572
a4b75251 1573 def _join_threads(self) -> None:
f67539c2
TL
1574 """Internal: Join all non-daemon threads"""
1575 threads = [thread for thread in list(self._threads.values())
1576 if thread.is_alive() and not thread.daemon]
1577 for thread in threads:
1578 thread.join()
1579
a4b75251 1580 def __enter__(self) -> Any:
f67539c2
TL
1581 return self
1582
a4b75251 1583 def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
f67539c2
TL
1584 pass
1585
a4b75251 1586 def __del__(self, _warn: Any = warnings.warn) -> None:
f67539c2
TL
1587 threads = [thread for thread in list(self._threads.values())
1588 if thread.is_alive()]
1589 if threads:
1590 _warn(f'{self.__class__} has registered but not finished child processes',
1591 ResourceWarning,
1592 source=self)
1593
a4b75251 1594 def add_child_handler(self, pid: Any, callback: Any, *args: Any) -> None:
f67539c2
TL
1595 loop = events.get_event_loop()
1596 thread = threading.Thread(target=self._do_waitpid,
1597 name=f'waitpid-{next(self._pid_counter)}',
1598 args=(loop, pid, callback, args),
1599 daemon=True)
1600 self._threads[pid] = thread
1601 thread.start()
1602
a4b75251 1603 def remove_child_handler(self, pid: Any) -> bool:
f67539c2
TL
1604 # asyncio never calls remove_child_handler() !!!
1605 # The method is no-op but is implemented because
1606 # abstract base classe requires it
1607 return True
1608
a4b75251 1609 def attach_loop(self, loop: Any) -> None:
f67539c2
TL
1610 pass
1611
a4b75251 1612 def _do_waitpid(self, loop: Any, expected_pid: Any, callback: Any, args: Any) -> None:
f67539c2
TL
1613 assert expected_pid > 0
1614
1615 try:
1616 pid, status = os.waitpid(expected_pid, 0)
1617 except ChildProcessError:
1618 # The child process is already reaped
1619 # (may happen if waitpid() is called elsewhere).
1620 pid = expected_pid
1621 returncode = 255
1622 logger.warning(
1623 'Unknown child process pid %d, will report returncode 255',
1624 pid)
1625 else:
1626 if os.WIFEXITED(status):
1627 returncode = os.WEXITSTATUS(status)
1628 elif os.WIFSIGNALED(status):
1629 returncode = -os.WTERMSIG(status)
1630 else:
1631 raise ValueError(f'unknown wait status {status}')
1632 if loop.get_debug():
1633 logger.debug('process %s exited with returncode %s',
1634 expected_pid, returncode)
1635
1636 if loop.is_closed():
1637 logger.warning('Loop %r that handles pid %r is closed', loop, pid)
1638 else:
1639 loop.call_soon_threadsafe(callback, pid, returncode, *args)
1640
1641 self._threads.pop(expected_pid)
1642
1643 # unlike SafeChildWatcher which handles SIGCHLD in the main thread,
1644 # ThreadedChildWatcher runs in a separated thread, hence allows us to
1645 # run create_subprocess_exec() in non-main thread, see
1646 # https://bugs.python.org/issue35621
1647 asyncio.set_child_watcher(ThreadedChildWatcher())
1648
1649
1650try:
1651 from asyncio import run as async_run # type: ignore[attr-defined]
1652except ImportError:
1653 def async_run(coro): # type: ignore
1654 loop = asyncio.new_event_loop()
1655 try:
1656 asyncio.set_event_loop(loop)
1657 return loop.run_until_complete(coro)
1658 finally:
1659 try:
1660 loop.run_until_complete(loop.shutdown_asyncgens())
1661 finally:
1662 asyncio.set_event_loop(None)
1663 loop.close()
1664
1665
1666def call(ctx: CephadmContext,
1667 command: List[str],
adb31ebb
TL
1668 desc: Optional[str] = None,
1669 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
1670 timeout: Optional[int] = DEFAULT_TIMEOUT,
522d829b 1671 **kwargs: Any) -> Tuple[str, str, int]:
9f95a23c
TL
1672 """
1673 Wrap subprocess.Popen to
1674
1675 - log stdout/stderr to a logger,
1676 - decode utf-8
1677 - cleanly return out, err, returncode
1678
9f95a23c
TL
1679 :param timeout: timeout in seconds
1680 """
f67539c2
TL
1681
1682 prefix = command[0] if desc is None else desc
1683 if prefix:
1684 prefix += ': '
1685 timeout = timeout or ctx.timeout
1686
f67539c2
TL
1687 async def tee(reader: asyncio.StreamReader) -> str:
1688 collected = StringIO()
1689 async for line in reader:
1690 message = line.decode('utf-8')
1691 collected.write(message)
1692 if verbosity == CallVerbosity.VERBOSE:
1693 logger.info(prefix + message.rstrip())
1694 elif verbosity != CallVerbosity.SILENT:
1695 logger.debug(prefix + message.rstrip())
1696 return collected.getvalue()
1697
1698 async def run_with_timeout() -> Tuple[str, str, int]:
1699 process = await asyncio.create_subprocess_exec(
1700 *command,
1701 stdout=asyncio.subprocess.PIPE,
522d829b
TL
1702 stderr=asyncio.subprocess.PIPE,
1703 env=os.environ.copy())
f67539c2
TL
1704 assert process.stdout
1705 assert process.stderr
1706 try:
1707 stdout, stderr = await asyncio.gather(tee(process.stdout),
1708 tee(process.stderr))
1709 returncode = await asyncio.wait_for(process.wait(), timeout)
1710 except asyncio.TimeoutError:
1711 logger.info(prefix + f'timeout after {timeout} seconds')
1712 return '', '', 124
9f95a23c 1713 else:
f67539c2 1714 return stdout, stderr, returncode
9f95a23c 1715
f67539c2 1716 stdout, stderr, returncode = async_run(run_with_timeout())
adb31ebb 1717 if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE:
f67539c2
TL
1718 logger.info('Non-zero exit code %d from %s',
1719 returncode, ' '.join(command))
1720 for line in stdout.splitlines():
1721 logger.info(prefix + 'stdout ' + line)
1722 for line in stderr.splitlines():
1723 logger.info(prefix + 'stderr ' + line)
1724 return stdout, stderr, returncode
1725
1726
1727def call_throws(
1728 ctx: CephadmContext,
1729 command: List[str],
1730 desc: Optional[str] = None,
1731 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
1732 timeout: Optional[int] = DEFAULT_TIMEOUT,
522d829b 1733 **kwargs: Any) -> Tuple[str, str, int]:
f67539c2 1734 out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs)
9f95a23c 1735 if ret:
20effc67
TL
1736 for s in (out, err):
1737 if s.strip() and len(s.splitlines()) <= 2: # readable message?
1738 raise RuntimeError(f'Failed command: {" ".join(command)}: {s}')
9f95a23c
TL
1739 raise RuntimeError('Failed command: %s' % ' '.join(command))
1740 return out, err, ret
1741
1742
f67539c2
TL
1743def call_timeout(ctx, command, timeout):
1744 # type: (CephadmContext, List[str], int) -> int
9f95a23c 1745 logger.debug('Running command (timeout=%s): %s'
f67539c2 1746 % (timeout, ' '.join(command)))
9f95a23c
TL
1747
1748 def raise_timeout(command, timeout):
1749 # type: (List[str], int) -> NoReturn
f67539c2 1750 msg = 'Command `%s` timed out after %s seconds' % (command, timeout)
9f95a23c
TL
1751 logger.debug(msg)
1752 raise TimeoutExpired(msg)
1753
f67539c2 1754 try:
522d829b 1755 return subprocess.call(command, timeout=timeout, env=os.environ.copy())
f67539c2
TL
1756 except subprocess.TimeoutExpired:
1757 raise_timeout(command, timeout)
9f95a23c
TL
1758
1759##################################
1760
f6b5b4d7 1761
522d829b 1762def json_loads_retry(cli_func: Callable[[], str]) -> Any:
b3b6e05e
TL
1763 for sleep_secs in [1, 4, 4]:
1764 try:
1765 return json.loads(cli_func())
1766 except json.JSONDecodeError:
1767 logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs)
1768 time.sleep(sleep_secs)
1769 return json.loads(cli_func())
1770
1771
f67539c2
TL
1772def is_available(ctx, what, func):
1773 # type: (CephadmContext, str, Callable[[], bool]) -> None
9f95a23c
TL
1774 """
1775 Wait for a service to become available
1776
1777 :param what: the name of the service
1778 :param func: the callable object that determines availability
1779 """
f67539c2 1780 retry = ctx.retry
f6b5b4d7 1781 logger.info('Waiting for %s...' % what)
9f95a23c
TL
1782 num = 1
1783 while True:
1784 if func():
e306af50 1785 logger.info('%s is available'
f6b5b4d7 1786 % what)
9f95a23c
TL
1787 break
1788 elif num > retry:
1789 raise Error('%s not available after %s tries'
f67539c2 1790 % (what, retry))
9f95a23c
TL
1791
1792 logger.info('%s not available, waiting (%s/%s)...'
f67539c2 1793 % (what, num, retry))
9f95a23c
TL
1794
1795 num += 1
f67539c2 1796 time.sleep(2)
9f95a23c
TL
1797
1798
1799def read_config(fn):
1800 # type: (Optional[str]) -> ConfigParser
f67539c2 1801 cp = ConfigParser()
9f95a23c 1802 if fn:
f67539c2 1803 cp.read(fn)
9f95a23c
TL
1804 return cp
1805
f6b5b4d7 1806
9f95a23c
TL
1807def pathify(p):
1808 # type: (str) -> str
e306af50
TL
1809 p = os.path.expanduser(p)
1810 return os.path.abspath(p)
9f95a23c 1811
f6b5b4d7 1812
9f95a23c 1813def get_file_timestamp(fn):
e306af50 1814 # type: (str) -> Optional[str]
9f95a23c
TL
1815 try:
1816 mt = os.path.getmtime(fn)
1817 return datetime.datetime.fromtimestamp(
1818 mt, tz=datetime.timezone.utc
1819 ).strftime(DATEFMT)
adb31ebb 1820 except Exception:
9f95a23c
TL
1821 return None
1822
f6b5b4d7 1823
9f95a23c 1824def try_convert_datetime(s):
e306af50 1825 # type: (str) -> Optional[str]
9f95a23c
TL
1826 # This is super irritating because
1827 # 1) podman and docker use different formats
1828 # 2) python's strptime can't parse either one
1829 #
1830 # I've seen:
1831 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1832 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1833 # 2020-03-03 15:52:30.136257504 -0600 CST
1834 # (In the podman case, there is a different string format for
1835 # 'inspect' and 'inspect --format {{.Created}}'!!)
1836
1837 # In *all* cases, the 9 digit second precision is too much for
1838 # python's strptime. Shorten it to 6 digits.
1839 p = re.compile(r'(\.[\d]{6})[\d]*')
1840 s = p.sub(r'\1', s)
1841
adb31ebb 1842 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
9f95a23c
TL
1843 if s and s[-1] == 'Z':
1844 s = s[:-1] + '-0000'
1845
adb31ebb 1846 # cut off the redundant 'CST' part that strptime can't parse, if
9f95a23c
TL
1847 # present.
1848 v = s.split(' ')
1849 s = ' '.join(v[0:3])
1850
1851 # try parsing with several format strings
1852 fmts = [
1853 '%Y-%m-%dT%H:%M:%S.%f%z',
1854 '%Y-%m-%d %H:%M:%S.%f %z',
1855 ]
1856 for f in fmts:
1857 try:
1858 # return timestamp normalized to UTC, rendered as DATEFMT.
1859 return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
1860 except ValueError:
1861 pass
1862 return None
1863
f6b5b4d7 1864
f67539c2 1865def _parse_podman_version(version_str):
9f95a23c 1866 # type: (str) -> Tuple[int, ...]
522d829b 1867 def to_int(val: str, org_e: Optional[Exception] = None) -> int:
9f95a23c
TL
1868 if not val and org_e:
1869 raise org_e
1870 try:
1871 return int(val)
1872 except ValueError as e:
1873 return to_int(val[0:-1], org_e or e)
1874
1875 return tuple(map(to_int, version_str.split('.')))
1876
1877
1878def get_hostname():
1879 # type: () -> str
1880 return socket.gethostname()
1881
f6b5b4d7 1882
9f95a23c
TL
1883def get_fqdn():
1884 # type: () -> str
1885 return socket.getfqdn() or socket.gethostname()
1886
f6b5b4d7 1887
9f95a23c
TL
1888def get_arch():
1889 # type: () -> str
1890 return platform.uname().machine
1891
f6b5b4d7 1892
9f95a23c
TL
1893def generate_service_id():
1894 # type: () -> str
1895 return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
1896 for _ in range(6))
1897
f6b5b4d7 1898
9f95a23c
TL
1899def generate_password():
1900 # type: () -> str
1901 return ''.join(random.choice(string.ascii_lowercase + string.digits)
1902 for i in range(10))
1903
f6b5b4d7 1904
9f95a23c
TL
1905def normalize_container_id(i):
1906 # type: (str) -> str
1907 # docker adds the sha256: prefix, but AFAICS both
1908 # docker (18.09.7 in bionic at least) and podman
1909 # both always use sha256, so leave off the prefix
1910 # for consistency.
1911 prefix = 'sha256:'
1912 if i.startswith(prefix):
1913 i = i[len(prefix):]
1914 return i
1915
f6b5b4d7 1916
9f95a23c
TL
1917def make_fsid():
1918 # type: () -> str
1919 return str(uuid.uuid1())
1920
f6b5b4d7 1921
9f95a23c
TL
1922def is_fsid(s):
1923 # type: (str) -> bool
1924 try:
1925 uuid.UUID(s)
1926 except ValueError:
1927 return False
1928 return True
1929
f6b5b4d7 1930
522d829b
TL
1931def validate_fsid(func: FuncT) -> FuncT:
1932 @wraps(func)
1933 def _validate_fsid(ctx: CephadmContext) -> Any:
1934 if 'fsid' in ctx and ctx.fsid:
1935 if not is_fsid(ctx.fsid):
1936 raise Error('not an fsid: %s' % ctx.fsid)
1937 return func(ctx)
1938 return cast(FuncT, _validate_fsid)
1939
1940
1941def infer_fsid(func: FuncT) -> FuncT:
9f95a23c
TL
1942 """
1943 If we only find a single fsid in /var/lib/ceph/*, use that
1944 """
522d829b 1945 @infer_config
9f95a23c 1946 @wraps(func)
522d829b
TL
1947 def _infer_fsid(ctx: CephadmContext) -> Any:
1948 if 'fsid' in ctx and ctx.fsid:
f67539c2
TL
1949 logger.debug('Using specified fsid: %s' % ctx.fsid)
1950 return func(ctx)
9f95a23c 1951
522d829b
TL
1952 fsids = set()
1953
1954 cp = read_config(ctx.config)
1955 if cp.has_option('global', 'fsid'):
1956 fsids.add(cp.get('global', 'fsid'))
1957
f67539c2 1958 daemon_list = list_daemons(ctx, detail=False)
9f95a23c 1959 for daemon in daemon_list:
f6b5b4d7
TL
1960 if not is_fsid(daemon['fsid']):
1961 # 'unknown' fsid
1962 continue
f67539c2
TL
1963 elif 'name' not in ctx or not ctx.name:
1964 # ctx.name not specified
522d829b 1965 fsids.add(daemon['fsid'])
f67539c2
TL
1966 elif daemon['name'] == ctx.name:
1967 # ctx.name is a match
522d829b
TL
1968 fsids.add(daemon['fsid'])
1969 fsids = sorted(fsids)
9f95a23c
TL
1970
1971 if not fsids:
1972 # some commands do not always require an fsid
1973 pass
1974 elif len(fsids) == 1:
1975 logger.info('Inferring fsid %s' % fsids[0])
f67539c2 1976 ctx.fsid = fsids[0]
9f95a23c 1977 else:
33c7a0ef 1978 raise Error('Cannot infer an fsid, one must be specified (using --fsid): %s' % fsids)
f67539c2 1979 return func(ctx)
9f95a23c 1980
522d829b 1981 return cast(FuncT, _infer_fsid)
9f95a23c 1982
f6b5b4d7 1983
522d829b 1984def infer_config(func: FuncT) -> FuncT:
e306af50 1985 """
33c7a0ef
TL
1986 Infer the clusater configuration using the followign priority order:
1987 1- if the user has provided custom conf file (-c option) use it
1988 2- otherwise if daemon --name has been provided use daemon conf
1989 3- otherwise find the mon daemon conf file and use it (if v1)
1990 4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it
1991 5- finally: fallback to the default file /etc/ceph/ceph.conf
e306af50
TL
1992 """
1993 @wraps(func)
522d829b 1994 def _infer_config(ctx: CephadmContext) -> Any:
33c7a0ef
TL
1995
1996 def config_path(daemon_type: str, daemon_name: str) -> str:
1997 data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_name)
1998 return os.path.join(data_dir, 'config')
1999
2000 def get_mon_daemon_name(fsid: str) -> Optional[str]:
2001 daemon_list = list_daemons(ctx, detail=False)
2002 for daemon in daemon_list:
2003 if (
2004 daemon.get('name', '').startswith('mon.')
2005 and daemon.get('fsid', '') == fsid
2006 and daemon.get('style', '') == 'cephadm:v1'
2007 and os.path.exists(config_path('mon', daemon['name'].split('.', 1)[1]))
2008 ):
2009 return daemon['name']
2010 return None
2011
522d829b 2012 ctx.config = ctx.config if 'config' in ctx else None
33c7a0ef
TL
2013 # check if user has provided conf by using -c option
2014 if ctx.config and (ctx.config != CEPH_DEFAULT_CONF):
2015 logger.debug(f'Using specified config: {ctx.config}')
f67539c2 2016 return func(ctx)
33c7a0ef 2017
522d829b 2018 if 'fsid' in ctx and ctx.fsid:
33c7a0ef
TL
2019 name = ctx.name if ('name' in ctx and ctx.name) else get_mon_daemon_name(ctx.fsid)
2020 if name is not None:
2021 # daemon name has been specified (or inffered from mon), let's use its conf
2022 ctx.config = config_path(name.split('.', 1)[0], name.split('.', 1)[1])
2023 else:
2024 # no daemon, in case the cluster has a config dir then use it
2025 ceph_conf = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_CONF}'
2026 if os.path.exists(ceph_conf):
2027 ctx.config = ceph_conf
2028
522d829b 2029 if ctx.config:
33c7a0ef
TL
2030 logger.info(f'Inferring config {ctx.config}')
2031 elif os.path.exists(CEPH_DEFAULT_CONF):
2032 logger.debug(f'Using default config {CEPH_DEFAULT_CONF}')
2033 ctx.config = CEPH_DEFAULT_CONF
f67539c2 2034 return func(ctx)
e306af50 2035
522d829b 2036 return cast(FuncT, _infer_config)
e306af50 2037
f6b5b4d7 2038
522d829b 2039def _get_default_image(ctx: CephadmContext) -> str:
1911f103 2040 if DEFAULT_IMAGE_IS_MASTER:
f67539c2 2041 warn = """This is a development version of cephadm.
1911f103
TL
2042For information regarding the latest stable release:
2043 https://docs.ceph.com/docs/{}/cephadm/install
f67539c2 2044""".format(LATEST_STABLE_RELEASE)
1911f103 2045 for line in warn.splitlines():
e306af50 2046 logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
1911f103
TL
2047 return DEFAULT_IMAGE
2048
f6b5b4d7 2049
522d829b 2050def infer_image(func: FuncT) -> FuncT:
9f95a23c
TL
2051 """
2052 Use the most recent ceph image
2053 """
2054 @wraps(func)
522d829b 2055 def _infer_image(ctx: CephadmContext) -> Any:
f67539c2
TL
2056 if not ctx.image:
2057 ctx.image = os.environ.get('CEPHADM_IMAGE')
2058 if not ctx.image:
33c7a0ef 2059 ctx.image = infer_local_ceph_image(ctx, ctx.container_engine.path)
f67539c2
TL
2060 if not ctx.image:
2061 ctx.image = _get_default_image(ctx)
2062 return func(ctx)
9f95a23c 2063
522d829b 2064 return cast(FuncT, _infer_image)
9f95a23c 2065
f6b5b4d7 2066
522d829b 2067def default_image(func: FuncT) -> FuncT:
9f95a23c 2068 @wraps(func)
522d829b 2069 def _default_image(ctx: CephadmContext) -> Any:
f67539c2
TL
2070 if not ctx.image:
2071 if 'name' in ctx and ctx.name:
2072 type_ = ctx.name.split('.', 1)[0]
9f95a23c 2073 if type_ in Monitoring.components:
f67539c2
TL
2074 ctx.image = Monitoring.components[type_]['image']
2075 if type_ == 'haproxy':
2076 ctx.image = HAproxy.default_image
2077 if type_ == 'keepalived':
2078 ctx.image = Keepalived.default_image
20effc67
TL
2079 if type_ == SNMPGateway.daemon_type:
2080 ctx.image = SNMPGateway.default_image
f67539c2
TL
2081 if not ctx.image:
2082 ctx.image = os.environ.get('CEPHADM_IMAGE')
2083 if not ctx.image:
2084 ctx.image = _get_default_image(ctx)
2085
2086 return func(ctx)
9f95a23c 2087
522d829b 2088 return cast(FuncT, _default_image)
9f95a23c 2089
f6b5b4d7 2090
33c7a0ef
TL
2091def get_container_info(ctx: CephadmContext, daemon_filter: str, by_name: bool) -> Optional[ContainerInfo]:
2092 """
2093 :param ctx: Cephadm context
2094 :param daemon_filter: daemon name or type
2095 :param by_name: must be set to True if daemon name is provided
2096 :return: Container information or None
9f95a23c 2097 """
33c7a0ef
TL
2098 def daemon_name_or_type(daemon: Dict[str, str]) -> str:
2099 return daemon['name'] if by_name else daemon['name'].split('.', 1)[0]
2100
2101 if by_name and '.' not in daemon_filter:
2102 logger.warning(f'Trying to get container info using invalid daemon name {daemon_filter}')
2103 return None
2104 daemons = list_daemons(ctx, detail=False)
2105 matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid]
2106 if matching_daemons:
2107 d_type, d_id = matching_daemons[0]['name'].split('.', 1)
2108 out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id)
2109 if not code:
2110 (container_id, image_name, image_id, start, version) = out.strip().split(',')
2111 return ContainerInfo(container_id, image_name, image_id, start, version)
2112 return None
2113
2114
2115def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]:
2116 """
2117 Infer the local ceph image based on the following priority criteria:
2118 1- the image specified by --image arg (if provided).
2119 2- the same image as the daemon container specified by --name arg (if provided).
2120 3- image used by any ceph container running on the host. In this case we use daemon types.
2121 4- if no container is found then we use the most ceph recent image on the host.
2122
2123 Note: any selected container must have the same fsid inferred previously.
2124
9f95a23c
TL
2125 :return: The most recent local ceph image (already pulled)
2126 """
33c7a0ef
TL
2127 # '|' special character is used to separate the output fields into:
2128 # - Repository@digest
2129 # - Image Id
2130 # - Image Tag
2131 # - Image creation date
f67539c2
TL
2132 out, _, _ = call_throws(ctx,
2133 [container_path, 'images',
2134 '--filter', 'label=ceph=True',
2135 '--filter', 'dangling=false',
33c7a0ef
TL
2136 '--format', '{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}'])
2137
2138 container_info = None
2139 daemon_name = ctx.name if ('name' in ctx and ctx.name and '.' in ctx.name) else None
2140 daemons_ls = [daemon_name] if daemon_name is not None else Ceph.daemons # daemon types: 'mon', 'mgr', etc
2141 for daemon in daemons_ls:
2142 container_info = get_container_info(ctx, daemon, daemon_name is not None)
2143 if container_info is not None:
2144 logger.debug(f"Using container info for daemon '{daemon}'")
2145 break
adb31ebb 2146
adb31ebb 2147 for image in out.splitlines():
33c7a0ef
TL
2148 if image and not image.isspace():
2149 (digest, image_id, tag, created_date) = image.lstrip().split('|')
2150 if container_info is not None and image_id not in container_info.image_id:
2151 continue
2152 if digest and not digest.endswith('@'):
2153 logger.info(f"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}")
2154 return digest
9f95a23c
TL
2155 return None
2156
f6b5b4d7 2157
9f95a23c 2158def write_tmp(s, uid, gid):
f67539c2 2159 # type: (str, int, int) -> IO[str]
9f95a23c
TL
2160 tmp_f = tempfile.NamedTemporaryFile(mode='w',
2161 prefix='ceph-tmp')
2162 os.fchown(tmp_f.fileno(), uid, gid)
2163 tmp_f.write(s)
2164 tmp_f.flush()
2165
2166 return tmp_f
2167
f6b5b4d7 2168
9f95a23c
TL
2169def makedirs(dir, uid, gid, mode):
2170 # type: (str, int, int, int) -> None
2171 if not os.path.exists(dir):
2172 os.makedirs(dir, mode=mode)
2173 else:
2174 os.chmod(dir, mode)
2175 os.chown(dir, uid, gid)
2176 os.chmod(dir, mode) # the above is masked by umask...
2177
f6b5b4d7 2178
f67539c2
TL
2179def get_data_dir(fsid, data_dir, t, n):
2180 # type: (str, str, str, Union[int, str]) -> str
2181 return os.path.join(data_dir, fsid, '%s.%s' % (t, n))
9f95a23c 2182
f6b5b4d7 2183
f67539c2
TL
2184def get_log_dir(fsid, log_dir):
2185 # type: (str, str) -> str
2186 return os.path.join(log_dir, fsid)
9f95a23c 2187
f6b5b4d7 2188
f67539c2
TL
2189def make_data_dir_base(fsid, data_dir, uid, gid):
2190 # type: (str, str, int, int) -> str
2191 data_dir_base = os.path.join(data_dir, fsid)
9f95a23c
TL
2192 makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
2193 makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
2194 makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
2195 DATA_DIR_MODE)
2196 return data_dir_base
2197
f6b5b4d7 2198
f67539c2
TL
2199def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None):
2200 # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str
f6b5b4d7 2201 if uid is None or gid is None:
f67539c2
TL
2202 uid, gid = extract_uid_gid(ctx)
2203 make_data_dir_base(fsid, ctx.data_dir, uid, gid)
2204 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c
TL
2205 makedirs(data_dir, uid, gid, DATA_DIR_MODE)
2206 return data_dir
2207
f6b5b4d7 2208
f67539c2
TL
2209def make_log_dir(ctx, fsid, uid=None, gid=None):
2210 # type: (CephadmContext, str, Optional[int], Optional[int]) -> str
f6b5b4d7 2211 if uid is None or gid is None:
f67539c2
TL
2212 uid, gid = extract_uid_gid(ctx)
2213 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c
TL
2214 makedirs(log_dir, uid, gid, LOG_DIR_MODE)
2215 return log_dir
2216
f6b5b4d7 2217
f67539c2
TL
2218def make_var_run(ctx, fsid, uid, gid):
2219 # type: (CephadmContext, str, int, int) -> None
2220 call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
2221 '/var/run/ceph/%s' % fsid])
9f95a23c 2222
f6b5b4d7 2223
f67539c2
TL
2224def copy_tree(ctx, src, dst, uid=None, gid=None):
2225 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
2226 """
2227 Copy a directory tree from src to dst
2228 """
f91f0fd5 2229 if uid is None or gid is None:
f67539c2 2230 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
2231
2232 for src_dir in src:
2233 dst_dir = dst
2234 if os.path.isdir(dst):
2235 dst_dir = os.path.join(dst, os.path.basename(src_dir))
2236
f67539c2 2237 logger.debug('copy directory `%s` -> `%s`' % (src_dir, dst_dir))
9f95a23c 2238 shutil.rmtree(dst_dir, ignore_errors=True)
f67539c2 2239 shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8
9f95a23c
TL
2240
2241 for dirpath, dirnames, filenames in os.walk(dst_dir):
f67539c2 2242 logger.debug('chown %s:%s `%s`' % (uid, gid, dirpath))
9f95a23c
TL
2243 os.chown(dirpath, uid, gid)
2244 for filename in filenames:
f67539c2 2245 logger.debug('chown %s:%s `%s`' % (uid, gid, filename))
9f95a23c
TL
2246 os.chown(os.path.join(dirpath, filename), uid, gid)
2247
2248
f67539c2
TL
2249def copy_files(ctx, src, dst, uid=None, gid=None):
2250 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
2251 """
2252 Copy a files from src to dst
2253 """
f91f0fd5 2254 if uid is None or gid is None:
f67539c2 2255 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
2256
2257 for src_file in src:
2258 dst_file = dst
2259 if os.path.isdir(dst):
2260 dst_file = os.path.join(dst, os.path.basename(src_file))
2261
f67539c2 2262 logger.debug('copy file `%s` -> `%s`' % (src_file, dst_file))
9f95a23c
TL
2263 shutil.copyfile(src_file, dst_file)
2264
f67539c2 2265 logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
9f95a23c
TL
2266 os.chown(dst_file, uid, gid)
2267
f6b5b4d7 2268
f67539c2
TL
2269def move_files(ctx, src, dst, uid=None, gid=None):
2270 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
2271 """
2272 Move files from src to dst
2273 """
f91f0fd5 2274 if uid is None or gid is None:
f67539c2 2275 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
2276
2277 for src_file in src:
2278 dst_file = dst
2279 if os.path.isdir(dst):
2280 dst_file = os.path.join(dst, os.path.basename(src_file))
2281
2282 if os.path.islink(src_file):
2283 # shutil.move() in py2 does not handle symlinks correctly
2284 src_rl = os.readlink(src_file)
2285 logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
2286 os.symlink(src_rl, dst_file)
2287 os.unlink(src_file)
2288 else:
2289 logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
2290 shutil.move(src_file, dst_file)
f67539c2 2291 logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
9f95a23c
TL
2292 os.chown(dst_file, uid, gid)
2293
f6b5b4d7 2294
33c7a0ef
TL
2295def recursive_chown(path: str, uid: int, gid: int) -> None:
2296 for dirpath, dirnames, filenames in os.walk(path):
2297 os.chown(dirpath, uid, gid)
2298 for filename in filenames:
2299 os.chown(os.path.join(dirpath, filename), uid, gid)
2300
2301
f67539c2 2302# copied from distutils
522d829b 2303def find_executable(executable: str, path: Optional[str] = None) -> Optional[str]:
9f95a23c
TL
2304 """Tries to find 'executable' in the directories listed in 'path'.
2305 A string listing directories separated by 'os.pathsep'; defaults to
2306 os.environ['PATH']. Returns the complete filename or None if not found.
2307 """
2308 _, ext = os.path.splitext(executable)
2309 if (sys.platform == 'win32') and (ext != '.exe'):
2310 executable = executable + '.exe'
2311
2312 if os.path.isfile(executable):
2313 return executable
2314
2315 if path is None:
2316 path = os.environ.get('PATH', None)
2317 if path is None:
2318 try:
f67539c2 2319 path = os.confstr('CS_PATH')
9f95a23c
TL
2320 except (AttributeError, ValueError):
2321 # os.confstr() or CS_PATH is not available
2322 path = os.defpath
2323 # bpo-35755: Don't use os.defpath if the PATH environment variable is
2324 # set to an empty string
2325
2326 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
2327 if not path:
2328 return None
2329
2330 paths = path.split(os.pathsep)
2331 for p in paths:
2332 f = os.path.join(p, executable)
2333 if os.path.isfile(f):
2334 # the file exists, we have a shot at spawn working
2335 return f
2336 return None
2337
f6b5b4d7 2338
9f95a23c
TL
2339def find_program(filename):
2340 # type: (str) -> str
2341 name = find_executable(filename)
2342 if name is None:
2343 raise ValueError('%s not found' % filename)
2344 return name
2345
f6b5b4d7 2346
522d829b 2347def find_container_engine(ctx: CephadmContext) -> Optional[ContainerEngine]:
f67539c2
TL
2348 if ctx.docker:
2349 return Docker()
2350 else:
2351 for i in CONTAINER_PREFERENCE:
2352 try:
2353 return i()
a4b75251
TL
2354 except Exception:
2355 pass
f67539c2
TL
2356 return None
2357
2358
a4b75251 2359def check_container_engine(ctx: CephadmContext) -> ContainerEngine:
f67539c2
TL
2360 engine = ctx.container_engine
2361 if not isinstance(engine, CONTAINER_PREFERENCE):
522d829b
TL
2362 # See https://github.com/python/mypy/issues/8993
2363 exes: List[str] = [i.EXE for i in CONTAINER_PREFERENCE] # type: ignore
2364 raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install <container engine>`'.format(' or '.join(exes)))
f67539c2
TL
2365 elif isinstance(engine, Podman):
2366 engine.get_version(ctx)
2367 if engine.version < MIN_PODMAN_VERSION:
2368 raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION)
a4b75251 2369 return engine
f67539c2
TL
2370
2371
9f95a23c
TL
2372def get_unit_name(fsid, daemon_type, daemon_id=None):
2373 # type: (str, str, Optional[Union[int, str]]) -> str
2374 # accept either name or type + id
20effc67 2375 if daemon_id is not None:
9f95a23c
TL
2376 return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
2377 else:
2378 return 'ceph-%s@%s' % (fsid, daemon_type)
2379
f6b5b4d7 2380
522d829b 2381def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid: str, name: str) -> str:
f67539c2 2382 daemon = get_daemon_description(ctx, fsid, name)
e306af50
TL
2383 try:
2384 return daemon['systemd_unit']
2385 except KeyError:
2386 raise Error('Failed to get unit name for {}'.format(daemon))
2387
f6b5b4d7 2388
f67539c2
TL
2389def check_unit(ctx, unit_name):
2390 # type: (CephadmContext, str) -> Tuple[bool, str, bool]
9f95a23c
TL
2391 # NOTE: we ignore the exit code here because systemctl outputs
2392 # various exit codes based on the state of the service, but the
2393 # string result is more explicit (and sufficient).
2394 enabled = False
2395 installed = False
2396 try:
f67539c2 2397 out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name],
adb31ebb 2398 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
2399 if code == 0:
2400 enabled = True
2401 installed = True
f67539c2 2402 elif 'disabled' in out:
9f95a23c
TL
2403 installed = True
2404 except Exception as e:
2405 logger.warning('unable to run systemctl: %s' % e)
2406 enabled = False
2407 installed = False
2408
2409 state = 'unknown'
2410 try:
f67539c2 2411 out, err, code = call(ctx, ['systemctl', 'is-active', unit_name],
adb31ebb 2412 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
2413 out = out.strip()
2414 if out in ['active']:
2415 state = 'running'
2416 elif out in ['inactive']:
2417 state = 'stopped'
2418 elif out in ['failed', 'auto-restart']:
2419 state = 'error'
2420 else:
2421 state = 'unknown'
2422 except Exception as e:
2423 logger.warning('unable to run systemctl: %s' % e)
2424 state = 'unknown'
2425 return (enabled, state, installed)
2426
f6b5b4d7 2427
f67539c2
TL
2428def check_units(ctx, units, enabler=None):
2429 # type: (CephadmContext, List[str], Optional[Packager]) -> bool
9f95a23c 2430 for u in units:
f67539c2 2431 (enabled, state, installed) = check_unit(ctx, u)
9f95a23c
TL
2432 if enabled and state == 'running':
2433 logger.info('Unit %s is enabled and running' % u)
2434 return True
2435 if enabler is not None:
2436 if installed:
2437 logger.info('Enabling unit %s' % u)
2438 enabler.enable_service(u)
2439 return False
2440
f6b5b4d7 2441
522d829b 2442def is_container_running(ctx: CephadmContext, c: 'CephContainer') -> bool:
20effc67
TL
2443 if ctx.name.split('.', 1)[0] in ['agent', 'cephadm-exporter']:
2444 # these are non-containerized daemon types
2445 return False
522d829b
TL
2446 return bool(get_running_container_name(ctx, c))
2447
2448
2449def get_running_container_name(ctx: CephadmContext, c: 'CephContainer') -> Optional[str]:
2450 for name in [c.cname, c.old_cname]:
2451 out, err, ret = call(ctx, [
2452 ctx.container_engine.path, 'container', 'inspect',
2453 '--format', '{{.State.Status}}', name
2454 ])
2455 if out.strip() == 'running':
2456 return name
2457 return None
f67539c2
TL
2458
2459
9f95a23c 2460def get_legacy_config_fsid(cluster, legacy_dir=None):
f6b5b4d7 2461 # type: (str, Optional[str]) -> Optional[str]
9f95a23c
TL
2462 config_file = '/etc/ceph/%s.conf' % cluster
2463 if legacy_dir is not None:
2464 config_file = os.path.abspath(legacy_dir + config_file)
2465
2466 if os.path.exists(config_file):
2467 config = read_config(config_file)
2468 if config.has_section('global') and config.has_option('global', 'fsid'):
2469 return config.get('global', 'fsid')
2470 return None
2471
f6b5b4d7 2472
f67539c2
TL
2473def get_legacy_daemon_fsid(ctx, cluster,
2474 daemon_type, daemon_id, legacy_dir=None):
2475 # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str]
9f95a23c
TL
2476 fsid = None
2477 if daemon_type == 'osd':
2478 try:
f67539c2 2479 fsid_file = os.path.join(ctx.data_dir,
9f95a23c
TL
2480 daemon_type,
2481 'ceph-%s' % daemon_id,
2482 'ceph_fsid')
2483 if legacy_dir is not None:
2484 fsid_file = os.path.abspath(legacy_dir + fsid_file)
2485 with open(fsid_file, 'r') as f:
2486 fsid = f.read().strip()
2487 except IOError:
2488 pass
2489 if not fsid:
2490 fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
2491 return fsid
2492
f6b5b4d7 2493
20effc67
TL
2494def should_log_to_journald(ctx: CephadmContext) -> bool:
2495 if ctx.log_to_journald is not None:
2496 return ctx.log_to_journald
2497 return isinstance(ctx.container_engine, Podman) and \
2498 ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION
2499
2500
f67539c2
TL
2501def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
2502 # type: (CephadmContext, str, str, Union[int, str]) -> List[str]
9f95a23c
TL
2503 r = list() # type: List[str]
2504
2505 if daemon_type in Ceph.daemons and daemon_type != 'crash':
2506 r += [
2507 '--setuser', 'ceph',
2508 '--setgroup', 'ceph',
2509 '--default-log-to-file=false',
9f95a23c 2510 ]
20effc67
TL
2511 log_to_journald = should_log_to_journald(ctx)
2512 if log_to_journald:
2513 r += [
2514 '--default-log-to-journald=true',
2515 '--default-log-to-stderr=false',
2516 ]
2517 else:
2518 r += [
2519 '--default-log-to-stderr=true',
2520 '--default-log-stderr-prefix=debug ',
2521 ]
9f95a23c
TL
2522 if daemon_type == 'mon':
2523 r += [
2524 '--default-mon-cluster-log-to-file=false',
9f95a23c 2525 ]
20effc67
TL
2526 if log_to_journald:
2527 r += [
2528 '--default-mon-cluster-log-to-journald=true',
2529 '--default-mon-cluster-log-to-stderr=false',
2530 ]
2531 else:
2532 r += ['--default-mon-cluster-log-to-stderr=true']
9f95a23c
TL
2533 elif daemon_type in Monitoring.components:
2534 metadata = Monitoring.components[daemon_type]
2535 r += metadata.get('args', list())
b3b6e05e 2536 # set ip and port to bind to for nodeexporter,alertmanager,prometheus
33c7a0ef 2537 if daemon_type not in ['grafana', 'loki', 'promtail']:
b3b6e05e
TL
2538 ip = ''
2539 port = Monitoring.port_map[daemon_type][0]
2540 if 'meta_json' in ctx and ctx.meta_json:
2541 meta = json.loads(ctx.meta_json) or {}
2542 if 'ip' in meta and meta['ip']:
2543 ip = meta['ip']
2544 if 'ports' in meta and meta['ports']:
2545 port = meta['ports'][0]
2546 r += [f'--web.listen-address={ip}:{port}']
33c7a0ef
TL
2547 if daemon_type == 'prometheus':
2548 scheme = 'http'
2549 host = get_fqdn()
2550 r += [f'--web.external-url={scheme}://{host}:{port}']
9f95a23c 2551 if daemon_type == 'alertmanager':
f67539c2 2552 config = get_parm(ctx.config_json)
9f95a23c
TL
2553 peers = config.get('peers', list()) # type: ignore
2554 for peer in peers:
f67539c2 2555 r += ['--cluster.peer={}'.format(peer)]
f6b5b4d7 2556 # some alertmanager, by default, look elsewhere for a config
f67539c2 2557 r += ['--config.file=/etc/alertmanager/alertmanager.yml']
33c7a0ef
TL
2558 if daemon_type == 'loki':
2559 r += ['--config.file=/etc/loki/loki.yml']
2560 if daemon_type == 'promtail':
2561 r += ['--config.file=/etc/promtail/promtail.yml']
2562 if daemon_type == 'node-exporter':
2563 r += ['--path.procfs=/host/proc',
2564 '--path.sysfs=/host/sys',
2565 '--path.rootfs=/rootfs']
9f95a23c 2566 elif daemon_type == NFSGanesha.daemon_type:
f67539c2 2567 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
1911f103 2568 r += nfs_ganesha.get_daemon_args()
f67539c2
TL
2569 elif daemon_type == HAproxy.daemon_type:
2570 haproxy = HAproxy.init(ctx, fsid, daemon_id)
2571 r += haproxy.get_daemon_args()
f91f0fd5 2572 elif daemon_type == CustomContainer.daemon_type:
f67539c2 2573 cc = CustomContainer.init(ctx, fsid, daemon_id)
f91f0fd5 2574 r.extend(cc.get_daemon_args())
20effc67
TL
2575 elif daemon_type == SNMPGateway.daemon_type:
2576 sc = SNMPGateway.init(ctx, fsid, daemon_id)
2577 r.extend(sc.get_daemon_args())
9f95a23c
TL
2578
2579 return r
2580
f6b5b4d7 2581
f67539c2 2582def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
e306af50 2583 config=None, keyring=None):
f67539c2
TL
2584 # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
2585 data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid)
20effc67
TL
2586
2587 if daemon_type in Ceph.daemons:
2588 make_log_dir(ctx, fsid, uid=uid, gid=gid)
9f95a23c
TL
2589
2590 if config:
2591 config_path = os.path.join(data_dir, 'config')
2592 with open(config_path, 'w') as f:
2593 os.fchown(f.fileno(), uid, gid)
2594 os.fchmod(f.fileno(), 0o600)
2595 f.write(config)
f91f0fd5 2596
9f95a23c
TL
2597 if keyring:
2598 keyring_path = os.path.join(data_dir, 'keyring')
2599 with open(keyring_path, 'w') as f:
2600 os.fchmod(f.fileno(), 0o600)
2601 os.fchown(f.fileno(), uid, gid)
2602 f.write(keyring)
2603
2604 if daemon_type in Monitoring.components.keys():
522d829b
TL
2605 config_json: Dict[str, Any] = dict()
2606 if 'config_json' in ctx:
2607 config_json = get_parm(ctx.config_json)
9f95a23c
TL
2608
2609 # Set up directories specific to the monitoring component
2610 config_dir = ''
f67539c2 2611 data_dir_root = ''
9f95a23c 2612 if daemon_type == 'prometheus':
f67539c2
TL
2613 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2614 daemon_type, daemon_id)
9f95a23c
TL
2615 config_dir = 'etc/prometheus'
2616 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2617 makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
2618 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
33c7a0ef
TL
2619 recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid)
2620 recursive_chown(os.path.join(data_dir_root, 'data'), uid, gid)
9f95a23c 2621 elif daemon_type == 'grafana':
f67539c2
TL
2622 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2623 daemon_type, daemon_id)
9f95a23c
TL
2624 config_dir = 'etc/grafana'
2625 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2626 makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
2627 makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
2628 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
f67539c2 2629 touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid)
9f95a23c 2630 elif daemon_type == 'alertmanager':
f67539c2
TL
2631 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2632 daemon_type, daemon_id)
9f95a23c
TL
2633 config_dir = 'etc/alertmanager'
2634 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2635 makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
33c7a0ef
TL
2636 elif daemon_type == 'promtail':
2637 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2638 daemon_type, daemon_id)
2639 config_dir = 'etc/promtail'
2640 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2641 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
2642 elif daemon_type == 'loki':
2643 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2644 daemon_type, daemon_id)
2645 config_dir = 'etc/loki'
2646 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2647 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
9f95a23c 2648
9f95a23c 2649 # populate the config directory for the component from the config-json
b3b6e05e
TL
2650 if 'files' in config_json:
2651 for fname in config_json['files']:
f91f0fd5 2652 content = dict_get_join(config_json['files'], fname)
b3b6e05e
TL
2653 if os.path.isabs(fname):
2654 fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep))
2655 else:
2656 fpath = os.path.join(data_dir_root, config_dir, fname)
2657 with open(fpath, 'w', encoding='utf-8') as f:
9f95a23c
TL
2658 os.fchown(f.fileno(), uid, gid)
2659 os.fchmod(f.fileno(), 0o600)
2660 f.write(content)
2661
f91f0fd5 2662 elif daemon_type == NFSGanesha.daemon_type:
f67539c2 2663 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
9f95a23c
TL
2664 nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
2665
f91f0fd5 2666 elif daemon_type == CephIscsi.daemon_type:
f67539c2 2667 ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
1911f103
TL
2668 ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
2669
f67539c2
TL
2670 elif daemon_type == HAproxy.daemon_type:
2671 haproxy = HAproxy.init(ctx, fsid, daemon_id)
2672 haproxy.create_daemon_dirs(data_dir, uid, gid)
2673
2674 elif daemon_type == Keepalived.daemon_type:
2675 keepalived = Keepalived.init(ctx, fsid, daemon_id)
2676 keepalived.create_daemon_dirs(data_dir, uid, gid)
2677
f91f0fd5 2678 elif daemon_type == CustomContainer.daemon_type:
f67539c2 2679 cc = CustomContainer.init(ctx, fsid, daemon_id)
f91f0fd5
TL
2680 cc.create_daemon_dirs(data_dir, uid, gid)
2681
20effc67
TL
2682 elif daemon_type == SNMPGateway.daemon_type:
2683 sg = SNMPGateway.init(ctx, fsid, daemon_id)
2684 sg.create_daemon_conf()
2685
f6b5b4d7 2686
9f95a23c
TL
2687def get_parm(option):
2688 # type: (str) -> Dict[str, str]
2689
2690 if not option:
2691 return dict()
2692
2693 global cached_stdin
2694 if option == '-':
2695 if cached_stdin is not None:
2696 j = cached_stdin
2697 else:
f67539c2
TL
2698 j = sys.stdin.read()
2699 cached_stdin = j
9f95a23c
TL
2700 else:
2701 # inline json string
2702 if option[0] == '{' and option[-1] == '}':
2703 j = option
2704 # json file
2705 elif os.path.exists(option):
2706 with open(option, 'r') as f:
2707 j = f.read()
2708 else:
f67539c2 2709 raise Error('Config file {} not found'.format(option))
9f95a23c
TL
2710
2711 try:
2712 js = json.loads(j)
2713 except ValueError as e:
f67539c2 2714 raise Error('Invalid JSON in {}: {}'.format(option, e))
9f95a23c
TL
2715 else:
2716 return js
2717
f6b5b4d7 2718
f67539c2
TL
2719def get_config_and_keyring(ctx):
2720 # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
801d1391
TL
2721 config = None
2722 keyring = None
2723
f67539c2
TL
2724 if 'config_json' in ctx and ctx.config_json:
2725 d = get_parm(ctx.config_json)
9f95a23c
TL
2726 config = d.get('config')
2727 keyring = d.get('keyring')
a4b75251
TL
2728 if config and keyring:
2729 return config, keyring
9f95a23c 2730
f67539c2
TL
2731 if 'config' in ctx and ctx.config:
2732 try:
2733 with open(ctx.config, 'r') as f:
2734 config = f.read()
b3b6e05e
TL
2735 except FileNotFoundError as e:
2736 raise Error(e)
f67539c2
TL
2737
2738 if 'key' in ctx and ctx.key:
2739 keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key)
2740 elif 'keyring' in ctx and ctx.keyring:
2741 try:
2742 with open(ctx.keyring, 'r') as f:
2743 keyring = f.read()
b3b6e05e
TL
2744 except FileNotFoundError as e:
2745 raise Error(e)
9f95a23c 2746
f6b5b4d7
TL
2747 return config, keyring
2748
2749
f67539c2
TL
2750def get_container_binds(ctx, fsid, daemon_type, daemon_id):
2751 # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]]
f6b5b4d7
TL
2752 binds = list()
2753
2754 if daemon_type == CephIscsi.daemon_type:
f6b5b4d7 2755 binds.extend(CephIscsi.get_container_binds())
f91f0fd5
TL
2756 elif daemon_type == CustomContainer.daemon_type:
2757 assert daemon_id
f67539c2
TL
2758 cc = CustomContainer.init(ctx, fsid, daemon_id)
2759 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
f91f0fd5 2760 binds.extend(cc.get_container_binds(data_dir))
f6b5b4d7
TL
2761
2762 return binds
2763
9f95a23c 2764
f67539c2 2765def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
9f95a23c 2766 no_config=False):
f67539c2 2767 # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
9f95a23c
TL
2768 mounts = dict()
2769
2770 if daemon_type in Ceph.daemons:
2771 if fsid:
f67539c2 2772 run_path = os.path.join('/var/run/ceph', fsid)
9f95a23c
TL
2773 if os.path.exists(run_path):
2774 mounts[run_path] = '/var/run/ceph:z'
f67539c2 2775 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c
TL
2776 mounts[log_dir] = '/var/log/ceph:z'
2777 crash_dir = '/var/lib/ceph/%s/crash' % fsid
2778 if os.path.exists(crash_dir):
2779 mounts[crash_dir] = '/var/lib/ceph/crash:z'
20effc67
TL
2780 if daemon_type != 'crash' and should_log_to_journald(ctx):
2781 journald_sock_dir = '/run/systemd/journal'
2782 mounts[journald_sock_dir] = journald_sock_dir
9f95a23c
TL
2783
2784 if daemon_type in Ceph.daemons and daemon_id:
f67539c2 2785 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c
TL
2786 if daemon_type == 'rgw':
2787 cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
2788 else:
2789 cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
2790 if daemon_type != 'crash':
2791 mounts[data_dir] = cdata_dir + ':z'
2792 if not no_config:
2793 mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
f67539c2 2794 if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash']:
9f95a23c
TL
2795 # these do not search for their keyrings in a data directory
2796 mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
2797
b3b6e05e 2798 if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']:
9f95a23c
TL
2799 mounts['/dev'] = '/dev' # FIXME: narrow this down?
2800 mounts['/run/udev'] = '/run/udev'
b3b6e05e 2801 if daemon_type in ['osd', 'clusterless-ceph-volume']:
9f95a23c 2802 mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
b3b6e05e
TL
2803 mounts['/run/lvm'] = '/run/lvm'
2804 mounts['/run/lock/lvm'] = '/run/lock/lvm'
2805 if daemon_type == 'osd':
f67539c2
TL
2806 # selinux-policy in the container may not match the host.
2807 if HostFacts(ctx).selinux_enabled:
2808 selinux_folder = '/var/lib/ceph/%s/selinux' % fsid
2809 if not os.path.exists(selinux_folder):
2810 os.makedirs(selinux_folder, mode=0o755)
2811 mounts[selinux_folder] = '/sys/fs/selinux:ro'
20effc67 2812 mounts['/'] = '/rootfs'
9f95a23c 2813
e306af50 2814 try:
f67539c2
TL
2815 if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development
2816 ceph_folder = pathify(ctx.shared_ceph_folder)
e306af50
TL
2817 if os.path.exists(ceph_folder):
2818 mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
522d829b 2819 mounts[ceph_folder + '/src/cephadm/cephadm'] = '/usr/sbin/cephadm'
e306af50
TL
2820 mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
2821 mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
20effc67
TL
2822 mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard'
2823 mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
e306af50
TL
2824 else:
2825 logger.error('{}{}{}'.format(termcolor.red,
f67539c2
TL
2826 'Ceph shared source folder does not exist.',
2827 termcolor.end))
e306af50
TL
2828 except AttributeError:
2829 pass
2830
9f95a23c 2831 if daemon_type in Monitoring.components and daemon_id:
f67539c2 2832 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
33c7a0ef 2833 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c
TL
2834 if daemon_type == 'prometheus':
2835 mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
2836 mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
33c7a0ef
TL
2837 elif daemon_type == 'loki':
2838 mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z'
2839 mounts[os.path.join(data_dir, 'data')] = '/loki:Z'
2840 elif daemon_type == 'promtail':
2841 mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
2842 mounts[log_dir] = '/var/log/ceph:z'
2843 mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
9f95a23c
TL
2844 elif daemon_type == 'node-exporter':
2845 mounts['/proc'] = '/host/proc:ro'
2846 mounts['/sys'] = '/host/sys:ro'
2847 mounts['/'] = '/rootfs:ro'
f67539c2 2848 elif daemon_type == 'grafana':
9f95a23c
TL
2849 mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
2850 mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
2851 mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
f67539c2 2852 mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
9f95a23c 2853 elif daemon_type == 'alertmanager':
f6b5b4d7 2854 mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
9f95a23c
TL
2855
2856 if daemon_type == NFSGanesha.daemon_type:
2857 assert daemon_id
f67539c2
TL
2858 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2859 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
f91f0fd5 2860 mounts.update(nfs_ganesha.get_container_mounts(data_dir))
9f95a23c 2861
f67539c2
TL
2862 if daemon_type == HAproxy.daemon_type:
2863 assert daemon_id
2864 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2865 mounts.update(HAproxy.get_container_mounts(data_dir))
2866
1911f103
TL
2867 if daemon_type == CephIscsi.daemon_type:
2868 assert daemon_id
f67539c2
TL
2869 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2870 log_dir = get_log_dir(fsid, ctx.log_dir)
1911f103
TL
2871 mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
2872
f67539c2
TL
2873 if daemon_type == Keepalived.daemon_type:
2874 assert daemon_id
2875 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2876 mounts.update(Keepalived.get_container_mounts(data_dir))
2877
f91f0fd5
TL
2878 if daemon_type == CustomContainer.daemon_type:
2879 assert daemon_id
f67539c2
TL
2880 cc = CustomContainer.init(ctx, fsid, daemon_id)
2881 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
f91f0fd5
TL
2882 mounts.update(cc.get_container_mounts(data_dir))
2883
9f95a23c
TL
2884 return mounts
2885
f6b5b4d7 2886
20effc67
TL
2887def get_ceph_volume_container(ctx: CephadmContext,
2888 privileged: bool = True,
2889 cname: str = '',
2890 volume_mounts: Dict[str, str] = {},
2891 bind_mounts: Optional[List[List[str]]] = None,
2892 args: List[str] = [],
2893 envs: Optional[List[str]] = None) -> 'CephContainer':
2894 if envs is None:
2895 envs = []
2896 envs.append('CEPH_VOLUME_SKIP_RESTORECON=yes')
2897 envs.append('CEPH_VOLUME_DEBUG=1')
2898
2899 return CephContainer(
2900 ctx,
2901 image=ctx.image,
2902 entrypoint='/usr/sbin/ceph-volume',
2903 args=args,
2904 volume_mounts=volume_mounts,
2905 bind_mounts=bind_mounts,
2906 envs=envs,
2907 privileged=privileged,
2908 cname=cname,
2909 memory_request=ctx.memory_request,
2910 memory_limit=ctx.memory_limit,
2911 )
2912
2913
f67539c2
TL
2914def get_container(ctx: CephadmContext,
2915 fsid: str, daemon_type: str, daemon_id: Union[int, str],
f91f0fd5
TL
2916 privileged: bool = False,
2917 ptrace: bool = False,
2918 container_args: Optional[List[str]] = None) -> 'CephContainer':
2919 entrypoint: str = ''
2920 name: str = ''
2921 ceph_args: List[str] = []
522d829b 2922 envs: List[str] = []
f91f0fd5
TL
2923 host_network: bool = True
2924
522d829b
TL
2925 if daemon_type in Ceph.daemons:
2926 envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
f91f0fd5
TL
2927 if container_args is None:
2928 container_args = []
9f95a23c
TL
2929 if daemon_type in ['mon', 'osd']:
2930 # mon and osd need privileged in order for libudev to query devices
2931 privileged = True
2932 if daemon_type == 'rgw':
2933 entrypoint = '/usr/bin/radosgw'
2934 name = 'client.rgw.%s' % daemon_id
2935 elif daemon_type == 'rbd-mirror':
2936 entrypoint = '/usr/bin/rbd-mirror'
2937 name = 'client.rbd-mirror.%s' % daemon_id
f67539c2
TL
2938 elif daemon_type == 'cephfs-mirror':
2939 entrypoint = '/usr/bin/cephfs-mirror'
2940 name = 'client.cephfs-mirror.%s' % daemon_id
9f95a23c
TL
2941 elif daemon_type == 'crash':
2942 entrypoint = '/usr/bin/ceph-crash'
2943 name = 'client.crash.%s' % daemon_id
2944 elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
2945 entrypoint = '/usr/bin/ceph-' + daemon_type
2946 name = '%s.%s' % (daemon_type, daemon_id)
2947 elif daemon_type in Monitoring.components:
2948 entrypoint = ''
9f95a23c
TL
2949 elif daemon_type == NFSGanesha.daemon_type:
2950 entrypoint = NFSGanesha.entrypoint
2951 name = '%s.%s' % (daemon_type, daemon_id)
f91f0fd5 2952 envs.extend(NFSGanesha.get_container_envs())
f67539c2
TL
2953 elif daemon_type == HAproxy.daemon_type:
2954 name = '%s.%s' % (daemon_type, daemon_id)
522d829b 2955 container_args.extend(['--user=root']) # haproxy 2.4 defaults to a different user
f67539c2
TL
2956 elif daemon_type == Keepalived.daemon_type:
2957 name = '%s.%s' % (daemon_type, daemon_id)
2958 envs.extend(Keepalived.get_container_envs())
2959 container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
1911f103
TL
2960 elif daemon_type == CephIscsi.daemon_type:
2961 entrypoint = CephIscsi.entrypoint
2962 name = '%s.%s' % (daemon_type, daemon_id)
e306af50
TL
2963 # So the container can modprobe iscsi_target_mod and have write perms
2964 # to configfs we need to make this a privileged container.
2965 privileged = True
f91f0fd5 2966 elif daemon_type == CustomContainer.daemon_type:
f67539c2 2967 cc = CustomContainer.init(ctx, fsid, daemon_id)
f91f0fd5
TL
2968 entrypoint = cc.entrypoint
2969 host_network = False
2970 envs.extend(cc.get_container_envs())
2971 container_args.extend(cc.get_container_args())
9f95a23c 2972
9f95a23c 2973 if daemon_type in Monitoring.components:
f67539c2 2974 uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
9f95a23c
TL
2975 monitoring_args = [
2976 '--user',
2977 str(uid),
2978 # FIXME: disable cpu/memory limits for the time being (not supported
2979 # by ubuntu 18.04 kernel!)
9f95a23c
TL
2980 ]
2981 container_args.extend(monitoring_args)
33c7a0ef
TL
2982 if daemon_type == 'node-exporter':
2983 # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
2984 # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
2985 # between the node-exporter container and the host to avoid selinux denials
2986 container_args.extend(['--security-opt', 'label=disable'])
9f95a23c
TL
2987 elif daemon_type == 'crash':
2988 ceph_args = ['-n', name]
2989 elif daemon_type in Ceph.daemons:
2990 ceph_args = ['-n', name, '-f']
20effc67
TL
2991 elif daemon_type == SNMPGateway.daemon_type:
2992 sg = SNMPGateway.init(ctx, fsid, daemon_id)
2993 container_args.append(
2994 f'--env-file={sg.conf_file_path}'
2995 )
9f95a23c 2996
f91f0fd5
TL
2997 # if using podman, set -d, --conmon-pidfile & --cidfile flags
2998 # so service can have Type=Forking
f67539c2 2999 if isinstance(ctx.container_engine, Podman):
f91f0fd5 3000 runtime_dir = '/run'
f67539c2
TL
3001 container_args.extend([
3002 '-d', '--log-driver', 'journald',
f91f0fd5
TL
3003 '--conmon-pidfile',
3004 runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
3005 '--cidfile',
f67539c2
TL
3006 runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id),
3007 ])
3008 if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
3009 container_args.append('--cgroups=split')
9f95a23c 3010
522d829b 3011 return CephContainer.for_daemon(
f67539c2 3012 ctx,
522d829b
TL
3013 fsid=fsid,
3014 daemon_type=daemon_type,
3015 daemon_id=str(daemon_id),
9f95a23c 3016 entrypoint=entrypoint,
f67539c2 3017 args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id),
9f95a23c 3018 container_args=container_args,
f67539c2
TL
3019 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
3020 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
9f95a23c
TL
3021 envs=envs,
3022 privileged=privileged,
3023 ptrace=ptrace,
f91f0fd5 3024 host_network=host_network,
9f95a23c
TL
3025 )
3026
f6b5b4d7 3027
f67539c2
TL
3028def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'):
3029 # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]
9f95a23c
TL
3030
3031 if not img:
f67539c2 3032 img = ctx.image
9f95a23c 3033
f6b5b4d7
TL
3034 if isinstance(file_path, str):
3035 paths = [file_path]
3036 else:
3037 paths = file_path
3038
20effc67
TL
3039 ex: Optional[Tuple[str, RuntimeError]] = None
3040
f6b5b4d7
TL
3041 for fp in paths:
3042 try:
3043 out = CephContainer(
f67539c2 3044 ctx,
f6b5b4d7
TL
3045 image=img,
3046 entrypoint='stat',
3047 args=['-c', '%u %g', fp]
3048 ).run()
3049 uid, gid = out.split(' ')
3050 return int(uid), int(gid)
20effc67
TL
3051 except RuntimeError as e:
3052 ex = (fp, e)
3053 if ex:
3054 raise Error(f'Failed to extract uid/gid for path {ex[0]}: {ex[1]}')
3055
f6b5b4d7
TL
3056 raise RuntimeError('uid/gid not found')
3057
9f95a23c 3058
f67539c2 3059def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
9f95a23c
TL
3060 config=None, keyring=None,
3061 osd_fsid=None,
f6b5b4d7
TL
3062 reconfig=False,
3063 ports=None):
f67539c2 3064 # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
f6b5b4d7
TL
3065
3066 ports = ports or []
f67539c2 3067 if any([port_in_use(ctx, port) for port in ports]):
b3b6e05e
TL
3068 if daemon_type == 'mgr':
3069 # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't
3070 # tell whether that is the case here.
3071 logger.warning(
3072 f"ceph-mgr TCP port(s) {','.join(map(str, ports))} already in use"
3073 )
3074 else:
3075 raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports)), daemon_type))
f6b5b4d7 3076
f67539c2 3077 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c
TL
3078 if reconfig and not os.path.exists(data_dir):
3079 raise Error('cannot reconfig, data path %s does not exist' % data_dir)
3080 if daemon_type == 'mon' and not os.path.exists(data_dir):
3081 assert config
3082 assert keyring
3083 # tmp keyring file
3084 tmp_keyring = write_tmp(keyring, uid, gid)
3085
3086 # tmp config file
3087 tmp_config = write_tmp(config, uid, gid)
3088
3089 # --mkfs
f67539c2
TL
3090 create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid)
3091 mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id)
3092 log_dir = get_log_dir(fsid, ctx.log_dir)
3093 CephContainer(
3094 ctx,
3095 image=ctx.image,
9f95a23c 3096 entrypoint='/usr/bin/ceph-mon',
f67539c2
TL
3097 args=[
3098 '--mkfs',
3099 '-i', str(daemon_id),
3100 '--fsid', fsid,
3101 '-c', '/tmp/config',
3102 '--keyring', '/tmp/keyring',
3103 ] + get_daemon_args(ctx, fsid, 'mon', daemon_id),
9f95a23c
TL
3104 volume_mounts={
3105 log_dir: '/var/log/ceph:z',
3106 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
3107 tmp_keyring.name: '/tmp/keyring:z',
3108 tmp_config.name: '/tmp/config:z',
3109 },
3110 ).run()
3111
3112 # write conf
3113 with open(mon_dir + '/config', 'w') as f:
3114 os.fchown(f.fileno(), uid, gid)
3115 os.fchmod(f.fileno(), 0o600)
3116 f.write(config)
3117 else:
3118 # dirs, conf, keyring
3119 create_daemon_dirs(
f67539c2 3120 ctx,
9f95a23c
TL
3121 fsid, daemon_type, daemon_id,
3122 uid, gid,
3123 config, keyring)
3124
3125 if not reconfig:
20effc67 3126 if daemon_type == CephadmAgent.daemon_type:
f67539c2
TL
3127 if ctx.config_json == '-':
3128 config_js = get_parm('-')
3129 else:
3130 config_js = get_parm(ctx.config_json)
3131 assert isinstance(config_js, dict)
3132
20effc67
TL
3133 cephadm_agent = CephadmAgent(ctx, fsid, daemon_id)
3134 cephadm_agent.deploy_daemon_unit(config_js)
f67539c2
TL
3135 else:
3136 if c:
3137 deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id,
3138 c, osd_fsid=osd_fsid, ports=ports)
3139 else:
3140 raise RuntimeError('attempting to deploy a daemon without a container image')
9f95a23c
TL
3141
3142 if not os.path.exists(data_dir + '/unit.created'):
3143 with open(data_dir + '/unit.created', 'w') as f:
3144 os.fchmod(f.fileno(), 0o600)
3145 os.fchown(f.fileno(), uid, gid)
3146 f.write('mtime is time the daemon deployment was created\n')
3147
3148 with open(data_dir + '/unit.configured', 'w') as f:
3149 f.write('mtime is time we were last configured\n')
3150 os.fchmod(f.fileno(), 0o600)
3151 os.fchown(f.fileno(), uid, gid)
3152
f67539c2 3153 update_firewalld(ctx, daemon_type)
9f95a23c 3154
f6b5b4d7
TL
3155 # Open ports explicitly required for the daemon
3156 if ports:
f67539c2 3157 fw = Firewalld(ctx)
f6b5b4d7
TL
3158 fw.open_ports(ports)
3159 fw.apply_rules()
3160
9f95a23c
TL
3161 if reconfig and daemon_type not in Ceph.daemons:
3162 # ceph daemons do not need a restart; others (presumably) do to pick
3163 # up the new config
f67539c2
TL
3164 call_throws(ctx, ['systemctl', 'reset-failed',
3165 get_unit_name(fsid, daemon_type, daemon_id)])
3166 call_throws(ctx, ['systemctl', 'restart',
3167 get_unit_name(fsid, daemon_type, daemon_id)])
3168
9f95a23c 3169
f67539c2
TL
3170def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False):
3171 # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None
f6b5b4d7 3172 if comment:
f91f0fd5 3173 # Sometimes adding a comment, especially if there are multiple containers in one
f6b5b4d7
TL
3174 # unit file, makes it easier to read and grok.
3175 file_obj.write('# ' + comment + '\n')
3176 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
522d829b 3177 file_obj.write('! ' + ' '.join(container.rm_cmd(old_cname=True)) + ' 2> /dev/null\n')
f67539c2 3178 file_obj.write('! ' + ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
f6b5b4d7 3179 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
f67539c2
TL
3180 if isinstance(ctx.container_engine, Podman):
3181 file_obj.write(
3182 '! '
3183 + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)])
3184 + ' 2> /dev/null\n')
522d829b
TL
3185 file_obj.write(
3186 '! '
3187 + ' '.join([shlex.quote(a) for a in container.rm_cmd(old_cname=True, storage=True)])
3188 + ' 2> /dev/null\n')
f6b5b4d7
TL
3189
3190 # container run command
f67539c2
TL
3191 file_obj.write(
3192 ' '.join([shlex.quote(a) for a in container.run_cmd()])
3193 + (' &' if background else '') + '\n')
3194
3195
522d829b
TL
3196def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None:
3197 # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
3198 # see https://tracker.ceph.com/issues/50998
3199
3200 CGROUPV2_PATH = Path('/sys/fs/cgroup')
3201 if not (CGROUPV2_PATH / 'system.slice').exists():
3202 # Only unified cgroup is affected, skip if not the case
3203 return
3204
3205 slice_name = 'system-ceph\\x2d{}.slice'.format(fsid.replace('-', '\\x2d'))
3206 cg_path = CGROUPV2_PATH / 'system.slice' / slice_name / f'{unit_name}.service'
3207 if not cg_path.exists():
3208 return
3209
3210 def cg_trim(path: Path) -> None:
3211 for p in path.iterdir():
3212 if p.is_dir():
3213 cg_trim(p)
3214 path.rmdir()
3215 try:
3216 cg_trim(cg_path)
3217 except OSError:
3218 logger.warning(f'Failed to trim old cgroups {cg_path}')
3219
3220
f67539c2
TL
3221def deploy_daemon_units(
3222 ctx: CephadmContext,
3223 fsid: str,
3224 uid: int,
3225 gid: int,
3226 daemon_type: str,
3227 daemon_id: Union[int, str],
3228 c: 'CephContainer',
3229 enable: bool = True,
3230 start: bool = True,
3231 osd_fsid: Optional[str] = None,
3232 ports: Optional[List[int]] = None,
3233) -> None:
9f95a23c 3234 # cmd
f67539c2
TL
3235 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
3236 with open(data_dir + '/unit.run.new', 'w') as f, \
b3b6e05e 3237 open(data_dir + '/unit.meta.new', 'w') as metaf:
f6b5b4d7 3238 f.write('set -e\n')
f91f0fd5
TL
3239
3240 if daemon_type in Ceph.daemons:
3241 install_path = find_program('install')
3242 f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
3243
9f95a23c
TL
3244 # pre-start cmd(s)
3245 if daemon_type == 'osd':
3246 # osds have a pre-start step
3247 assert osd_fsid
f6b5b4d7
TL
3248 simple_fn = os.path.join('/etc/ceph/osd',
3249 '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
3250 if os.path.exists(simple_fn):
3251 f.write('# Simple OSDs need chown on startup:\n')
3252 for n in ['block', 'block.db', 'block.wal']:
3253 p = os.path.join(data_dir, n)
3254 f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
3255 else:
20effc67
TL
3256 # if ceph-volume does not support 'ceph-volume activate', we must
3257 # do 'ceph-volume lvm activate'.
3258 test_cv = get_ceph_volume_container(
f67539c2 3259 ctx,
20effc67
TL
3260 args=['activate', '--bad-option'],
3261 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
3262 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
3263 cname='ceph-%s-%s.%s-activate-test' % (fsid, daemon_type, daemon_id),
3264 )
3265 out, err, ret = call(ctx, test_cv.run_cmd(), verbosity=CallVerbosity.SILENT)
3266 # bad: ceph-volume: error: unrecognized arguments: activate --bad-option
3267 # good: ceph-volume: error: unrecognized arguments: --bad-option
3268 if 'unrecognized arguments: activate' in err:
3269 # older ceph-volume without top-level activate or --no-tmpfs
3270 cmd = [
f6b5b4d7
TL
3271 'lvm', 'activate',
3272 str(daemon_id), osd_fsid,
20effc67
TL
3273 '--no-systemd',
3274 ]
3275 else:
3276 cmd = [
3277 'activate',
3278 '--osd-id', str(daemon_id),
3279 '--osd-uuid', osd_fsid,
3280 '--no-systemd',
3281 '--no-tmpfs',
3282 ]
3283
3284 prestart = get_ceph_volume_container(
3285 ctx,
3286 args=cmd,
f67539c2
TL
3287 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
3288 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
f6b5b4d7
TL
3289 cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
3290 )
33c7a0ef
TL
3291 if 'cluster' in ctx and ctx.cluster:
3292 # ctx.cluster is only set during adoption of a daemon from a cluster
3293 # with a custom name (not "ceph"). The initial activate command the first
3294 # time we start the new cephadm based systemd unit for this osd must account
3295 # for this by mounting to the correct data dir in the container. Otherwise
3296 # necessary files from the old data dir of the daemon won't be copied over
3297 # to the new data dir on the host. After the first start (e.g. on any redeploys)
3298 # this is no longer necessary as we will have these files in the data dir on the host
3299 if data_dir in prestart.volume_mounts:
3300 prestart.volume_mounts[data_dir] = f'/var/lib/ceph/osd/{ctx.cluster}-{daemon_id}'
f67539c2 3301 _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
1911f103
TL
3302 elif daemon_type == CephIscsi.daemon_type:
3303 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
f67539c2 3304 ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
f6b5b4d7 3305 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
a4b75251 3306 _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True)
f67539c2
TL
3307
3308 _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id)))
3309
3310 # some metadata about the deploy
3311 meta: Dict[str, Any] = {}
3312 if 'meta_json' in ctx and ctx.meta_json:
3313 meta = json.loads(ctx.meta_json) or {}
3314 meta.update({
3315 'memory_request': int(ctx.memory_request) if ctx.memory_request else None,
3316 'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None,
3317 })
3318 if not meta.get('ports'):
3319 meta['ports'] = ports
3320 metaf.write(json.dumps(meta, indent=4) + '\n')
1911f103 3321
9f95a23c 3322 os.fchmod(f.fileno(), 0o600)
f67539c2 3323 os.fchmod(metaf.fileno(), 0o600)
9f95a23c
TL
3324 os.rename(data_dir + '/unit.run.new',
3325 data_dir + '/unit.run')
f67539c2
TL
3326 os.rename(data_dir + '/unit.meta.new',
3327 data_dir + '/unit.meta')
9f95a23c
TL
3328
3329 # post-stop command(s)
3330 with open(data_dir + '/unit.poststop.new', 'w') as f:
3331 if daemon_type == 'osd':
3332 assert osd_fsid
20effc67 3333 poststop = get_ceph_volume_container(
f67539c2 3334 ctx,
9f95a23c
TL
3335 args=[
3336 'lvm', 'deactivate',
3337 str(daemon_id), osd_fsid,
3338 ],
f67539c2
TL
3339 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
3340 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
9f95a23c
TL
3341 cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
3342 daemon_id),
3343 )
f67539c2 3344 _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
1911f103 3345 elif daemon_type == CephIscsi.daemon_type:
f6b5b4d7 3346 # make sure we also stop the tcmu container
f67539c2 3347 ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
f6b5b4d7 3348 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
f67539c2 3349 f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n')
1911f103 3350 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
9f95a23c
TL
3351 os.fchmod(f.fileno(), 0o600)
3352 os.rename(data_dir + '/unit.poststop.new',
3353 data_dir + '/unit.poststop')
3354
522d829b
TL
3355 # post-stop command(s)
3356 with open(data_dir + '/unit.stop.new', 'w') as f:
33c7a0ef
TL
3357 # following generated script basically checks if the container exists
3358 # before stopping it. Exit code will be success either if it doesn't
3359 # exist or if it exists and is stopped successfully.
3360 container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null'
3361 f.write(f'! {container_exists % c.old_cname} || {" ".join(c.stop_cmd(old_cname=True))} \n')
3362 f.write(f'! {container_exists % c.cname} || {" ".join(c.stop_cmd())} \n')
522d829b
TL
3363
3364 os.fchmod(f.fileno(), 0o600)
3365 os.rename(data_dir + '/unit.stop.new',
3366 data_dir + '/unit.stop')
3367
f67539c2
TL
3368 if c:
3369 with open(data_dir + '/unit.image.new', 'w') as f:
3370 f.write(c.image + '\n')
3371 os.fchmod(f.fileno(), 0o600)
3372 os.rename(data_dir + '/unit.image.new',
3373 data_dir + '/unit.image')
9f95a23c 3374
b3b6e05e
TL
3375 # sysctl
3376 install_sysctl(ctx, fsid, daemon_type)
3377
9f95a23c 3378 # systemd
f67539c2
TL
3379 install_base_units(ctx, fsid)
3380 unit = get_unit_file(ctx, fsid)
9f95a23c 3381 unit_file = 'ceph-%s@.service' % (fsid)
f67539c2 3382 with open(ctx.unit_dir + '/' + unit_file + '.new', 'w') as f:
9f95a23c 3383 f.write(unit)
f67539c2
TL
3384 os.rename(ctx.unit_dir + '/' + unit_file + '.new',
3385 ctx.unit_dir + '/' + unit_file)
3386 call_throws(ctx, ['systemctl', 'daemon-reload'])
9f95a23c
TL
3387
3388 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
f67539c2 3389 call(ctx, ['systemctl', 'stop', unit_name],
adb31ebb 3390 verbosity=CallVerbosity.DEBUG)
f67539c2 3391 call(ctx, ['systemctl', 'reset-failed', unit_name],
adb31ebb 3392 verbosity=CallVerbosity.DEBUG)
9f95a23c 3393 if enable:
f67539c2 3394 call_throws(ctx, ['systemctl', 'enable', unit_name])
9f95a23c 3395 if start:
522d829b 3396 clean_cgroup(ctx, fsid, unit_name)
f67539c2 3397 call_throws(ctx, ['systemctl', 'start', unit_name])
9f95a23c 3398
f6b5b4d7
TL
3399
3400class Firewalld(object):
f67539c2
TL
3401 def __init__(self, ctx):
3402 # type: (CephadmContext) -> None
3403 self.ctx = ctx
f6b5b4d7
TL
3404 self.available = self.check()
3405
3406 def check(self):
3407 # type: () -> bool
3408 self.cmd = find_executable('firewall-cmd')
3409 if not self.cmd:
3410 logger.debug('firewalld does not appear to be present')
3411 return False
f67539c2 3412 (enabled, state, _) = check_unit(self.ctx, 'firewalld.service')
f6b5b4d7
TL
3413 if not enabled:
3414 logger.debug('firewalld.service is not enabled')
3415 return False
f67539c2 3416 if state != 'running':
f6b5b4d7
TL
3417 logger.debug('firewalld.service is not running')
3418 return False
3419
f67539c2 3420 logger.info('firewalld ready')
f6b5b4d7
TL
3421 return True
3422
3423 def enable_service_for(self, daemon_type):
3424 # type: (str) -> None
3425 if not self.available:
3426 logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)
3427 return
3428
3429 if daemon_type == 'mon':
3430 svc = 'ceph-mon'
3431 elif daemon_type in ['mgr', 'mds', 'osd']:
3432 svc = 'ceph'
3433 elif daemon_type == NFSGanesha.daemon_type:
3434 svc = 'nfs'
3435 else:
3436 return
3437
f67539c2
TL
3438 if not self.cmd:
3439 raise RuntimeError('command not defined')
3440
3441 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
3442 if ret:
3443 logger.info('Enabling firewalld service %s in current zone...' % svc)
f67539c2 3444 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc])
9f95a23c
TL
3445 if ret:
3446 raise RuntimeError(
3447 'unable to add service %s to current zone: %s' % (svc, err))
3448 else:
3449 logger.debug('firewalld service %s is enabled in current zone' % svc)
f6b5b4d7
TL
3450
3451 def open_ports(self, fw_ports):
3452 # type: (List[int]) -> None
3453 if not self.available:
3454 logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
3455 return
3456
f67539c2
TL
3457 if not self.cmd:
3458 raise RuntimeError('command not defined')
3459
f6b5b4d7
TL
3460 for port in fw_ports:
3461 tcp_port = str(port) + '/tcp'
f67539c2 3462 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
9f95a23c 3463 if ret:
f6b5b4d7 3464 logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
f67539c2 3465 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port])
f6b5b4d7
TL
3466 if ret:
3467 raise RuntimeError('unable to add port %s to current zone: %s' %
f67539c2 3468 (tcp_port, err))
f6b5b4d7
TL
3469 else:
3470 logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
3471
f67539c2
TL
3472 def close_ports(self, fw_ports):
3473 # type: (List[int]) -> None
3474 if not self.available:
3475 logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports)
3476 return
3477
3478 if not self.cmd:
3479 raise RuntimeError('command not defined')
3480
3481 for port in fw_ports:
3482 tcp_port = str(port) + '/tcp'
3483 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
3484 if not ret:
3485 logger.info('Disabling port %s in current zone...' % tcp_port)
3486 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port])
3487 if ret:
3488 raise RuntimeError('unable to remove port %s from current zone: %s' %
3489 (tcp_port, err))
3490 else:
3491 logger.info(f'Port {tcp_port} disabled')
3492 else:
3493 logger.info(f'firewalld port {tcp_port} already closed')
3494
f6b5b4d7
TL
3495 def apply_rules(self):
3496 # type: () -> None
3497 if not self.available:
3498 return
3499
f67539c2
TL
3500 if not self.cmd:
3501 raise RuntimeError('command not defined')
f6b5b4d7 3502
f67539c2 3503 call_throws(self.ctx, [self.cmd, '--reload'])
f6b5b4d7 3504
f67539c2
TL
3505
3506def update_firewalld(ctx, daemon_type):
3507 # type: (CephadmContext, str) -> None
33c7a0ef
TL
3508 if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
3509 firewall = Firewalld(ctx)
3510 firewall.enable_service_for(daemon_type)
3511 firewall.apply_rules()
f6b5b4d7 3512
f6b5b4d7 3513
b3b6e05e
TL
3514def install_sysctl(ctx: CephadmContext, fsid: str, daemon_type: str) -> None:
3515 """
3516 Set up sysctl settings
3517 """
3518 def _write(conf: Path, lines: List[str]) -> None:
3519 lines = [
3520 '# created by cephadm',
3521 '',
3522 *lines,
3523 '',
3524 ]
3525 with open(conf, 'w') as f:
3526 f.write('\n'.join(lines))
f6b5b4d7 3527
b3b6e05e
TL
3528 conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf')
3529 lines: Optional[List] = None
3530
3531 if daemon_type == 'osd':
3532 lines = OSD.get_sysctl_settings()
3533 elif daemon_type == 'haproxy':
3534 lines = HAproxy.get_sysctl_settings()
3535 elif daemon_type == 'keepalived':
3536 lines = Keepalived.get_sysctl_settings()
3537
3538 # apply the sysctl settings
3539 if lines:
522d829b 3540 Path(ctx.sysctl_dir).mkdir(mode=0o755, exist_ok=True)
b3b6e05e
TL
3541 _write(conf, lines)
3542 call_throws(ctx, ['sysctl', '--system'])
9f95a23c 3543
f67539c2 3544
33c7a0ef
TL
3545def migrate_sysctl_dir(ctx: CephadmContext, fsid: str) -> None:
3546 """
3547 Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration.
3548 This moves it to '/etc/sysctl.d'.
3549 """
3550 deprecated_location: str = '/usr/lib/sysctl.d'
3551 deprecated_confs: List[str] = glob(f'{deprecated_location}/90-ceph-{fsid}-*.conf')
3552 if not deprecated_confs:
3553 return
3554
3555 file_count: int = len(deprecated_confs)
3556 logger.info(f'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.')
3557 for conf in deprecated_confs:
3558 try:
3559 shutil.move(conf, ctx.sysctl_dir)
3560 file_count -= 1
3561 except shutil.Error as err:
3562 if str(err).endswith('already exists'):
3563 logger.warning(f'Destination file already exists. Deleting {conf}.')
3564 try:
3565 os.unlink(conf)
3566 file_count -= 1
3567 except OSError as del_err:
3568 logger.warning(f'Could not remove {conf}: {del_err}.')
3569 else:
3570 logger.warning(f'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}')
3571
3572 # Log successful migration
3573 if file_count == 0:
3574 logger.info(f'Successfully migrated sysctl config to {ctx.sysctl_dir}.')
3575 return
3576
3577 # Log partially successful / unsuccessful migration
3578 files_processed: int = len(deprecated_confs)
3579 if file_count < files_processed:
3580 status: str = f'partially successful (failed {file_count}/{files_processed})'
3581 elif file_count == files_processed:
3582 status = 'unsuccessful'
3583 logger.warning(f'Migration of sysctl configuration {status}. You may want to perform a migration manually.')
3584
3585
f67539c2
TL
3586def install_base_units(ctx, fsid):
3587 # type: (CephadmContext, str) -> None
9f95a23c
TL
3588 """
3589 Set up ceph.target and ceph-$fsid.target units.
3590 """
3591 # global unit
f67539c2
TL
3592 existed = os.path.exists(ctx.unit_dir + '/ceph.target')
3593 with open(ctx.unit_dir + '/ceph.target.new', 'w') as f:
9f95a23c
TL
3594 f.write('[Unit]\n'
3595 'Description=All Ceph clusters and services\n'
3596 '\n'
3597 '[Install]\n'
3598 'WantedBy=multi-user.target\n')
f67539c2
TL
3599 os.rename(ctx.unit_dir + '/ceph.target.new',
3600 ctx.unit_dir + '/ceph.target')
9f95a23c
TL
3601 if not existed:
3602 # we disable before enable in case a different ceph.target
3603 # (from the traditional package) is present; while newer
3604 # systemd is smart enough to disable the old
3605 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
3606 # some older versions of systemd error out with EEXIST.
f67539c2
TL
3607 call_throws(ctx, ['systemctl', 'disable', 'ceph.target'])
3608 call_throws(ctx, ['systemctl', 'enable', 'ceph.target'])
3609 call_throws(ctx, ['systemctl', 'start', 'ceph.target'])
9f95a23c
TL
3610
3611 # cluster unit
f67539c2
TL
3612 existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid)
3613 with open(ctx.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f:
3614 f.write(
3615 '[Unit]\n'
3616 'Description=Ceph cluster {fsid}\n'
3617 'PartOf=ceph.target\n'
3618 'Before=ceph.target\n'
3619 '\n'
3620 '[Install]\n'
3621 'WantedBy=multi-user.target ceph.target\n'.format(
3622 fsid=fsid)
9f95a23c 3623 )
f67539c2
TL
3624 os.rename(ctx.unit_dir + '/ceph-%s.target.new' % fsid,
3625 ctx.unit_dir + '/ceph-%s.target' % fsid)
9f95a23c 3626 if not existed:
f67539c2
TL
3627 call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid])
3628 call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid])
9f95a23c
TL
3629
3630 # logrotate for the cluster
f67539c2 3631 with open(ctx.logrotate_dir + '/ceph-%s' % fsid, 'w') as f:
9f95a23c
TL
3632 """
3633 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
3634 in all containers, but I don't see an elegant way to send SIGHUP *just* to
3635 the daemons for this cluster. (1) systemd kill -s will get the signal to
3636 podman, but podman will exit. (2) podman kill will get the signal to the
3637 first child (bash), but that isn't the ceph daemon. This is simpler and
3638 should be harmless.
3639 """
3640 f.write("""# created by cephadm
3641/var/log/ceph/%s/*.log {
3642 rotate 7
3643 daily
3644 compress
3645 sharedscripts
3646 postrotate
f67539c2 3647 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror' || true
9f95a23c
TL
3648 endscript
3649 missingok
3650 notifempty
3651 su root root
3652}
3653""" % fsid)
3654
f6b5b4d7 3655
f67539c2
TL
3656def get_unit_file(ctx, fsid):
3657 # type: (CephadmContext, str) -> str
f91f0fd5 3658 extra_args = ''
f67539c2
TL
3659 if isinstance(ctx.container_engine, Podman):
3660 extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3661 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3662 'Type=forking\n'
3663 'PIDFile=%t/%n-pid\n')
3664 if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
3665 extra_args += 'Delegate=yes\n'
3666
3667 docker = isinstance(ctx.container_engine, Docker)
9f95a23c
TL
3668 u = """# generated by cephadm
3669[Unit]
3670Description=Ceph %i for {fsid}
3671
3672# According to:
3673# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
3674# these can be removed once ceph-mon will dynamically change network
3675# configuration.
f67539c2 3676After=network-online.target local-fs.target time-sync.target{docker_after}
9f95a23c 3677Wants=network-online.target local-fs.target time-sync.target
f67539c2 3678{docker_requires}
9f95a23c
TL
3679
3680PartOf=ceph-{fsid}.target
3681Before=ceph-{fsid}.target
3682
3683[Service]
3684LimitNOFILE=1048576
3685LimitNPROC=1048576
3686EnvironmentFile=-/etc/environment
9f95a23c 3687ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
33c7a0ef 3688ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop'
9f95a23c
TL
3689ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
3690KillMode=none
3691Restart=on-failure
3692RestartSec=10s
3693TimeoutStartSec=120
e306af50 3694TimeoutStopSec=120
9f95a23c
TL
3695StartLimitInterval=30min
3696StartLimitBurst=5
f91f0fd5 3697{extra_args}
9f95a23c
TL
3698[Install]
3699WantedBy=ceph-{fsid}.target
33c7a0ef 3700""".format(fsid=fsid,
f67539c2
TL
3701 data_dir=ctx.data_dir,
3702 extra_args=extra_args,
3703 # if docker, we depend on docker.service
3704 docker_after=' docker.service' if docker else '',
3705 docker_requires='Requires=docker.service\n' if docker else '')
f91f0fd5 3706
9f95a23c
TL
3707 return u
3708
3709##################################
3710
f6b5b4d7 3711
9f95a23c
TL
3712class CephContainer:
3713 def __init__(self,
f67539c2 3714 ctx: CephadmContext,
f91f0fd5
TL
3715 image: str,
3716 entrypoint: str,
3717 args: List[str] = [],
3718 volume_mounts: Dict[str, str] = {},
3719 cname: str = '',
3720 container_args: List[str] = [],
3721 envs: Optional[List[str]] = None,
3722 privileged: bool = False,
3723 ptrace: bool = False,
3724 bind_mounts: Optional[List[List[str]]] = None,
f67539c2 3725 init: Optional[bool] = None,
f91f0fd5 3726 host_network: bool = True,
f67539c2
TL
3727 memory_request: Optional[str] = None,
3728 memory_limit: Optional[str] = None,
f91f0fd5 3729 ) -> None:
f67539c2 3730 self.ctx = ctx
9f95a23c
TL
3731 self.image = image
3732 self.entrypoint = entrypoint
3733 self.args = args
3734 self.volume_mounts = volume_mounts
522d829b 3735 self._cname = cname
9f95a23c
TL
3736 self.container_args = container_args
3737 self.envs = envs
3738 self.privileged = privileged
3739 self.ptrace = ptrace
f6b5b4d7 3740 self.bind_mounts = bind_mounts if bind_mounts else []
f67539c2 3741 self.init = init if init else ctx.container_init
f91f0fd5 3742 self.host_network = host_network
f67539c2
TL
3743 self.memory_request = memory_request
3744 self.memory_limit = memory_limit
9f95a23c 3745
522d829b
TL
3746 @classmethod
3747 def for_daemon(cls,
3748 ctx: CephadmContext,
3749 fsid: str,
3750 daemon_type: str,
3751 daemon_id: str,
3752 entrypoint: str,
3753 args: List[str] = [],
3754 volume_mounts: Dict[str, str] = {},
3755 container_args: List[str] = [],
3756 envs: Optional[List[str]] = None,
3757 privileged: bool = False,
3758 ptrace: bool = False,
3759 bind_mounts: Optional[List[List[str]]] = None,
3760 init: Optional[bool] = None,
3761 host_network: bool = True,
3762 memory_request: Optional[str] = None,
3763 memory_limit: Optional[str] = None,
3764 ) -> 'CephContainer':
3765 return cls(
3766 ctx,
3767 image=ctx.image,
3768 entrypoint=entrypoint,
3769 args=args,
3770 volume_mounts=volume_mounts,
3771 cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
3772 container_args=container_args,
3773 envs=envs,
3774 privileged=privileged,
3775 ptrace=ptrace,
3776 bind_mounts=bind_mounts,
3777 init=init,
3778 host_network=host_network,
3779 memory_request=memory_request,
3780 memory_limit=memory_limit,
3781 )
3782
3783 @property
3784 def cname(self) -> str:
3785 """
3786 podman adds the current container name to the /etc/hosts
3787 file. Turns out, python's `socket.getfqdn()` differs from
3788 `hostname -f`, when we have the container names containing
3789 dots in it.:
3790
3791 # podman run --name foo.bar.baz.com ceph/ceph /bin/bash
3792 [root@sebastians-laptop /]# cat /etc/hosts
3793 127.0.0.1 localhost
3794 ::1 localhost
3795 127.0.1.1 sebastians-laptop foo.bar.baz.com
3796 [root@sebastians-laptop /]# hostname -f
3797 sebastians-laptop
3798 [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())'
3799 foo.bar.baz.com
3800
3801 Fascinatingly, this doesn't happen when using dashes.
3802 """
3803 return self._cname.replace('.', '-')
3804
3805 @cname.setter
3806 def cname(self, val: str) -> None:
3807 self._cname = val
3808
3809 @property
3810 def old_cname(self) -> str:
3811 return self._cname
3812
f91f0fd5
TL
3813 def run_cmd(self) -> List[str]:
3814 cmd_args: List[str] = [
f67539c2 3815 str(self.ctx.container_engine.path),
f91f0fd5
TL
3816 'run',
3817 '--rm',
3818 '--ipc=host',
b3b6e05e
TL
3819 # some containers (ahem, haproxy) override this, but we want a fast
3820 # shutdown always (and, more importantly, a successful exit even if we
3821 # fall back to SIGKILL).
3822 '--stop-signal=SIGTERM',
f91f0fd5 3823 ]
f67539c2
TL
3824
3825 if isinstance(self.ctx.container_engine, Podman):
f67539c2
TL
3826 if os.path.exists('/etc/ceph/podman-auth.json'):
3827 cmd_args.append('--authfile=/etc/ceph/podman-auth.json')
3828
f91f0fd5
TL
3829 envs: List[str] = [
3830 '-e', 'CONTAINER_IMAGE=%s' % self.image,
3831 '-e', 'NODE_NAME=%s' % get_hostname(),
3832 ]
3833 vols: List[str] = []
3834 binds: List[str] = []
9f95a23c 3835
f67539c2
TL
3836 if self.memory_request:
3837 cmd_args.extend(['-e', 'POD_MEMORY_REQUEST', str(self.memory_request)])
3838 if self.memory_limit:
3839 cmd_args.extend(['-e', 'POD_MEMORY_LIMIT', str(self.memory_limit)])
3840 cmd_args.extend(['--memory', str(self.memory_limit)])
3841
f91f0fd5
TL
3842 if self.host_network:
3843 cmd_args.append('--net=host')
3844 if self.entrypoint:
3845 cmd_args.extend(['--entrypoint', self.entrypoint])
9f95a23c 3846 if self.privileged:
f91f0fd5
TL
3847 cmd_args.extend([
3848 '--privileged',
3849 # let OSD etc read block devs that haven't been chowned
3850 '--group-add=disk'])
3851 if self.ptrace and not self.privileged:
3852 # if privileged, the SYS_PTRACE cap is already added
3853 # in addition, --cap-add and --privileged are mutually
3854 # exclusive since podman >= 2.0
3855 cmd_args.append('--cap-add=SYS_PTRACE')
3856 if self.init:
3857 cmd_args.append('--init')
f67539c2 3858 envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
f91f0fd5
TL
3859 if self.cname:
3860 cmd_args.extend(['--name', self.cname])
3861 if self.envs:
3862 for env in self.envs:
3863 envs.extend(['-e', env])
3864
9f95a23c
TL
3865 vols = sum(
3866 [['-v', '%s:%s' % (host_dir, container_dir)]
3867 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7 3868 binds = sum([['--mount', '{}'.format(','.join(bind))]
f91f0fd5
TL
3869 for bind in self.bind_mounts], [])
3870
f67539c2
TL
3871 return \
3872 cmd_args + self.container_args + \
3873 envs + vols + binds + \
3874 [self.image] + self.args # type: ignore
f91f0fd5
TL
3875
3876 def shell_cmd(self, cmd: List[str]) -> List[str]:
3877 cmd_args: List[str] = [
f67539c2 3878 str(self.ctx.container_engine.path),
9f95a23c
TL
3879 'run',
3880 '--rm',
e306af50 3881 '--ipc=host',
f91f0fd5
TL
3882 ]
3883 envs: List[str] = [
3884 '-e', 'CONTAINER_IMAGE=%s' % self.image,
3885 '-e', 'NODE_NAME=%s' % get_hostname(),
3886 ]
3887 vols: List[str] = []
3888 binds: List[str] = []
9f95a23c 3889
f91f0fd5
TL
3890 if self.host_network:
3891 cmd_args.append('--net=host')
b3b6e05e
TL
3892 if self.ctx.no_hosts:
3893 cmd_args.append('--no-hosts')
9f95a23c 3894 if self.privileged:
f91f0fd5
TL
3895 cmd_args.extend([
3896 '--privileged',
3897 # let OSD etc read block devs that haven't been chowned
3898 '--group-add=disk',
3899 ])
f67539c2
TL
3900 if self.init:
3901 cmd_args.append('--init')
3902 envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
f91f0fd5
TL
3903 if self.envs:
3904 for env in self.envs:
3905 envs.extend(['-e', env])
3906
9f95a23c
TL
3907 vols = sum(
3908 [['-v', '%s:%s' % (host_dir, container_dir)]
3909 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7
TL
3910 binds = sum([['--mount', '{}'.format(','.join(bind))]
3911 for bind in self.bind_mounts], [])
f91f0fd5
TL
3912
3913 return cmd_args + self.container_args + envs + vols + binds + [
9f95a23c 3914 '--entrypoint', cmd[0],
f91f0fd5 3915 self.image,
9f95a23c
TL
3916 ] + cmd[1:]
3917
3918 def exec_cmd(self, cmd):
3919 # type: (List[str]) -> List[str]
522d829b
TL
3920 cname = get_running_container_name(self.ctx, self)
3921 if not cname:
3922 raise Error('unable to find container "{}"'.format(self.cname))
9f95a23c 3923 return [
f67539c2 3924 str(self.ctx.container_engine.path),
9f95a23c
TL
3925 'exec',
3926 ] + self.container_args + [
3927 self.cname,
3928 ] + cmd
3929
522d829b 3930 def rm_cmd(self, old_cname: bool = False, storage: bool = False) -> List[str]:
f6b5b4d7 3931 ret = [
f67539c2 3932 str(self.ctx.container_engine.path),
f6b5b4d7
TL
3933 'rm', '-f',
3934 ]
3935 if storage:
3936 ret.append('--storage')
522d829b
TL
3937 if old_cname:
3938 ret.append(self.old_cname)
3939 else:
3940 ret.append(self.cname)
f6b5b4d7
TL
3941 return ret
3942
522d829b 3943 def stop_cmd(self, old_cname: bool = False) -> List[str]:
f6b5b4d7 3944 ret = [
f67539c2 3945 str(self.ctx.container_engine.path),
522d829b 3946 'stop', self.old_cname if old_cname else self.cname,
f6b5b4d7
TL
3947 ]
3948 return ret
3949
9f95a23c
TL
3950 def run(self, timeout=DEFAULT_TIMEOUT):
3951 # type: (Optional[int]) -> str
f67539c2
TL
3952 out, _, _ = call_throws(self.ctx, self.run_cmd(),
3953 desc=self.entrypoint, timeout=timeout)
9f95a23c
TL
3954 return out
3955
20effc67
TL
3956
3957#####################################
3958
3959class MgrListener(Thread):
3960 def __init__(self, agent: 'CephadmAgent') -> None:
3961 self.agent = agent
3962 self.stop = False
3963 super(MgrListener, self).__init__(target=self.run)
3964
3965 def run(self) -> None:
3966 listenSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
3967 listenSocket.bind(('0.0.0.0', int(self.agent.listener_port)))
3968 listenSocket.settimeout(60)
3969 listenSocket.listen(1)
3970 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
3971 ssl_ctx.verify_mode = ssl.CERT_REQUIRED
3972 ssl_ctx.load_cert_chain(self.agent.listener_cert_path, self.agent.listener_key_path)
3973 ssl_ctx.load_verify_locations(self.agent.ca_path)
3974 secureListenSocket = ssl_ctx.wrap_socket(listenSocket, server_side=True)
3975 while not self.stop:
3976 try:
3977 try:
3978 conn, _ = secureListenSocket.accept()
3979 except socket.timeout:
3980 continue
3981 try:
3982 length: int = int(conn.recv(10).decode())
3983 except Exception as e:
3984 err_str = f'Failed to extract length of payload from message: {e}'
3985 conn.send(err_str.encode())
3986 logger.error(err_str)
3987 while True:
3988 payload = conn.recv(length).decode()
3989 if not payload:
3990 break
3991 try:
3992 data: Dict[Any, Any] = json.loads(payload)
3993 self.handle_json_payload(data)
3994 except Exception as e:
3995 err_str = f'Failed to extract json payload from message: {e}'
3996 conn.send(err_str.encode())
3997 logger.error(err_str)
3998 else:
3999 conn.send(b'ACK')
4000 if 'config' in data:
4001 self.agent.wakeup()
4002 self.agent.ls_gatherer.wakeup()
4003 self.agent.volume_gatherer.wakeup()
4004 logger.debug(f'Got mgr message {data}')
4005 except Exception as e:
4006 logger.error(f'Mgr Listener encountered exception: {e}')
4007
4008 def shutdown(self) -> None:
4009 self.stop = True
4010
4011 def handle_json_payload(self, data: Dict[Any, Any]) -> None:
4012 self.agent.ack = int(data['counter'])
4013 if 'config' in data:
4014 logger.info('Received new config from mgr')
4015 config = data['config']
4016 for filename in config:
4017 if filename in self.agent.required_files:
4018 file_path = os.path.join(self.agent.daemon_dir, filename)
4019 with open(os.open(file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
4020 f.write(config[filename])
4021 os.rename(file_path + '.new', file_path)
4022 self.agent.pull_conf_settings()
4023 self.agent.wakeup()
4024
4025
4026class CephadmAgent():
4027
4028 daemon_type = 'agent'
4029 default_port = 8498
4030 loop_interval = 30
4031 stop = False
4032
4033 required_files = [
4034 'agent.json',
4035 'keyring',
4036 'root_cert.pem',
4037 'listener.crt',
4038 'listener.key',
4039 ]
4040
4041 def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] = ''):
4042 self.ctx = ctx
4043 self.fsid = fsid
4044 self.daemon_id = daemon_id
4045 self.starting_port = 14873
4046 self.target_ip = ''
4047 self.target_port = ''
4048 self.host = ''
4049 self.daemon_dir = os.path.join(ctx.data_dir, self.fsid, f'{self.daemon_type}.{self.daemon_id}')
4050 self.config_path = os.path.join(self.daemon_dir, 'agent.json')
4051 self.keyring_path = os.path.join(self.daemon_dir, 'keyring')
4052 self.ca_path = os.path.join(self.daemon_dir, 'root_cert.pem')
4053 self.listener_cert_path = os.path.join(self.daemon_dir, 'listener.crt')
4054 self.listener_key_path = os.path.join(self.daemon_dir, 'listener.key')
4055 self.listener_port = ''
4056 self.ack = 1
4057 self.event = Event()
4058 self.mgr_listener = MgrListener(self)
4059 self.ls_gatherer = AgentGatherer(self, lambda: self._get_ls(), 'Ls')
4060 self.volume_gatherer = AgentGatherer(self, lambda: self._ceph_volume(enhanced=False), 'Volume')
4061 self.device_enhanced_scan = False
4062 self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
4063 self.recent_iteration_index: int = 0
4064 self.cached_ls_values: Dict[str, Dict[str, str]] = {}
4065
4066 def validate(self, config: Dict[str, str] = {}) -> None:
4067 # check for the required files
4068 for fname in self.required_files:
4069 if fname not in config:
4070 raise Error('required file missing from config: %s' % fname)
4071
4072 def deploy_daemon_unit(self, config: Dict[str, str] = {}) -> None:
4073 if not config:
4074 raise Error('Agent needs a config')
4075 assert isinstance(config, dict)
4076 self.validate(config)
4077
4078 # Create the required config files in the daemons dir, with restricted permissions
4079 for filename in config:
4080 if filename in self.required_files:
4081 file_path = os.path.join(self.daemon_dir, filename)
4082 with open(os.open(file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
4083 f.write(config[filename])
4084 os.rename(file_path + '.new', file_path)
4085
4086 unit_run_path = os.path.join(self.daemon_dir, 'unit.run')
4087 with open(os.open(unit_run_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
4088 f.write(self.unit_run())
4089 os.rename(unit_run_path + '.new', unit_run_path)
4090
4091 meta: Dict[str, Any] = {}
4092 meta_file_path = os.path.join(self.daemon_dir, 'unit.meta')
4093 if 'meta_json' in self.ctx and self.ctx.meta_json:
4094 meta = json.loads(self.ctx.meta_json) or {}
4095 with open(os.open(meta_file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
4096 f.write(json.dumps(meta, indent=4) + '\n')
4097 os.rename(meta_file_path + '.new', meta_file_path)
4098
4099 unit_file_path = os.path.join(self.ctx.unit_dir, self.unit_name())
4100 with open(os.open(unit_file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
4101 f.write(self.unit_file())
4102 os.rename(unit_file_path + '.new', unit_file_path)
4103
4104 call_throws(self.ctx, ['systemctl', 'daemon-reload'])
4105 call(self.ctx, ['systemctl', 'stop', self.unit_name()],
4106 verbosity=CallVerbosity.DEBUG)
4107 call(self.ctx, ['systemctl', 'reset-failed', self.unit_name()],
4108 verbosity=CallVerbosity.DEBUG)
4109 call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name()])
4110
4111 def unit_name(self) -> str:
4112 return '{}.service'.format(get_unit_name(self.fsid, self.daemon_type, self.daemon_id))
4113
4114 def unit_run(self) -> str:
4115 py3 = shutil.which('python3')
4116 binary_path = os.path.realpath(sys.argv[0])
4117 return ('set -e\n' + f'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n')
4118
4119 def unit_file(self) -> str:
4120 return """#generated by cephadm
4121[Unit]
4122Description=cephadm agent for cluster {fsid}
4123
4124PartOf=ceph-{fsid}.target
4125Before=ceph-{fsid}.target
4126
4127[Service]
4128Type=forking
4129ExecStart=/bin/bash {data_dir}/unit.run
4130Restart=on-failure
4131RestartSec=10s
4132
4133[Install]
4134WantedBy=ceph-{fsid}.target
4135""".format(
4136 fsid=self.fsid,
4137 data_dir=self.daemon_dir
4138 )
4139
4140 def shutdown(self) -> None:
4141 self.stop = True
4142 if self.mgr_listener.is_alive():
4143 self.mgr_listener.shutdown()
4144
4145 def wakeup(self) -> None:
4146 self.event.set()
4147
4148 def pull_conf_settings(self) -> None:
4149 try:
4150 with open(self.config_path, 'r') as f:
4151 config = json.load(f)
4152 self.target_ip = config['target_ip']
4153 self.target_port = config['target_port']
4154 self.loop_interval = int(config['refresh_period'])
4155 self.starting_port = int(config['listener_port'])
4156 self.host = config['host']
4157 use_lsm = config['device_enhanced_scan']
4158 except Exception as e:
4159 self.shutdown()
4160 raise Error(f'Failed to get agent target ip and port from config: {e}')
4161
4162 try:
4163 with open(self.keyring_path, 'r') as f:
4164 self.keyring = f.read()
4165 except Exception as e:
4166 self.shutdown()
4167 raise Error(f'Failed to get agent keyring: {e}')
4168
4169 assert self.target_ip and self.target_port
4170
4171 self.device_enhanced_scan = False
4172 if use_lsm.lower() == 'true':
4173 self.device_enhanced_scan = True
4174 self.volume_gatherer.update_func(lambda: self._ceph_volume(enhanced=self.device_enhanced_scan))
4175
4176 def run(self) -> None:
4177 self.pull_conf_settings()
4178
4179 try:
4180 for _ in range(1001):
4181 if not port_in_use(self.ctx, self.starting_port):
4182 self.listener_port = str(self.starting_port)
4183 break
4184 self.starting_port += 1
4185 if not self.listener_port:
4186 raise Error(f'All 1000 ports starting at {str(self.starting_port - 1001)} taken.')
4187 except Exception as e:
4188 raise Error(f'Failed to pick port for agent to listen on: {e}')
4189
4190 if not self.mgr_listener.is_alive():
4191 self.mgr_listener.start()
4192
4193 if not self.ls_gatherer.is_alive():
4194 self.ls_gatherer.start()
4195
4196 if not self.volume_gatherer.is_alive():
4197 self.volume_gatherer.start()
4198
4199 ssl_ctx = ssl.create_default_context()
4200 ssl_ctx.check_hostname = True
4201 ssl_ctx.verify_mode = ssl.CERT_REQUIRED
4202 ssl_ctx.load_verify_locations(self.ca_path)
4203
4204 while not self.stop:
4205 start_time = time.monotonic()
4206 ack = self.ack
4207
4208 # part of the networks info is returned as a set which is not JSON
4209 # serializable. The set must be converted to a list
4210 networks = list_networks(self.ctx)
4211 networks_list = {}
4212 for key in networks.keys():
4213 for k, v in networks[key].items():
4214 networks_list[key] = {k: list(v)}
4215
4216 data = json.dumps({'host': self.host,
4217 'ls': (self.ls_gatherer.data if self.ack == self.ls_gatherer.ack
4218 and self.ls_gatherer.data is not None else []),
4219 'networks': networks_list,
4220 'facts': HostFacts(self.ctx).dump(),
4221 'volume': (self.volume_gatherer.data if self.ack == self.volume_gatherer.ack
4222 and self.volume_gatherer.data is not None else ''),
4223 'ack': str(ack),
4224 'keyring': self.keyring,
4225 'port': self.listener_port})
4226 data = data.encode('ascii')
4227
4228 url = f'https://{self.target_ip}:{self.target_port}/data'
4229 try:
4230 req = Request(url, data, {'Content-Type': 'application/json'})
4231 send_time = time.monotonic()
4232 with urlopen(req, context=ssl_ctx) as response:
4233 response_str = response.read()
4234 response_json = json.loads(response_str)
4235 total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds()
4236 logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.')
4237 except Exception as e:
4238 logger.error(f'Failed to send metadata to mgr: {e}')
4239
4240 end_time = time.monotonic()
4241 run_time = datetime.timedelta(seconds=(end_time - start_time))
4242 self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds()
4243 self.recent_iteration_index = (self.recent_iteration_index + 1) % 3
4244 run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t])
4245
4246 self.event.wait(max(self.loop_interval - int(run_time_average), 0))
4247 self.event.clear()
4248
4249 def _ceph_volume(self, enhanced: bool = False) -> Tuple[str, bool]:
4250 self.ctx.command = 'inventory --format=json'.split()
4251 if enhanced:
4252 self.ctx.command.append('--with-lsm')
4253 self.ctx.fsid = self.fsid
4254
4255 stream = io.StringIO()
4256 with redirect_stdout(stream):
4257 command_ceph_volume(self.ctx)
4258
4259 stdout = stream.getvalue()
4260
4261 if stdout:
4262 return (stdout, False)
4263 else:
4264 raise Exception('ceph-volume returned empty value')
4265
4266 def _daemon_ls_subset(self) -> Dict[str, Dict[str, Any]]:
4267 # gets a subset of ls info quickly. The results of this will tell us if our
4268 # cached info is still good or if we need to run the full ls again.
4269 # for legacy containers, we just grab the full info. For cephadmv1 containers,
4270 # we only grab enabled, state, mem_usage and container id. If container id has
4271 # not changed for any daemon, we assume our cached info is good.
4272 daemons: Dict[str, Dict[str, Any]] = {}
4273 data_dir = self.ctx.data_dir
4274 seen_memusage = {} # type: Dict[str, int]
4275 out, err, code = call(
4276 self.ctx,
4277 [self.ctx.container_engine.path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
4278 verbosity=CallVerbosity.DEBUG
4279 )
4280 seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out)
4281 # we need a mapping from container names to ids. Later we will convert daemon
4282 # names to container names to get daemons container id to see if it has changed
4283 out, err, code = call(
4284 self.ctx,
4285 [self.ctx.container_engine.path, 'ps', '--format', '{{.ID}},{{.Names}}', '--no-trunc'],
4286 verbosity=CallVerbosity.DEBUG
4287 )
4288 name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out)
4289 for i in os.listdir(data_dir):
4290 if i in ['mon', 'osd', 'mds', 'mgr']:
4291 daemon_type = i
4292 for j in os.listdir(os.path.join(data_dir, i)):
4293 if '-' not in j:
4294 continue
4295 (cluster, daemon_id) = j.split('-', 1)
4296 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
4297 (enabled, state, _) = check_unit(self.ctx, legacy_unit_name)
4298 daemons[f'{daemon_type}.{daemon_id}'] = {
4299 'style': 'legacy',
4300 'name': '%s.%s' % (daemon_type, daemon_id),
4301 'fsid': self.ctx.fsid if self.ctx.fsid is not None else 'unknown',
4302 'systemd_unit': legacy_unit_name,
4303 'enabled': 'true' if enabled else 'false',
4304 'state': state,
4305 }
4306 elif is_fsid(i):
4307 fsid = str(i) # convince mypy that fsid is a str here
4308 for j in os.listdir(os.path.join(data_dir, i)):
4309 if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
4310 (daemon_type, daemon_id) = j.split('.', 1)
4311 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
4312 (enabled, state, _) = check_unit(self.ctx, unit_name)
4313 daemons[j] = {
4314 'style': 'cephadm:v1',
4315 'systemd_unit': unit_name,
4316 'enabled': 'true' if enabled else 'false',
4317 'state': state,
4318 }
4319 c = CephContainer.for_daemon(self.ctx, self.ctx.fsid, daemon_type, daemon_id, 'bash')
4320 container_id: Optional[str] = None
4321 for name in (c.cname, c.old_cname):
4322 if name in name_id_mapping:
4323 container_id = name_id_mapping[name]
4324 break
4325 daemons[j]['container_id'] = container_id
4326 if container_id:
4327 daemons[j]['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
4328 return daemons
4329
4330 def _parse_container_id_name(self, code: int, out: str) -> Dict[str, str]:
4331 # map container names to ids from ps output
4332 name_id_mapping = {} # type: Dict[str, str]
4333 if not code:
4334 for line in out.splitlines():
4335 id, name = line.split(',')
4336 name_id_mapping[name] = id
4337 return name_id_mapping
4338
4339 def _get_ls(self) -> Tuple[List[Dict[str, str]], bool]:
4340 if not self.cached_ls_values:
4341 logger.info('No cached ls output. Running full daemon ls')
4342 ls = list_daemons(self.ctx)
4343 for d in ls:
4344 self.cached_ls_values[d['name']] = d
4345 return (ls, True)
4346 else:
4347 ls_subset = self._daemon_ls_subset()
4348 need_full_ls = False
4349 state_change = False
4350 if set(self.cached_ls_values.keys()) != set(ls_subset.keys()):
4351 # case for a new daemon in ls or an old daemon no longer appearing.
4352 # If that happens we need a full ls
4353 logger.info('Change detected in state of daemons. Running full daemon ls')
4354 ls = list_daemons(self.ctx)
4355 for d in ls:
4356 self.cached_ls_values[d['name']] = d
4357 return (ls, True)
4358 for daemon, info in self.cached_ls_values.items():
4359 if info['style'] == 'legacy':
4360 # for legacy containers, ls_subset just grabs all the info
4361 self.cached_ls_values[daemon] = ls_subset[daemon]
4362 else:
4363 if info['container_id'] != ls_subset[daemon]['container_id']:
4364 # case for container id having changed. We need full ls as
4365 # info we didn't grab like version and start time could have changed
4366 need_full_ls = True
4367 break
4368
4369 # want to know if a daemons state change because in those cases we want
4370 # to report back quicker
4371 if (
4372 self.cached_ls_values[daemon]['enabled'] != ls_subset[daemon]['enabled']
4373 or self.cached_ls_values[daemon]['state'] != ls_subset[daemon]['state']
4374 ):
4375 state_change = True
4376 # if we reach here, container id matched. Update the few values we do track
4377 # from ls subset: state, enabled, memory_usage.
4378 self.cached_ls_values[daemon]['enabled'] = ls_subset[daemon]['enabled']
4379 self.cached_ls_values[daemon]['state'] = ls_subset[daemon]['state']
4380 if 'memory_usage' in ls_subset[daemon]:
4381 self.cached_ls_values[daemon]['memory_usage'] = ls_subset[daemon]['memory_usage']
4382 if need_full_ls:
4383 logger.info('Change detected in state of daemons. Running full daemon ls')
4384 ls = list_daemons(self.ctx)
4385 for d in ls:
4386 self.cached_ls_values[d['name']] = d
4387 return (ls, True)
4388 else:
4389 ls = [info for daemon, info in self.cached_ls_values.items()]
4390 return (ls, state_change)
4391
4392
4393class AgentGatherer(Thread):
4394 def __init__(self, agent: 'CephadmAgent', func: Callable, gatherer_type: str = 'Unnamed', initial_ack: int = 0) -> None:
4395 self.agent = agent
4396 self.func = func
4397 self.gatherer_type = gatherer_type
4398 self.ack = initial_ack
4399 self.event = Event()
4400 self.data: Any = None
4401 self.stop = False
4402 self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
4403 self.recent_iteration_index: int = 0
4404 super(AgentGatherer, self).__init__(target=self.run)
4405
4406 def run(self) -> None:
4407 while not self.stop:
4408 try:
4409 start_time = time.monotonic()
4410
4411 ack = self.agent.ack
4412 change = False
4413 try:
4414 self.data, change = self.func()
4415 except Exception as e:
4416 logger.error(f'{self.gatherer_type} Gatherer encountered exception gathering data: {e}')
4417 self.data = None
4418 if ack != self.ack or change:
4419 self.ack = ack
4420 self.agent.wakeup()
4421
4422 end_time = time.monotonic()
4423 run_time = datetime.timedelta(seconds=(end_time - start_time))
4424 self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds()
4425 self.recent_iteration_index = (self.recent_iteration_index + 1) % 3
4426 run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t])
4427
4428 self.event.wait(max(self.agent.loop_interval - int(run_time_average), 0))
4429 self.event.clear()
4430 except Exception as e:
4431 logger.error(f'{self.gatherer_type} Gatherer encountered exception: {e}')
4432
4433 def shutdown(self) -> None:
4434 self.stop = True
4435
4436 def wakeup(self) -> None:
4437 self.event.set()
4438
4439 def update_func(self, func: Callable) -> None:
4440 self.func = func
4441
4442
4443def command_agent(ctx: CephadmContext) -> None:
4444 agent = CephadmAgent(ctx, ctx.fsid, ctx.daemon_id)
4445
4446 if not os.path.isdir(agent.daemon_dir):
4447 raise Error(f'Agent daemon directory {agent.daemon_dir} does not exist. Perhaps agent was never deployed?')
4448
4449 agent.run()
4450
4451
9f95a23c
TL
4452##################################
4453
f6b5b4d7 4454
9f95a23c 4455@infer_image
f67539c2
TL
4456def command_version(ctx):
4457 # type: (CephadmContext) -> int
4458 c = CephContainer(ctx, ctx.image, 'ceph', ['--version'])
4459 out, err, ret = call(ctx, c.run_cmd(), desc=c.entrypoint)
4460 if not ret:
4461 print(out.strip())
4462 return ret
9f95a23c
TL
4463
4464##################################
4465
f6b5b4d7 4466
33c7a0ef 4467@default_image
f67539c2
TL
4468def command_pull(ctx):
4469 # type: (CephadmContext) -> int
f6b5b4d7 4470
33c7a0ef
TL
4471 try:
4472 _pull_image(ctx, ctx.image, ctx.insecure)
4473 except UnauthorizedRegistryError:
4474 err_str = 'Failed to pull container image. Check that host(s) are logged into the registry'
4475 logger.debug(f'Pulling image for `command_pull` failed: {err_str}')
4476 raise Error(err_str)
f67539c2 4477 return command_inspect_image(ctx)
9f95a23c 4478
f6b5b4d7 4479
a4b75251
TL
4480def _pull_image(ctx, image, insecure=False):
4481 # type: (CephadmContext, str, bool) -> None
f6b5b4d7
TL
4482 logger.info('Pulling container image %s...' % image)
4483
4484 ignorelist = [
f67539c2
TL
4485 'error creating read-write layer with ID',
4486 'net/http: TLS handshake timeout',
4487 'Digest did not match, expected',
f6b5b4d7
TL
4488 ]
4489
f67539c2 4490 cmd = [ctx.container_engine.path, 'pull', image]
a4b75251
TL
4491 if isinstance(ctx.container_engine, Podman):
4492 if insecure:
4493 cmd.append('--tls-verify=false')
4494
4495 if os.path.exists('/etc/ceph/podman-auth.json'):
4496 cmd.append('--authfile=/etc/ceph/podman-auth.json')
f6b5b4d7
TL
4497 cmd_str = ' '.join(cmd)
4498
4499 for sleep_secs in [1, 4, 25]:
f67539c2 4500 out, err, ret = call(ctx, cmd)
f6b5b4d7
TL
4501 if not ret:
4502 return
4503
33c7a0ef
TL
4504 if 'unauthorized' in err:
4505 raise UnauthorizedRegistryError()
4506
f6b5b4d7 4507 if not any(pattern in err for pattern in ignorelist):
a4b75251 4508 raise Error('Failed command: %s' % cmd_str)
f6b5b4d7 4509
f67539c2 4510 logger.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))
f6b5b4d7
TL
4511 time.sleep(sleep_secs)
4512
a4b75251 4513 raise Error('Failed command: %s: maximum retries reached' % cmd_str)
f67539c2 4514
9f95a23c
TL
4515##################################
4516
f6b5b4d7 4517
9f95a23c 4518@infer_image
f67539c2
TL
4519def command_inspect_image(ctx):
4520 # type: (CephadmContext) -> int
4521 out, err, ret = call_throws(ctx, [
4522 ctx.container_engine.path, 'inspect',
cd265ab1 4523 '--format', '{{.ID}},{{.RepoDigests}}',
f67539c2 4524 ctx.image])
9f95a23c
TL
4525 if ret:
4526 return errno.ENOENT
f67539c2 4527 info_from = get_image_info_from_inspect(out.strip(), ctx.image)
f91f0fd5 4528
f67539c2 4529 ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
f91f0fd5
TL
4530 info_from['ceph_version'] = ver
4531
4532 print(json.dumps(info_from, indent=4, sort_keys=True))
4533 return 0
4534
4535
522d829b 4536def normalize_image_digest(digest: str) -> str:
20effc67
TL
4537 """
4538 Normal case:
4539 >>> normalize_image_digest('ceph/ceph', 'docker.io')
4540 'docker.io/ceph/ceph'
4541
4542 No change:
4543 >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')
4544 'quay.ceph.io/ceph/ceph'
4545
4546 >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')
4547 'docker.io/ubuntu'
4548
4549 >>> normalize_image_digest('localhost/ceph', 'docker.io')
4550 'localhost/ceph'
4551 """
4552 known_shortnames = [
4553 'ceph/ceph',
4554 'ceph/daemon',
4555 'ceph/daemon-base',
4556 ]
4557 for image in known_shortnames:
4558 if digest.startswith(image):
4559 return f'{DEFAULT_REGISTRY}/{digest}'
f67539c2
TL
4560 return digest
4561
4562
f91f0fd5 4563def get_image_info_from_inspect(out, image):
f67539c2 4564 # type: (str, str) -> Dict[str, Union[str,List[str]]]
f91f0fd5
TL
4565 image_id, digests = out.split(',', 1)
4566 if not out:
4567 raise Error('inspect {}: empty result'.format(image))
9f95a23c 4568 r = {
f91f0fd5 4569 'image_id': normalize_container_id(image_id)
f67539c2 4570 } # type: Dict[str, Union[str,List[str]]]
f91f0fd5 4571 if digests:
20effc67 4572 r['repo_digests'] = list(map(normalize_image_digest, digests[1: -1].split(' ')))
f91f0fd5
TL
4573 return r
4574
9f95a23c
TL
4575##################################
4576
f91f0fd5 4577
f67539c2
TL
4578def check_subnet(subnets: str) -> Tuple[int, List[int], str]:
4579 """Determine whether the given string is a valid subnet
4580
4581 :param subnets: subnet string, a single definition or comma separated list of CIDR subnets
4582 :returns: return code, IP version list of the subnets and msg describing any errors validation errors
4583 """
4584
4585 rc = 0
4586 versions = set()
4587 errors = []
4588 subnet_list = subnets.split(',')
4589 for subnet in subnet_list:
4590 # ensure the format of the string is as expected address/netmask
33c7a0ef 4591 subnet = subnet.strip()
f67539c2
TL
4592 if not re.search(r'\/\d+$', subnet):
4593 rc = 1
4594 errors.append(f'{subnet} is not in CIDR format (address/netmask)')
4595 continue
4596 try:
4597 v = ipaddress.ip_network(subnet).version
4598 versions.add(v)
4599 except ValueError as e:
4600 rc = 1
4601 errors.append(f'{subnet} invalid: {str(e)}')
4602
4603 return rc, list(versions), ', '.join(errors)
4604
4605
f6b5b4d7
TL
4606def unwrap_ipv6(address):
4607 # type: (str) -> str
4608 if address.startswith('[') and address.endswith(']'):
20effc67 4609 return address[1: -1]
f6b5b4d7
TL
4610 return address
4611
4612
f91f0fd5
TL
4613def wrap_ipv6(address):
4614 # type: (str) -> str
4615
4616 # We cannot assume it's already wrapped or even an IPv6 address if
4617 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
4618 # the ValueError
4619 try:
f67539c2
TL
4620 if ipaddress.ip_address(address).version == 6:
4621 return f'[{address}]'
f91f0fd5
TL
4622 except ValueError:
4623 pass
4624
4625 return address
4626
4627
f6b5b4d7
TL
4628def is_ipv6(address):
4629 # type: (str) -> bool
4630 address = unwrap_ipv6(address)
4631 try:
f67539c2 4632 return ipaddress.ip_address(address).version == 6
f6b5b4d7 4633 except ValueError:
f67539c2 4634 logger.warning('Address: {} is not a valid IP address'.format(address))
f6b5b4d7
TL
4635 return False
4636
4637
33c7a0ef
TL
4638def ip_in_subnets(ip_addr: str, subnets: str) -> bool:
4639 """Determine if the ip_addr belongs to any of the subnets list."""
4640 subnet_list = [x.strip() for x in subnets.split(',')]
4641 for subnet in subnet_list:
4642 ip_address = unwrap_ipv6(ip_addr) if is_ipv6(ip_addr) else ip_addr
4643 if ipaddress.ip_address(ip_address) in ipaddress.ip_network(subnet):
4644 return True
4645 return False
4646
4647
4648def parse_mon_addrv(addrv_arg: str) -> List[EndPoint]:
4649 """Parse mon-addrv param into a list of mon end points."""
9f95a23c 4650 r = re.compile(r':(\d+)$')
33c7a0ef
TL
4651 addrv_args = []
4652 addr_arg = addrv_arg
4653 if addr_arg[0] != '[' or addr_arg[-1] != ']':
4654 raise Error(f'--mon-addrv value {addr_arg} must use square backets')
4655
4656 for addr in addr_arg[1: -1].split(','):
4657 hasport = r.findall(addr)
4658 if not hasport:
4659 raise Error(f'--mon-addrv value {addr_arg} must include port number')
4660 port_str = hasport[0]
4661 addr = re.sub(r'^v\d+:', '', addr) # strip off v1: or v2: prefix
4662 base_ip = addr[0:-(len(port_str)) - 1]
4663 addrv_args.append(EndPoint(base_ip, int(port_str)))
4664
4665 return addrv_args
4666
4667
4668def parse_mon_ip(mon_ip: str) -> List[EndPoint]:
4669 """Parse mon-ip param into a list of mon end points."""
4670 r = re.compile(r':(\d+)$')
4671 addrv_args = []
4672 hasport = r.findall(mon_ip)
4673 if hasport:
4674 port_str = hasport[0]
4675 base_ip = mon_ip[0:-(len(port_str)) - 1]
4676 addrv_args.append(EndPoint(base_ip, int(port_str)))
4677 else:
4678 # No port provided: use fixed ports for ceph monitor
4679 addrv_args.append(EndPoint(mon_ip, 3300))
4680 addrv_args.append(EndPoint(mon_ip, 6789))
4681
4682 return addrv_args
4683
4684
4685def build_addrv_params(addrv: List[EndPoint]) -> str:
4686 """Convert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]"""
4687 if len(addrv) > 2:
4688 raise Error('Detected a local mon-addrv list with more than 2 entries.')
4689 port_to_ver: Dict[int, str] = {6789: 'v1', 3300: 'v2'}
4690 addr_arg_list: List[str] = []
4691 for ep in addrv:
4692 if ep.port in port_to_ver:
4693 ver = port_to_ver[ep.port]
4694 else:
4695 ver = 'v2' # default mon protocol version if port is not provided
4696 logger.warning(f'Using msgr2 protocol for unrecognized port {ep}')
4697 addr_arg_list.append(f'{ver}:{ep.ip}:{ep.port}')
4698
4699 addr_arg = '[{0}]'.format(','.join(addr_arg_list))
4700 return addr_arg
4701
4702
4703def get_public_net_from_cfg(ctx: CephadmContext) -> Optional[str]:
4704 """Get mon public network from configuration file."""
4705 cp = read_config(ctx.config)
4706 if not cp.has_option('global', 'public_network'):
4707 return None
4708
4709 # Ensure all public CIDR networks are valid
4710 public_network = cp.get('global', 'public_network')
4711 rc, _, err_msg = check_subnet(public_network)
4712 if rc:
4713 raise Error(f'Invalid public_network {public_network} parameter: {err_msg}')
4714
4715 # Ensure all public CIDR networks are configured locally
4716 configured_subnets = set([x.strip() for x in public_network.split(',')])
4717 local_subnets = set([x[0] for x in list_networks(ctx).items()])
4718 valid_public_net = False
4719 for net in configured_subnets:
4720 if net in local_subnets:
4721 valid_public_net = True
4722 else:
4723 logger.warning(f'The public CIDR network {net} (from -c conf file) is not configured locally.')
4724 if not valid_public_net:
4725 raise Error(f'None of the public CIDR network(s) {configured_subnets} (from -c conf file) is configured locally.')
4726
4727 # Ensure public_network is compatible with the provided mon-ip (or mon-addrv)
4728 if ctx.mon_ip:
4729 if not ip_in_subnets(ctx.mon_ip, public_network):
4730 raise Error(f'The provided --mon-ip {ctx.mon_ip} does not belong to any public_network(s) {public_network}')
4731 elif ctx.mon_addrv:
4732 addrv_args = parse_mon_addrv(ctx.mon_addrv)
4733 for addrv in addrv_args:
4734 if not ip_in_subnets(addrv.ip, public_network):
4735 raise Error(f'The provided --mon-addrv {addrv.ip} ip does not belong to any public_network(s) {public_network}')
4736
4737 logger.debug(f'Using mon public network from configuration file {public_network}')
4738 return public_network
4739
4740
4741def infer_mon_network(ctx: CephadmContext, mon_eps: List[EndPoint]) -> Optional[str]:
4742 """Infer mon public network from local network."""
4743 # Make sure IP is configured locally, and then figure out the CIDR network
4744 mon_networks = []
4745 for net, ifaces in list_networks(ctx).items():
4746 # build local_ips list for the specified network
4747 local_ips: List[str] = []
4748 for _, ls in ifaces.items():
4749 local_ips.extend([ipaddress.ip_address(ip) for ip in ls])
4750
4751 # check if any of mon ips belong to this net
4752 for mon_ep in mon_eps:
4753 try:
4754 if ipaddress.ip_address(unwrap_ipv6(mon_ep.ip)) in local_ips:
4755 mon_networks.append(net)
4756 logger.info(f'Mon IP `{mon_ep.ip}` is in CIDR network `{net}`')
4757 except ValueError as e:
4758 logger.warning(f'Cannot infer CIDR network for mon IP `{mon_ep.ip}` : {e}')
4759
4760 if not mon_networks:
4761 raise Error('Cannot infer CIDR network. Pass --skip-mon-network to configure it later')
4762 else:
4763 logger.debug(f'Inferred mon public CIDR from local network configuration {mon_networks}')
4764
4765 mon_networks = list(set(mon_networks)) # remove duplicates
4766 return ','.join(mon_networks)
4767
4768
4769def prepare_mon_addresses(ctx: CephadmContext) -> Tuple[str, bool, Optional[str]]:
4770 """Get mon public network configuration."""
f67539c2 4771 ipv6 = False
33c7a0ef
TL
4772 addrv_args: List[EndPoint] = []
4773 mon_addrv: str = '' # i.e: [v2:192.168.100.1:3300,v1:192.168.100.1:6789]
f67539c2
TL
4774
4775 if ctx.mon_ip:
4776 ipv6 = is_ipv6(ctx.mon_ip)
f91f0fd5 4777 if ipv6:
f67539c2 4778 ctx.mon_ip = wrap_ipv6(ctx.mon_ip)
33c7a0ef
TL
4779 addrv_args = parse_mon_ip(ctx.mon_ip)
4780 mon_addrv = build_addrv_params(addrv_args)
f67539c2 4781 elif ctx.mon_addrv:
33c7a0ef
TL
4782 ipv6 = ctx.mon_addrv.count('[') > 1
4783 addrv_args = parse_mon_addrv(ctx.mon_addrv)
4784 mon_addrv = ctx.mon_addrv
9f95a23c
TL
4785 else:
4786 raise Error('must specify --mon-ip or --mon-addrv')
9f95a23c 4787
33c7a0ef
TL
4788 if addrv_args:
4789 for end_point in addrv_args:
4790 check_ip_port(ctx, end_point)
4791
4792 logger.debug(f'Base mon IP(s) is {addrv_args}, mon addrv is {mon_addrv}')
9f95a23c 4793 mon_network = None
f67539c2 4794 if not ctx.skip_mon_network:
33c7a0ef 4795 mon_network = get_public_net_from_cfg(ctx) or infer_mon_network(ctx, addrv_args)
9f95a23c 4796
33c7a0ef 4797 return (mon_addrv, ipv6, mon_network)
9f95a23c 4798
f6b5b4d7 4799
f67539c2 4800def prepare_cluster_network(ctx: CephadmContext) -> Tuple[str, bool]:
f67539c2
TL
4801 # the cluster network may not exist on this node, so all we can do is
4802 # validate that the address given is valid ipv4 or ipv6 subnet
33c7a0ef
TL
4803 ipv6_cluster_network = False
4804 cp = read_config(ctx.config)
4805 cluster_network = ctx.cluster_network
4806 if cluster_network is None and cp.has_option('global', 'cluster_network'):
4807 cluster_network = cp.get('global', 'cluster_network')
4808
4809 if cluster_network:
4810 cluser_nets = set([x.strip() for x in cluster_network.split(',')])
4811 local_subnets = set([x[0] for x in list_networks(ctx).items()])
4812 for net in cluser_nets:
4813 if net not in local_subnets:
4814 logger.warning(f'The cluster CIDR network {net} is not configured locally.')
4815
4816 rc, versions, err_msg = check_subnet(cluster_network)
f67539c2
TL
4817 if rc:
4818 raise Error(f'Invalid --cluster-network parameter: {err_msg}')
f67539c2
TL
4819 ipv6_cluster_network = True if 6 in versions else False
4820 else:
33c7a0ef 4821 logger.info('Internal network (--cluster-network) has not '
f67539c2
TL
4822 'been provided, OSD replication will default to '
4823 'the public_network')
9f95a23c 4824
f67539c2
TL
4825 return cluster_network, ipv6_cluster_network
4826
4827
4828def create_initial_keys(
4829 ctx: CephadmContext,
4830 uid: int, gid: int,
4831 mgr_id: str
4832) -> Tuple[str, str, str, Any, Any]: # type: ignore
4833
4834 _image = ctx.image
9f95a23c
TL
4835
4836 # create some initial keys
4837 logger.info('Creating initial keys...')
4838 mon_key = CephContainer(
f67539c2
TL
4839 ctx,
4840 image=_image,
9f95a23c
TL
4841 entrypoint='/usr/bin/ceph-authtool',
4842 args=['--gen-print-key'],
4843 ).run().strip()
4844 admin_key = CephContainer(
f67539c2
TL
4845 ctx,
4846 image=_image,
9f95a23c
TL
4847 entrypoint='/usr/bin/ceph-authtool',
4848 args=['--gen-print-key'],
4849 ).run().strip()
4850 mgr_key = CephContainer(
f67539c2
TL
4851 ctx,
4852 image=_image,
9f95a23c
TL
4853 entrypoint='/usr/bin/ceph-authtool',
4854 args=['--gen-print-key'],
4855 ).run().strip()
4856
4857 keyring = ('[mon.]\n'
4858 '\tkey = %s\n'
4859 '\tcaps mon = allow *\n'
4860 '[client.admin]\n'
4861 '\tkey = %s\n'
4862 '\tcaps mon = allow *\n'
4863 '\tcaps mds = allow *\n'
4864 '\tcaps mgr = allow *\n'
4865 '\tcaps osd = allow *\n'
4866 '[mgr.%s]\n'
4867 '\tkey = %s\n'
4868 '\tcaps mon = profile mgr\n'
4869 '\tcaps mds = allow *\n'
4870 '\tcaps osd = allow *\n'
4871 % (mon_key, admin_key, mgr_id, mgr_key))
4872
f67539c2
TL
4873 admin_keyring = write_tmp('[client.admin]\n'
4874 '\tkey = ' + admin_key + '\n',
4875 uid, gid)
4876
9f95a23c 4877 # tmp keyring file
f67539c2
TL
4878 bootstrap_keyring = write_tmp(keyring, uid, gid)
4879 return (mon_key, mgr_key, admin_key,
4880 bootstrap_keyring, admin_keyring)
4881
9f95a23c 4882
f67539c2
TL
4883def create_initial_monmap(
4884 ctx: CephadmContext,
4885 uid: int, gid: int,
4886 fsid: str,
4887 mon_id: str, mon_addr: str
4888) -> Any:
9f95a23c 4889 logger.info('Creating initial monmap...')
f67539c2 4890 monmap = write_tmp('', 0, 0)
9f95a23c 4891 out = CephContainer(
f67539c2
TL
4892 ctx,
4893 image=ctx.image,
9f95a23c 4894 entrypoint='/usr/bin/monmaptool',
f67539c2
TL
4895 args=[
4896 '--create',
4897 '--clobber',
4898 '--fsid', fsid,
4899 '--addv', mon_id, mon_addr,
4900 '/tmp/monmap'
9f95a23c
TL
4901 ],
4902 volume_mounts={
f67539c2 4903 monmap.name: '/tmp/monmap:z',
9f95a23c
TL
4904 },
4905 ).run()
f67539c2 4906 logger.debug(f'monmaptool for {mon_id} {mon_addr} on {out}')
9f95a23c
TL
4907
4908 # pass monmap file to ceph user for use by ceph-mon --mkfs below
f67539c2
TL
4909 os.fchown(monmap.fileno(), uid, gid)
4910 return monmap
9f95a23c 4911
f67539c2
TL
4912
4913def prepare_create_mon(
4914 ctx: CephadmContext,
4915 uid: int, gid: int,
4916 fsid: str, mon_id: str,
4917 bootstrap_keyring_path: str,
4918 monmap_path: str
522d829b 4919) -> Tuple[str, str]:
9f95a23c 4920 logger.info('Creating mon...')
f67539c2
TL
4921 create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid)
4922 mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id)
4923 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c 4924 out = CephContainer(
f67539c2
TL
4925 ctx,
4926 image=ctx.image,
9f95a23c 4927 entrypoint='/usr/bin/ceph-mon',
f67539c2
TL
4928 args=[
4929 '--mkfs',
4930 '-i', mon_id,
4931 '--fsid', fsid,
4932 '-c', '/dev/null',
4933 '--monmap', '/tmp/monmap',
4934 '--keyring', '/tmp/keyring',
4935 ] + get_daemon_args(ctx, fsid, 'mon', mon_id),
9f95a23c
TL
4936 volume_mounts={
4937 log_dir: '/var/log/ceph:z',
4938 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
f67539c2
TL
4939 bootstrap_keyring_path: '/tmp/keyring:z',
4940 monmap_path: '/tmp/monmap:z',
9f95a23c
TL
4941 },
4942 ).run()
f67539c2
TL
4943 logger.debug(f'create mon.{mon_id} on {out}')
4944 return (mon_dir, log_dir)
4945
4946
4947def create_mon(
4948 ctx: CephadmContext,
4949 uid: int, gid: int,
4950 fsid: str, mon_id: str
4951) -> None:
4952 mon_c = get_container(ctx, fsid, 'mon', mon_id)
4953 ctx.meta_json = json.dumps({'service_name': 'mon'})
4954 deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid,
9f95a23c
TL
4955 config=None, keyring=None)
4956
9f95a23c 4957
f67539c2
TL
4958def wait_for_mon(
4959 ctx: CephadmContext,
4960 mon_id: str, mon_dir: str,
4961 admin_keyring_path: str, config_path: str
522d829b 4962) -> None:
9f95a23c
TL
4963 logger.info('Waiting for mon to start...')
4964 c = CephContainer(
f67539c2
TL
4965 ctx,
4966 image=ctx.image,
9f95a23c
TL
4967 entrypoint='/usr/bin/ceph',
4968 args=[
4969 'status'],
4970 volume_mounts={
4971 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
f67539c2
TL
4972 admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z',
4973 config_path: '/etc/ceph/ceph.conf:z',
9f95a23c
TL
4974 },
4975 )
4976
4977 # wait for the service to become available
4978 def is_mon_available():
4979 # type: () -> bool
f67539c2
TL
4980 timeout = ctx.timeout if ctx.timeout else 60 # seconds
4981 out, err, ret = call(ctx, c.run_cmd(),
9f95a23c
TL
4982 desc=c.entrypoint,
4983 timeout=timeout)
4984 return ret == 0
9f95a23c 4985
f67539c2
TL
4986 is_available(ctx, 'mon', is_mon_available)
4987
4988
4989def create_mgr(
4990 ctx: CephadmContext,
4991 uid: int, gid: int,
4992 fsid: str, mgr_id: str, mgr_key: str,
4993 config: str, clifunc: Callable
4994) -> None:
4995 logger.info('Creating mgr...')
4996 mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
4997 mgr_c = get_container(ctx, fsid, 'mgr', mgr_id)
4998 # Note:the default port used by the Prometheus node exporter is opened in fw
4999 ctx.meta_json = json.dumps({'service_name': 'mgr'})
5000 deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid,
5001 config=config, keyring=mgr_keyring, ports=[9283])
5002
5003 # wait for the service to become available
5004 logger.info('Waiting for mgr to start...')
5005
5006 def is_mgr_available():
5007 # type: () -> bool
5008 timeout = ctx.timeout if ctx.timeout else 60 # seconds
5009 try:
5010 out = clifunc(['status', '-f', 'json-pretty'], timeout=timeout)
5011 j = json.loads(out)
5012 return j.get('mgrmap', {}).get('available', False)
5013 except Exception as e:
5014 logger.debug('status failed: %s' % e)
5015 return False
5016 is_available(ctx, 'mgr', is_mgr_available)
5017
5018
5019def prepare_ssh(
5020 ctx: CephadmContext,
5021 cli: Callable, wait_for_mgr_restart: Callable
5022) -> None:
5023
5024 cli(['cephadm', 'set-user', ctx.ssh_user])
5025
5026 if ctx.ssh_config:
5027 logger.info('Using provided ssh config...')
5028 mounts = {
5029 pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z',
5030 }
5031 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
5032
5033 if ctx.ssh_private_key and ctx.ssh_public_key:
5034 logger.info('Using provided ssh keys...')
5035 mounts = {
5036 pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
5037 pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
5038 }
5039 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
5040 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
33c7a0ef 5041 ssh_pub = cli(['cephadm', 'get-pub-key'])
f67539c2
TL
5042 else:
5043 logger.info('Generating ssh key...')
5044 cli(['cephadm', 'generate-key'])
5045 ssh_pub = cli(['cephadm', 'get-pub-key'])
f67539c2
TL
5046 with open(ctx.output_pub_ssh_key, 'w') as f:
5047 f.write(ssh_pub)
5048 logger.info('Wrote public SSH key to %s' % ctx.output_pub_ssh_key)
5049
33c7a0ef 5050 authorize_ssh_key(ssh_pub, ctx.ssh_user)
f67539c2
TL
5051
5052 host = get_hostname()
5053 logger.info('Adding host %s...' % host)
5054 try:
5055 args = ['orch', 'host', 'add', host]
5056 if ctx.mon_ip:
522d829b 5057 args.append(unwrap_ipv6(ctx.mon_ip))
33c7a0ef
TL
5058 elif ctx.mon_addrv:
5059 addrv_args = parse_mon_addrv(ctx.mon_addrv)
5060 args.append(unwrap_ipv6(addrv_args[0].ip))
f67539c2
TL
5061 cli(args)
5062 except RuntimeError as e:
5063 raise Error('Failed to add host <%s>: %s' % (host, e))
5064
5065 for t in ['mon', 'mgr']:
5066 if not ctx.orphan_initial_daemons:
5067 logger.info('Deploying %s service with default placement...' % t)
5068 cli(['orch', 'apply', t])
5069 else:
5070 logger.info('Deploying unmanaged %s service...' % t)
5071 cli(['orch', 'apply', t, '--unmanaged'])
5072
5073 if not ctx.orphan_initial_daemons:
5074 logger.info('Deploying crash service with default placement...')
5075 cli(['orch', 'apply', 'crash'])
5076
5077 if not ctx.skip_monitoring_stack:
f67539c2
TL
5078 for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
5079 logger.info('Deploying %s service with default placement...' % t)
5080 cli(['orch', 'apply', t])
5081
5082
5083def enable_cephadm_mgr_module(
5084 cli: Callable, wait_for_mgr_restart: Callable
5085) -> None:
5086
5087 logger.info('Enabling cephadm module...')
5088 cli(['mgr', 'module', 'enable', 'cephadm'])
5089 wait_for_mgr_restart()
5090 logger.info('Setting orchestrator backend to cephadm...')
5091 cli(['orch', 'set', 'backend', 'cephadm'])
5092
5093
5094def prepare_dashboard(
5095 ctx: CephadmContext,
5096 uid: int, gid: int,
5097 cli: Callable, wait_for_mgr_restart: Callable
5098) -> None:
5099
5100 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
5101 # if the user does not want to use SSL he can change this setting once the cluster is up
5102 cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx.ssl_dashboard_port)])
5103
5104 # configuring dashboard parameters
5105 logger.info('Enabling the dashboard module...')
5106 cli(['mgr', 'module', 'enable', 'dashboard'])
5107 wait_for_mgr_restart()
5108
5109 # dashboard crt and key
5110 if ctx.dashboard_key and ctx.dashboard_crt:
5111 logger.info('Using provided dashboard certificate...')
5112 mounts = {
5113 pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z',
5114 pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z'
5115 }
5116 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
5117 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
5118 else:
5119 logger.info('Generating a dashboard self-signed certificate...')
5120 cli(['dashboard', 'create-self-signed-cert'])
5121
5122 logger.info('Creating initial admin user...')
5123 password = ctx.initial_dashboard_password or generate_password()
5124 tmp_password_file = write_tmp(password, uid, gid)
5125 cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
5126 if not ctx.dashboard_password_noupdate:
5127 cmd.append('--pwd-update-required')
5128 cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'})
5129 logger.info('Fetching dashboard port number...')
5130 out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
5131 port = int(out)
5132
5133 # Open dashboard port
33c7a0ef
TL
5134 if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
5135 fw = Firewalld(ctx)
5136 fw.open_ports([port])
5137 fw.apply_rules()
f67539c2
TL
5138
5139 logger.info('Ceph Dashboard is now available at:\n\n'
5140 '\t URL: https://%s:%s/\n'
5141 '\t User: %s\n'
5142 '\tPassword: %s\n' % (
5143 get_fqdn(), port,
5144 ctx.initial_dashboard_user,
5145 password))
5146
5147
5148def prepare_bootstrap_config(
5149 ctx: CephadmContext,
5150 fsid: str, mon_addr: str, image: str
5151
5152) -> str:
5153
5154 cp = read_config(ctx.config)
5155 if not cp.has_section('global'):
5156 cp.add_section('global')
5157 cp.set('global', 'fsid', fsid)
5158 cp.set('global', 'mon_host', mon_addr)
5159 cp.set('global', 'container_image', image)
b3b6e05e 5160
f67539c2
TL
5161 if not cp.has_section('mon'):
5162 cp.add_section('mon')
5163 if (
5164 not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
5165 and not cp.has_option('mon', 'auth allow insecure global id reclaim')
5166 ):
5167 cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
b3b6e05e
TL
5168
5169 if ctx.single_host_defaults:
5170 logger.info('Adjusting default settings to suit single-host cluster...')
5171 # replicate across osds, not hosts
5172 if (
a4b75251
TL
5173 not cp.has_option('global', 'osd_crush_chooseleaf_type')
5174 and not cp.has_option('global', 'osd crush chooseleaf type')
b3b6e05e 5175 ):
a4b75251 5176 cp.set('global', 'osd_crush_chooseleaf_type', '0')
b3b6e05e
TL
5177 # replica 2x
5178 if (
5179 not cp.has_option('global', 'osd_pool_default_size')
5180 and not cp.has_option('global', 'osd pool default size')
5181 ):
5182 cp.set('global', 'osd_pool_default_size', '2')
5183 # disable mgr standby modules (so we can colocate multiple mgrs on one host)
5184 if not cp.has_section('mgr'):
5185 cp.add_section('mgr')
5186 if (
5187 not cp.has_option('mgr', 'mgr_standby_modules')
5188 and not cp.has_option('mgr', 'mgr standby modules')
5189 ):
5190 cp.set('mgr', 'mgr_standby_modules', 'false')
522d829b
TL
5191 if ctx.log_to_file:
5192 cp.set('global', 'log_to_file', 'true')
5193 cp.set('global', 'log_to_stderr', 'false')
5194 cp.set('global', 'log_to_journald', 'false')
5195 cp.set('global', 'mon_cluster_log_to_file', 'true')
5196 cp.set('global', 'mon_cluster_log_to_stderr', 'false')
5197 cp.set('global', 'mon_cluster_log_to_journald', 'false')
b3b6e05e 5198
f67539c2
TL
5199 cpf = StringIO()
5200 cp.write(cpf)
5201 config = cpf.getvalue()
5202
5203 if ctx.registry_json or ctx.registry_url:
5204 command_registry_login(ctx)
5205
5206 return config
5207
5208
5209def finish_bootstrap_config(
5210 ctx: CephadmContext,
5211 fsid: str,
5212 config: str,
5213 mon_id: str, mon_dir: str,
5214 mon_network: Optional[str], ipv6: bool,
5215 cli: Callable,
5216 cluster_network: Optional[str], ipv6_cluster_network: bool
5217
5218) -> None:
5219 if not ctx.no_minimize_config:
9f95a23c
TL
5220 logger.info('Assimilating anything we can from ceph.conf...')
5221 cli([
5222 'config', 'assimilate-conf',
5223 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
5224 ], {
5225 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
5226 })
5227 logger.info('Generating new minimal ceph.conf...')
5228 cli([
5229 'config', 'generate-minimal-conf',
5230 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
5231 ], {
5232 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
5233 })
5234 # re-read our minimized config
5235 with open(mon_dir + '/config', 'r') as f:
5236 config = f.read()
5237 logger.info('Restarting the monitor...')
f67539c2 5238 call_throws(ctx, [
9f95a23c
TL
5239 'systemctl',
5240 'restart',
5241 get_unit_name(fsid, 'mon', mon_id)
5242 ])
33c7a0ef
TL
5243 elif 'image' in ctx and ctx.image:
5244 # we still want to assimilate the given container image if provided
5245 cli(['config', 'set', 'global', 'container_image', f'{ctx.image}'])
9f95a23c
TL
5246
5247 if mon_network:
f67539c2 5248 logger.info(f'Setting mon public_network to {mon_network}')
9f95a23c
TL
5249 cli(['config', 'set', 'mon', 'public_network', mon_network])
5250
f67539c2
TL
5251 if cluster_network:
5252 logger.info(f'Setting cluster_network to {cluster_network}')
5253 cli(['config', 'set', 'global', 'cluster_network', cluster_network])
5254
5255 if ipv6 or ipv6_cluster_network:
5256 logger.info('Enabling IPv6 (ms_bind_ipv6) binding')
f6b5b4d7
TL
5257 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
5258
f67539c2
TL
5259 with open(ctx.output_config, 'w') as f:
5260 f.write(config)
5261 logger.info('Wrote config to %s' % ctx.output_config)
5262 pass
5263
5264
a4b75251
TL
5265# funcs to process spec file for apply spec
5266def _parse_yaml_docs(f: Iterable[str]) -> List[List[str]]:
5267 docs = []
5268 current_doc = [] # type: List[str]
5269 for line in f:
33c7a0ef 5270 if re.search(r'^---\s+', line):
a4b75251
TL
5271 if current_doc:
5272 docs.append(current_doc)
5273 current_doc = []
5274 else:
5275 current_doc.append(line.rstrip())
5276 if current_doc:
5277 docs.append(current_doc)
5278 return docs
5279
5280
5281def _parse_yaml_obj(doc: List[str]) -> Dict[str, str]:
5282 # note: this only parses the first layer of yaml
5283 obj = {} # type: Dict[str, str]
5284 current_key = ''
5285 for line in doc:
5286 if line.startswith(' '):
5287 obj[current_key] += line.strip()
5288 elif line.endswith(':'):
5289 current_key = line.strip(':')
5290 obj[current_key] = ''
5291 else:
5292 current_key, val = line.split(':')
5293 obj[current_key] = val.strip()
5294 return obj
5295
5296
5297def parse_yaml_objs(f: Iterable[str]) -> List[Dict[str, str]]:
5298 objs = []
5299 for d in _parse_yaml_docs(f):
5300 objs.append(_parse_yaml_obj(d))
5301 return objs
5302
5303
5304def _distribute_ssh_keys(ctx: CephadmContext, host_spec: Dict[str, str], bootstrap_hostname: str) -> int:
5305 # copy ssh key to hosts in host spec (used for apply spec)
33c7a0ef 5306 ssh_key = CEPH_DEFAULT_PUBKEY
a4b75251
TL
5307 if ctx.ssh_public_key:
5308 ssh_key = ctx.ssh_public_key.name
5309
5310 if bootstrap_hostname != host_spec['hostname']:
5311 if 'addr' in host_spec:
5312 addr = host_spec['addr']
5313 else:
5314 addr = host_spec['hostname']
5315 out, err, code = call(ctx, ['sudo', '-u', ctx.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx.ssh_user, addr)])
5316 if code:
5317 logger.info('\nCopying ssh key to host %s at address %s failed!\n' % (host_spec['hostname'], addr))
5318 return 1
5319 else:
5320 logger.info('Added ssh key to host %s at address %s\n' % (host_spec['hostname'], addr))
5321 return 0
5322
5323
33c7a0ef
TL
5324def save_cluster_config(ctx: CephadmContext, uid: int, gid: int, fsid: str) -> None:
5325 """Save cluster configuration to the per fsid directory """
5326 def copy_file(src: str, dst: str) -> None:
5327 if src:
5328 shutil.copyfile(src, dst)
5329
5330 conf_dir = f'{ctx.data_dir}/{fsid}/{CEPH_CONF_DIR}'
5331 makedirs(conf_dir, uid, gid, DATA_DIR_MODE)
5332 if os.path.exists(conf_dir):
5333 logger.info(f'Saving cluster configuration to {conf_dir} directory')
5334 copy_file(ctx.output_config, os.path.join(conf_dir, CEPH_CONF))
5335 copy_file(ctx.output_keyring, os.path.join(conf_dir, CEPH_KEYRING))
5336 # ctx.output_pub_ssh_key may not exist if user has provided custom ssh keys
5337 if (os.path.exists(ctx.output_pub_ssh_key)):
5338 copy_file(ctx.output_pub_ssh_key, os.path.join(conf_dir, CEPH_PUBKEY))
5339 else:
5340 logger.warning(f'Cannot create cluster configuration directory {conf_dir}')
5341
5342
f67539c2
TL
5343@default_image
5344def command_bootstrap(ctx):
5345 # type: (CephadmContext) -> int
5346
5347 if not ctx.output_config:
33c7a0ef 5348 ctx.output_config = os.path.join(ctx.output_dir, CEPH_CONF)
f67539c2 5349 if not ctx.output_keyring:
33c7a0ef 5350 ctx.output_keyring = os.path.join(ctx.output_dir, CEPH_KEYRING)
f67539c2 5351 if not ctx.output_pub_ssh_key:
33c7a0ef
TL
5352 ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, CEPH_PUBKEY)
5353
5354 if bool(ctx.ssh_private_key) is not bool(ctx.ssh_public_key):
5355 raise Error('--ssh-private-key and --ssh-public-key must be provided together or not at all.')
5356
5357 if ctx.fsid:
5358 data_dir_base = os.path.join(ctx.data_dir, ctx.fsid)
5359 if os.path.exists(data_dir_base):
5360 raise Error(f"A cluster with the same fsid '{ctx.fsid}' already exists.")
5361 else:
5362 logger.warning('Specifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.')
f67539c2
TL
5363
5364 # verify output files
5365 for f in [ctx.output_config, ctx.output_keyring,
5366 ctx.output_pub_ssh_key]:
5367 if not ctx.allow_overwrite:
5368 if os.path.exists(f):
5369 raise Error('%s already exists; delete or pass '
5370 '--allow-overwrite to overwrite' % f)
5371 dirname = os.path.dirname(f)
5372 if dirname and not os.path.exists(dirname):
5373 fname = os.path.basename(f)
5374 logger.info(f'Creating directory {dirname} for {fname}')
5375 try:
5376 # use makedirs to create intermediate missing dirs
5377 os.makedirs(dirname, 0o755)
5378 except PermissionError:
5379 raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
5380
b3b6e05e
TL
5381 (user_conf, _) = get_config_and_keyring(ctx)
5382
33c7a0ef
TL
5383 if ctx.ssh_user != 'root':
5384 check_ssh_connectivity(ctx)
5385
f67539c2
TL
5386 if not ctx.skip_prepare_host:
5387 command_prepare_host(ctx)
5388 else:
5389 logger.info('Skip prepare_host')
5390
5391 # initial vars
5392 fsid = ctx.fsid or make_fsid()
b3b6e05e
TL
5393 if not is_fsid(fsid):
5394 raise Error('not an fsid: %s' % fsid)
5395 logger.info('Cluster fsid: %s' % fsid)
5396
f67539c2
TL
5397 hostname = get_hostname()
5398 if '.' in hostname and not ctx.allow_fqdn_hostname:
5399 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
5400 mon_id = ctx.mon_id or hostname
5401 mgr_id = ctx.mgr_id or generate_service_id()
f67539c2
TL
5402
5403 lock = FileLock(ctx, fsid)
5404 lock.acquire()
5405
5406 (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx)
5407 cluster_network, ipv6_cluster_network = prepare_cluster_network(ctx)
5408
5409 config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image)
5410
5411 if not ctx.skip_pull:
33c7a0ef
TL
5412 try:
5413 _pull_image(ctx, ctx.image)
5414 except UnauthorizedRegistryError:
5415 err_str = 'Failed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentials'
5416 logger.debug(f'Pulling image for bootstrap on {hostname} failed: {err_str}')
5417 raise Error(err_str)
f67539c2
TL
5418
5419 image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
5420 logger.info(f'Ceph version: {image_ver}')
b3b6e05e
TL
5421
5422 if not ctx.allow_mismatched_release:
5423 image_release = image_ver.split()[4]
5424 if image_release not in \
5425 [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE]:
5426 raise Error(
5427 f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};'
5428 ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
5429 )
f67539c2
TL
5430
5431 logger.info('Extracting ceph user uid/gid from container image...')
5432 (uid, gid) = extract_uid_gid(ctx)
5433
5434 # create some initial keys
20effc67 5435 (mon_key, mgr_key, admin_key, bootstrap_keyring, admin_keyring) = create_initial_keys(ctx, uid, gid, mgr_id)
f67539c2
TL
5436
5437 monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg)
20effc67
TL
5438 (mon_dir, log_dir) = prepare_create_mon(ctx, uid, gid, fsid, mon_id,
5439 bootstrap_keyring.name, monmap.name)
f67539c2
TL
5440
5441 with open(mon_dir + '/config', 'w') as f:
5442 os.fchown(f.fileno(), uid, gid)
5443 os.fchmod(f.fileno(), 0o600)
5444 f.write(config)
5445
5446 make_var_run(ctx, fsid, uid, gid)
5447 create_mon(ctx, uid, gid, fsid, mon_id)
5448
5449 # config to issue various CLI commands
5450 tmp_config = write_tmp(config, uid, gid)
5451
5452 # a CLI helper to reduce our typing
5453 def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT):
5454 # type: (List[str], Dict[str, str], Optional[int]) -> str
5455 mounts = {
5456 log_dir: '/var/log/ceph:z',
5457 admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
5458 tmp_config.name: '/etc/ceph/ceph.conf:z',
5459 }
5460 for k, v in extra_mounts.items():
5461 mounts[k] = v
5462 timeout = timeout or ctx.timeout
5463 return CephContainer(
5464 ctx,
5465 image=ctx.image,
5466 entrypoint='/usr/bin/ceph',
5467 args=cmd,
5468 volume_mounts=mounts,
5469 ).run(timeout=timeout)
5470
5471 wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name)
5472
5473 finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir,
5474 mon_network, ipv6, cli,
5475 cluster_network, ipv6_cluster_network)
9f95a23c
TL
5476
5477 # output files
f67539c2 5478 with open(ctx.output_keyring, 'w') as f:
9f95a23c
TL
5479 os.fchmod(f.fileno(), 0o600)
5480 f.write('[client.admin]\n'
5481 '\tkey = ' + admin_key + '\n')
f67539c2 5482 logger.info('Wrote keyring to %s' % ctx.output_keyring)
9f95a23c 5483
f67539c2
TL
5484 # create mgr
5485 create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli)
9f95a23c 5486
b3b6e05e
TL
5487 if user_conf:
5488 # user given config settings were already assimilated earlier
5489 # but if the given settings contained any attributes in
5490 # the mgr (e.g. mgr/cephadm/container_image_prometheus)
5491 # they don't seem to be stored if there isn't a mgr yet.
5492 # Since re-assimilating the same conf settings should be
5493 # idempotent we can just do it again here.
5494 with tempfile.NamedTemporaryFile(buffering=0) as tmp:
5495 tmp.write(user_conf.encode('utf-8'))
5496 cli(['config', 'assimilate-conf',
5497 '-i', '/var/lib/ceph/user.conf'],
5498 {tmp.name: '/var/lib/ceph/user.conf:z'})
9f95a23c
TL
5499
5500 # wait for mgr to restart (after enabling a module)
522d829b 5501 def wait_for_mgr_restart() -> None:
f67539c2
TL
5502 # first get latest mgrmap epoch from the mon. try newer 'mgr
5503 # stat' command first, then fall back to 'mgr dump' if
5504 # necessary
5505 try:
5506 j = json_loads_retry(lambda: cli(['mgr', 'stat']))
5507 except Exception:
5508 j = json_loads_retry(lambda: cli(['mgr', 'dump']))
9f95a23c 5509 epoch = j['epoch']
f67539c2 5510
9f95a23c
TL
5511 # wait for mgr to have it
5512 logger.info('Waiting for the mgr to restart...')
f67539c2 5513
9f95a23c
TL
5514 def mgr_has_latest_epoch():
5515 # type: () -> bool
5516 try:
5517 out = cli(['tell', 'mgr', 'mgr_status'])
5518 j = json.loads(out)
5519 return j['mgrmap_epoch'] >= epoch
5520 except Exception as e:
5521 logger.debug('tell mgr mgr_status failed: %s' % e)
5522 return False
f67539c2 5523 is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch)
e306af50 5524
f67539c2 5525 enable_cephadm_mgr_module(cli, wait_for_mgr_restart)
e306af50 5526
f67539c2
TL
5527 # ssh
5528 if not ctx.skip_ssh:
5529 prepare_ssh(ctx, cli, wait_for_mgr_restart)
5530
5531 if ctx.registry_url and ctx.registry_username and ctx.registry_password:
20effc67
TL
5532 registry_credentials = {'url': ctx.registry_url, 'username': ctx.registry_username, 'password': ctx.registry_password}
5533 cli(['config-key', 'set', 'mgr/cephadm/registry_credentials', json.dumps(registry_credentials)])
f67539c2
TL
5534
5535 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force'])
5536
f67539c2
TL
5537 if not ctx.skip_dashboard:
5538 prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart)
f6b5b4d7 5539
33c7a0ef 5540 if ctx.output_config == CEPH_DEFAULT_CONF and not ctx.skip_admin_label and not ctx.no_minimize_config:
b3b6e05e
TL
5541 logger.info('Enabling client.admin keyring and conf on hosts with "admin" label')
5542 try:
5543 cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin'])
5544 cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin'])
5545 except Exception:
5546 logger.info('Unable to set up "admin" label; assuming older version of Ceph')
5547
f67539c2
TL
5548 if ctx.apply_spec:
5549 logger.info('Applying %s to cluster' % ctx.apply_spec)
a4b75251 5550 # copy ssh key to hosts in spec file
f67539c2 5551 with open(ctx.apply_spec) as f:
a4b75251
TL
5552 try:
5553 for spec in parse_yaml_objs(f):
5554 if spec.get('service_type') == 'host':
5555 _distribute_ssh_keys(ctx, spec, hostname)
5556 except ValueError:
5557 logger.info('Unable to parse %s succesfully' % ctx.apply_spec)
e306af50
TL
5558
5559 mounts = {}
a4b75251
TL
5560 mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro'
5561 try:
5562 out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
5563 logger.info(out)
5564 except Exception:
5565 logger.info('\nApplying %s to cluster failed!\n' % ctx.apply_spec)
9f95a23c 5566
33c7a0ef
TL
5567 save_cluster_config(ctx, uid, gid, fsid)
5568
20effc67
TL
5569 # enable autotune for osd_memory_target
5570 logger.info('Enabling autotune for osd_memory_target')
5571 cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true'])
5572
5573 # Notify the Dashboard to show the 'Expand cluster' page on first log in.
5574 cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED'])
5575
33c7a0ef 5576 logger.info('You can access the Ceph CLI as following in case of multi-cluster or non-default config:\n\n'
9f95a23c
TL
5577 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
5578 sys.argv[0],
5579 fsid,
f67539c2
TL
5580 ctx.output_config,
5581 ctx.output_keyring))
33c7a0ef
TL
5582
5583 logger.info('Or, if you are only running a single cluster on this host:\n\n\tsudo %s shell \n' % (sys.argv[0]))
5584
9f95a23c
TL
5585 logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
5586 '\tceph telemetry on\n\n'
5587 'For more information see:\n\n'
20effc67 5588 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
9f95a23c
TL
5589 logger.info('Bootstrap complete.')
5590 return 0
5591
5592##################################
5593
f67539c2 5594
522d829b 5595def command_registry_login(ctx: CephadmContext) -> int:
f67539c2
TL
5596 if ctx.registry_json:
5597 logger.info('Pulling custom registry login info from %s.' % ctx.registry_json)
5598 d = get_parm(ctx.registry_json)
f6b5b4d7 5599 if d.get('url') and d.get('username') and d.get('password'):
f67539c2
TL
5600 ctx.registry_url = d.get('url')
5601 ctx.registry_username = d.get('username')
5602 ctx.registry_password = d.get('password')
5603 registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
f6b5b4d7 5604 else:
f67539c2
TL
5605 raise Error('json provided for custom registry login did not include all necessary fields. '
5606 'Please setup json file as\n'
5607 '{\n'
5608 ' "url": "REGISTRY_URL",\n'
5609 ' "username": "REGISTRY_USERNAME",\n'
5610 ' "password": "REGISTRY_PASSWORD"\n'
5611 '}\n')
5612 elif ctx.registry_url and ctx.registry_username and ctx.registry_password:
5613 registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
f6b5b4d7 5614 else:
f67539c2
TL
5615 raise Error('Invalid custom registry arguments received. To login to a custom registry include '
5616 '--registry-url, --registry-username and --registry-password '
5617 'options or --registry-json option')
f6b5b4d7
TL
5618 return 0
5619
f67539c2 5620
522d829b 5621def registry_login(ctx: CephadmContext, url: Optional[str], username: Optional[str], password: Optional[str]) -> None:
f67539c2 5622 logger.info('Logging into custom registry.')
f6b5b4d7 5623 try:
f67539c2
TL
5624 engine = ctx.container_engine
5625 cmd = [engine.path, 'login',
5626 '-u', username, '-p', password,
5627 url]
5628 if isinstance(engine, Podman):
5629 cmd.append('--authfile=/etc/ceph/podman-auth.json')
5630 out, _, _ = call_throws(ctx, cmd)
5631 if isinstance(engine, Podman):
5632 os.chmod('/etc/ceph/podman-auth.json', 0o600)
5633 except Exception:
5634 raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx.registry_url, ctx.registry_username))
f6b5b4d7
TL
5635
5636##################################
5637
5638
f67539c2
TL
5639def extract_uid_gid_monitoring(ctx, daemon_type):
5640 # type: (CephadmContext, str) -> Tuple[int, int]
9f95a23c
TL
5641
5642 if daemon_type == 'prometheus':
f67539c2 5643 uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus')
9f95a23c
TL
5644 elif daemon_type == 'node-exporter':
5645 uid, gid = 65534, 65534
5646 elif daemon_type == 'grafana':
f67539c2 5647 uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
33c7a0ef
TL
5648 elif daemon_type == 'loki':
5649 uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
5650 elif daemon_type == 'promtail':
5651 uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
9f95a23c 5652 elif daemon_type == 'alertmanager':
f67539c2 5653 uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus'])
9f95a23c 5654 else:
f67539c2 5655 raise Error('{} not implemented yet'.format(daemon_type))
9f95a23c
TL
5656 return uid, gid
5657
5658
20effc67
TL
5659def get_container_with_extra_args(ctx: CephadmContext,
5660 fsid: str, daemon_type: str, daemon_id: Union[int, str],
5661 privileged: bool = False,
5662 ptrace: bool = False,
5663 container_args: Optional[List[str]] = None) -> 'CephContainer':
5664 # wrapper for get_container that additionally adds extra_container_args if present
5665 # used for deploying daemons with additional podman/docker container arguments
5666 c = get_container(ctx, fsid, daemon_type, daemon_id, privileged, ptrace, container_args)
5667 if 'extra_container_args' in ctx and ctx.extra_container_args:
5668 c.container_args.extend(ctx.extra_container_args)
5669 return c
5670
5671
9f95a23c 5672@default_image
f67539c2
TL
5673def command_deploy(ctx):
5674 # type: (CephadmContext) -> None
5675 daemon_type, daemon_id = ctx.name.split('.', 1)
9f95a23c 5676
f67539c2
TL
5677 lock = FileLock(ctx, ctx.fsid)
5678 lock.acquire()
9f95a23c
TL
5679
5680 if daemon_type not in get_supported_daemons():
5681 raise Error('daemon type %s not recognized' % daemon_type)
5682
e306af50 5683 redeploy = False
f67539c2 5684 unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id)
f67539c2 5685 (_, state, _) = check_unit(ctx, unit_name)
522d829b 5686 if state == 'running' or is_container_running(ctx, CephContainer.for_daemon(ctx, ctx.fsid, daemon_type, daemon_id, 'bash')):
e306af50
TL
5687 redeploy = True
5688
f67539c2
TL
5689 if ctx.reconfig:
5690 logger.info('%s daemon %s ...' % ('Reconfig', ctx.name))
e306af50 5691 elif redeploy:
f67539c2 5692 logger.info('%s daemon %s ...' % ('Redeploy', ctx.name))
e306af50 5693 else:
f67539c2 5694 logger.info('%s daemon %s ...' % ('Deploy', ctx.name))
9f95a23c 5695
33c7a0ef
TL
5696 # Migrate sysctl conf files from /usr/lib to /etc
5697 migrate_sysctl_dir(ctx, ctx.fsid)
5698
f6b5b4d7 5699 # Get and check ports explicitly required to be opened
f67539c2
TL
5700 daemon_ports = [] # type: List[int]
5701
5702 # only check port in use if not reconfig or redeploy since service
5703 # we are redeploying/reconfiguring will already be using the port
5704 if not ctx.reconfig and not redeploy:
5705 if ctx.tcp_ports:
5706 daemon_ports = list(map(int, ctx.tcp_ports.split()))
f6b5b4d7 5707
9f95a23c 5708 if daemon_type in Ceph.daemons:
f67539c2
TL
5709 config, keyring = get_config_and_keyring(ctx)
5710 uid, gid = extract_uid_gid(ctx)
5711 make_var_run(ctx, ctx.fsid, uid, gid)
f6b5b4d7 5712
20effc67
TL
5713 c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id,
5714 ptrace=ctx.allow_ptrace)
f67539c2 5715 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
9f95a23c 5716 config=config, keyring=keyring,
f67539c2
TL
5717 osd_fsid=ctx.osd_fsid,
5718 reconfig=ctx.reconfig,
f6b5b4d7 5719 ports=daemon_ports)
9f95a23c
TL
5720
5721 elif daemon_type in Monitoring.components:
5722 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
9f95a23c 5723 # Default Checks
9f95a23c 5724 # make sure provided config-json is sufficient
f67539c2 5725 config = get_parm(ctx.config_json) # type: ignore
9f95a23c
TL
5726 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
5727 required_args = Monitoring.components[daemon_type].get('config-json-args', list())
5728 if required_files:
5729 if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
f67539c2
TL
5730 raise Error('{} deployment requires config-json which must '
5731 'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files)))
9f95a23c
TL
5732 if required_args:
5733 if not config or not all(c in config.keys() for c in required_args): # type: ignore
f67539c2
TL
5734 raise Error('{} deployment requires config-json which must '
5735 'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args)))
9f95a23c 5736
f67539c2 5737 uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
20effc67 5738 c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id)
f67539c2
TL
5739 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
5740 reconfig=ctx.reconfig,
f6b5b4d7 5741 ports=daemon_ports)
9f95a23c
TL
5742
5743 elif daemon_type == NFSGanesha.daemon_type:
b3b6e05e
TL
5744 if not ctx.reconfig and not redeploy and not daemon_ports:
5745 daemon_ports = list(NFSGanesha.port_map.values())
f6b5b4d7 5746
f67539c2 5747 config, keyring = get_config_and_keyring(ctx)
9f95a23c 5748 # TODO: extract ganesha uid/gid (997, 994) ?
f67539c2 5749 uid, gid = extract_uid_gid(ctx)
20effc67 5750 c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id)
f67539c2 5751 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
9f95a23c 5752 config=config, keyring=keyring,
f67539c2 5753 reconfig=ctx.reconfig,
f6b5b4d7 5754 ports=daemon_ports)
e306af50 5755
1911f103 5756 elif daemon_type == CephIscsi.daemon_type:
f67539c2
TL
5757 config, keyring = get_config_and_keyring(ctx)
5758 uid, gid = extract_uid_gid(ctx)
20effc67 5759 c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id)
f67539c2 5760 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
1911f103 5761 config=config, keyring=keyring,
f67539c2
TL
5762 reconfig=ctx.reconfig,
5763 ports=daemon_ports)
5764
5765 elif daemon_type == HAproxy.daemon_type:
5766 haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id)
5767 uid, gid = haproxy.extract_uid_gid_haproxy()
20effc67 5768 c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id)
f67539c2
TL
5769 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
5770 reconfig=ctx.reconfig,
5771 ports=daemon_ports)
5772
5773 elif daemon_type == Keepalived.daemon_type:
5774 keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id)
5775 uid, gid = keepalived.extract_uid_gid_keepalived()
20effc67 5776 c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id)
f67539c2
TL
5777 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
5778 reconfig=ctx.reconfig,
f6b5b4d7 5779 ports=daemon_ports)
f91f0fd5
TL
5780
5781 elif daemon_type == CustomContainer.daemon_type:
f67539c2
TL
5782 cc = CustomContainer.init(ctx, ctx.fsid, daemon_id)
5783 if not ctx.reconfig and not redeploy:
f91f0fd5 5784 daemon_ports.extend(cc.ports)
20effc67
TL
5785 c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id,
5786 privileged=cc.privileged,
5787 ptrace=ctx.allow_ptrace)
f67539c2 5788 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
f91f0fd5 5789 uid=cc.uid, gid=cc.gid, config=None,
f67539c2 5790 keyring=None, reconfig=ctx.reconfig,
f91f0fd5
TL
5791 ports=daemon_ports)
5792
20effc67 5793 elif daemon_type == CephadmAgent.daemon_type:
f67539c2
TL
5794 # get current user gid and uid
5795 uid = os.getuid()
5796 gid = os.getgid()
f67539c2
TL
5797 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None,
5798 uid, gid, ports=daemon_ports)
5799
20effc67
TL
5800 elif daemon_type == SNMPGateway.daemon_type:
5801 sc = SNMPGateway.init(ctx, ctx.fsid, daemon_id)
5802 c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id)
5803 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
5804 sc.uid, sc.gid,
5805 ports=daemon_ports)
5806
9f95a23c 5807 else:
f91f0fd5
TL
5808 raise Error('daemon type {} not implemented in command_deploy function'
5809 .format(daemon_type))
9f95a23c
TL
5810
5811##################################
5812
f6b5b4d7 5813
9f95a23c 5814@infer_image
f67539c2
TL
5815def command_run(ctx):
5816 # type: (CephadmContext) -> int
5817 (daemon_type, daemon_id) = ctx.name.split('.', 1)
5818 c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
9f95a23c 5819 command = c.run_cmd()
f67539c2 5820 return call_timeout(ctx, command, ctx.timeout)
9f95a23c
TL
5821
5822##################################
5823
f6b5b4d7 5824
9f95a23c 5825@infer_fsid
e306af50 5826@infer_config
9f95a23c 5827@infer_image
522d829b 5828@validate_fsid
f67539c2
TL
5829def command_shell(ctx):
5830 # type: (CephadmContext) -> int
522d829b
TL
5831 cp = read_config(ctx.config)
5832 if cp.has_option('global', 'fsid') and \
5833 cp.get('global', 'fsid') != ctx.fsid:
5834 raise Error('fsid does not match ceph.conf')
f67539c2 5835
f67539c2
TL
5836 if ctx.name:
5837 if '.' in ctx.name:
5838 (daemon_type, daemon_id) = ctx.name.split('.', 1)
9f95a23c 5839 else:
f67539c2 5840 daemon_type = ctx.name
9f95a23c
TL
5841 daemon_id = None
5842 else:
5843 daemon_type = 'osd' # get the most mounts
5844 daemon_id = None
5845
20effc67
TL
5846 if ctx.fsid and daemon_type in Ceph.daemons:
5847 make_log_dir(ctx, ctx.fsid)
5848
f67539c2 5849 if daemon_id and not ctx.fsid:
9f95a23c
TL
5850 raise Error('must pass --fsid to specify cluster')
5851
33c7a0ef
TL
5852 # in case a dedicated keyring for the specified fsid is found we us it.
5853 # Otherwise, use /etc/ceph files by default, if present. We do this instead of
9f95a23c
TL
5854 # making these defaults in the arg parser because we don't want an error
5855 # if they don't exist.
33c7a0ef
TL
5856 if not ctx.keyring:
5857 keyring_file = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_KEYRING}'
5858 if os.path.exists(keyring_file):
5859 ctx.keyring = keyring_file
5860 elif os.path.exists(CEPH_DEFAULT_KEYRING):
5861 ctx.keyring = CEPH_DEFAULT_KEYRING
f67539c2
TL
5862
5863 container_args: List[str] = ['-i']
5864 mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id,
5865 no_config=True if ctx.config else False)
5866 binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id)
5867 if ctx.config:
5868 mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z'
5869 if ctx.keyring:
5870 mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z'
5871 if ctx.mount:
5872 for _mount in ctx.mount:
f91f0fd5
TL
5873 split_src_dst = _mount.split(':')
5874 mount = pathify(split_src_dst[0])
5875 filename = os.path.basename(split_src_dst[0])
5876 if len(split_src_dst) > 1:
a4b75251
TL
5877 dst = split_src_dst[1]
5878 if len(split_src_dst) == 3:
5879 dst = '{}:{}'.format(dst, split_src_dst[2])
f91f0fd5
TL
5880 mounts[mount] = dst
5881 else:
a4b75251 5882 mounts[mount] = '/mnt/{}'.format(filename)
f67539c2
TL
5883 if ctx.command:
5884 command = ctx.command
9f95a23c
TL
5885 else:
5886 command = ['bash']
5887 container_args += [
f67539c2 5888 '-t',
9f95a23c 5889 '-e', 'LANG=C',
f67539c2 5890 '-e', 'PS1=%s' % CUSTOM_PS1,
9f95a23c 5891 ]
f67539c2
TL
5892 if ctx.fsid:
5893 home = os.path.join(ctx.data_dir, ctx.fsid, 'home')
9f95a23c
TL
5894 if not os.path.exists(home):
5895 logger.debug('Creating root home at %s' % home)
5896 makedirs(home, 0, 0, 0o660)
5897 if os.path.exists('/etc/skel'):
5898 for f in os.listdir('/etc/skel'):
5899 if f.startswith('.bash'):
5900 shutil.copyfile(os.path.join('/etc/skel', f),
5901 os.path.join(home, f))
5902 mounts[home] = '/root'
5903
b3b6e05e
TL
5904 for i in ctx.volume:
5905 a, b = i.split(':', 1)
5906 mounts[a] = b
5907
9f95a23c 5908 c = CephContainer(
f67539c2
TL
5909 ctx,
5910 image=ctx.image,
9f95a23c
TL
5911 entrypoint='doesnotmatter',
5912 args=[],
5913 container_args=container_args,
5914 volume_mounts=mounts,
f6b5b4d7 5915 bind_mounts=binds,
f67539c2 5916 envs=ctx.env,
9f95a23c
TL
5917 privileged=True)
5918 command = c.shell_cmd(command)
5919
f67539c2 5920 return call_timeout(ctx, command, ctx.timeout)
9f95a23c
TL
5921
5922##################################
5923
f6b5b4d7 5924
9f95a23c 5925@infer_fsid
f67539c2
TL
5926def command_enter(ctx):
5927 # type: (CephadmContext) -> int
5928 if not ctx.fsid:
9f95a23c 5929 raise Error('must pass --fsid to specify cluster')
f67539c2
TL
5930 (daemon_type, daemon_id) = ctx.name.split('.', 1)
5931 container_args = ['-i'] # type: List[str]
5932 if ctx.command:
5933 command = ctx.command
9f95a23c
TL
5934 else:
5935 command = ['sh']
5936 container_args += [
f67539c2 5937 '-t',
9f95a23c 5938 '-e', 'LANG=C',
f67539c2 5939 '-e', 'PS1=%s' % CUSTOM_PS1,
9f95a23c 5940 ]
1911f103 5941 c = CephContainer(
f67539c2
TL
5942 ctx,
5943 image=ctx.image,
1911f103
TL
5944 entrypoint='doesnotmatter',
5945 container_args=container_args,
f67539c2 5946 cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id),
1911f103 5947 )
9f95a23c 5948 command = c.exec_cmd(command)
f67539c2 5949 return call_timeout(ctx, command, ctx.timeout)
9f95a23c
TL
5950
5951##################################
5952
f6b5b4d7 5953
9f95a23c
TL
5954@infer_fsid
5955@infer_image
522d829b 5956@validate_fsid
f67539c2
TL
5957def command_ceph_volume(ctx):
5958 # type: (CephadmContext) -> None
522d829b
TL
5959 cp = read_config(ctx.config)
5960 if cp.has_option('global', 'fsid') and \
5961 cp.get('global', 'fsid') != ctx.fsid:
5962 raise Error('fsid does not match ceph.conf')
5963
f67539c2
TL
5964 if ctx.fsid:
5965 make_log_dir(ctx, ctx.fsid)
9f95a23c 5966
f67539c2
TL
5967 lock = FileLock(ctx, ctx.fsid)
5968 lock.acquire()
1911f103 5969
f67539c2
TL
5970 (uid, gid) = (0, 0) # ceph-volume runs as root
5971 mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None)
9f95a23c
TL
5972
5973 tmp_config = None
5974 tmp_keyring = None
5975
f67539c2 5976 (config, keyring) = get_config_and_keyring(ctx)
9f95a23c 5977
801d1391 5978 if config:
9f95a23c
TL
5979 # tmp config file
5980 tmp_config = write_tmp(config, uid, gid)
9f95a23c 5981 mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
801d1391
TL
5982
5983 if keyring:
5984 # tmp keyring file
5985 tmp_keyring = write_tmp(keyring, uid, gid)
9f95a23c
TL
5986 mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
5987
20effc67 5988 c = get_ceph_volume_container(
f67539c2 5989 ctx,
f67539c2
TL
5990 envs=ctx.env,
5991 args=ctx.command,
9f95a23c
TL
5992 volume_mounts=mounts,
5993 )
b3b6e05e
TL
5994
5995 out, err, code = call_throws(ctx, c.run_cmd())
9f95a23c
TL
5996 if not code:
5997 print(out)
5998
5999##################################
6000
f6b5b4d7 6001
9f95a23c 6002@infer_fsid
f67539c2 6003def command_unit(ctx):
33c7a0ef 6004 # type: (CephadmContext) -> int
f67539c2 6005 if not ctx.fsid:
9f95a23c 6006 raise Error('must pass --fsid to specify cluster')
e306af50 6007
f67539c2 6008 unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
e306af50 6009
33c7a0ef
TL
6010 _, _, code = call(
6011 ctx,
6012 ['systemctl', ctx.command, unit_name],
adb31ebb
TL
6013 verbosity=CallVerbosity.VERBOSE,
6014 desc=''
6015 )
33c7a0ef 6016 return code
9f95a23c
TL
6017
6018##################################
6019
f6b5b4d7 6020
9f95a23c 6021@infer_fsid
f67539c2
TL
6022def command_logs(ctx):
6023 # type: (CephadmContext) -> None
6024 if not ctx.fsid:
9f95a23c
TL
6025 raise Error('must pass --fsid to specify cluster')
6026
f67539c2 6027 unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
9f95a23c
TL
6028
6029 cmd = [find_program('journalctl')]
6030 cmd.extend(['-u', unit_name])
f67539c2
TL
6031 if ctx.command:
6032 cmd.extend(ctx.command)
9f95a23c
TL
6033
6034 # call this directly, without our wrapper, so that we get an unmolested
6035 # stdout with logger prefixing.
f67539c2 6036 logger.debug('Running command: %s' % ' '.join(cmd))
522d829b 6037 subprocess.call(cmd, env=os.environ.copy()) # type: ignore
9f95a23c
TL
6038
6039##################################
6040
f6b5b4d7 6041
f67539c2 6042def list_networks(ctx):
522d829b 6043 # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]]
9f95a23c 6044
f67539c2
TL
6045 # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
6046 # so we'll need to use a regex to parse 'ip' command output.
6047 #
6048 # out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
6049 # j = json.loads(out)
6050 # for x in j:
f67539c2
TL
6051 res = _list_ipv4_networks(ctx)
6052 res.update(_list_ipv6_networks(ctx))
f6b5b4d7
TL
6053 return res
6054
6055
522d829b 6056def _list_ipv4_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]:
f67539c2
TL
6057 execstr: Optional[str] = find_executable('ip')
6058 if not execstr:
6059 raise FileNotFoundError("unable to find 'ip' command")
6060 out, _, _ = call_throws(ctx, [execstr, 'route', 'ls'])
f6b5b4d7
TL
6061 return _parse_ipv4_route(out)
6062
9f95a23c 6063
522d829b
TL
6064def _parse_ipv4_route(out: str) -> Dict[str, Dict[str, Set[str]]]:
6065 r = {} # type: Dict[str, Dict[str, Set[str]]]
33c7a0ef 6066 p = re.compile(r'^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)')
9f95a23c
TL
6067 for line in out.splitlines():
6068 m = p.findall(line)
6069 if not m:
6070 continue
6071 net = m[0][0]
33c7a0ef
TL
6072 if '/' not in net: # aggregate /32 mask for single host sub-networks
6073 net += '/32'
f67539c2
TL
6074 iface = m[0][1]
6075 ip = m[0][4]
9f95a23c 6076 if net not in r:
f67539c2
TL
6077 r[net] = {}
6078 if iface not in r[net]:
522d829b
TL
6079 r[net][iface] = set()
6080 r[net][iface].add(ip)
9f95a23c
TL
6081 return r
6082
f6b5b4d7 6083
522d829b 6084def _list_ipv6_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]:
f67539c2
TL
6085 execstr: Optional[str] = find_executable('ip')
6086 if not execstr:
6087 raise FileNotFoundError("unable to find 'ip' command")
6088 routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls'])
6089 ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls'])
f6b5b4d7
TL
6090 return _parse_ipv6_route(routes, ips)
6091
6092
522d829b
TL
6093def _parse_ipv6_route(routes: str, ips: str) -> Dict[str, Dict[str, Set[str]]]:
6094 r = {} # type: Dict[str, Dict[str, Set[str]]]
f6b5b4d7
TL
6095 route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
6096 ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
f67539c2 6097 iface_p = re.compile(r'^(\d+): (\S+): (.*)$')
f6b5b4d7
TL
6098 for line in routes.splitlines():
6099 m = route_p.findall(line)
6100 if not m or m[0][0].lower() == 'default':
6101 continue
6102 net = m[0][0]
33c7a0ef
TL
6103 if '/' not in net: # aggregate /128 mask for single host sub-networks
6104 net += '/128'
f67539c2 6105 iface = m[0][1]
33c7a0ef
TL
6106 if iface == 'lo': # skip loopback devices
6107 continue
f6b5b4d7 6108 if net not in r:
f67539c2
TL
6109 r[net] = {}
6110 if iface not in r[net]:
522d829b 6111 r[net][iface] = set()
f6b5b4d7 6112
f67539c2 6113 iface = None
f6b5b4d7
TL
6114 for line in ips.splitlines():
6115 m = ip_p.findall(line)
6116 if not m:
f67539c2
TL
6117 m = iface_p.findall(line)
6118 if m:
6119 # drop @... suffix, if present
6120 iface = m[0][1].split('@')[0]
f6b5b4d7
TL
6121 continue
6122 ip = m[0][0]
6123 # find the network it belongs to
6124 net = [n for n in r.keys()
f67539c2 6125 if ipaddress.ip_address(ip) in ipaddress.ip_network(n)]
20effc67 6126 if net and iface in r[net[0]]:
f67539c2 6127 assert(iface)
522d829b 6128 r[net[0]][iface].add(ip)
f6b5b4d7
TL
6129
6130 return r
6131
6132
f67539c2
TL
6133def command_list_networks(ctx):
6134 # type: (CephadmContext) -> None
6135 r = list_networks(ctx)
522d829b
TL
6136
6137 def serialize_sets(obj: Any) -> Any:
6138 return list(obj) if isinstance(obj, set) else obj
6139
6140 print(json.dumps(r, indent=4, default=serialize_sets))
9f95a23c
TL
6141
6142##################################
6143
f6b5b4d7 6144
f67539c2
TL
6145def command_ls(ctx):
6146 # type: (CephadmContext) -> None
6147 ls = list_daemons(ctx, detail=not ctx.no_detail,
6148 legacy_dir=ctx.legacy_dir)
9f95a23c
TL
6149 print(json.dumps(ls, indent=4))
6150
f6b5b4d7 6151
f67539c2
TL
6152def with_units_to_int(v: str) -> int:
6153 if v.endswith('iB'):
6154 v = v[:-2]
6155 elif v.endswith('B'):
6156 v = v[:-1]
6157 mult = 1
6158 if v[-1].upper() == 'K':
6159 mult = 1024
6160 v = v[:-1]
6161 elif v[-1].upper() == 'M':
6162 mult = 1024 * 1024
6163 v = v[:-1]
6164 elif v[-1].upper() == 'G':
6165 mult = 1024 * 1024 * 1024
6166 v = v[:-1]
6167 elif v[-1].upper() == 'T':
6168 mult = 1024 * 1024 * 1024 * 1024
6169 v = v[:-1]
6170 return int(float(v) * mult)
6171
6172
6173def list_daemons(ctx, detail=True, legacy_dir=None):
6174 # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
6175 host_version: Optional[str] = None
9f95a23c 6176 ls = []
f67539c2 6177 container_path = ctx.container_engine.path
9f95a23c 6178
f67539c2 6179 data_dir = ctx.data_dir
9f95a23c
TL
6180 if legacy_dir is not None:
6181 data_dir = os.path.abspath(legacy_dir + data_dir)
6182
6183 # keep track of ceph versions we see
6184 seen_versions = {} # type: Dict[str, Optional[str]]
6185
f67539c2
TL
6186 # keep track of image digests
6187 seen_digests = {} # type: Dict[str, List[str]]
6188
33c7a0ef 6189 # keep track of memory and cpu usage we've seen
f67539c2 6190 seen_memusage = {} # type: Dict[str, int]
33c7a0ef 6191 seen_cpuperc = {} # type: Dict[str, str]
f67539c2
TL
6192 out, err, code = call(
6193 ctx,
6194 [container_path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
6195 verbosity=CallVerbosity.DEBUG
6196 )
522d829b 6197 seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out)
f67539c2 6198
33c7a0ef
TL
6199 out, err, code = call(
6200 ctx,
6201 [container_path, 'stats', '--format', '{{.ID}},{{.CPUPerc}}', '--no-stream'],
6202 verbosity=CallVerbosity.DEBUG
6203 )
6204 seen_cpuperc_cid_len, seen_cpuperc = _parse_cpu_perc(code, out)
6205
9f95a23c
TL
6206 # /var/lib/ceph
6207 if os.path.exists(data_dir):
6208 for i in os.listdir(data_dir):
6209 if i in ['mon', 'osd', 'mds', 'mgr']:
6210 daemon_type = i
6211 for j in os.listdir(os.path.join(data_dir, i)):
6212 if '-' not in j:
6213 continue
6214 (cluster, daemon_id) = j.split('-', 1)
f67539c2
TL
6215 fsid = get_legacy_daemon_fsid(ctx,
6216 cluster, daemon_type, daemon_id,
6217 legacy_dir=legacy_dir)
e306af50 6218 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
f67539c2 6219 val: Dict[str, Any] = {
9f95a23c
TL
6220 'style': 'legacy',
6221 'name': '%s.%s' % (daemon_type, daemon_id),
6222 'fsid': fsid if fsid is not None else 'unknown',
e306af50 6223 'systemd_unit': legacy_unit_name,
9f95a23c
TL
6224 }
6225 if detail:
20effc67 6226 (val['enabled'], val['state'], _) = check_unit(ctx, legacy_unit_name)
9f95a23c
TL
6227 if not host_version:
6228 try:
f67539c2
TL
6229 out, err, code = call(ctx,
6230 ['ceph', '-v'],
6231 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
6232 if not code and out.startswith('ceph version '):
6233 host_version = out.split(' ')[2]
6234 except Exception:
6235 pass
f67539c2
TL
6236 val['host_version'] = host_version
6237 ls.append(val)
9f95a23c
TL
6238 elif is_fsid(i):
6239 fsid = str(i) # convince mypy that fsid is a str here
6240 for j in os.listdir(os.path.join(data_dir, i)):
f67539c2 6241 if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
9f95a23c
TL
6242 name = j
6243 (daemon_type, daemon_id) = j.split('.', 1)
6244 unit_name = get_unit_name(fsid,
6245 daemon_type,
6246 daemon_id)
6247 else:
6248 continue
f67539c2 6249 val = {
9f95a23c
TL
6250 'style': 'cephadm:v1',
6251 'name': name,
6252 'fsid': fsid,
e306af50 6253 'systemd_unit': unit_name,
9f95a23c
TL
6254 }
6255 if detail:
6256 # get container id
20effc67 6257 (val['enabled'], val['state'], _) = check_unit(ctx, unit_name)
9f95a23c
TL
6258 container_id = None
6259 image_name = None
6260 image_id = None
f67539c2 6261 image_digests = None
9f95a23c
TL
6262 version = None
6263 start_stamp = None
6264
522d829b 6265 out, err, code = get_container_stats(ctx, container_path, fsid, daemon_type, daemon_id)
9f95a23c
TL
6266 if not code:
6267 (container_id, image_name, image_id, start,
6268 version) = out.strip().split(',')
6269 image_id = normalize_container_id(image_id)
6270 daemon_type = name.split('.', 1)[0]
6271 start_stamp = try_convert_datetime(start)
f67539c2
TL
6272
6273 # collect digests for this image id
6274 image_digests = seen_digests.get(image_id)
6275 if not image_digests:
6276 out, err, code = call(
6277 ctx,
6278 [
6279 container_path, 'image', 'inspect', image_id,
6280 '--format', '{{.RepoDigests}}',
6281 ],
6282 verbosity=CallVerbosity.DEBUG)
6283 if not code:
18d92ca7
TL
6284 image_digests = list(set(map(
6285 normalize_image_digest,
6286 out.strip()[1:-1].split(' '))))
f67539c2
TL
6287 seen_digests[image_id] = image_digests
6288
6289 # identify software version inside the container (if we can)
9f95a23c
TL
6290 if not version or '.' not in version:
6291 version = seen_versions.get(image_id, None)
6292 if daemon_type == NFSGanesha.daemon_type:
f67539c2 6293 version = NFSGanesha.get_version(ctx, container_id)
1911f103 6294 if daemon_type == CephIscsi.daemon_type:
f67539c2 6295 version = CephIscsi.get_version(ctx, container_id)
9f95a23c
TL
6296 elif not version:
6297 if daemon_type in Ceph.daemons:
f67539c2
TL
6298 out, err, code = call(ctx,
6299 [container_path, 'exec', container_id,
6300 'ceph', '-v'],
6301 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
6302 if not code and \
6303 out.startswith('ceph version '):
6304 version = out.split(' ')[2]
6305 seen_versions[image_id] = version
6306 elif daemon_type == 'grafana':
f67539c2
TL
6307 out, err, code = call(ctx,
6308 [container_path, 'exec', container_id,
6309 'grafana-server', '-v'],
6310 verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
6311 if not code and \
6312 out.startswith('Version '):
6313 version = out.split(' ')[1]
6314 seen_versions[image_id] = version
6315 elif daemon_type in ['prometheus',
6316 'alertmanager',
33c7a0ef
TL
6317 'node-exporter',
6318 'loki',
6319 'promtail']:
f67539c2
TL
6320 version = Monitoring.get_version(ctx, container_id, daemon_type)
6321 seen_versions[image_id] = version
6322 elif daemon_type == 'haproxy':
6323 out, err, code = call(ctx,
6324 [container_path, 'exec', container_id,
6325 'haproxy', '-v'],
6326 verbosity=CallVerbosity.DEBUG)
6327 if not code and \
6328 out.startswith('HA-Proxy version '):
6329 version = out.split(' ')[2]
6330 seen_versions[image_id] = version
6331 elif daemon_type == 'keepalived':
6332 out, err, code = call(ctx,
6333 [container_path, 'exec', container_id,
6334 'keepalived', '--version'],
6335 verbosity=CallVerbosity.DEBUG)
9f95a23c 6336 if not code and \
f67539c2
TL
6337 err.startswith('Keepalived '):
6338 version = err.split(' ')[1]
6339 if version[0] == 'v':
6340 version = version[1:]
9f95a23c 6341 seen_versions[image_id] = version
f91f0fd5
TL
6342 elif daemon_type == CustomContainer.daemon_type:
6343 # Because a custom container can contain
6344 # everything, we do not know which command
6345 # to execute to get the version.
6346 pass
20effc67
TL
6347 elif daemon_type == SNMPGateway.daemon_type:
6348 version = SNMPGateway.get_version(ctx, fsid, daemon_id)
6349 seen_versions[image_id] = version
9f95a23c 6350 else:
f91f0fd5 6351 logger.warning('version for unknown daemon type %s' % daemon_type)
9f95a23c 6352 else:
f67539c2 6353 vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore
9f95a23c
TL
6354 try:
6355 with open(vfile, 'r') as f:
6356 image_name = f.read().strip() or None
6357 except IOError:
6358 pass
f67539c2
TL
6359
6360 # unit.meta?
6361 mfile = os.path.join(data_dir, fsid, j, 'unit.meta') # type: ignore
6362 try:
6363 with open(mfile, 'r') as f:
6364 meta = json.loads(f.read())
6365 val.update(meta)
6366 except IOError:
6367 pass
6368
6369 val['container_id'] = container_id
6370 val['container_image_name'] = image_name
6371 val['container_image_id'] = image_id
6372 val['container_image_digests'] = image_digests
6373 if container_id:
6374 val['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
33c7a0ef 6375 val['cpu_percentage'] = seen_cpuperc.get(container_id[0:seen_cpuperc_cid_len])
f67539c2
TL
6376 val['version'] = version
6377 val['started'] = start_stamp
6378 val['created'] = get_file_timestamp(
9f95a23c
TL
6379 os.path.join(data_dir, fsid, j, 'unit.created')
6380 )
f67539c2 6381 val['deployed'] = get_file_timestamp(
9f95a23c 6382 os.path.join(data_dir, fsid, j, 'unit.image'))
f67539c2 6383 val['configured'] = get_file_timestamp(
9f95a23c 6384 os.path.join(data_dir, fsid, j, 'unit.configured'))
f67539c2 6385 ls.append(val)
9f95a23c 6386
9f95a23c
TL
6387 return ls
6388
6389
522d829b
TL
6390def _parse_mem_usage(code: int, out: str) -> Tuple[int, Dict[str, int]]:
6391 # keep track of memory usage we've seen
6392 seen_memusage = {} # type: Dict[str, int]
6393 seen_memusage_cid_len = 0
6394 if not code:
6395 for line in out.splitlines():
6396 (cid, usage) = line.split(',')
6397 (used, limit) = usage.split(' / ')
6398 try:
6399 seen_memusage[cid] = with_units_to_int(used)
6400 if not seen_memusage_cid_len:
6401 seen_memusage_cid_len = len(cid)
6402 except ValueError:
6403 logger.info('unable to parse memory usage line\n>{}'.format(line))
6404 pass
6405 return seen_memusage_cid_len, seen_memusage
6406
6407
33c7a0ef
TL
6408def _parse_cpu_perc(code: int, out: str) -> Tuple[int, Dict[str, str]]:
6409 seen_cpuperc = {}
6410 seen_cpuperc_cid_len = 0
6411 if not code:
6412 for line in out.splitlines():
6413 (cid, cpuperc) = line.split(',')
6414 try:
6415 seen_cpuperc[cid] = cpuperc
6416 if not seen_cpuperc_cid_len:
6417 seen_cpuperc_cid_len = len(cid)
6418 except ValueError:
6419 logger.info('unable to parse cpu percentage line\n>{}'.format(line))
6420 pass
6421 return seen_cpuperc_cid_len, seen_cpuperc
6422
6423
f67539c2
TL
6424def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None):
6425 # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str]
e306af50 6426
f67539c2 6427 for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir):
e306af50
TL
6428 if d['fsid'] != fsid:
6429 continue
6430 if d['name'] != name:
6431 continue
6432 return d
6433 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
6434
522d829b
TL
6435
6436def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, daemon_type: str, daemon_id: str) -> Tuple[str, str, int]:
6437 c = CephContainer.for_daemon(ctx, fsid, daemon_type, daemon_id, 'bash')
6438 out, err, code = '', '', -1
6439 for name in (c.cname, c.old_cname):
6440 cmd = [
6441 container_path, 'inspect',
6442 '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
6443 name
6444 ]
6445 out, err, code = call(ctx, cmd, verbosity=CallVerbosity.DEBUG)
6446 if not code:
6447 break
6448 return out, err, code
6449
9f95a23c
TL
6450##################################
6451
f67539c2 6452
9f95a23c 6453@default_image
f67539c2
TL
6454def command_adopt(ctx):
6455 # type: (CephadmContext) -> None
9f95a23c 6456
f67539c2 6457 if not ctx.skip_pull:
33c7a0ef
TL
6458 try:
6459 _pull_image(ctx, ctx.image)
6460 except UnauthorizedRegistryError:
6461 err_str = 'Failed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url <url> --registry-username <username> --registry-password <password>` or supply login info via a json file with `cephadm registry-login --registry-json <file>`'
6462 logger.debug(f'Pulling image for `command_adopt` failed: {err_str}')
6463 raise Error(err_str)
9f95a23c 6464
f67539c2 6465 (daemon_type, daemon_id) = ctx.name.split('.', 1)
9f95a23c
TL
6466
6467 # legacy check
f67539c2
TL
6468 if ctx.style != 'legacy':
6469 raise Error('adoption of style %s not implemented' % ctx.style)
9f95a23c
TL
6470
6471 # lock
f67539c2
TL
6472 fsid = get_legacy_daemon_fsid(ctx,
6473 ctx.cluster,
9f95a23c
TL
6474 daemon_type,
6475 daemon_id,
f67539c2 6476 legacy_dir=ctx.legacy_dir)
9f95a23c
TL
6477 if not fsid:
6478 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
f67539c2
TL
6479 lock = FileLock(ctx, fsid)
6480 lock.acquire()
9f95a23c
TL
6481
6482 # call correct adoption
6483 if daemon_type in Ceph.daemons:
f67539c2 6484 command_adopt_ceph(ctx, daemon_type, daemon_id, fsid)
9f95a23c 6485 elif daemon_type == 'prometheus':
f67539c2 6486 command_adopt_prometheus(ctx, daemon_id, fsid)
9f95a23c 6487 elif daemon_type == 'grafana':
f67539c2 6488 command_adopt_grafana(ctx, daemon_id, fsid)
9f95a23c
TL
6489 elif daemon_type == 'node-exporter':
6490 raise Error('adoption of node-exporter not implemented')
6491 elif daemon_type == 'alertmanager':
f67539c2 6492 command_adopt_alertmanager(ctx, daemon_id, fsid)
9f95a23c
TL
6493 else:
6494 raise Error('daemon type %s not recognized' % daemon_type)
6495
6496
1911f103 6497class AdoptOsd(object):
f67539c2
TL
6498 def __init__(self, ctx, osd_data_dir, osd_id):
6499 # type: (CephadmContext, str, str) -> None
6500 self.ctx = ctx
1911f103
TL
6501 self.osd_data_dir = osd_data_dir
6502 self.osd_id = osd_id
6503
6504 def check_online_osd(self):
6505 # type: () -> Tuple[Optional[str], Optional[str]]
6506
6507 osd_fsid, osd_type = None, None
6508
6509 path = os.path.join(self.osd_data_dir, 'fsid')
6510 try:
6511 with open(path, 'r') as f:
6512 osd_fsid = f.read().strip()
f67539c2 6513 logger.info('Found online OSD at %s' % path)
1911f103
TL
6514 except IOError:
6515 logger.info('Unable to read OSD fsid from %s' % path)
e306af50
TL
6516 if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
6517 with open(os.path.join(self.osd_data_dir, 'type')) as f:
6518 osd_type = f.read().strip()
6519 else:
6520 logger.info('"type" file missing for OSD data dir')
1911f103
TL
6521
6522 return osd_fsid, osd_type
6523
6524 def check_offline_lvm_osd(self):
6525 # type: () -> Tuple[Optional[str], Optional[str]]
1911f103
TL
6526 osd_fsid, osd_type = None, None
6527
20effc67 6528 c = get_ceph_volume_container(
f67539c2 6529 self.ctx,
1911f103 6530 args=['lvm', 'list', '--format=json'],
1911f103 6531 )
f67539c2 6532 out, err, code = call_throws(self.ctx, c.run_cmd())
1911f103
TL
6533 if not code:
6534 try:
6535 js = json.loads(out)
6536 if self.osd_id in js:
f67539c2 6537 logger.info('Found offline LVM OSD {}'.format(self.osd_id))
1911f103
TL
6538 osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
6539 for device in js[self.osd_id]:
6540 if device['tags']['ceph.type'] == 'block':
6541 osd_type = 'bluestore'
6542 break
6543 if device['tags']['ceph.type'] == 'data':
6544 osd_type = 'filestore'
6545 break
6546 except ValueError as e:
f67539c2 6547 logger.info('Invalid JSON in ceph-volume lvm list: {}'.format(e))
1911f103
TL
6548
6549 return osd_fsid, osd_type
6550
6551 def check_offline_simple_osd(self):
6552 # type: () -> Tuple[Optional[str], Optional[str]]
1911f103
TL
6553 osd_fsid, osd_type = None, None
6554
f67539c2 6555 osd_file = glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self.osd_id))
1911f103
TL
6556 if len(osd_file) == 1:
6557 with open(osd_file[0], 'r') as f:
6558 try:
6559 js = json.loads(f.read())
f67539c2
TL
6560 logger.info('Found offline simple OSD {}'.format(self.osd_id))
6561 osd_fsid = js['fsid']
6562 osd_type = js['type']
6563 if osd_type != 'filestore':
1911f103
TL
6564 # need this to be mounted for the adopt to work, as it
6565 # needs to move files from this directory
f67539c2 6566 call_throws(self.ctx, ['mount', js['data']['path'], self.osd_data_dir])
1911f103 6567 except ValueError as e:
f67539c2 6568 logger.info('Invalid JSON in {}: {}'.format(osd_file, e))
1911f103
TL
6569
6570 return osd_fsid, osd_type
6571
9f95a23c 6572
f67539c2
TL
6573def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid):
6574 # type: (CephadmContext, str, str, str) -> None
9f95a23c 6575
f67539c2 6576 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
6577
6578 data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
f67539c2
TL
6579 (daemon_type, ctx.cluster, daemon_id))
6580 data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src)
9f95a23c 6581
1911f103
TL
6582 if not os.path.exists(data_dir_src):
6583 raise Error("{}.{} data directory '{}' does not exist. "
f67539c2
TL
6584 'Incorrect ID specified, or daemon already adopted?'.format(
6585 daemon_type, daemon_id, data_dir_src))
1911f103 6586
9f95a23c
TL
6587 osd_fsid = None
6588 if daemon_type == 'osd':
f67539c2 6589 adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id)
1911f103
TL
6590 osd_fsid, osd_type = adopt_osd.check_online_osd()
6591 if not osd_fsid:
6592 osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
6593 if not osd_fsid:
6594 osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
6595 if not osd_fsid:
6596 raise Error('Unable to find OSD {}'.format(daemon_id))
6597 logger.info('objectstore_type is %s' % osd_type)
e306af50 6598 assert osd_type
1911f103 6599 if osd_type == 'filestore':
9f95a23c
TL
6600 raise Error('FileStore is not supported by cephadm')
6601
6602 # NOTE: implicit assumption here that the units correspond to the
6603 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
6604 # CLUSTER field.
6605 unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
f67539c2 6606 (enabled, state, _) = check_unit(ctx, unit_name)
9f95a23c
TL
6607 if state == 'running':
6608 logger.info('Stopping old systemd unit %s...' % unit_name)
f67539c2 6609 call_throws(ctx, ['systemctl', 'stop', unit_name])
9f95a23c
TL
6610 if enabled:
6611 logger.info('Disabling old systemd unit %s...' % unit_name)
f67539c2 6612 call_throws(ctx, ['systemctl', 'disable', unit_name])
9f95a23c
TL
6613
6614 # data
6615 logger.info('Moving data...')
f67539c2 6616 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
9f95a23c 6617 uid=uid, gid=gid)
f67539c2 6618 move_files(ctx, glob(os.path.join(data_dir_src, '*')),
9f95a23c
TL
6619 data_dir_dst,
6620 uid=uid, gid=gid)
f67539c2 6621 logger.debug('Remove dir `%s`' % (data_dir_src))
9f95a23c 6622 if os.path.ismount(data_dir_src):
f67539c2 6623 call_throws(ctx, ['umount', data_dir_src])
9f95a23c
TL
6624 os.rmdir(data_dir_src)
6625
6626 logger.info('Chowning content...')
f67539c2 6627 call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
9f95a23c
TL
6628
6629 if daemon_type == 'mon':
6630 # rename *.ldb -> *.sst, in case they are coming from ubuntu
6631 store = os.path.join(data_dir_dst, 'store.db')
6632 num_renamed = 0
6633 if os.path.exists(store):
6634 for oldf in os.listdir(store):
6635 if oldf.endswith('.ldb'):
6636 newf = oldf.replace('.ldb', '.sst')
6637 oldp = os.path.join(store, oldf)
6638 newp = os.path.join(store, newf)
6639 logger.debug('Renaming %s -> %s' % (oldp, newp))
6640 os.rename(oldp, newp)
6641 if num_renamed:
6642 logger.info('Renamed %d leveldb *.ldb files to *.sst',
6643 num_renamed)
6644 if daemon_type == 'osd':
6645 for n in ['block', 'block.db', 'block.wal']:
6646 p = os.path.join(data_dir_dst, n)
6647 if os.path.exists(p):
6648 logger.info('Chowning %s...' % p)
6649 os.chown(p, uid, gid)
6650 # disable the ceph-volume 'simple' mode files on the host
6651 simple_fn = os.path.join('/etc/ceph/osd',
6652 '%s-%s.json' % (daemon_id, osd_fsid))
6653 if os.path.exists(simple_fn):
6654 new_fn = simple_fn + '.adopted-by-cephadm'
6655 logger.info('Renaming %s -> %s', simple_fn, new_fn)
6656 os.rename(simple_fn, new_fn)
6657 logger.info('Disabling host unit ceph-volume@ simple unit...')
f67539c2
TL
6658 call(ctx, ['systemctl', 'disable',
6659 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
9f95a23c
TL
6660 else:
6661 # assume this is an 'lvm' c-v for now, but don't error
6662 # out if it's not.
6663 logger.info('Disabling host unit ceph-volume@ lvm unit...')
f67539c2
TL
6664 call(ctx, ['systemctl', 'disable',
6665 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
9f95a23c
TL
6666
6667 # config
f67539c2
TL
6668 config_src = '/etc/ceph/%s.conf' % (ctx.cluster)
6669 config_src = os.path.abspath(ctx.legacy_dir + config_src)
9f95a23c 6670 config_dst = os.path.join(data_dir_dst, 'config')
f67539c2 6671 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
9f95a23c
TL
6672
6673 # logs
6674 logger.info('Moving logs...')
6675 log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
f67539c2
TL
6676 (ctx.cluster, daemon_type, daemon_id))
6677 log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src)
6678 log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid)
6679 move_files(ctx, glob(log_dir_src),
9f95a23c
TL
6680 log_dir_dst,
6681 uid=uid, gid=gid)
6682
6683 logger.info('Creating new units...')
f67539c2
TL
6684 make_var_run(ctx, fsid, uid, gid)
6685 c = get_container(ctx, fsid, daemon_type, daemon_id)
6686 deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c,
9f95a23c 6687 enable=True, # unconditionally enable the new unit
f67539c2 6688 start=(state == 'running' or ctx.force_start),
9f95a23c 6689 osd_fsid=osd_fsid)
f67539c2 6690 update_firewalld(ctx, daemon_type)
9f95a23c
TL
6691
6692
f67539c2
TL
6693def command_adopt_prometheus(ctx, daemon_id, fsid):
6694 # type: (CephadmContext, str, str) -> None
9f95a23c 6695 daemon_type = 'prometheus'
f67539c2 6696 (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
9f95a23c 6697
f67539c2 6698 _stop_and_disable(ctx, 'prometheus')
9f95a23c 6699
f67539c2
TL
6700 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
6701 uid=uid, gid=gid)
9f95a23c
TL
6702
6703 # config
6704 config_src = '/etc/prometheus/prometheus.yml'
f67539c2 6705 config_src = os.path.abspath(ctx.legacy_dir + config_src)
9f95a23c 6706 config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
1911f103 6707 makedirs(config_dst, uid, gid, 0o755)
f67539c2 6708 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
9f95a23c
TL
6709
6710 # data
6711 data_src = '/var/lib/prometheus/metrics/'
f67539c2 6712 data_src = os.path.abspath(ctx.legacy_dir + data_src)
9f95a23c 6713 data_dst = os.path.join(data_dir_dst, 'data')
f67539c2 6714 copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
9f95a23c 6715
f67539c2
TL
6716 make_var_run(ctx, fsid, uid, gid)
6717 c = get_container(ctx, fsid, daemon_type, daemon_id)
6718 deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
6719 update_firewalld(ctx, daemon_type)
9f95a23c 6720
f6b5b4d7 6721
f67539c2
TL
6722def command_adopt_grafana(ctx, daemon_id, fsid):
6723 # type: (CephadmContext, str, str) -> None
9f95a23c
TL
6724
6725 daemon_type = 'grafana'
f67539c2 6726 (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
9f95a23c 6727
f67539c2 6728 _stop_and_disable(ctx, 'grafana-server')
9f95a23c 6729
f67539c2
TL
6730 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
6731 uid=uid, gid=gid)
9f95a23c
TL
6732
6733 # config
6734 config_src = '/etc/grafana/grafana.ini'
f67539c2 6735 config_src = os.path.abspath(ctx.legacy_dir + config_src)
9f95a23c
TL
6736 config_dst = os.path.join(data_dir_dst, 'etc/grafana')
6737 makedirs(config_dst, uid, gid, 0o755)
f67539c2 6738 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
9f95a23c
TL
6739
6740 prov_src = '/etc/grafana/provisioning/'
f67539c2 6741 prov_src = os.path.abspath(ctx.legacy_dir + prov_src)
9f95a23c 6742 prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
f67539c2 6743 copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid)
9f95a23c
TL
6744
6745 # cert
6746 cert = '/etc/grafana/grafana.crt'
6747 key = '/etc/grafana/grafana.key'
6748 if os.path.exists(cert) and os.path.exists(key):
6749 cert_src = '/etc/grafana/grafana.crt'
f67539c2 6750 cert_src = os.path.abspath(ctx.legacy_dir + cert_src)
9f95a23c
TL
6751 makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
6752 cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
f67539c2 6753 copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid)
9f95a23c
TL
6754
6755 key_src = '/etc/grafana/grafana.key'
f67539c2 6756 key_src = os.path.abspath(ctx.legacy_dir + key_src)
9f95a23c 6757 key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
f67539c2 6758 copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid)
9f95a23c
TL
6759
6760 _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
6761 else:
f67539c2 6762 logger.debug('Skipping ssl, missing cert {} or key {}'.format(cert, key))
9f95a23c 6763
9f95a23c
TL
6764 # data - possible custom dashboards/plugins
6765 data_src = '/var/lib/grafana/'
f67539c2 6766 data_src = os.path.abspath(ctx.legacy_dir + data_src)
9f95a23c 6767 data_dst = os.path.join(data_dir_dst, 'data')
f67539c2 6768 copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
9f95a23c 6769
f67539c2
TL
6770 make_var_run(ctx, fsid, uid, gid)
6771 c = get_container(ctx, fsid, daemon_type, daemon_id)
6772 deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
6773 update_firewalld(ctx, daemon_type)
9f95a23c 6774
f6b5b4d7 6775
f67539c2
TL
6776def command_adopt_alertmanager(ctx, daemon_id, fsid):
6777 # type: (CephadmContext, str, str) -> None
801d1391
TL
6778
6779 daemon_type = 'alertmanager'
f67539c2 6780 (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
801d1391 6781
f67539c2 6782 _stop_and_disable(ctx, 'prometheus-alertmanager')
801d1391 6783
f67539c2
TL
6784 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
6785 uid=uid, gid=gid)
801d1391
TL
6786
6787 # config
6788 config_src = '/etc/prometheus/alertmanager.yml'
f67539c2 6789 config_src = os.path.abspath(ctx.legacy_dir + config_src)
801d1391
TL
6790 config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
6791 makedirs(config_dst, uid, gid, 0o755)
f67539c2 6792 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
801d1391
TL
6793
6794 # data
6795 data_src = '/var/lib/prometheus/alertmanager/'
f67539c2 6796 data_src = os.path.abspath(ctx.legacy_dir + data_src)
801d1391 6797 data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
f67539c2 6798 copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
801d1391 6799
f67539c2
TL
6800 make_var_run(ctx, fsid, uid, gid)
6801 c = get_container(ctx, fsid, daemon_type, daemon_id)
6802 deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
6803 update_firewalld(ctx, daemon_type)
801d1391 6804
f6b5b4d7 6805
9f95a23c
TL
6806def _adjust_grafana_ini(filename):
6807 # type: (str) -> None
6808
6809 # Update cert_file, cert_key pathnames in server section
6810 # ConfigParser does not preserve comments
6811 try:
f67539c2 6812 with open(filename, 'r') as grafana_ini:
9f95a23c 6813 lines = grafana_ini.readlines()
f67539c2
TL
6814 with open('{}.new'.format(filename), 'w') as grafana_ini:
6815 server_section = False
9f95a23c
TL
6816 for line in lines:
6817 if line.startswith('['):
f67539c2 6818 server_section = False
9f95a23c 6819 if line.startswith('[server]'):
f67539c2 6820 server_section = True
9f95a23c
TL
6821 if server_section:
6822 line = re.sub(r'^cert_file.*',
f67539c2 6823 'cert_file = /etc/grafana/certs/cert_file', line)
9f95a23c 6824 line = re.sub(r'^cert_key.*',
f67539c2 6825 'cert_key = /etc/grafana/certs/cert_key', line)
9f95a23c 6826 grafana_ini.write(line)
f67539c2 6827 os.rename('{}.new'.format(filename), filename)
9f95a23c 6828 except OSError as err:
f67539c2 6829 raise Error('Cannot update {}: {}'.format(filename, err))
9f95a23c
TL
6830
6831
f67539c2
TL
6832def _stop_and_disable(ctx, unit_name):
6833 # type: (CephadmContext, str) -> None
9f95a23c 6834
f67539c2 6835 (enabled, state, _) = check_unit(ctx, unit_name)
9f95a23c
TL
6836 if state == 'running':
6837 logger.info('Stopping old systemd unit %s...' % unit_name)
f67539c2 6838 call_throws(ctx, ['systemctl', 'stop', unit_name])
9f95a23c
TL
6839 if enabled:
6840 logger.info('Disabling old systemd unit %s...' % unit_name)
f67539c2 6841 call_throws(ctx, ['systemctl', 'disable', unit_name])
9f95a23c
TL
6842
6843##################################
6844
9f95a23c 6845
f67539c2
TL
6846def command_rm_daemon(ctx):
6847 # type: (CephadmContext) -> None
6848 lock = FileLock(ctx, ctx.fsid)
6849 lock.acquire()
9f95a23c 6850
f67539c2
TL
6851 (daemon_type, daemon_id) = ctx.name.split('.', 1)
6852 unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
e306af50 6853
f67539c2 6854 if daemon_type in ['mon', 'osd'] and not ctx.force:
9f95a23c 6855 raise Error('must pass --force to proceed: '
f67539c2 6856 'this command may destroy precious data!')
e306af50 6857
f67539c2 6858 call(ctx, ['systemctl', 'stop', unit_name],
adb31ebb 6859 verbosity=CallVerbosity.DEBUG)
f67539c2 6860 call(ctx, ['systemctl', 'reset-failed', unit_name],
adb31ebb 6861 verbosity=CallVerbosity.DEBUG)
f67539c2 6862 call(ctx, ['systemctl', 'disable', unit_name],
adb31ebb 6863 verbosity=CallVerbosity.DEBUG)
f67539c2 6864 data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c 6865 if daemon_type in ['mon', 'osd', 'prometheus'] and \
f67539c2 6866 not ctx.force_delete_data:
9f95a23c 6867 # rename it out of the way -- do not delete
f67539c2 6868 backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed')
9f95a23c
TL
6869 if not os.path.exists(backup_dir):
6870 makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
6871 dirname = '%s.%s_%s' % (daemon_type, daemon_id,
6872 datetime.datetime.utcnow().strftime(DATEFMT))
6873 os.rename(data_dir,
6874 os.path.join(backup_dir, dirname))
6875 else:
f67539c2 6876 call_throws(ctx, ['rm', '-rf', data_dir])
9f95a23c 6877
33c7a0ef
TL
6878 if 'tcp_ports' in ctx and ctx.tcp_ports is not None:
6879 ports: List[int] = [int(p) for p in ctx.tcp_ports.split()]
6880 try:
6881 fw = Firewalld(ctx)
6882 fw.close_ports(ports)
6883 fw.apply_rules()
6884 except RuntimeError as e:
6885 # in case we cannot close the ports we will remove
6886 # the daemon but keep them open.
6887 logger.warning(f' Error when trying to close ports: {e}')
6888
6889
9f95a23c
TL
6890##################################
6891
f6b5b4d7 6892
522d829b 6893def _zap(ctx: CephadmContext, what: str) -> None:
b3b6e05e 6894 mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
20effc67
TL
6895 c = get_ceph_volume_container(ctx,
6896 args=['lvm', 'zap', '--destroy', what],
6897 volume_mounts=mounts,
6898 envs=ctx.env)
b3b6e05e
TL
6899 logger.info(f'Zapping {what}...')
6900 out, err, code = call_throws(ctx, c.run_cmd())
6901
6902
6903@infer_image
522d829b 6904def _zap_osds(ctx: CephadmContext) -> None:
b3b6e05e
TL
6905 # assume fsid lock already held
6906
6907 # list
6908 mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
20effc67
TL
6909 c = get_ceph_volume_container(ctx,
6910 args=['inventory', '--format', 'json'],
6911 volume_mounts=mounts,
6912 envs=ctx.env)
b3b6e05e
TL
6913 out, err, code = call_throws(ctx, c.run_cmd())
6914 if code:
6915 raise Error('failed to list osd inventory')
6916 try:
6917 ls = json.loads(out)
6918 except ValueError as e:
6919 raise Error(f'Invalid JSON in ceph-volume inventory: {e}')
6920
6921 for i in ls:
6922 matches = [lv.get('cluster_fsid') == ctx.fsid for lv in i.get('lvs', [])]
6923 if any(matches) and all(matches):
6924 _zap(ctx, i.get('path'))
6925 elif any(matches):
6926 lv_names = [lv['name'] for lv in i.get('lvs', [])]
6927 # TODO: we need to map the lv_names back to device paths (the vg
6928 # id isn't part of the output here!)
6929 logger.warning(f'Not zapping LVs (not implemented): {lv_names}')
6930
6931
522d829b 6932def command_zap_osds(ctx: CephadmContext) -> None:
b3b6e05e
TL
6933 if not ctx.force:
6934 raise Error('must pass --force to proceed: '
6935 'this command may destroy precious data!')
6936
6937 lock = FileLock(ctx, ctx.fsid)
6938 lock.acquire()
6939
6940 _zap_osds(ctx)
6941
6942##################################
6943
6944
33c7a0ef
TL
6945def get_ceph_cluster_count(ctx: CephadmContext) -> int:
6946 return len([c for c in os.listdir(ctx.data_dir) if is_fsid(c)])
6947
6948
f67539c2
TL
6949def command_rm_cluster(ctx):
6950 # type: (CephadmContext) -> None
6951 if not ctx.force:
9f95a23c 6952 raise Error('must pass --force to proceed: '
f67539c2 6953 'this command may destroy precious data!')
9f95a23c 6954
f67539c2
TL
6955 lock = FileLock(ctx, ctx.fsid)
6956 lock.acquire()
9f95a23c 6957
33c7a0ef 6958 def disable_systemd_service(unit_name: str) -> None:
f67539c2 6959 call(ctx, ['systemctl', 'stop', unit_name],
adb31ebb 6960 verbosity=CallVerbosity.DEBUG)
f67539c2 6961 call(ctx, ['systemctl', 'reset-failed', unit_name],
adb31ebb 6962 verbosity=CallVerbosity.DEBUG)
f67539c2 6963 call(ctx, ['systemctl', 'disable', unit_name],
adb31ebb 6964 verbosity=CallVerbosity.DEBUG)
9f95a23c 6965
33c7a0ef
TL
6966 # stop + disable individual daemon units
6967 for d in list_daemons(ctx, detail=False):
6968 if d['fsid'] != ctx.fsid:
6969 continue
6970 if d['style'] != 'cephadm:v1':
6971 continue
6972 disable_systemd_service(get_unit_name(ctx.fsid, d['name']))
6973
9f95a23c 6974 # cluster units
f67539c2 6975 for unit_name in ['ceph-%s.target' % ctx.fsid]:
33c7a0ef 6976 disable_systemd_service(unit_name)
9f95a23c 6977
522d829b 6978 slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d'))
f67539c2 6979 call(ctx, ['systemctl', 'stop', slice_name],
adb31ebb 6980 verbosity=CallVerbosity.DEBUG)
9f95a23c 6981
b3b6e05e
TL
6982 # osds?
6983 if ctx.zap_osds:
6984 _zap_osds(ctx)
6985
9f95a23c 6986 # rm units
b3b6e05e
TL
6987 call_throws(ctx, ['rm', '-f', ctx.unit_dir
6988 + '/ceph-%s@.service' % ctx.fsid])
6989 call_throws(ctx, ['rm', '-f', ctx.unit_dir
6990 + '/ceph-%s.target' % ctx.fsid])
f67539c2
TL
6991 call_throws(ctx, ['rm', '-rf',
6992 ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid])
9f95a23c 6993 # rm data
f67539c2
TL
6994 call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid])
6995
6996 if not ctx.keep_logs:
6997 # rm logs
6998 call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid])
b3b6e05e
TL
6999 call_throws(ctx, ['rm', '-rf', ctx.log_dir
7000 + '/*.wants/ceph-%s@*' % ctx.fsid])
f67539c2 7001
9f95a23c 7002 # rm logrotate config
f67539c2 7003 call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid])
9f95a23c 7004
33c7a0ef
TL
7005 # if last cluster on host remove shared files
7006 if get_ceph_cluster_count(ctx) == 0:
7007 disable_systemd_service('ceph.target')
7008
7009 # rm shared ceph target files
7010 call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/multi-user.target.wants/ceph.target'])
7011 call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/ceph.target'])
7012
7013 # rm cephadm logrotate config
b3b6e05e
TL
7014 call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm'])
7015
33c7a0ef
TL
7016 if not ctx.keep_logs:
7017 # remove all cephadm logs
7018 for fname in glob(f'{ctx.log_dir}/cephadm.log*'):
7019 os.remove(fname)
7020
b3b6e05e 7021 # rm sysctl settings
33c7a0ef 7022 sysctl_dirs: List[Path] = [Path(ctx.sysctl_dir), Path('/usr/lib/sysctl.d')]
b3b6e05e 7023
33c7a0ef
TL
7024 for sysctl_dir in sysctl_dirs:
7025 for p in sysctl_dir.glob(f'90-ceph-{ctx.fsid}-*.conf'):
7026 p.unlink()
1911f103 7027
33c7a0ef
TL
7028 # cleanup remaining ceph directories
7029 ceph_dirs = [f'/run/ceph/{ctx.fsid}', f'/tmp/var/lib/ceph/{ctx.fsid}', f'/var/run/ceph/{ctx.fsid}']
7030 for dd in ceph_dirs:
7031 shutil.rmtree(dd, ignore_errors=True)
7032
7033 # clean up config, keyring, and pub key files
7034 files = [CEPH_DEFAULT_CONF, CEPH_DEFAULT_PUBKEY, CEPH_DEFAULT_KEYRING]
1911f103
TL
7035 if os.path.exists(files[0]):
7036 valid_fsid = False
7037 with open(files[0]) as f:
f67539c2 7038 if ctx.fsid in f.read():
1911f103
TL
7039 valid_fsid = True
7040 if valid_fsid:
33c7a0ef 7041 # rm configuration files on /etc/ceph
1911f103
TL
7042 for n in range(0, len(files)):
7043 if os.path.exists(files[n]):
7044 os.remove(files[n])
7045
9f95a23c
TL
7046##################################
7047
f67539c2
TL
7048
7049def check_time_sync(ctx, enabler=None):
7050 # type: (CephadmContext, Optional[Packager]) -> bool
9f95a23c
TL
7051 units = [
7052 'chrony.service', # 18.04 (at least)
f67539c2 7053 'chronyd.service', # el / opensuse
9f95a23c 7054 'systemd-timesyncd.service',
f67539c2 7055 'ntpd.service', # el7 (at least)
9f95a23c 7056 'ntp.service', # 18.04 (at least)
f91f0fd5 7057 'ntpsec.service', # 20.04 (at least) / buster
522d829b 7058 'openntpd.service', # ubuntu / debian
9f95a23c 7059 ]
f67539c2 7060 if not check_units(ctx, units, enabler):
9f95a23c
TL
7061 logger.warning('No time sync service is running; checked for %s' % units)
7062 return False
7063 return True
7064
f6b5b4d7 7065
f67539c2 7066def command_check_host(ctx: CephadmContext) -> None:
1911f103 7067 errors = []
9f95a23c
TL
7068 commands = ['systemctl', 'lvcreate']
7069
f67539c2 7070 try:
a4b75251
TL
7071 engine = check_container_engine(ctx)
7072 logger.info(f'{engine} is present')
f67539c2
TL
7073 except Error as e:
7074 errors.append(str(e))
1911f103 7075
9f95a23c
TL
7076 for command in commands:
7077 try:
7078 find_program(command)
7079 logger.info('%s is present' % command)
7080 except ValueError:
1911f103 7081 errors.append('%s binary does not appear to be installed' % command)
9f95a23c
TL
7082
7083 # check for configured+running chronyd or ntp
f67539c2 7084 if not check_time_sync(ctx):
1911f103 7085 errors.append('No time synchronization is active')
9f95a23c 7086
f67539c2
TL
7087 if 'expect_hostname' in ctx and ctx.expect_hostname:
7088 if get_hostname().lower() != ctx.expect_hostname.lower():
1911f103 7089 errors.append('hostname "%s" does not match expected hostname "%s"' % (
f67539c2 7090 get_hostname(), ctx.expect_hostname))
20effc67
TL
7091 else:
7092 logger.info('Hostname "%s" matches what is expected.',
7093 ctx.expect_hostname)
9f95a23c 7094
1911f103 7095 if errors:
f67539c2 7096 raise Error('\nERROR: '.join(errors))
1911f103 7097
9f95a23c
TL
7098 logger.info('Host looks OK')
7099
7100##################################
7101
f6b5b4d7 7102
33c7a0ef
TL
7103def get_ssh_vars(ssh_user: str) -> Tuple[int, int, str]:
7104 try:
7105 s_pwd = pwd.getpwnam(ssh_user)
7106 except KeyError:
7107 raise Error('Cannot find uid/gid for ssh-user: %s' % (ssh_user))
7108
7109 ssh_uid = s_pwd.pw_uid
7110 ssh_gid = s_pwd.pw_gid
7111 ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
7112 return ssh_uid, ssh_gid, ssh_dir
7113
7114
7115def authorize_ssh_key(ssh_pub_key: str, ssh_user: str) -> bool:
7116 """Authorize the public key for the provided ssh user"""
7117
7118 def key_in_file(path: str, key: str) -> bool:
7119 if not os.path.exists(path):
7120 return False
7121 with open(path) as f:
7122 lines = f.readlines()
7123 for line in lines:
7124 if line.strip() == key.strip():
7125 return True
7126 return False
7127
7128 logger.info(f'Adding key to {ssh_user}@localhost authorized_keys...')
7129 if ssh_pub_key is None or ssh_pub_key.isspace():
7130 raise Error('Trying to authorize an empty ssh key')
7131
7132 ssh_pub_key = ssh_pub_key.strip()
7133 ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user)
7134 if not os.path.exists(ssh_dir):
7135 makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
7136
7137 auth_keys_file = '%s/authorized_keys' % ssh_dir
7138 if key_in_file(auth_keys_file, ssh_pub_key):
7139 logger.info(f'key already in {ssh_user}@localhost authorized_keys...')
7140 return False
7141
7142 add_newline = False
7143 if os.path.exists(auth_keys_file):
7144 with open(auth_keys_file, 'r') as f:
7145 f.seek(0, os.SEEK_END)
7146 if f.tell() > 0:
7147 f.seek(f.tell() - 1, os.SEEK_SET) # go to last char
7148 if f.read() != '\n':
7149 add_newline = True
7150
7151 with open(auth_keys_file, 'a') as f:
7152 os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it
7153 os.fchmod(f.fileno(), 0o600) # just in case we created it
7154 if add_newline:
7155 f.write('\n')
7156 f.write(ssh_pub_key + '\n')
7157
7158 return True
7159
7160
7161def revoke_ssh_key(key: str, ssh_user: str) -> None:
7162 """Revoke the public key authorization for the ssh user"""
7163 ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user)
7164 auth_keys_file = '%s/authorized_keys' % ssh_dir
7165 deleted = False
7166 if os.path.exists(auth_keys_file):
7167 with open(auth_keys_file, 'r') as f:
7168 lines = f.readlines()
7169 _, filename = tempfile.mkstemp()
7170 with open(filename, 'w') as f:
7171 os.fchown(f.fileno(), ssh_uid, ssh_gid)
7172 os.fchmod(f.fileno(), 0o600) # secure access to the keys file
7173 for line in lines:
7174 if line.strip() == key.strip():
7175 deleted = True
7176 else:
7177 f.write(line)
7178
7179 if deleted:
7180 shutil.move(filename, auth_keys_file)
7181 else:
7182 logger.warning('Cannot find the ssh key to be deleted')
7183
7184
7185def check_ssh_connectivity(ctx: CephadmContext) -> None:
7186
7187 def cmd_is_available(cmd: str) -> bool:
7188 if shutil.which(cmd) is None:
7189 logger.warning(f'Command not found: {cmd}')
7190 return False
7191 return True
7192
7193 if not cmd_is_available('ssh') or not cmd_is_available('ssh-keygen'):
7194 logger.warning('Cannot check ssh connectivity. Skipping...')
7195 return
7196
7197 logger.info('Verifying ssh connectivity ...')
7198 if ctx.ssh_private_key and ctx.ssh_public_key:
7199 # let's use the keys provided by the user
7200 ssh_priv_key_path = pathify(ctx.ssh_private_key.name)
7201 ssh_pub_key_path = pathify(ctx.ssh_public_key.name)
7202 else:
7203 # no custom keys, let's generate some random keys just for this check
7204 ssh_priv_key_path = f'/tmp/ssh_key_{uuid.uuid1()}'
7205 ssh_pub_key_path = f'{ssh_priv_key_path}.pub'
7206 ssh_key_gen_cmd = ['ssh-keygen', '-q', '-t', 'rsa', '-N', '', '-C', '', '-f', ssh_priv_key_path]
7207 _, _, code = call(ctx, ssh_key_gen_cmd)
7208 if code != 0:
7209 logger.warning('Cannot generate keys to check ssh connectivity.')
7210 return
7211
7212 with open(ssh_pub_key_path, 'r') as f:
7213 key = f.read().strip()
7214 new_key = authorize_ssh_key(key, ctx.ssh_user)
7215 ssh_cfg_file_arg = ['-F', pathify(ctx.ssh_config.name)] if ctx.ssh_config else []
7216 _, _, code = call(ctx, ['ssh', '-o StrictHostKeyChecking=no',
7217 *ssh_cfg_file_arg, '-i', ssh_priv_key_path,
7218 '-o PasswordAuthentication=no',
7219 f'{ctx.ssh_user}@{get_hostname()}',
7220 'sudo echo'])
7221
7222 # we only remove the key if it's a new one. In case the user has provided
7223 # some already existing key then we don't alter authorized_keys file
7224 if new_key:
7225 revoke_ssh_key(key, ctx.ssh_user)
7226
7227 pub_key_msg = '- The public key file configured by --ssh-public-key is valid\n' if ctx.ssh_public_key else ''
7228 prv_key_msg = '- The private key file configured by --ssh-private-key is valid\n' if ctx.ssh_private_key else ''
7229 ssh_cfg_msg = '- The ssh configuration file configured by --ssh-config is valid\n' if ctx.ssh_config else ''
7230 err_msg = f"""
7231** Please verify your user's ssh configuration and make sure:
7232- User {ctx.ssh_user} must have passwordless sudo access
7233{pub_key_msg}{prv_key_msg}{ssh_cfg_msg}
7234"""
7235 if code != 0:
7236 raise Error(err_msg)
7237
7238
f67539c2 7239def command_prepare_host(ctx: CephadmContext) -> None:
9f95a23c
TL
7240 logger.info('Verifying podman|docker is present...')
7241 pkg = None
f67539c2
TL
7242 try:
7243 check_container_engine(ctx)
7244 except Error as e:
7245 logger.warning(str(e))
9f95a23c 7246 if not pkg:
f67539c2 7247 pkg = create_packager(ctx)
9f95a23c
TL
7248 pkg.install_podman()
7249
7250 logger.info('Verifying lvm2 is present...')
7251 if not find_executable('lvcreate'):
7252 if not pkg:
f67539c2 7253 pkg = create_packager(ctx)
9f95a23c
TL
7254 pkg.install(['lvm2'])
7255
7256 logger.info('Verifying time synchronization is in place...')
f67539c2 7257 if not check_time_sync(ctx):
9f95a23c 7258 if not pkg:
f67539c2 7259 pkg = create_packager(ctx)
9f95a23c
TL
7260 pkg.install(['chrony'])
7261 # check again, and this time try to enable
7262 # the service
f67539c2 7263 check_time_sync(ctx, enabler=pkg)
9f95a23c 7264
f67539c2
TL
7265 if 'expect_hostname' in ctx and ctx.expect_hostname and ctx.expect_hostname != get_hostname():
7266 logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname))
7267 call_throws(ctx, ['hostname', ctx.expect_hostname])
9f95a23c 7268 with open('/etc/hostname', 'w') as f:
f67539c2 7269 f.write(ctx.expect_hostname + '\n')
9f95a23c
TL
7270
7271 logger.info('Repeating the final host check...')
f67539c2 7272 command_check_host(ctx)
9f95a23c
TL
7273
7274##################################
7275
f6b5b4d7 7276
9f95a23c
TL
7277class CustomValidation(argparse.Action):
7278
522d829b 7279 def _check_name(self, values: str) -> None:
9f95a23c
TL
7280 try:
7281 (daemon_type, daemon_id) = values.split('.', 1)
7282 except ValueError:
7283 raise argparse.ArgumentError(self,
f67539c2 7284 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com')
9f95a23c
TL
7285
7286 daemons = get_supported_daemons()
7287 if daemon_type not in daemons:
7288 raise argparse.ArgumentError(self,
f67539c2
TL
7289 'name must declare the type of daemon e.g. '
7290 '{}'.format(', '.join(daemons)))
9f95a23c 7291
522d829b
TL
7292 def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Union[str, Sequence[Any], None],
7293 option_string: Optional[str] = None) -> None:
7294 assert isinstance(values, str)
f67539c2 7295 if self.dest == 'name':
9f95a23c
TL
7296 self._check_name(values)
7297 setattr(namespace, self.dest, values)
7298
7299##################################
7300
f6b5b4d7 7301
9f95a23c 7302def get_distro():
e306af50 7303 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
9f95a23c
TL
7304 distro = None
7305 distro_version = None
7306 distro_codename = None
7307 with open('/etc/os-release', 'r') as f:
7308 for line in f.readlines():
7309 line = line.strip()
7310 if '=' not in line or line.startswith('#'):
7311 continue
7312 (var, val) = line.split('=', 1)
7313 if val[0] == '"' and val[-1] == '"':
7314 val = val[1:-1]
7315 if var == 'ID':
7316 distro = val.lower()
7317 elif var == 'VERSION_ID':
7318 distro_version = val.lower()
7319 elif var == 'VERSION_CODENAME':
7320 distro_codename = val.lower()
7321 return distro, distro_version, distro_codename
7322
f6b5b4d7 7323
9f95a23c 7324class Packager(object):
f67539c2 7325 def __init__(self, ctx: CephadmContext,
522d829b
TL
7326 stable: Optional[str] = None, version: Optional[str] = None,
7327 branch: Optional[str] = None, commit: Optional[str] = None):
9f95a23c
TL
7328 assert \
7329 (stable and not version and not branch and not commit) or \
7330 (not stable and version and not branch and not commit) or \
7331 (not stable and not version and branch) or \
7332 (not stable and not version and not branch and not commit)
f67539c2 7333 self.ctx = ctx
9f95a23c
TL
7334 self.stable = stable
7335 self.version = version
7336 self.branch = branch
7337 self.commit = commit
7338
20effc67
TL
7339 def validate(self) -> None:
7340 """Validate parameters before writing any state to disk."""
7341 pass
7342
522d829b
TL
7343 def add_repo(self) -> None:
7344 raise NotImplementedError
7345
7346 def rm_repo(self) -> None:
7347 raise NotImplementedError
7348
7349 def install(self, ls: List[str]) -> None:
9f95a23c
TL
7350 raise NotImplementedError
7351
522d829b 7352 def install_podman(self) -> None:
9f95a23c
TL
7353 raise NotImplementedError
7354
522d829b 7355 def query_shaman(self, distro: str, distro_version: Any, branch: Optional[str], commit: Optional[str]) -> str:
9f95a23c 7356 # query shaman
f91f0fd5 7357 logger.info('Fetching repo metadata from shaman and chacra...')
9f95a23c
TL
7358 shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
7359 distro=distro,
7360 distro_version=distro_version,
7361 branch=branch,
7362 sha1=commit or 'latest',
7363 arch=get_arch()
7364 )
7365 try:
7366 shaman_response = urlopen(shaman_url)
7367 except HTTPError as err:
f91f0fd5 7368 logger.error('repository not found in shaman (might not be available yet)')
9f95a23c 7369 raise Error('%s, failed to fetch %s' % (err, shaman_url))
f67539c2 7370 chacra_url = ''
9f95a23c
TL
7371 try:
7372 chacra_url = shaman_response.geturl()
7373 chacra_response = urlopen(chacra_url)
7374 except HTTPError as err:
f91f0fd5 7375 logger.error('repository not found in chacra (might not be available yet)')
9f95a23c
TL
7376 raise Error('%s, failed to fetch %s' % (err, chacra_url))
7377 return chacra_response.read().decode('utf-8')
7378
522d829b 7379 def repo_gpgkey(self) -> Tuple[str, str]:
f67539c2
TL
7380 if self.ctx.gpg_url:
7381 return self.ctx.gpg_url
9f95a23c 7382 if self.stable or self.version:
b3b6e05e 7383 return 'https://download.ceph.com/keys/release.gpg', 'release'
9f95a23c 7384 else:
b3b6e05e 7385 return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild'
9f95a23c 7386
522d829b 7387 def enable_service(self, service: str) -> None:
9f95a23c
TL
7388 """
7389 Start and enable the service (typically using systemd).
7390 """
f67539c2 7391 call_throws(self.ctx, ['systemctl', 'enable', '--now', service])
9f95a23c
TL
7392
7393
7394class Apt(Packager):
7395 DISTRO_NAMES = {
7396 'ubuntu': 'ubuntu',
7397 'debian': 'debian',
7398 }
7399
f67539c2 7400 def __init__(self, ctx: CephadmContext,
522d829b
TL
7401 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
7402 distro: Optional[str], distro_version: Optional[str], distro_codename: Optional[str]) -> None:
f67539c2 7403 super(Apt, self).__init__(ctx, stable=stable, version=version,
9f95a23c 7404 branch=branch, commit=commit)
522d829b 7405 assert distro
f67539c2 7406 self.ctx = ctx
9f95a23c
TL
7407 self.distro = self.DISTRO_NAMES[distro]
7408 self.distro_codename = distro_codename
f91f0fd5 7409 self.distro_version = distro_version
9f95a23c 7410
522d829b 7411 def repo_path(self) -> str:
9f95a23c
TL
7412 return '/etc/apt/sources.list.d/ceph.list'
7413
522d829b 7414 def add_repo(self) -> None:
f67539c2 7415
9f95a23c 7416 url, name = self.repo_gpgkey()
f91f0fd5 7417 logger.info('Installing repo GPG key from %s...' % url)
9f95a23c
TL
7418 try:
7419 response = urlopen(url)
7420 except HTTPError as err:
f91f0fd5 7421 logger.error('failed to fetch GPG repo key from %s: %s' % (
9f95a23c
TL
7422 url, err))
7423 raise Error('failed to fetch GPG key')
b3b6e05e
TL
7424 key = response.read()
7425 with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'wb') as f:
9f95a23c
TL
7426 f.write(key)
7427
7428 if self.version:
7429 content = 'deb %s/debian-%s/ %s main\n' % (
f67539c2 7430 self.ctx.repo_url, self.version, self.distro_codename)
9f95a23c
TL
7431 elif self.stable:
7432 content = 'deb %s/debian-%s/ %s main\n' % (
f67539c2 7433 self.ctx.repo_url, self.stable, self.distro_codename)
9f95a23c
TL
7434 else:
7435 content = self.query_shaman(self.distro, self.distro_codename, self.branch,
7436 self.commit)
7437
f91f0fd5 7438 logger.info('Installing repo file at %s...' % self.repo_path())
9f95a23c
TL
7439 with open(self.repo_path(), 'w') as f:
7440 f.write(content)
7441
b3b6e05e
TL
7442 self.update()
7443
522d829b 7444 def rm_repo(self) -> None:
9f95a23c
TL
7445 for name in ['autobuild', 'release']:
7446 p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
7447 if os.path.exists(p):
f91f0fd5 7448 logger.info('Removing repo GPG key %s...' % p)
9f95a23c
TL
7449 os.unlink(p)
7450 if os.path.exists(self.repo_path()):
f91f0fd5 7451 logger.info('Removing repo at %s...' % self.repo_path())
9f95a23c
TL
7452 os.unlink(self.repo_path())
7453
f91f0fd5
TL
7454 if self.distro == 'ubuntu':
7455 self.rm_kubic_repo()
7456
522d829b 7457 def install(self, ls: List[str]) -> None:
f91f0fd5 7458 logger.info('Installing packages %s...' % ls)
f67539c2 7459 call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls)
9f95a23c 7460
522d829b 7461 def update(self) -> None:
b3b6e05e
TL
7462 logger.info('Updating package list...')
7463 call_throws(self.ctx, ['apt-get', 'update'])
7464
522d829b 7465 def install_podman(self) -> None:
9f95a23c 7466 if self.distro == 'ubuntu':
f91f0fd5
TL
7467 logger.info('Setting up repo for podman...')
7468 self.add_kubic_repo()
b3b6e05e 7469 self.update()
9f95a23c 7470
f91f0fd5 7471 logger.info('Attempting podman install...')
9f95a23c
TL
7472 try:
7473 self.install(['podman'])
f67539c2 7474 except Error:
f91f0fd5 7475 logger.info('Podman did not work. Falling back to docker...')
9f95a23c
TL
7476 self.install(['docker.io'])
7477
522d829b 7478 def kubic_repo_url(self) -> str:
f91f0fd5
TL
7479 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
7480 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version
7481
522d829b 7482 def kubic_repo_path(self) -> str:
f91f0fd5
TL
7483 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
7484
522d829b 7485 def kubric_repo_gpgkey_url(self) -> str:
f91f0fd5
TL
7486 return '%s/Release.key' % self.kubic_repo_url()
7487
522d829b 7488 def kubric_repo_gpgkey_path(self) -> str:
f91f0fd5
TL
7489 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
7490
522d829b 7491 def add_kubic_repo(self) -> None:
f91f0fd5
TL
7492 url = self.kubric_repo_gpgkey_url()
7493 logger.info('Installing repo GPG key from %s...' % url)
7494 try:
7495 response = urlopen(url)
7496 except HTTPError as err:
7497 logger.error('failed to fetch GPG repo key from %s: %s' % (
7498 url, err))
7499 raise Error('failed to fetch GPG key')
7500 key = response.read().decode('utf-8')
7501 tmp_key = write_tmp(key, 0, 0)
7502 keyring = self.kubric_repo_gpgkey_path()
f67539c2 7503 call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name])
f91f0fd5
TL
7504
7505 logger.info('Installing repo file at %s...' % self.kubic_repo_path())
7506 content = 'deb %s /\n' % self.kubic_repo_url()
7507 with open(self.kubic_repo_path(), 'w') as f:
7508 f.write(content)
7509
522d829b 7510 def rm_kubic_repo(self) -> None:
f91f0fd5
TL
7511 keyring = self.kubric_repo_gpgkey_path()
7512 if os.path.exists(keyring):
7513 logger.info('Removing repo GPG key %s...' % keyring)
7514 os.unlink(keyring)
7515
7516 p = self.kubic_repo_path()
7517 if os.path.exists(p):
7518 logger.info('Removing repo at %s...' % p)
7519 os.unlink(p)
7520
f6b5b4d7 7521
9f95a23c
TL
7522class YumDnf(Packager):
7523 DISTRO_NAMES = {
7524 'centos': ('centos', 'el'),
7525 'rhel': ('centos', 'el'),
7526 'scientific': ('centos', 'el'),
b3b6e05e 7527 'rocky': ('centos', 'el'),
522d829b 7528 'almalinux': ('centos', 'el'),
9f95a23c 7529 'fedora': ('fedora', 'fc'),
a4b75251 7530 'mariner': ('mariner', 'cm'),
9f95a23c
TL
7531 }
7532
f67539c2 7533 def __init__(self, ctx: CephadmContext,
522d829b
TL
7534 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
7535 distro: Optional[str], distro_version: Optional[str]) -> None:
f67539c2 7536 super(YumDnf, self).__init__(ctx, stable=stable, version=version,
9f95a23c 7537 branch=branch, commit=commit)
522d829b
TL
7538 assert distro
7539 assert distro_version
f67539c2 7540 self.ctx = ctx
9f95a23c
TL
7541 self.major = int(distro_version.split('.')[0])
7542 self.distro_normalized = self.DISTRO_NAMES[distro][0]
7543 self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
7544 if (self.distro_code == 'fc' and self.major >= 30) or \
7545 (self.distro_code == 'el' and self.major >= 8):
7546 self.tool = 'dnf'
a4b75251
TL
7547 elif (self.distro_code == 'cm'):
7548 self.tool = 'tdnf'
9f95a23c
TL
7549 else:
7550 self.tool = 'yum'
7551
522d829b 7552 def custom_repo(self, **kw: Any) -> str:
9f95a23c
TL
7553 """
7554 Repo files need special care in that a whole line should not be present
7555 if there is no value for it. Because we were using `format()` we could
7556 not conditionally add a line for a repo file. So the end result would
7557 contain a key with a missing value (say if we were passing `None`).
7558
7559 For example, it could look like::
7560
7561 [ceph repo]
7562 name= ceph repo
7563 proxy=
7564 gpgcheck=
7565
7566 Which breaks. This function allows us to conditionally add lines,
7567 preserving an order and be more careful.
7568
7569 Previously, and for historical purposes, this is how the template used
7570 to look::
7571
7572 custom_repo =
7573 [{repo_name}]
7574 name={name}
7575 baseurl={baseurl}
7576 enabled={enabled}
7577 gpgcheck={gpgcheck}
7578 type={_type}
7579 gpgkey={gpgkey}
7580 proxy={proxy}
7581
7582 """
7583 lines = []
7584
7585 # by using tuples (vs a dict) we preserve the order of what we want to
7586 # return, like starting with a [repo name]
7587 tmpl = (
7588 ('reponame', '[%s]'),
7589 ('name', 'name=%s'),
7590 ('baseurl', 'baseurl=%s'),
7591 ('enabled', 'enabled=%s'),
7592 ('gpgcheck', 'gpgcheck=%s'),
7593 ('_type', 'type=%s'),
7594 ('gpgkey', 'gpgkey=%s'),
7595 ('proxy', 'proxy=%s'),
7596 ('priority', 'priority=%s'),
7597 )
7598
7599 for line in tmpl:
7600 tmpl_key, tmpl_value = line # key values from tmpl
7601
7602 # ensure that there is an actual value (not None nor empty string)
7603 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
7604 lines.append(tmpl_value % kw.get(tmpl_key))
7605
7606 return '\n'.join(lines)
7607
522d829b 7608 def repo_path(self) -> str:
9f95a23c
TL
7609 return '/etc/yum.repos.d/ceph.repo'
7610
522d829b 7611 def repo_baseurl(self) -> str:
9f95a23c
TL
7612 assert self.stable or self.version
7613 if self.version:
f67539c2 7614 return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version,
9f95a23c
TL
7615 self.distro_code)
7616 else:
f67539c2 7617 return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable,
9f95a23c
TL
7618 self.distro_code)
7619
20effc67 7620 def validate(self) -> None:
b3b6e05e
TL
7621 if self.distro_code.startswith('fc'):
7622 raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro')
7623 if self.distro_code == 'el7':
7624 if self.stable and self.stable >= 'pacific':
7625 raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it')
7626 if self.version and self.version.split('.')[0] >= '16':
7627 raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it')
20effc67
TL
7628
7629 if self.stable or self.version:
7630 # we know that yum & dnf require there to be a
7631 # $base_url/$arch/repodata/repomd.xml so we can test if this URL
7632 # is gettable in order to validate the inputs
7633 test_url = self.repo_baseurl() + '/noarch/repodata/repomd.xml'
7634 try:
7635 urlopen(test_url)
7636 except HTTPError as err:
7637 logger.error('unable to fetch repo metadata: %r', err)
7638 raise Error('failed to fetch repository metadata. please check'
7639 ' the provided parameters are correct and try again')
7640
7641 def add_repo(self) -> None:
9f95a23c
TL
7642 if self.stable or self.version:
7643 content = ''
7644 for n, t in {
7645 'Ceph': '$basearch',
7646 'Ceph-noarch': 'noarch',
7647 'Ceph-source': 'SRPMS'}.items():
7648 content += '[%s]\n' % (n)
7649 content += self.custom_repo(
7650 name='Ceph %s' % t,
7651 baseurl=self.repo_baseurl() + '/' + t,
7652 enabled=1,
7653 gpgcheck=1,
7654 gpgkey=self.repo_gpgkey()[0],
7655 )
7656 content += '\n\n'
7657 else:
7658 content = self.query_shaman(self.distro_normalized, self.major,
7659 self.branch,
7660 self.commit)
7661
f91f0fd5 7662 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
7663 with open(self.repo_path(), 'w') as f:
7664 f.write(content)
7665
7666 if self.distro_code.startswith('el'):
7667 logger.info('Enabling EPEL...')
f67539c2 7668 call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release'])
9f95a23c 7669
522d829b 7670 def rm_repo(self) -> None:
9f95a23c
TL
7671 if os.path.exists(self.repo_path()):
7672 os.unlink(self.repo_path())
9f95a23c 7673
522d829b 7674 def install(self, ls: List[str]) -> None:
9f95a23c 7675 logger.info('Installing packages %s...' % ls)
f67539c2 7676 call_throws(self.ctx, [self.tool, 'install', '-y'] + ls)
9f95a23c 7677
522d829b 7678 def install_podman(self) -> None:
9f95a23c
TL
7679 self.install(['podman'])
7680
7681
7682class Zypper(Packager):
7683 DISTRO_NAMES = [
7684 'sles',
7685 'opensuse-tumbleweed',
7686 'opensuse-leap'
7687 ]
7688
f67539c2 7689 def __init__(self, ctx: CephadmContext,
522d829b
TL
7690 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
7691 distro: Optional[str], distro_version: Optional[str]) -> None:
f67539c2 7692 super(Zypper, self).__init__(ctx, stable=stable, version=version,
9f95a23c 7693 branch=branch, commit=commit)
522d829b 7694 assert distro is not None
f67539c2 7695 self.ctx = ctx
9f95a23c
TL
7696 self.tool = 'zypper'
7697 self.distro = 'opensuse'
7698 self.distro_version = '15.1'
7699 if 'tumbleweed' not in distro and distro_version is not None:
7700 self.distro_version = distro_version
7701
522d829b 7702 def custom_repo(self, **kw: Any) -> str:
9f95a23c
TL
7703 """
7704 See YumDnf for format explanation.
7705 """
7706 lines = []
7707
7708 # by using tuples (vs a dict) we preserve the order of what we want to
7709 # return, like starting with a [repo name]
7710 tmpl = (
7711 ('reponame', '[%s]'),
7712 ('name', 'name=%s'),
7713 ('baseurl', 'baseurl=%s'),
7714 ('enabled', 'enabled=%s'),
7715 ('gpgcheck', 'gpgcheck=%s'),
7716 ('_type', 'type=%s'),
7717 ('gpgkey', 'gpgkey=%s'),
7718 ('proxy', 'proxy=%s'),
7719 ('priority', 'priority=%s'),
7720 )
7721
7722 for line in tmpl:
7723 tmpl_key, tmpl_value = line # key values from tmpl
7724
7725 # ensure that there is an actual value (not None nor empty string)
7726 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
7727 lines.append(tmpl_value % kw.get(tmpl_key))
7728
7729 return '\n'.join(lines)
7730
522d829b 7731 def repo_path(self) -> str:
9f95a23c
TL
7732 return '/etc/zypp/repos.d/ceph.repo'
7733
522d829b 7734 def repo_baseurl(self) -> str:
9f95a23c
TL
7735 assert self.stable or self.version
7736 if self.version:
f67539c2
TL
7737 return '%s/rpm-%s/%s' % (self.ctx.repo_url,
7738 self.stable, self.distro)
9f95a23c 7739 else:
f67539c2
TL
7740 return '%s/rpm-%s/%s' % (self.ctx.repo_url,
7741 self.stable, self.distro)
9f95a23c 7742
522d829b 7743 def add_repo(self) -> None:
9f95a23c
TL
7744 if self.stable or self.version:
7745 content = ''
7746 for n, t in {
7747 'Ceph': '$basearch',
7748 'Ceph-noarch': 'noarch',
7749 'Ceph-source': 'SRPMS'}.items():
7750 content += '[%s]\n' % (n)
7751 content += self.custom_repo(
7752 name='Ceph %s' % t,
7753 baseurl=self.repo_baseurl() + '/' + t,
7754 enabled=1,
7755 gpgcheck=1,
7756 gpgkey=self.repo_gpgkey()[0],
7757 )
7758 content += '\n\n'
7759 else:
7760 content = self.query_shaman(self.distro, self.distro_version,
7761 self.branch,
7762 self.commit)
7763
f91f0fd5 7764 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
7765 with open(self.repo_path(), 'w') as f:
7766 f.write(content)
7767
522d829b 7768 def rm_repo(self) -> None:
9f95a23c
TL
7769 if os.path.exists(self.repo_path()):
7770 os.unlink(self.repo_path())
7771
522d829b 7772 def install(self, ls: List[str]) -> None:
9f95a23c 7773 logger.info('Installing packages %s...' % ls)
f67539c2 7774 call_throws(self.ctx, [self.tool, 'in', '-y'] + ls)
9f95a23c 7775
522d829b 7776 def install_podman(self) -> None:
9f95a23c
TL
7777 self.install(['podman'])
7778
7779
f67539c2 7780def create_packager(ctx: CephadmContext,
522d829b
TL
7781 stable: Optional[str] = None, version: Optional[str] = None,
7782 branch: Optional[str] = None, commit: Optional[str] = None) -> Packager:
9f95a23c
TL
7783 distro, distro_version, distro_codename = get_distro()
7784 if distro in YumDnf.DISTRO_NAMES:
f67539c2 7785 return YumDnf(ctx, stable=stable, version=version,
9f95a23c 7786 branch=branch, commit=commit,
f67539c2 7787 distro=distro, distro_version=distro_version)
9f95a23c 7788 elif distro in Apt.DISTRO_NAMES:
f67539c2 7789 return Apt(ctx, stable=stable, version=version,
9f95a23c
TL
7790 branch=branch, commit=commit,
7791 distro=distro, distro_version=distro_version,
7792 distro_codename=distro_codename)
7793 elif distro in Zypper.DISTRO_NAMES:
f67539c2 7794 return Zypper(ctx, stable=stable, version=version,
9f95a23c
TL
7795 branch=branch, commit=commit,
7796 distro=distro, distro_version=distro_version)
7797 raise Error('Distro %s version %s not supported' % (distro, distro_version))
7798
7799
522d829b 7800def command_add_repo(ctx: CephadmContext) -> None:
f67539c2 7801 if ctx.version and ctx.release:
9f95a23c 7802 raise Error('you can specify either --release or --version but not both')
f67539c2 7803 if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit:
1911f103 7804 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
f67539c2 7805 if ctx.version:
9f95a23c 7806 try:
f67539c2
TL
7807 (x, y, z) = ctx.version.split('.')
7808 except Exception:
9f95a23c 7809 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
b3b6e05e
TL
7810 if ctx.release:
7811 # Pacific =/= pacific in this case, set to undercase to avoid confision
7812 ctx.release = ctx.release.lower()
9f95a23c 7813
f67539c2
TL
7814 pkg = create_packager(ctx, stable=ctx.release,
7815 version=ctx.version,
7816 branch=ctx.dev,
7817 commit=ctx.dev_commit)
20effc67 7818 pkg.validate()
9f95a23c 7819 pkg.add_repo()
b3b6e05e 7820 logger.info('Completed adding repo.')
9f95a23c 7821
f6b5b4d7 7822
522d829b 7823def command_rm_repo(ctx: CephadmContext) -> None:
f67539c2 7824 pkg = create_packager(ctx)
9f95a23c
TL
7825 pkg.rm_repo()
7826
f6b5b4d7 7827
522d829b 7828def command_install(ctx: CephadmContext) -> None:
f67539c2
TL
7829 pkg = create_packager(ctx)
7830 pkg.install(ctx.packages)
9f95a23c
TL
7831
7832##################################
7833
f67539c2 7834
f91f0fd5
TL
7835def get_ipv4_address(ifname):
7836 # type: (str) -> str
522d829b 7837 def _extract(sock: socket.socket, offset: int) -> str:
f91f0fd5 7838 return socket.inet_ntop(
f67539c2
TL
7839 socket.AF_INET,
7840 fcntl.ioctl(
7841 sock.fileno(),
7842 offset,
7843 struct.pack('256s', bytes(ifname[:15], 'utf-8'))
7844 )[20:24])
f91f0fd5
TL
7845
7846 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
7847 try:
7848 addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR
7849 dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK
7850 except OSError:
7851 # interface does not have an ipv4 address
7852 return ''
7853
7854 dec_mask = sum([bin(int(i)).count('1')
7855 for i in dq_mask.split('.')])
7856 return '{}/{}'.format(addr, dec_mask)
7857
7858
7859def get_ipv6_address(ifname):
7860 # type: (str) -> str
7861 if not os.path.exists('/proc/net/if_inet6'):
7862 return ''
7863
7864 raw = read_file(['/proc/net/if_inet6'])
7865 data = raw.splitlines()
7866 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
7867 # field 0 is ipv6, field 2 is scope
7868 for iface_setting in data:
7869 field = iface_setting.split()
7870 if field[-1] == ifname:
7871 ipv6_raw = field[0]
f67539c2 7872 ipv6_fmtd = ':'.join([ipv6_raw[_p:_p + 4] for _p in range(0, len(field[0]), 4)])
f91f0fd5
TL
7873 # apply naming rules using ipaddress module
7874 ipv6 = ipaddress.ip_address(ipv6_fmtd)
f67539c2 7875 return '{}/{}'.format(str(ipv6), int('0x{}'.format(field[2]), 16))
f91f0fd5
TL
7876 return ''
7877
7878
7879def bytes_to_human(num, mode='decimal'):
7880 # type: (float, str) -> str
7881 """Convert a bytes value into it's human-readable form.
7882
7883 :param num: number, in bytes, to convert
7884 :param mode: Either decimal (default) or binary to determine divisor
7885 :returns: string representing the bytes value in a more readable format
7886 """
7887 unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
7888 divisor = 1000.0
f67539c2 7889 yotta = 'YB'
f91f0fd5
TL
7890
7891 if mode == 'binary':
7892 unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
7893 divisor = 1024.0
f67539c2 7894 yotta = 'YiB'
f91f0fd5
TL
7895
7896 for unit in unit_list:
7897 if abs(num) < divisor:
f67539c2 7898 return '%3.1f%s' % (num, unit)
f91f0fd5 7899 num /= divisor
f67539c2 7900 return '%.1f%s' % (num, yotta)
f91f0fd5
TL
7901
7902
7903def read_file(path_list, file_name=''):
7904 # type: (List[str], str) -> str
7905 """Returns the content of the first file found within the `path_list`
7906
7907 :param path_list: list of file paths to search
7908 :param file_name: optional file_name to be applied to a file path
7909 :returns: content of the file or 'Unknown'
7910 """
7911 for path in path_list:
7912 if file_name:
7913 file_path = os.path.join(path, file_name)
7914 else:
7915 file_path = path
7916 if os.path.exists(file_path):
7917 with open(file_path, 'r') as f:
7918 try:
7919 content = f.read().strip()
7920 except OSError:
7921 # sysfs may populate the file, but for devices like
7922 # virtio reads can fail
f67539c2 7923 return 'Unknown'
f91f0fd5
TL
7924 else:
7925 return content
f67539c2 7926 return 'Unknown'
f91f0fd5
TL
7927
7928##################################
f67539c2
TL
7929
7930
f91f0fd5
TL
7931class HostFacts():
7932 _dmi_path_list = ['/sys/class/dmi/id']
7933 _nic_path_list = ['/sys/class/net']
f91f0fd5
TL
7934 _apparmor_path_list = ['/etc/apparmor']
7935 _disk_vendor_workarounds = {
f67539c2 7936 '0x1af4': 'Virtio Block Device'
f91f0fd5 7937 }
a4b75251 7938 _excluded_block_devices = ('sr', 'zram', 'dm-')
f91f0fd5 7939
f67539c2
TL
7940 def __init__(self, ctx: CephadmContext):
7941 self.ctx: CephadmContext = ctx
7942 self.cpu_model: str = 'Unknown'
7943 self.cpu_count: int = 0
7944 self.cpu_cores: int = 0
7945 self.cpu_threads: int = 0
7946 self.interfaces: Dict[str, Any] = {}
f91f0fd5 7947
f67539c2 7948 self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines()
f91f0fd5
TL
7949 self._get_cpuinfo()
7950 self._process_nics()
f67539c2
TL
7951 self.arch: str = platform.processor()
7952 self.kernel: str = platform.release()
f91f0fd5
TL
7953
7954 def _get_cpuinfo(self):
7955 # type: () -> None
7956 """Determine cpu information via /proc/cpuinfo"""
7957 raw = read_file(['/proc/cpuinfo'])
7958 output = raw.splitlines()
7959 cpu_set = set()
7960
7961 for line in output:
f67539c2
TL
7962 field = [f.strip() for f in line.split(':')]
7963 if 'model name' in line:
f91f0fd5 7964 self.cpu_model = field[1]
f67539c2 7965 if 'physical id' in line:
f91f0fd5 7966 cpu_set.add(field[1])
f67539c2 7967 if 'siblings' in line:
f91f0fd5 7968 self.cpu_threads = int(field[1].strip())
f67539c2 7969 if 'cpu cores' in line:
f91f0fd5
TL
7970 self.cpu_cores = int(field[1].strip())
7971 pass
7972 self.cpu_count = len(cpu_set)
7973
7974 def _get_block_devs(self):
7975 # type: () -> List[str]
7976 """Determine the list of block devices by looking at /sys/block"""
7977 return [dev for dev in os.listdir('/sys/block')
a4b75251 7978 if not dev.startswith(HostFacts._excluded_block_devices)]
f91f0fd5
TL
7979
7980 def _get_devs_by_type(self, rota='0'):
7981 # type: (str) -> List[str]
7982 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
7983 devs = list()
7984 for blk_dev in self._get_block_devs():
7985 rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev)
7986 rot_value = read_file([rot_path])
7987 if rot_value == rota:
7988 devs.append(blk_dev)
7989 return devs
7990
7991 @property
7992 def operating_system(self):
7993 # type: () -> str
7994 """Determine OS version"""
7995 raw_info = read_file(['/etc/os-release'])
7996 os_release = raw_info.splitlines()
7997 rel_str = 'Unknown'
7998 rel_dict = dict()
7999
8000 for line in os_release:
f67539c2 8001 if '=' in line:
f91f0fd5
TL
8002 var_name, var_value = line.split('=')
8003 rel_dict[var_name] = var_value.strip('"')
8004
8005 # Would normally use PRETTY_NAME, but NAME and VERSION are more
8006 # consistent
f67539c2
TL
8007 if all(_v in rel_dict for _v in ['NAME', 'VERSION']):
8008 rel_str = '{} {}'.format(rel_dict['NAME'], rel_dict['VERSION'])
f91f0fd5
TL
8009 return rel_str
8010
8011 @property
8012 def hostname(self):
8013 # type: () -> str
8014 """Return the hostname"""
8015 return platform.node()
8016
8017 @property
8018 def subscribed(self):
8019 # type: () -> str
8020 """Highlevel check to see if the host is subscribed to receive updates/support"""
8021 def _red_hat():
8022 # type: () -> str
8023 # RHEL 7 and RHEL 8
8024 entitlements_dir = '/etc/pki/entitlement'
8025 if os.path.exists(entitlements_dir):
8026 pems = glob('{}/*.pem'.format(entitlements_dir))
8027 if len(pems) >= 2:
f67539c2 8028 return 'Yes'
f91f0fd5 8029
f67539c2 8030 return 'No'
f91f0fd5
TL
8031
8032 os_name = self.operating_system
f67539c2 8033 if os_name.upper().startswith('RED HAT'):
f91f0fd5
TL
8034 return _red_hat()
8035
f67539c2 8036 return 'Unknown'
f91f0fd5
TL
8037
8038 @property
8039 def hdd_count(self):
8040 # type: () -> int
8041 """Return a count of HDDs (spinners)"""
8042 return len(self._get_devs_by_type(rota='1'))
8043
8044 def _get_capacity(self, dev):
8045 # type: (str) -> int
8046 """Determine the size of a given device"""
8047 size_path = os.path.join('/sys/block', dev, 'size')
8048 size_blocks = int(read_file([size_path]))
8049 blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size')
8050 blk_count = int(read_file([blk_path]))
8051 return size_blocks * blk_count
8052
8053 def _get_capacity_by_type(self, rota='0'):
8054 # type: (str) -> int
8055 """Return the total capacity of a category of device (flash or hdd)"""
8056 devs = self._get_devs_by_type(rota=rota)
8057 capacity = 0
8058 for dev in devs:
8059 capacity += self._get_capacity(dev)
8060 return capacity
8061
8062 def _dev_list(self, dev_list):
8063 # type: (List[str]) -> List[Dict[str, object]]
8064 """Return a 'pretty' name list for each device in the `dev_list`"""
8065 disk_list = list()
8066
8067 for dev in dev_list:
8068 disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
8069 disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
8070 disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
8071 vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
8072 disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
8073 disk_size_bytes = self._get_capacity(dev)
8074 disk_list.append({
f67539c2
TL
8075 'description': '{} {} ({})'.format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
8076 'vendor': disk_vendor,
8077 'model': disk_model,
8078 'rev': disk_rev,
8079 'wwid': disk_wwid,
8080 'dev_name': dev,
8081 'disk_size_bytes': disk_size_bytes,
8082 })
f91f0fd5
TL
8083 return disk_list
8084
8085 @property
8086 def hdd_list(self):
8087 # type: () -> List[Dict[str, object]]
8088 """Return a list of devices that are HDDs (spinners)"""
8089 devs = self._get_devs_by_type(rota='1')
8090 return self._dev_list(devs)
8091
8092 @property
8093 def flash_list(self):
8094 # type: () -> List[Dict[str, object]]
8095 """Return a list of devices that are flash based (SSD, NVMe)"""
8096 devs = self._get_devs_by_type(rota='0')
8097 return self._dev_list(devs)
8098
8099 @property
8100 def hdd_capacity_bytes(self):
8101 # type: () -> int
8102 """Return the total capacity for all HDD devices (bytes)"""
8103 return self._get_capacity_by_type(rota='1')
8104
8105 @property
8106 def hdd_capacity(self):
8107 # type: () -> str
8108 """Return the total capacity for all HDD devices (human readable format)"""
8109 return bytes_to_human(self.hdd_capacity_bytes)
8110
8111 @property
8112 def cpu_load(self):
8113 # type: () -> Dict[str, float]
8114 """Return the cpu load average data for the host"""
8115 raw = read_file(['/proc/loadavg']).strip()
8116 data = raw.split()
8117 return {
f67539c2
TL
8118 '1min': float(data[0]),
8119 '5min': float(data[1]),
8120 '15min': float(data[2]),
f91f0fd5
TL
8121 }
8122
8123 @property
8124 def flash_count(self):
8125 # type: () -> int
8126 """Return the number of flash devices in the system (SSD, NVMe)"""
8127 return len(self._get_devs_by_type(rota='0'))
8128
8129 @property
8130 def flash_capacity_bytes(self):
8131 # type: () -> int
8132 """Return the total capacity for all flash devices (bytes)"""
8133 return self._get_capacity_by_type(rota='0')
8134
8135 @property
8136 def flash_capacity(self):
8137 # type: () -> str
8138 """Return the total capacity for all Flash devices (human readable format)"""
8139 return bytes_to_human(self.flash_capacity_bytes)
8140
8141 def _process_nics(self):
8142 # type: () -> None
8143 """Look at the NIC devices and extract network related metadata"""
8144 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
8145 hw_lookup = {
f67539c2
TL
8146 '1': 'ethernet',
8147 '32': 'infiniband',
8148 '772': 'loopback',
f91f0fd5
TL
8149 }
8150
8151 for nic_path in HostFacts._nic_path_list:
8152 if not os.path.exists(nic_path):
8153 continue
8154 for iface in os.listdir(nic_path):
8155
33c7a0ef
TL
8156 if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
8157 nic_type = 'bridge'
8158 elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
8159 nic_type = 'bonding'
8160 else:
8161 nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), 'Unknown')
8162
8163 if nic_type == 'loopback': # skip loopback devices
8164 continue
8165
f67539c2
TL
8166 lower_devs_list = [os.path.basename(link.replace('lower_', '')) for link in glob(os.path.join(nic_path, iface, 'lower_*'))]
8167 upper_devs_list = [os.path.basename(link.replace('upper_', '')) for link in glob(os.path.join(nic_path, iface, 'upper_*'))]
f91f0fd5
TL
8168
8169 try:
8170 mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
8171 except ValueError:
8172 mtu = 0
8173
8174 operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
8175 try:
8176 speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
8177 except (OSError, ValueError):
8178 # OSError : device doesn't support the ethtool get_link_ksettings
8179 # ValueError : raised when the read fails, and returns Unknown
8180 #
8181 # Either way, we show a -1 when speed isn't available
8182 speed = -1
8183
f91f0fd5
TL
8184 dev_link = os.path.join(nic_path, iface, 'device')
8185 if os.path.exists(dev_link):
8186 iftype = 'physical'
8187 driver_path = os.path.join(dev_link, 'driver')
8188 if os.path.exists(driver_path):
f67539c2 8189 driver = os.path.basename(os.path.realpath(driver_path))
f91f0fd5
TL
8190 else:
8191 driver = 'Unknown'
8192
8193 else:
8194 iftype = 'logical'
8195 driver = ''
8196
8197 self.interfaces[iface] = {
f67539c2
TL
8198 'mtu': mtu,
8199 'upper_devs_list': upper_devs_list,
8200 'lower_devs_list': lower_devs_list,
8201 'operstate': operstate,
8202 'iftype': iftype,
8203 'nic_type': nic_type,
8204 'driver': driver,
8205 'speed': speed,
8206 'ipv4_address': get_ipv4_address(iface),
8207 'ipv6_address': get_ipv6_address(iface),
f91f0fd5
TL
8208 }
8209
8210 @property
8211 def nic_count(self):
8212 # type: () -> int
8213 """Return a total count of all physical NICs detected in the host"""
8214 phys_devs = []
8215 for iface in self.interfaces:
f67539c2 8216 if self.interfaces[iface]['iftype'] == 'physical':
f91f0fd5
TL
8217 phys_devs.append(iface)
8218 return len(phys_devs)
8219
f91f0fd5
TL
8220 def _get_mem_data(self, field_name):
8221 # type: (str) -> int
8222 for line in self._meminfo:
8223 if line.startswith(field_name):
8224 _d = line.split()
8225 return int(_d[1])
8226 return 0
8227
8228 @property
8229 def memory_total_kb(self):
8230 # type: () -> int
8231 """Determine the memory installed (kb)"""
8232 return self._get_mem_data('MemTotal')
8233
8234 @property
8235 def memory_free_kb(self):
8236 # type: () -> int
8237 """Determine the memory free (not cache, immediately usable)"""
8238 return self._get_mem_data('MemFree')
8239
8240 @property
8241 def memory_available_kb(self):
8242 # type: () -> int
8243 """Determine the memory available to new applications without swapping"""
8244 return self._get_mem_data('MemAvailable')
8245
8246 @property
8247 def vendor(self):
8248 # type: () -> str
8249 """Determine server vendor from DMI data in sysfs"""
f67539c2 8250 return read_file(HostFacts._dmi_path_list, 'sys_vendor')
f91f0fd5
TL
8251
8252 @property
8253 def model(self):
8254 # type: () -> str
8255 """Determine server model information from DMI data in sysfs"""
f67539c2
TL
8256 family = read_file(HostFacts._dmi_path_list, 'product_family')
8257 product = read_file(HostFacts._dmi_path_list, 'product_name')
f91f0fd5 8258 if family == 'Unknown' and product:
f67539c2 8259 return '{}'.format(product)
f91f0fd5 8260
f67539c2 8261 return '{} ({})'.format(family, product)
f91f0fd5
TL
8262
8263 @property
8264 def bios_version(self):
8265 # type: () -> str
8266 """Determine server BIOS version from DMI data in sysfs"""
f67539c2 8267 return read_file(HostFacts._dmi_path_list, 'bios_version')
f91f0fd5
TL
8268
8269 @property
8270 def bios_date(self):
8271 # type: () -> str
8272 """Determine server BIOS date from DMI data in sysfs"""
f67539c2 8273 return read_file(HostFacts._dmi_path_list, 'bios_date')
f91f0fd5
TL
8274
8275 @property
8276 def timestamp(self):
8277 # type: () -> float
8278 """Return the current time as Epoch seconds"""
8279 return time.time()
8280
8281 @property
8282 def system_uptime(self):
8283 # type: () -> float
8284 """Return the system uptime (in secs)"""
8285 raw_time = read_file(['/proc/uptime'])
8286 up_secs, _ = raw_time.split()
8287 return float(up_secs)
8288
f67539c2 8289 @property
f91f0fd5
TL
8290 def kernel_security(self):
8291 # type: () -> Dict[str, str]
8292 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
f67539c2 8293 def _fetch_selinux() -> Dict[str, str]:
522d829b 8294 """Get the selinux status"""
f91f0fd5 8295 security = {}
522d829b
TL
8296 try:
8297 out, err, code = call(self.ctx, ['sestatus'],
8298 verbosity=CallVerbosity.DEBUG)
8299 security['type'] = 'SELinux'
8300 status, mode, policy = '', '', ''
8301 for line in out.split('\n'):
8302 if line.startswith('SELinux status:'):
8303 k, v = line.split(':')
8304 status = v.strip()
8305 elif line.startswith('Current mode:'):
8306 k, v = line.split(':')
8307 mode = v.strip()
8308 elif line.startswith('Loaded policy name:'):
8309 k, v = line.split(':')
8310 policy = v.strip()
8311 if status == 'disabled':
8312 security['description'] = 'SELinux: Disabled'
8313 else:
8314 security['description'] = 'SELinux: Enabled({}, {})'.format(mode, policy)
8315 except Exception as e:
8316 logger.info('unable to get selinux status: %s' % e)
8317 return security
f91f0fd5 8318
f67539c2 8319 def _fetch_apparmor() -> Dict[str, str]:
f91f0fd5
TL
8320 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
8321 security = {}
8322 for apparmor_path in HostFacts._apparmor_path_list:
8323 if os.path.exists(apparmor_path):
f67539c2
TL
8324 security['type'] = 'AppArmor'
8325 security['description'] = 'AppArmor: Enabled'
f91f0fd5
TL
8326 try:
8327 profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
b3b6e05e
TL
8328 if len(profiles) == 0:
8329 return {}
f91f0fd5
TL
8330 except OSError:
8331 pass
8332 else:
8333 summary = {} # type: Dict[str, int]
8334 for line in profiles.split('\n'):
8335 item, mode = line.split(' ')
f67539c2 8336 mode = mode.strip('()')
f91f0fd5
TL
8337 if mode in summary:
8338 summary[mode] += 1
8339 else:
8340 summary[mode] = 0
f67539c2
TL
8341 summary_str = ','.join(['{} {}'.format(v, k) for k, v in summary.items()])
8342 security = {**security, **summary} # type: ignore
8343 security['description'] += '({})'.format(summary_str)
f91f0fd5
TL
8344
8345 return security
f67539c2 8346 return {}
f91f0fd5 8347
f67539c2 8348 ret = {}
f91f0fd5
TL
8349 if os.path.exists('/sys/kernel/security/lsm'):
8350 lsm = read_file(['/sys/kernel/security/lsm']).strip()
8351 if 'selinux' in lsm:
f67539c2 8352 ret = _fetch_selinux()
f91f0fd5 8353 elif 'apparmor' in lsm:
f67539c2 8354 ret = _fetch_apparmor()
f91f0fd5
TL
8355 else:
8356 return {
f67539c2
TL
8357 'type': 'Unknown',
8358 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor'
f91f0fd5
TL
8359 }
8360
f67539c2
TL
8361 if ret:
8362 return ret
8363
f91f0fd5 8364 return {
f67539c2
TL
8365 'type': 'None',
8366 'description': 'Linux Security Module framework is not available'
f91f0fd5
TL
8367 }
8368
f67539c2 8369 @property
522d829b 8370 def selinux_enabled(self) -> bool:
f67539c2
TL
8371 return (self.kernel_security['type'] == 'SELinux') and \
8372 (self.kernel_security['description'] != 'SELinux: Disabled')
8373
adb31ebb
TL
8374 @property
8375 def kernel_parameters(self):
8376 # type: () -> Dict[str, str]
8377 """Get kernel parameters required/used in Ceph clusters"""
8378
8379 k_param = {}
f67539c2 8380 out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
adb31ebb
TL
8381 if out:
8382 param_list = out.split('\n')
f67539c2 8383 param_dict = {param.split(' = ')[0]: param.split(' = ')[-1] for param in param_list}
adb31ebb
TL
8384
8385 # return only desired parameters
8386 if 'net.ipv4.ip_nonlocal_bind' in param_dict:
8387 k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
8388
8389 return k_param
8390
522d829b
TL
8391 @staticmethod
8392 def _process_net_data(tcp_file: str, protocol: str = 'tcp') -> List[int]:
8393 listening_ports = []
8394 # Connections state documentation
8395 # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h
8396 # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul <port>)
8397 listening_state = {
8398 'tcp': '0A',
8399 'udp': '07'
8400 }
8401
8402 if protocol not in listening_state.keys():
8403 return []
8404
8405 if os.path.exists(tcp_file):
8406 with open(tcp_file) as f:
8407 tcp_data = f.readlines()[1:]
8408
8409 for con in tcp_data:
8410 con_info = con.strip().split()
8411 if con_info[3] == listening_state[protocol]:
8412 local_port = int(con_info[1].split(':')[1], 16)
8413 listening_ports.append(local_port)
8414
8415 return listening_ports
8416
8417 @property
8418 def tcp_ports_used(self) -> List[int]:
8419 return HostFacts._process_net_data('/proc/net/tcp')
8420
8421 @property
8422 def tcp6_ports_used(self) -> List[int]:
8423 return HostFacts._process_net_data('/proc/net/tcp6')
8424
8425 @property
8426 def udp_ports_used(self) -> List[int]:
8427 return HostFacts._process_net_data('/proc/net/udp', 'udp')
8428
8429 @property
8430 def udp6_ports_used(self) -> List[int]:
8431 return HostFacts._process_net_data('/proc/net/udp6', 'udp')
8432
f91f0fd5
TL
8433 def dump(self):
8434 # type: () -> str
8435 """Return the attributes of this HostFacts object as json"""
f67539c2
TL
8436 data = {
8437 k: getattr(self, k) for k in dir(self)
8438 if not k.startswith('_')
8439 and isinstance(getattr(self, k), (float, int, str, list, dict, tuple))
f91f0fd5
TL
8440 }
8441 return json.dumps(data, indent=2, sort_keys=True)
8442
8443##################################
8444
f67539c2 8445
522d829b 8446def command_gather_facts(ctx: CephadmContext) -> None:
f91f0fd5 8447 """gather_facts is intended to provide host releated metadata to the caller"""
f67539c2 8448 host = HostFacts(ctx)
f91f0fd5
TL
8449 print(host.dump())
8450
f67539c2
TL
8451
8452##################################
8453
8454
a4b75251 8455def systemd_target_state(ctx: CephadmContext, target_name: str, subsystem: str = 'ceph') -> bool:
f67539c2
TL
8456 # TODO: UNITTEST
8457 return os.path.exists(
8458 os.path.join(
a4b75251 8459 ctx.unit_dir,
f67539c2
TL
8460 f'{subsystem}.target.wants',
8461 target_name
8462 )
8463 )
8464
8465
a4b75251
TL
8466def target_exists(ctx: CephadmContext) -> bool:
8467 return os.path.exists(ctx.unit_dir + '/ceph.target')
8468
8469
f67539c2 8470@infer_fsid
522d829b 8471def command_maintenance(ctx: CephadmContext) -> str:
f67539c2 8472 if not ctx.fsid:
a4b75251 8473 raise Error('failed - must pass --fsid to specify cluster')
f67539c2
TL
8474
8475 target = f'ceph-{ctx.fsid}.target'
8476
8477 if ctx.maintenance_action.lower() == 'enter':
8478 logger.info('Requested to place host into maintenance')
a4b75251 8479 if systemd_target_state(ctx, target):
f67539c2
TL
8480 _out, _err, code = call(ctx,
8481 ['systemctl', 'disable', target],
8482 verbosity=CallVerbosity.DEBUG)
8483 if code:
8484 logger.error(f'Failed to disable the {target} target')
8485 return 'failed - to disable the target'
8486 else:
8487 # stopping a target waits by default
8488 _out, _err, code = call(ctx,
8489 ['systemctl', 'stop', target],
8490 verbosity=CallVerbosity.DEBUG)
8491 if code:
8492 logger.error(f'Failed to stop the {target} target')
8493 return 'failed - to disable the target'
8494 else:
8495 return f'success - systemd target {target} disabled'
8496
8497 else:
8498 return 'skipped - target already disabled'
8499
8500 else:
8501 logger.info('Requested to exit maintenance state')
a4b75251
TL
8502 # if we've never deployed a daemon on this host there will be no systemd
8503 # target to disable so attempting a disable will fail. We still need to
8504 # return success here or host will be permanently stuck in maintenance mode
8505 # as no daemons can be deployed so no systemd target will ever exist to disable.
8506 if not target_exists(ctx):
8507 return 'skipped - systemd target not present on this host. Host removed from maintenance mode.'
f67539c2 8508 # exit maintenance request
a4b75251 8509 if not systemd_target_state(ctx, target):
f67539c2
TL
8510 _out, _err, code = call(ctx,
8511 ['systemctl', 'enable', target],
8512 verbosity=CallVerbosity.DEBUG)
8513 if code:
8514 logger.error(f'Failed to enable the {target} target')
8515 return 'failed - unable to enable the target'
8516 else:
8517 # starting a target waits by default
8518 _out, _err, code = call(ctx,
8519 ['systemctl', 'start', target],
8520 verbosity=CallVerbosity.DEBUG)
8521 if code:
8522 logger.error(f'Failed to start the {target} target')
8523 return 'failed - unable to start the target'
8524 else:
8525 return f'success - systemd target {target} enabled and started'
522d829b 8526 return f'success - systemd target {target} enabled and started'
f91f0fd5
TL
8527
8528##################################
8529
f6b5b4d7 8530
9f95a23c
TL
8531def _get_parser():
8532 # type: () -> argparse.ArgumentParser
8533 parser = argparse.ArgumentParser(
8534 description='Bootstrap Ceph daemons with systemd and containers.',
8535 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
8536 parser.add_argument(
8537 '--image',
8538 help='container image. Can also be set via the "CEPHADM_IMAGE" '
8539 'env var')
8540 parser.add_argument(
8541 '--docker',
8542 action='store_true',
8543 help='use docker instead of podman')
8544 parser.add_argument(
8545 '--data-dir',
8546 default=DATA_DIR,
8547 help='base directory for daemon data')
8548 parser.add_argument(
8549 '--log-dir',
8550 default=LOG_DIR,
8551 help='base directory for daemon logs')
8552 parser.add_argument(
8553 '--logrotate-dir',
8554 default=LOGROTATE_DIR,
8555 help='location of logrotate configuration files')
b3b6e05e
TL
8556 parser.add_argument(
8557 '--sysctl-dir',
8558 default=SYSCTL_DIR,
8559 help='location of sysctl configuration files')
9f95a23c
TL
8560 parser.add_argument(
8561 '--unit-dir',
8562 default=UNIT_DIR,
8563 help='base directory for systemd units')
8564 parser.add_argument(
8565 '--verbose', '-v',
8566 action='store_true',
8567 help='Show debug-level log messages')
8568 parser.add_argument(
8569 '--timeout',
8570 type=int,
8571 default=DEFAULT_TIMEOUT,
8572 help='timeout in seconds')
8573 parser.add_argument(
8574 '--retry',
8575 type=int,
8576 default=DEFAULT_RETRY,
8577 help='max number of retries')
e306af50
TL
8578 parser.add_argument(
8579 '--env', '-e',
8580 action='append',
8581 default=[],
8582 help='set environment variable')
f67539c2
TL
8583 parser.add_argument(
8584 '--no-container-init',
8585 action='store_true',
8586 default=not CONTAINER_INIT,
8587 help='Do not run podman/docker with `--init`')
9f95a23c
TL
8588
8589 subparsers = parser.add_subparsers(help='sub-command')
8590
8591 parser_version = subparsers.add_parser(
8592 'version', help='get ceph version from container')
8593 parser_version.set_defaults(func=command_version)
8594
8595 parser_pull = subparsers.add_parser(
33c7a0ef 8596 'pull', help='pull the default container image')
9f95a23c 8597 parser_pull.set_defaults(func=command_pull)
a4b75251
TL
8598 parser_pull.add_argument(
8599 '--insecure',
8600 action='store_true',
8601 help=argparse.SUPPRESS,
8602 )
9f95a23c
TL
8603
8604 parser_inspect_image = subparsers.add_parser(
8605 'inspect-image', help='inspect local container image')
8606 parser_inspect_image.set_defaults(func=command_inspect_image)
8607
8608 parser_ls = subparsers.add_parser(
8609 'ls', help='list daemon instances on this host')
8610 parser_ls.set_defaults(func=command_ls)
8611 parser_ls.add_argument(
8612 '--no-detail',
8613 action='store_true',
8614 help='Do not include daemon status')
8615 parser_ls.add_argument(
8616 '--legacy-dir',
8617 default='/',
8618 help='base directory for legacy daemon data')
8619
8620 parser_list_networks = subparsers.add_parser(
8621 'list-networks', help='list IP networks')
8622 parser_list_networks.set_defaults(func=command_list_networks)
8623
8624 parser_adopt = subparsers.add_parser(
8625 'adopt', help='adopt daemon deployed with a different tool')
8626 parser_adopt.set_defaults(func=command_adopt)
8627 parser_adopt.add_argument(
8628 '--name', '-n',
8629 required=True,
8630 help='daemon name (type.id)')
8631 parser_adopt.add_argument(
8632 '--style',
8633 required=True,
8634 help='deployment style (legacy, ...)')
8635 parser_adopt.add_argument(
8636 '--cluster',
8637 default='ceph',
8638 help='cluster name')
8639 parser_adopt.add_argument(
8640 '--legacy-dir',
8641 default='/',
8642 help='base directory for legacy daemon data')
8643 parser_adopt.add_argument(
8644 '--config-json',
8645 help='Additional configuration information in JSON format')
8646 parser_adopt.add_argument(
8647 '--skip-firewalld',
8648 action='store_true',
8649 help='Do not configure firewalld')
8650 parser_adopt.add_argument(
8651 '--skip-pull',
8652 action='store_true',
33c7a0ef 8653 help='do not pull the default image before adopting')
1911f103
TL
8654 parser_adopt.add_argument(
8655 '--force-start',
8656 action='store_true',
f67539c2 8657 help='start newly adoped daemon, even if it was not running previously')
f91f0fd5
TL
8658 parser_adopt.add_argument(
8659 '--container-init',
8660 action='store_true',
f67539c2
TL
8661 default=CONTAINER_INIT,
8662 help=argparse.SUPPRESS)
9f95a23c
TL
8663
8664 parser_rm_daemon = subparsers.add_parser(
8665 'rm-daemon', help='remove daemon instance')
8666 parser_rm_daemon.set_defaults(func=command_rm_daemon)
8667 parser_rm_daemon.add_argument(
8668 '--name', '-n',
8669 required=True,
8670 action=CustomValidation,
8671 help='daemon name (type.id)')
33c7a0ef
TL
8672 parser_rm_daemon.add_argument(
8673 '--tcp-ports',
8674 help='List of tcp ports to close in the host firewall')
9f95a23c
TL
8675 parser_rm_daemon.add_argument(
8676 '--fsid',
8677 required=True,
8678 help='cluster FSID')
8679 parser_rm_daemon.add_argument(
8680 '--force',
8681 action='store_true',
8682 help='proceed, even though this may destroy valuable data')
8683 parser_rm_daemon.add_argument(
8684 '--force-delete-data',
8685 action='store_true',
8686 help='delete valuable daemon data instead of making a backup')
8687
8688 parser_rm_cluster = subparsers.add_parser(
8689 'rm-cluster', help='remove all daemons for a cluster')
8690 parser_rm_cluster.set_defaults(func=command_rm_cluster)
8691 parser_rm_cluster.add_argument(
8692 '--fsid',
8693 required=True,
8694 help='cluster FSID')
8695 parser_rm_cluster.add_argument(
8696 '--force',
8697 action='store_true',
8698 help='proceed, even though this may destroy valuable data')
f67539c2
TL
8699 parser_rm_cluster.add_argument(
8700 '--keep-logs',
8701 action='store_true',
8702 help='do not remove log files')
b3b6e05e
TL
8703 parser_rm_cluster.add_argument(
8704 '--zap-osds',
8705 action='store_true',
8706 help='zap OSD devices for this cluster')
9f95a23c
TL
8707
8708 parser_run = subparsers.add_parser(
8709 'run', help='run a ceph daemon, in a container, in the foreground')
8710 parser_run.set_defaults(func=command_run)
8711 parser_run.add_argument(
8712 '--name', '-n',
8713 required=True,
8714 help='daemon name (type.id)')
8715 parser_run.add_argument(
8716 '--fsid',
8717 required=True,
8718 help='cluster FSID')
8719
8720 parser_shell = subparsers.add_parser(
8721 'shell', help='run an interactive shell inside a daemon container')
8722 parser_shell.set_defaults(func=command_shell)
20effc67
TL
8723 parser_shell.add_argument(
8724 '--shared_ceph_folder',
8725 metavar='CEPH_SOURCE_FOLDER',
8726 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c
TL
8727 parser_shell.add_argument(
8728 '--fsid',
8729 help='cluster FSID')
8730 parser_shell.add_argument(
8731 '--name', '-n',
8732 help='daemon name (type.id)')
8733 parser_shell.add_argument(
8734 '--config', '-c',
8735 help='ceph.conf to pass through to the container')
8736 parser_shell.add_argument(
8737 '--keyring', '-k',
8738 help='ceph.keyring to pass through to the container')
e306af50
TL
8739 parser_shell.add_argument(
8740 '--mount', '-m',
f67539c2
TL
8741 help=('mount a file or directory in the container. '
8742 'Support multiple mounts. '
8743 'ie: `--mount /foo /bar:/bar`. '
8744 'When no destination is passed, default is /mnt'),
8745 nargs='+')
9f95a23c
TL
8746 parser_shell.add_argument(
8747 '--env', '-e',
8748 action='append',
8749 default=[],
8750 help='set environment variable')
b3b6e05e
TL
8751 parser_shell.add_argument(
8752 '--volume', '-v',
8753 action='append',
8754 default=[],
8755 help='set environment variable')
9f95a23c 8756 parser_shell.add_argument(
e306af50 8757 'command', nargs=argparse.REMAINDER,
9f95a23c 8758 help='command (optional)')
b3b6e05e
TL
8759 parser_shell.add_argument(
8760 '--no-hosts',
8761 action='store_true',
8762 help='dont pass /etc/hosts through to the container')
9f95a23c
TL
8763
8764 parser_enter = subparsers.add_parser(
8765 'enter', help='run an interactive shell inside a running daemon container')
8766 parser_enter.set_defaults(func=command_enter)
8767 parser_enter.add_argument(
8768 '--fsid',
8769 help='cluster FSID')
8770 parser_enter.add_argument(
8771 '--name', '-n',
8772 required=True,
8773 help='daemon name (type.id)')
8774 parser_enter.add_argument(
e306af50 8775 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
8776 help='command')
8777
8778 parser_ceph_volume = subparsers.add_parser(
8779 'ceph-volume', help='run ceph-volume inside a container')
8780 parser_ceph_volume.set_defaults(func=command_ceph_volume)
20effc67
TL
8781 parser_ceph_volume.add_argument(
8782 '--shared_ceph_folder',
8783 metavar='CEPH_SOURCE_FOLDER',
8784 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c
TL
8785 parser_ceph_volume.add_argument(
8786 '--fsid',
8787 help='cluster FSID')
8788 parser_ceph_volume.add_argument(
8789 '--config-json',
20effc67 8790 help='JSON file with config and (client.bootstrap-osd) key')
801d1391
TL
8791 parser_ceph_volume.add_argument(
8792 '--config', '-c',
8793 help='ceph conf file')
8794 parser_ceph_volume.add_argument(
8795 '--keyring', '-k',
8796 help='ceph.keyring to pass through to the container')
9f95a23c 8797 parser_ceph_volume.add_argument(
e306af50 8798 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
8799 help='command')
8800
b3b6e05e
TL
8801 parser_zap_osds = subparsers.add_parser(
8802 'zap-osds', help='zap all OSDs associated with a particular fsid')
8803 parser_zap_osds.set_defaults(func=command_zap_osds)
8804 parser_zap_osds.add_argument(
8805 '--fsid',
8806 required=True,
8807 help='cluster FSID')
8808 parser_zap_osds.add_argument(
8809 '--force',
8810 action='store_true',
8811 help='proceed, even though this may destroy valuable data')
8812
9f95a23c 8813 parser_unit = subparsers.add_parser(
f67539c2 8814 'unit', help="operate on the daemon's systemd unit")
9f95a23c
TL
8815 parser_unit.set_defaults(func=command_unit)
8816 parser_unit.add_argument(
8817 'command',
8818 help='systemd command (start, stop, restart, enable, disable, ...)')
8819 parser_unit.add_argument(
8820 '--fsid',
8821 help='cluster FSID')
8822 parser_unit.add_argument(
8823 '--name', '-n',
8824 required=True,
8825 help='daemon name (type.id)')
8826
8827 parser_logs = subparsers.add_parser(
8828 'logs', help='print journald logs for a daemon container')
8829 parser_logs.set_defaults(func=command_logs)
8830 parser_logs.add_argument(
8831 '--fsid',
8832 help='cluster FSID')
8833 parser_logs.add_argument(
8834 '--name', '-n',
8835 required=True,
8836 help='daemon name (type.id)')
8837 parser_logs.add_argument(
8838 'command', nargs='*',
8839 help='additional journalctl args')
8840
8841 parser_bootstrap = subparsers.add_parser(
8842 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
8843 parser_bootstrap.set_defaults(func=command_bootstrap)
8844 parser_bootstrap.add_argument(
8845 '--config', '-c',
8846 help='ceph conf file to incorporate')
8847 parser_bootstrap.add_argument(
8848 '--mon-id',
8849 required=False,
8850 help='mon id (default: local hostname)')
33c7a0ef
TL
8851 group = parser_bootstrap.add_mutually_exclusive_group()
8852 group.add_argument(
9f95a23c
TL
8853 '--mon-addrv',
8854 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
33c7a0ef 8855 group.add_argument(
9f95a23c
TL
8856 '--mon-ip',
8857 help='mon IP')
8858 parser_bootstrap.add_argument(
8859 '--mgr-id',
8860 required=False,
8861 help='mgr id (default: randomly generated)')
8862 parser_bootstrap.add_argument(
8863 '--fsid',
8864 help='cluster FSID')
8865 parser_bootstrap.add_argument(
8866 '--output-dir',
8867 default='/etc/ceph',
8868 help='directory to write config, keyring, and pub key files')
8869 parser_bootstrap.add_argument(
8870 '--output-keyring',
8871 help='location to write keyring file with new cluster admin and mon keys')
8872 parser_bootstrap.add_argument(
8873 '--output-config',
8874 help='location to write conf file to connect to new cluster')
8875 parser_bootstrap.add_argument(
8876 '--output-pub-ssh-key',
f67539c2 8877 help="location to write the cluster's public SSH key")
b3b6e05e
TL
8878 parser_bootstrap.add_argument(
8879 '--skip-admin-label',
8880 action='store_true',
8881 help='do not create admin label for ceph.conf and client.admin keyring distribution')
9f95a23c
TL
8882 parser_bootstrap.add_argument(
8883 '--skip-ssh',
8884 action='store_true',
8885 help='skip setup of ssh key on local host')
8886 parser_bootstrap.add_argument(
8887 '--initial-dashboard-user',
8888 default='admin',
8889 help='Initial user for the dashboard')
8890 parser_bootstrap.add_argument(
8891 '--initial-dashboard-password',
8892 help='Initial password for the initial dashboard user')
f6b5b4d7
TL
8893 parser_bootstrap.add_argument(
8894 '--ssl-dashboard-port',
8895 type=int,
f67539c2 8896 default=8443,
f6b5b4d7 8897 help='Port number used to connect with dashboard using SSL')
9f95a23c
TL
8898 parser_bootstrap.add_argument(
8899 '--dashboard-key',
e306af50 8900 type=argparse.FileType('r'),
9f95a23c
TL
8901 help='Dashboard key')
8902 parser_bootstrap.add_argument(
8903 '--dashboard-crt',
e306af50 8904 type=argparse.FileType('r'),
9f95a23c
TL
8905 help='Dashboard certificate')
8906
e306af50
TL
8907 parser_bootstrap.add_argument(
8908 '--ssh-config',
8909 type=argparse.FileType('r'),
8910 help='SSH config')
8911 parser_bootstrap.add_argument(
8912 '--ssh-private-key',
8913 type=argparse.FileType('r'),
8914 help='SSH private key')
8915 parser_bootstrap.add_argument(
8916 '--ssh-public-key',
8917 type=argparse.FileType('r'),
8918 help='SSH public key')
f6b5b4d7
TL
8919 parser_bootstrap.add_argument(
8920 '--ssh-user',
8921 default='root',
8922 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
9f95a23c
TL
8923 parser_bootstrap.add_argument(
8924 '--skip-mon-network',
8925 action='store_true',
8926 help='set mon public_network based on bootstrap mon ip')
8927 parser_bootstrap.add_argument(
8928 '--skip-dashboard',
8929 action='store_true',
8930 help='do not enable the Ceph Dashboard')
8931 parser_bootstrap.add_argument(
8932 '--dashboard-password-noupdate',
8933 action='store_true',
8934 help='stop forced dashboard password change')
8935 parser_bootstrap.add_argument(
8936 '--no-minimize-config',
8937 action='store_true',
8938 help='do not assimilate and minimize the config file')
8939 parser_bootstrap.add_argument(
8940 '--skip-ping-check',
8941 action='store_true',
8942 help='do not verify that mon IP is pingable')
8943 parser_bootstrap.add_argument(
8944 '--skip-pull',
8945 action='store_true',
33c7a0ef 8946 help='do not pull the default image before bootstrapping')
9f95a23c
TL
8947 parser_bootstrap.add_argument(
8948 '--skip-firewalld',
8949 action='store_true',
8950 help='Do not configure firewalld')
8951 parser_bootstrap.add_argument(
8952 '--allow-overwrite',
8953 action='store_true',
8954 help='allow overwrite of existing --output-* config/keyring/ssh files')
8955 parser_bootstrap.add_argument(
8956 '--allow-fqdn-hostname',
8957 action='store_true',
8958 help='allow hostname that is fully-qualified (contains ".")')
f67539c2
TL
8959 parser_bootstrap.add_argument(
8960 '--allow-mismatched-release',
8961 action='store_true',
8962 help="allow bootstrap of ceph that doesn't match this version of cephadm")
9f95a23c
TL
8963 parser_bootstrap.add_argument(
8964 '--skip-prepare-host',
8965 action='store_true',
8966 help='Do not prepare host')
8967 parser_bootstrap.add_argument(
8968 '--orphan-initial-daemons',
8969 action='store_true',
f67539c2 8970 help='Set mon and mgr service to `unmanaged`, Do not create the crash service')
9f95a23c
TL
8971 parser_bootstrap.add_argument(
8972 '--skip-monitoring-stack',
8973 action='store_true',
8974 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
e306af50
TL
8975 parser_bootstrap.add_argument(
8976 '--apply-spec',
8977 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
e306af50
TL
8978 parser_bootstrap.add_argument(
8979 '--shared_ceph_folder',
8980 metavar='CEPH_SOURCE_FOLDER',
8981 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c 8982
f6b5b4d7
TL
8983 parser_bootstrap.add_argument(
8984 '--registry-url',
8985 help='url for custom registry')
8986 parser_bootstrap.add_argument(
8987 '--registry-username',
8988 help='username for custom registry')
8989 parser_bootstrap.add_argument(
8990 '--registry-password',
8991 help='password for custom registry')
8992 parser_bootstrap.add_argument(
8993 '--registry-json',
8994 help='json file with custom registry login info (URL, Username, Password)')
f91f0fd5
TL
8995 parser_bootstrap.add_argument(
8996 '--container-init',
8997 action='store_true',
f67539c2
TL
8998 default=CONTAINER_INIT,
8999 help=argparse.SUPPRESS)
f67539c2
TL
9000 parser_bootstrap.add_argument(
9001 '--cluster-network',
9002 help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
b3b6e05e
TL
9003 parser_bootstrap.add_argument(
9004 '--single-host-defaults',
9005 action='store_true',
9006 help='adjust configuration defaults to suit a single-host cluster')
522d829b
TL
9007 parser_bootstrap.add_argument(
9008 '--log-to-file',
9009 action='store_true',
9010 help='configure cluster to log to traditional log files in /var/log/ceph/$fsid')
f6b5b4d7 9011
9f95a23c
TL
9012 parser_deploy = subparsers.add_parser(
9013 'deploy', help='deploy a daemon')
9014 parser_deploy.set_defaults(func=command_deploy)
9015 parser_deploy.add_argument(
9016 '--name',
9017 required=True,
9018 action=CustomValidation,
9019 help='daemon name (type.id)')
9020 parser_deploy.add_argument(
9021 '--fsid',
9022 required=True,
9023 help='cluster FSID')
9024 parser_deploy.add_argument(
9025 '--config', '-c',
9026 help='config file for new daemon')
9027 parser_deploy.add_argument(
9028 '--config-json',
9029 help='Additional configuration information in JSON format')
9030 parser_deploy.add_argument(
9031 '--keyring',
9032 help='keyring for new daemon')
9033 parser_deploy.add_argument(
9034 '--key',
9035 help='key for new daemon')
9036 parser_deploy.add_argument(
9037 '--osd-fsid',
9038 help='OSD uuid, if creating an OSD container')
9039 parser_deploy.add_argument(
9040 '--skip-firewalld',
9041 action='store_true',
9042 help='Do not configure firewalld')
f6b5b4d7
TL
9043 parser_deploy.add_argument(
9044 '--tcp-ports',
9045 help='List of tcp ports to open in the host firewall')
9f95a23c
TL
9046 parser_deploy.add_argument(
9047 '--reconfig',
9048 action='store_true',
9049 help='Reconfigure a previously deployed daemon')
9050 parser_deploy.add_argument(
9051 '--allow-ptrace',
9052 action='store_true',
9053 help='Allow SYS_PTRACE on daemon container')
f91f0fd5
TL
9054 parser_deploy.add_argument(
9055 '--container-init',
9056 action='store_true',
f67539c2
TL
9057 default=CONTAINER_INIT,
9058 help=argparse.SUPPRESS)
9059 parser_deploy.add_argument(
9060 '--memory-request',
9061 help='Container memory request/target'
9062 )
9063 parser_deploy.add_argument(
9064 '--memory-limit',
9065 help='Container memory hard limit'
9066 )
9067 parser_deploy.add_argument(
9068 '--meta-json',
9069 help='JSON dict of additional metadata'
9070 )
20effc67
TL
9071 parser_deploy.add_argument(
9072 '--extra-container-args',
9073 action='append',
9074 default=[],
9075 help='Additional container arguments to apply to deamon'
9076 )
9f95a23c
TL
9077
9078 parser_check_host = subparsers.add_parser(
9079 'check-host', help='check host configuration')
9080 parser_check_host.set_defaults(func=command_check_host)
9081 parser_check_host.add_argument(
9082 '--expect-hostname',
9083 help='Check that hostname matches an expected value')
9084
9085 parser_prepare_host = subparsers.add_parser(
9086 'prepare-host', help='prepare a host for cephadm use')
9087 parser_prepare_host.set_defaults(func=command_prepare_host)
9088 parser_prepare_host.add_argument(
9089 '--expect-hostname',
9090 help='Set hostname')
9091
9092 parser_add_repo = subparsers.add_parser(
9093 'add-repo', help='configure package repository')
9094 parser_add_repo.set_defaults(func=command_add_repo)
9095 parser_add_repo.add_argument(
9096 '--release',
1911f103 9097 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
9f95a23c
TL
9098 parser_add_repo.add_argument(
9099 '--version',
9100 help='use specific upstream version (x.y.z)')
9101 parser_add_repo.add_argument(
9102 '--dev',
9103 help='use specified bleeding edge build from git branch or tag')
9104 parser_add_repo.add_argument(
9105 '--dev-commit',
9106 help='use specified bleeding edge build from git commit')
9107 parser_add_repo.add_argument(
9108 '--gpg-url',
9109 help='specify alternative GPG key location')
9110 parser_add_repo.add_argument(
9111 '--repo-url',
9112 default='https://download.ceph.com',
9113 help='specify alternative repo location')
9114 # TODO: proxy?
9115
9116 parser_rm_repo = subparsers.add_parser(
9117 'rm-repo', help='remove package repository configuration')
9118 parser_rm_repo.set_defaults(func=command_rm_repo)
9119
9120 parser_install = subparsers.add_parser(
9121 'install', help='install ceph package(s)')
9122 parser_install.set_defaults(func=command_install)
9123 parser_install.add_argument(
9124 'packages', nargs='*',
9125 default=['cephadm'],
9126 help='packages')
9127
f6b5b4d7
TL
9128 parser_registry_login = subparsers.add_parser(
9129 'registry-login', help='log host into authenticated registry')
9130 parser_registry_login.set_defaults(func=command_registry_login)
9131 parser_registry_login.add_argument(
9132 '--registry-url',
9133 help='url for custom registry')
9134 parser_registry_login.add_argument(
9135 '--registry-username',
9136 help='username for custom registry')
9137 parser_registry_login.add_argument(
9138 '--registry-password',
9139 help='password for custom registry')
9140 parser_registry_login.add_argument(
9141 '--registry-json',
9142 help='json file with custom registry login info (URL, Username, Password)')
9143 parser_registry_login.add_argument(
9144 '--fsid',
9145 help='cluster FSID')
9146
f91f0fd5
TL
9147 parser_gather_facts = subparsers.add_parser(
9148 'gather-facts', help='gather and return host related information (JSON format)')
9149 parser_gather_facts.set_defaults(func=command_gather_facts)
9150
f67539c2
TL
9151 parser_maintenance = subparsers.add_parser(
9152 'host-maintenance', help='Manage the maintenance state of a host')
9153 parser_maintenance.add_argument(
9154 '--fsid',
9155 help='cluster FSID')
9156 parser_maintenance.add_argument(
9157 'maintenance_action',
9158 type=str,
9159 choices=['enter', 'exit'],
9160 help='Maintenance action - enter maintenance, or exit maintenance')
9161 parser_maintenance.set_defaults(func=command_maintenance)
9162
20effc67
TL
9163 parser_agent = subparsers.add_parser(
9164 'agent', help='start cephadm agent')
9165 parser_agent.set_defaults(func=command_agent)
9166 parser_agent.add_argument(
9167 '--fsid',
9168 required=True,
9169 help='cluster FSID')
9170 parser_agent.add_argument(
9171 '--daemon-id',
9172 help='daemon id for agent')
9173
9f95a23c
TL
9174 return parser
9175
f6b5b4d7 9176
522d829b 9177def _parse_args(av: List[str]) -> argparse.Namespace:
9f95a23c 9178 parser = _get_parser()
f67539c2 9179
e306af50 9180 args = parser.parse_args(av)
f67539c2 9181 if 'command' in args and args.command and args.command[0] == '--':
e306af50 9182 args.command.pop(0)
f67539c2
TL
9183
9184 # workaround argparse to deprecate the subparser `--container-init` flag
9185 # container_init and no_container_init must always be mutually exclusive
9186 container_init_args = ('--container-init', '--no-container-init')
9187 if set(container_init_args).issubset(av):
9188 parser.error('argument %s: not allowed with argument %s' % (container_init_args))
9189 elif '--container-init' in av:
9190 args.no_container_init = not args.container_init
9191 else:
9192 args.container_init = not args.no_container_init
9193 assert args.container_init is not args.no_container_init
9194
e306af50 9195 return args
9f95a23c 9196
f6b5b4d7 9197
b3b6e05e 9198def cephadm_init_ctx(args: List[str]) -> CephadmContext:
f67539c2
TL
9199 ctx = CephadmContext()
9200 ctx.set_args(_parse_args(args))
9201 return ctx
9202
9203
20effc67
TL
9204def cephadm_init_logging(ctx: CephadmContext, args: List[str]) -> None:
9205 """Configure the logging for cephadm as well as updating the system
9206 to have the expected log dir and logrotate configuration.
9207 """
f67539c2 9208 global logger
f91f0fd5
TL
9209 if not os.path.exists(LOG_DIR):
9210 os.makedirs(LOG_DIR)
33c7a0ef
TL
9211 operations = ['bootstrap', 'rm-cluster']
9212 if any(op in args for op in operations):
9213 dictConfig(interactive_logging_config)
9214 else:
9215 dictConfig(logging_config)
9216
f91f0fd5
TL
9217 logger = logging.getLogger()
9218
b3b6e05e
TL
9219 if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
9220 with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
9221 f.write("""# created by cephadm
9222/var/log/ceph/cephadm.log {
9223 rotate 7
9224 daily
9225 compress
9226 missingok
9227 notifempty
9228}
9229""")
9230
f67539c2 9231 if ctx.verbose:
f91f0fd5 9232 for handler in logger.handlers:
f67539c2
TL
9233 if handler.name == 'console':
9234 handler.setLevel(logging.DEBUG)
a4b75251 9235 logger.debug('%s\ncephadm %s' % ('-' * 80, args))
f67539c2
TL
9236
9237
20effc67
TL
9238def cephadm_require_root() -> None:
9239 """Exit if the process is not running as root."""
f67539c2
TL
9240 if os.geteuid() != 0:
9241 sys.stderr.write('ERROR: cephadm should be run as root\n')
9f95a23c
TL
9242 sys.exit(1)
9243
20effc67
TL
9244
9245def main() -> None:
f67539c2
TL
9246 av: List[str] = []
9247 av = sys.argv[1:]
9248
20effc67 9249 ctx = cephadm_init_ctx(av)
b3b6e05e
TL
9250 if not ctx.has_function():
9251 sys.stderr.write('No command specified; pass -h or --help for usage\n')
f67539c2 9252 sys.exit(1)
1911f103 9253
20effc67
TL
9254 cephadm_require_root()
9255 cephadm_init_logging(ctx, av)
9f95a23c 9256 try:
f67539c2
TL
9257 # podman or docker?
9258 ctx.container_engine = find_container_engine(ctx)
9259 if ctx.func not in \
a4b75251
TL
9260 [
9261 command_check_host,
9262 command_prepare_host,
9263 command_add_repo,
9264 command_rm_repo,
9265 command_install
9266 ]:
f67539c2
TL
9267 check_container_engine(ctx)
9268 # command handler
9269 r = ctx.func(ctx)
9f95a23c 9270 except Error as e:
f67539c2 9271 if ctx.verbose:
9f95a23c 9272 raise
f67539c2 9273 logger.error('ERROR: %s' % e)
9f95a23c
TL
9274 sys.exit(1)
9275 if not r:
9276 r = 0
9277 sys.exit(r)
f67539c2
TL
9278
9279
9280if __name__ == '__main__':
9281 main()