]> git.proxmox.com Git - ceph.git/blame - ceph/src/cephadm/cephadm
import ceph quincy 17.2.4
[ceph.git] / ceph / src / cephadm / cephadm
CommitLineData
9f95a23c
TL
1#!/usr/bin/python3
2
f67539c2
TL
3import asyncio
4import asyncio.subprocess
5import argparse
6import datetime
7import fcntl
8import ipaddress
20effc67 9import io
f67539c2
TL
10import json
11import logging
12from logging.config import dictConfig
13import os
14import platform
15import pwd
16import random
17import shlex
18import shutil
19import socket
20import string
21import subprocess
22import sys
23import tempfile
24import time
25import errno
26import struct
f67539c2
TL
27import ssl
28from enum import Enum
a4b75251 29from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO, Sequence, TypeVar, cast, Set, Iterable
f67539c2
TL
30
31import re
32import uuid
33
34from configparser import ConfigParser
20effc67 35from contextlib import redirect_stdout
f67539c2
TL
36from functools import wraps
37from glob import glob
38from io import StringIO
20effc67
TL
39from threading import Thread, Event
40from urllib.error import HTTPError, URLError
41from urllib.request import urlopen, Request
f67539c2
TL
42from pathlib import Path
43
522d829b
TL
44FuncT = TypeVar('FuncT', bound=Callable)
45
f67539c2 46# Default container images -----------------------------------------------------
1d09f67e
TL
47DEFAULT_IMAGE = 'quay.io/ceph/ceph:v17'
48DEFAULT_IMAGE_IS_MASTER = False
20effc67 49DEFAULT_IMAGE_RELEASE = 'quincy'
1d09f67e 50DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4'
33c7a0ef
TL
51DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
52DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
1d09f67e
TL
53DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1'
54DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0'
55DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:8.3.5'
2a845540
TL
56DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
57DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.1.5'
20effc67 58DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
f67539c2
TL
59DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this
60# ------------------------------------------------------------------------------
61
1d09f67e 62LATEST_STABLE_RELEASE = 'quincy'
f6b5b4d7
TL
63DATA_DIR = '/var/lib/ceph'
64LOG_DIR = '/var/log/ceph'
65LOCK_DIR = '/run/cephadm'
66LOGROTATE_DIR = '/etc/logrotate.d'
33c7a0ef 67SYSCTL_DIR = '/etc/sysctl.d'
f6b5b4d7 68UNIT_DIR = '/etc/systemd/system'
33c7a0ef
TL
69CEPH_CONF_DIR = 'config'
70CEPH_CONF = 'ceph.conf'
71CEPH_PUBKEY = 'ceph.pub'
72CEPH_KEYRING = 'ceph.client.admin.keyring'
73CEPH_DEFAULT_CONF = f'/etc/ceph/{CEPH_CONF}'
74CEPH_DEFAULT_KEYRING = f'/etc/ceph/{CEPH_KEYRING}'
75CEPH_DEFAULT_PUBKEY = f'/etc/ceph/{CEPH_PUBKEY}'
f6b5b4d7
TL
76LOG_DIR_MODE = 0o770
77DATA_DIR_MODE = 0o700
f67539c2
TL
78CONTAINER_INIT = True
79MIN_PODMAN_VERSION = (2, 0, 2)
80CGROUPS_SPLIT_PODMAN_VERSION = (2, 1, 0)
f6b5b4d7
TL
81CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ '
82DEFAULT_TIMEOUT = None # in seconds
f67539c2 83DEFAULT_RETRY = 15
f67539c2 84DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
2a845540 85QUIET_LOG_LEVEL = 9 # DEBUG is 10, so using 9 to be lower level than DEBUG
f67539c2
TL
86
87logger: logging.Logger = None # type: ignore
9f95a23c
TL
88
89"""
90You can invoke cephadm in two ways:
91
921. The normal way, at the command line.
93
942. By piping the script to the python3 binary. In this latter case, you should
95 prepend one or more lines to the beginning of the script.
96
97 For arguments,
98
99 injected_argv = [...]
100
101 e.g.,
102
103 injected_argv = ['ls']
104
105 For reading stdin from the '--config-json -' argument,
106
107 injected_stdin = '...'
108"""
f67539c2 109cached_stdin = None
f91f0fd5 110
2a845540 111
f67539c2 112##################################
9f95a23c 113
9f95a23c 114
2a845540
TL
115async def run_func(func: Callable, cmd: str) -> subprocess.CompletedProcess:
116 logger.debug(f'running function {func.__name__}, with parms: {cmd}')
117 response = func(cmd)
118 return response
119
120
121async def concurrent_tasks(func: Callable, cmd_list: List[str]) -> List[Any]:
122 tasks = []
123 for cmd in cmd_list:
124 tasks.append(run_func(func, cmd))
125
126 data = await asyncio.gather(*tasks)
127
128 return data
129
130
33c7a0ef
TL
131class EndPoint:
132 """EndPoint representing an ip:port format"""
133
134 def __init__(self, ip: str, port: int) -> None:
135 self.ip = ip
136 self.port = port
137
138 def __str__(self) -> str:
139 return f'{self.ip}:{self.port}'
140
141 def __repr__(self) -> str:
142 return f'{self.ip}:{self.port}'
143
144
145class ContainerInfo:
146 def __init__(self, container_id: str,
147 image_name: str,
148 image_id: str,
149 start: str,
150 version: str) -> None:
151 self.container_id = container_id
152 self.image_name = image_name
153 self.image_id = image_id
154 self.start = start
155 self.version = version
156
157 def __eq__(self, other: Any) -> bool:
158 if not isinstance(other, ContainerInfo):
159 return NotImplemented
160 return (self.container_id == other.container_id
161 and self.image_name == other.image_name
162 and self.image_id == other.image_id
163 and self.start == other.start
164 and self.version == other.version)
165
166
f67539c2 167class BaseConfig:
9f95a23c 168
522d829b 169 def __init__(self) -> None:
f67539c2
TL
170 self.image: str = ''
171 self.docker: bool = False
172 self.data_dir: str = DATA_DIR
173 self.log_dir: str = LOG_DIR
174 self.logrotate_dir: str = LOGROTATE_DIR
b3b6e05e 175 self.sysctl_dir: str = SYSCTL_DIR
f67539c2
TL
176 self.unit_dir: str = UNIT_DIR
177 self.verbose: bool = False
178 self.timeout: Optional[int] = DEFAULT_TIMEOUT
179 self.retry: int = DEFAULT_RETRY
180 self.env: List[str] = []
181 self.memory_request: Optional[int] = None
182 self.memory_limit: Optional[int] = None
20effc67 183 self.log_to_journald: Optional[bool] = None
f67539c2
TL
184
185 self.container_init: bool = CONTAINER_INIT
186 self.container_engine: Optional[ContainerEngine] = None
187
522d829b 188 def set_from_args(self, args: argparse.Namespace) -> None:
f67539c2
TL
189 argdict: Dict[str, Any] = vars(args)
190 for k, v in argdict.items():
191 if hasattr(self, k):
192 setattr(self, k, v)
193
194
195class CephadmContext:
9f95a23c 196
522d829b 197 def __init__(self) -> None:
f67539c2
TL
198 self.__dict__['_args'] = None
199 self.__dict__['_conf'] = BaseConfig()
9f95a23c 200
f67539c2
TL
201 def set_args(self, args: argparse.Namespace) -> None:
202 self._conf.set_from_args(args)
203 self._args = args
f6b5b4d7 204
f67539c2
TL
205 def has_function(self) -> bool:
206 return 'func' in self._args
207
208 def __contains__(self, name: str) -> bool:
209 return hasattr(self, name)
210
211 def __getattr__(self, name: str) -> Any:
212 if '_conf' in self.__dict__ and hasattr(self._conf, name):
213 return getattr(self._conf, name)
214 elif '_args' in self.__dict__ and hasattr(self._args, name):
215 return getattr(self._args, name)
216 else:
217 return super().__getattribute__(name)
218
219 def __setattr__(self, name: str, value: Any) -> None:
220 if hasattr(self._conf, name):
221 setattr(self._conf, name, value)
222 elif hasattr(self._args, name):
223 setattr(self._args, name, value)
224 else:
225 super().__setattr__(name, value)
226
227
228class ContainerEngine:
522d829b 229 def __init__(self) -> None:
f67539c2
TL
230 self.path = find_program(self.EXE)
231
522d829b 232 @classmethod
f67539c2 233 @property
522d829b 234 def EXE(cls) -> str:
f67539c2
TL
235 raise NotImplementedError()
236
a4b75251
TL
237 def __str__(self) -> str:
238 return f'{self.EXE} ({self.path})'
239
f67539c2
TL
240
241class Podman(ContainerEngine):
242 EXE = 'podman'
243
522d829b 244 def __init__(self) -> None:
f67539c2 245 super().__init__()
522d829b 246 self._version: Optional[Tuple[int, ...]] = None
f67539c2
TL
247
248 @property
522d829b 249 def version(self) -> Tuple[int, ...]:
f67539c2
TL
250 if self._version is None:
251 raise RuntimeError('Please call `get_version` first')
252 return self._version
253
522d829b 254 def get_version(self, ctx: CephadmContext) -> None:
2a845540 255 out, _, _ = call_throws(ctx, [self.path, 'version', '--format', '{{.Client.Version}}'], verbosity=CallVerbosity.QUIET)
f67539c2
TL
256 self._version = _parse_podman_version(out)
257
a4b75251
TL
258 def __str__(self) -> str:
259 version = '.'.join(map(str, self.version))
260 return f'{self.EXE} ({self.path}) version {version}'
261
f67539c2
TL
262
263class Docker(ContainerEngine):
264 EXE = 'docker'
265
266
267CONTAINER_PREFERENCE = (Podman, Docker) # prefer podman to docker
9f95a23c 268
9f95a23c 269
33c7a0ef
TL
270# During normal cephadm operations (cephadm ls, gather-facts, etc ) we use:
271# stdout: for JSON output only
272# stderr: for error, debug, info, etc
f91f0fd5
TL
273logging_config = {
274 'version': 1,
275 'disable_existing_loggers': True,
276 'formatters': {
277 'cephadm': {
a4b75251 278 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
f91f0fd5
TL
279 },
280 },
281 'handlers': {
f67539c2
TL
282 'console': {
283 'level': 'INFO',
284 'class': 'logging.StreamHandler',
f91f0fd5
TL
285 },
286 'log_file': {
287 'level': 'DEBUG',
b3b6e05e 288 'class': 'logging.handlers.WatchedFileHandler',
f91f0fd5
TL
289 'formatter': 'cephadm',
290 'filename': '%s/cephadm.log' % LOG_DIR,
f91f0fd5
TL
291 }
292 },
293 'loggers': {
294 '': {
295 'level': 'DEBUG',
296 'handlers': ['console', 'log_file'],
297 }
298 }
299}
e306af50 300
f67539c2 301
33c7a0ef
TL
302class ExcludeErrorsFilter(logging.Filter):
303 def filter(self, record: logging.LogRecord) -> bool:
304 """Only lets through log messages with log level below WARNING ."""
305 return record.levelno < logging.WARNING
306
307
308# When cephadm is used as standard binary (bootstrap, rm-cluster, etc) we use:
309# stdout: for debug and info
310# stderr: for errors and warnings
311interactive_logging_config = {
312 'version': 1,
313 'filters': {
314 'exclude_errors': {
315 '()': ExcludeErrorsFilter
316 }
317 },
318 'disable_existing_loggers': True,
319 'formatters': {
320 'cephadm': {
321 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s'
322 },
323 },
324 'handlers': {
325 'console_stdout': {
326 'level': 'INFO',
327 'class': 'logging.StreamHandler',
328 'filters': ['exclude_errors'],
329 'stream': sys.stdout
330 },
331 'console_stderr': {
332 'level': 'WARNING',
333 'class': 'logging.StreamHandler',
334 'stream': sys.stderr
335 },
336 'log_file': {
337 'level': 'DEBUG',
338 'class': 'logging.handlers.WatchedFileHandler',
339 'formatter': 'cephadm',
340 'filename': '%s/cephadm.log' % LOG_DIR,
341 }
342 },
343 'loggers': {
344 '': {
345 'level': 'DEBUG',
346 'handlers': ['console_stdout', 'console_stderr', 'log_file'],
347 }
348 }
349}
350
351
e306af50
TL
352class termcolor:
353 yellow = '\033[93m'
354 red = '\033[31m'
355 end = '\033[0m'
356
f6b5b4d7 357
9f95a23c
TL
358class Error(Exception):
359 pass
360
f6b5b4d7 361
9f95a23c
TL
362class TimeoutExpired(Error):
363 pass
364
33c7a0ef
TL
365
366class UnauthorizedRegistryError(Error):
367 pass
368
9f95a23c
TL
369##################################
370
f6b5b4d7 371
9f95a23c 372class Ceph(object):
33c7a0ef 373 daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror',
f67539c2 374 'crash', 'cephfs-mirror')
9f95a23c
TL
375
376##################################
377
f6b5b4d7 378
b3b6e05e
TL
379class OSD(object):
380 @staticmethod
381 def get_sysctl_settings() -> List[str]:
382 return [
383 '# allow a large number of OSDs',
384 'fs.aio-max-nr = 1048576',
385 'kernel.pid_max = 4194304',
386 ]
387
20effc67 388
b3b6e05e
TL
389##################################
390
391
20effc67
TL
392class SNMPGateway:
393 """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks"""
394 daemon_type = 'snmp-gateway'
395 SUPPORTED_VERSIONS = ['V2c', 'V3']
396 default_image = DEFAULT_SNMP_GATEWAY_IMAGE
397 DEFAULT_PORT = 9464
398 env_filename = 'snmp-gateway.conf'
399
400 def __init__(self,
401 ctx: CephadmContext,
402 fsid: str,
403 daemon_id: Union[int, str],
404 config_json: Dict[str, Any],
405 image: Optional[str] = None) -> None:
406 self.ctx = ctx
407 self.fsid = fsid
408 self.daemon_id = daemon_id
409 self.image = image or SNMPGateway.default_image
410
411 self.uid = config_json.get('uid', 0)
412 self.gid = config_json.get('gid', 0)
413
414 self.destination = config_json.get('destination', '')
415 self.snmp_version = config_json.get('snmp_version', 'V2c')
416 self.snmp_community = config_json.get('snmp_community', 'public')
417 self.log_level = config_json.get('log_level', 'info')
418 self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '')
419 self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '')
420 self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '')
421 self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '')
422 self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '')
423 self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '')
424
425 self.validate()
426
427 @classmethod
428 def init(cls, ctx: CephadmContext, fsid: str,
429 daemon_id: Union[int, str]) -> 'SNMPGateway':
430 assert ctx.config_json
431 return cls(ctx, fsid, daemon_id,
432 get_parm(ctx.config_json), ctx.image)
433
434 @staticmethod
435 def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]:
436 """Return the version of the notifer from it's http endpoint"""
437 path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta')
438 try:
439 with open(path, 'r') as env:
440 metadata = json.loads(env.read())
441 except (OSError, json.JSONDecodeError):
442 return None
443
444 ports = metadata.get('ports', [])
445 if not ports:
446 return None
447
448 try:
449 with urlopen(f'http://127.0.0.1:{ports[0]}/') as r:
450 html = r.read().decode('utf-8').split('\n')
451 except (HTTPError, URLError):
452 return None
453
454 for h in html:
455 stripped = h.strip()
456 if stripped.startswith(('<pre>', '<PRE>')) and \
457 stripped.endswith(('</pre>', '</PRE>')):
458 # <pre>(version=1.2.1, branch=HEAD, revision=7...
459 return stripped.split(',')[0].split('version=')[1]
460
461 return None
462
463 @property
464 def port(self) -> int:
465 if not self.ctx.tcp_ports:
466 return self.DEFAULT_PORT
467 else:
468 if len(self.ctx.tcp_ports) > 0:
469 return int(self.ctx.tcp_ports.split()[0])
470 else:
471 return self.DEFAULT_PORT
472
473 def get_daemon_args(self) -> List[str]:
474 v3_args = []
475 base_args = [
476 f'--web.listen-address=:{self.port}',
477 f'--snmp.destination={self.destination}',
478 f'--snmp.version={self.snmp_version}',
479 f'--log.level={self.log_level}',
480 '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl'
481 ]
482
483 if self.snmp_version == 'V3':
484 # common auth settings
485 v3_args.extend([
486 '--snmp.authentication-enabled',
487 f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}',
488 f'--snmp.security-engine-id={self.snmp_v3_engine_id}'
489 ])
490 # authPriv setting is applied if we have a privacy protocol setting
491 if self.snmp_v3_priv_protocol:
492 v3_args.extend([
493 '--snmp.private-enabled',
494 f'--snmp.private-protocol={self.snmp_v3_priv_protocol}'
495 ])
496
497 return base_args + v3_args
498
499 @property
500 def data_dir(self) -> str:
501 return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}')
502
503 @property
504 def conf_file_path(self) -> str:
505 return os.path.join(self.data_dir, self.env_filename)
506
507 def create_daemon_conf(self) -> None:
508 """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
509 with open(os.open(self.conf_file_path, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
510 if self.snmp_version == 'V2c':
511 f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n')
512 else:
513 f.write(f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n')
514 f.write(f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n')
515 if self.snmp_v3_priv_password:
516 f.write(f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n')
517
518 def validate(self) -> None:
519 """Validate the settings
520
521 Raises:
522 Error: if the fsid doesn't look like an fsid
523 Error: if the snmp version is not supported
524 Error: destination IP and port address missing
525 """
526 if not is_fsid(self.fsid):
527 raise Error(f'not a valid fsid: {self.fsid}')
528
529 if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS:
530 raise Error(f'not a valid snmp version: {self.snmp_version}')
531
532 if not self.destination:
533 raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener')
534
535
536##################################
9f95a23c
TL
537class Monitoring(object):
538 """Define the configs for the monitoring containers"""
539
540 port_map = {
f67539c2
TL
541 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI
542 'node-exporter': [9100],
543 'grafana': [3000],
544 'alertmanager': [9093, 9094],
33c7a0ef
TL
545 'loki': [3100],
546 'promtail': [9080]
9f95a23c
TL
547 }
548
549 components = {
f67539c2
TL
550 'prometheus': {
551 'image': DEFAULT_PROMETHEUS_IMAGE,
552 'cpus': '2',
553 'memory': '4GB',
554 'args': [
555 '--config.file=/etc/prometheus/prometheus.yml',
556 '--storage.tsdb.path=/prometheus',
9f95a23c 557 ],
f67539c2
TL
558 'config-json-files': [
559 'prometheus.yml',
9f95a23c
TL
560 ],
561 },
33c7a0ef
TL
562 'loki': {
563 'image': DEFAULT_LOKI_IMAGE,
564 'cpus': '1',
565 'memory': '1GB',
566 'args': [
567 '--config.file=/etc/loki/loki.yml',
568 ],
569 'config-json-files': [
570 'loki.yml'
571 ],
572 },
573 'promtail': {
574 'image': DEFAULT_PROMTAIL_IMAGE,
575 'cpus': '1',
576 'memory': '1GB',
577 'args': [
578 '--config.file=/etc/promtail/promtail.yml',
579 ],
580 'config-json-files': [
581 'promtail.yml',
582 ],
583 },
f67539c2
TL
584 'node-exporter': {
585 'image': DEFAULT_NODE_EXPORTER_IMAGE,
586 'cpus': '1',
587 'memory': '1GB',
588 'args': [
589 '--no-collector.timex',
9f95a23c
TL
590 ],
591 },
f67539c2
TL
592 'grafana': {
593 'image': DEFAULT_GRAFANA_IMAGE,
594 'cpus': '2',
595 'memory': '4GB',
596 'args': [],
597 'config-json-files': [
598 'grafana.ini',
599 'provisioning/datasources/ceph-dashboard.yml',
600 'certs/cert_file',
601 'certs/cert_key',
9f95a23c
TL
602 ],
603 },
f67539c2
TL
604 'alertmanager': {
605 'image': DEFAULT_ALERT_MANAGER_IMAGE,
606 'cpus': '2',
607 'memory': '2GB',
608 'args': [
f67539c2 609 '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
f91f0fd5 610 ],
f67539c2
TL
611 'config-json-files': [
612 'alertmanager.yml',
9f95a23c 613 ],
f67539c2
TL
614 'config-json-args': [
615 'peers',
9f95a23c
TL
616 ],
617 },
618 } # type: ignore
619
f67539c2
TL
620 @staticmethod
621 def get_version(ctx, container_id, daemon_type):
622 # type: (CephadmContext, str, str) -> str
623 """
33c7a0ef 624 :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
f67539c2 625 """
33c7a0ef 626 assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail')
f67539c2
TL
627 cmd = daemon_type.replace('-', '_')
628 code = -1
629 err = ''
630 version = ''
631 if daemon_type == 'alertmanager':
632 for cmd in ['alertmanager', 'prometheus-alertmanager']:
633 _, err, code = call(ctx, [
634 ctx.container_engine.path, 'exec', container_id, cmd,
635 '--version'
2a845540 636 ], verbosity=CallVerbosity.QUIET)
f67539c2
TL
637 if code == 0:
638 break
639 cmd = 'alertmanager' # reset cmd for version extraction
640 else:
641 _, err, code = call(ctx, [
642 ctx.container_engine.path, 'exec', container_id, cmd, '--version'
2a845540 643 ], verbosity=CallVerbosity.QUIET)
f67539c2
TL
644 if code == 0 and \
645 err.startswith('%s, version ' % cmd):
646 version = err.split(' ')[2]
647 return version
648
9f95a23c
TL
649##################################
650
f6b5b4d7 651
f67539c2
TL
652def populate_files(config_dir, config_files, uid, gid):
653 # type: (str, Dict, int, int) -> None
654 """create config files for different services"""
655 for fname in config_files:
656 config_file = os.path.join(config_dir, fname)
657 config_content = dict_get_join(config_files, fname)
658 logger.info('Write file: %s' % (config_file))
b3b6e05e 659 with open(config_file, 'w', encoding='utf-8') as f:
f67539c2
TL
660 os.fchown(f.fileno(), uid, gid)
661 os.fchmod(f.fileno(), 0o600)
662 f.write(config_content)
663
664
9f95a23c
TL
665class NFSGanesha(object):
666 """Defines a NFS-Ganesha container"""
667
668 daemon_type = 'nfs'
669 entrypoint = '/usr/bin/ganesha.nfsd'
670 daemon_args = ['-F', '-L', 'STDERR']
671
672 required_files = ['ganesha.conf']
673
674 port_map = {
f67539c2 675 'nfs': 2049,
9f95a23c
TL
676 }
677
678 def __init__(self,
f67539c2 679 ctx,
9f95a23c
TL
680 fsid,
681 daemon_id,
682 config_json,
683 image=DEFAULT_IMAGE):
f67539c2
TL
684 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
685 self.ctx = ctx
9f95a23c
TL
686 self.fsid = fsid
687 self.daemon_id = daemon_id
688 self.image = image
689
9f95a23c 690 # config-json options
f91f0fd5
TL
691 self.pool = dict_get(config_json, 'pool', require=True)
692 self.namespace = dict_get(config_json, 'namespace')
693 self.userid = dict_get(config_json, 'userid')
694 self.extra_args = dict_get(config_json, 'extra_args', [])
695 self.files = dict_get(config_json, 'files', {})
696 self.rgw = dict_get(config_json, 'rgw', {})
9f95a23c
TL
697
698 # validate the supplied args
699 self.validate()
700
701 @classmethod
f67539c2
TL
702 def init(cls, ctx, fsid, daemon_id):
703 # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
704 return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), ctx.image)
9f95a23c 705
f91f0fd5 706 def get_container_mounts(self, data_dir):
9f95a23c
TL
707 # type: (str) -> Dict[str, str]
708 mounts = dict()
709 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
710 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
711 mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
f91f0fd5
TL
712 if self.rgw:
713 cluster = self.rgw.get('cluster', 'ceph')
714 rgw_user = self.rgw.get('user', 'admin')
715 mounts[os.path.join(data_dir, 'keyring.rgw')] = \
f67539c2 716 '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
9f95a23c
TL
717 return mounts
718
719 @staticmethod
720 def get_container_envs():
721 # type: () -> List[str]
722 envs = [
33c7a0ef 723 'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF)
9f95a23c
TL
724 ]
725 return envs
726
727 @staticmethod
f67539c2
TL
728 def get_version(ctx, container_id):
729 # type: (CephadmContext, str) -> Optional[str]
9f95a23c 730 version = None
f67539c2
TL
731 out, err, code = call(ctx,
732 [ctx.container_engine.path, 'exec', container_id,
733 NFSGanesha.entrypoint, '-v'],
2a845540 734 verbosity=CallVerbosity.QUIET)
9f95a23c
TL
735 if code == 0:
736 match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
737 if match:
738 version = match.group(1)
739 return version
740
741 def validate(self):
e306af50 742 # type: () -> None
9f95a23c
TL
743 if not is_fsid(self.fsid):
744 raise Error('not an fsid: %s' % self.fsid)
745 if not self.daemon_id:
746 raise Error('invalid daemon_id: %s' % self.daemon_id)
747 if not self.image:
748 raise Error('invalid image: %s' % self.image)
749
750 # check for the required files
751 if self.required_files:
752 for fname in self.required_files:
753 if fname not in self.files:
754 raise Error('required file missing from config-json: %s' % fname)
755
f91f0fd5
TL
756 # check for an RGW config
757 if self.rgw:
758 if not self.rgw.get('keyring'):
759 raise Error('RGW keyring is missing')
760 if not self.rgw.get('user'):
761 raise Error('RGW user is missing')
762
9f95a23c
TL
763 def get_daemon_name(self):
764 # type: () -> str
765 return '%s.%s' % (self.daemon_type, self.daemon_id)
766
767 def get_container_name(self, desc=None):
768 # type: (Optional[str]) -> str
769 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
770 if desc:
771 cname = '%s-%s' % (cname, desc)
772 return cname
773
1911f103
TL
774 def get_daemon_args(self):
775 # type: () -> List[str]
776 return self.daemon_args + self.extra_args
777
9f95a23c
TL
778 def create_daemon_dirs(self, data_dir, uid, gid):
779 # type: (str, int, int) -> None
780 """Create files under the container data dir"""
781 if not os.path.isdir(data_dir):
782 raise OSError('data_dir is not a directory: %s' % (data_dir))
783
784 logger.info('Creating ganesha config...')
785
786 # create the ganesha conf dir
787 config_dir = os.path.join(data_dir, 'etc/ganesha')
788 makedirs(config_dir, uid, gid, 0o755)
789
790 # populate files from the config-json
f67539c2 791 populate_files(config_dir, self.files, uid, gid)
9f95a23c 792
f91f0fd5
TL
793 # write the RGW keyring
794 if self.rgw:
795 keyring_path = os.path.join(data_dir, 'keyring.rgw')
796 with open(keyring_path, 'w') as f:
797 os.fchmod(f.fileno(), 0o600)
798 os.fchown(f.fileno(), uid, gid)
799 f.write(self.rgw.get('keyring', ''))
800
9f95a23c
TL
801##################################
802
f6b5b4d7 803
1911f103
TL
804class CephIscsi(object):
805 """Defines a Ceph-Iscsi container"""
806
807 daemon_type = 'iscsi'
808 entrypoint = '/usr/bin/rbd-target-api'
809
810 required_files = ['iscsi-gateway.cfg']
811
812 def __init__(self,
f67539c2 813 ctx,
1911f103
TL
814 fsid,
815 daemon_id,
816 config_json,
817 image=DEFAULT_IMAGE):
f67539c2
TL
818 # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
819 self.ctx = ctx
1911f103
TL
820 self.fsid = fsid
821 self.daemon_id = daemon_id
822 self.image = image
823
1911f103 824 # config-json options
f91f0fd5 825 self.files = dict_get(config_json, 'files', {})
1911f103
TL
826
827 # validate the supplied args
828 self.validate()
829
830 @classmethod
f67539c2
TL
831 def init(cls, ctx, fsid, daemon_id):
832 # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
833 return cls(ctx, fsid, daemon_id,
834 get_parm(ctx.config_json), ctx.image)
1911f103
TL
835
836 @staticmethod
837 def get_container_mounts(data_dir, log_dir):
838 # type: (str, str) -> Dict[str, str]
839 mounts = dict()
840 mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
841 mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
842 mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
f91f0fd5 843 mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
a4b75251 844 mounts[log_dir] = '/var/log:z'
f91f0fd5 845 mounts['/dev'] = '/dev'
1911f103
TL
846 return mounts
847
f6b5b4d7
TL
848 @staticmethod
849 def get_container_binds():
850 # type: () -> List[List[str]]
851 binds = []
852 lib_modules = ['type=bind',
853 'source=/lib/modules',
854 'destination=/lib/modules',
855 'ro=true']
856 binds.append(lib_modules)
857 return binds
858
1911f103 859 @staticmethod
f67539c2
TL
860 def get_version(ctx, container_id):
861 # type: (CephadmContext, str) -> Optional[str]
1911f103 862 version = None
f67539c2
TL
863 out, err, code = call(ctx,
864 [ctx.container_engine.path, 'exec', container_id,
865 '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
2a845540 866 verbosity=CallVerbosity.QUIET)
1911f103 867 if code == 0:
f6b5b4d7 868 version = out.strip()
1911f103
TL
869 return version
870
871 def validate(self):
e306af50 872 # type: () -> None
1911f103
TL
873 if not is_fsid(self.fsid):
874 raise Error('not an fsid: %s' % self.fsid)
875 if not self.daemon_id:
876 raise Error('invalid daemon_id: %s' % self.daemon_id)
877 if not self.image:
878 raise Error('invalid image: %s' % self.image)
879
880 # check for the required files
881 if self.required_files:
882 for fname in self.required_files:
883 if fname not in self.files:
884 raise Error('required file missing from config-json: %s' % fname)
885
886 def get_daemon_name(self):
887 # type: () -> str
888 return '%s.%s' % (self.daemon_type, self.daemon_id)
889
890 def get_container_name(self, desc=None):
891 # type: (Optional[str]) -> str
892 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
893 if desc:
894 cname = '%s-%s' % (cname, desc)
895 return cname
896
1911f103
TL
897 def create_daemon_dirs(self, data_dir, uid, gid):
898 # type: (str, int, int) -> None
899 """Create files under the container data dir"""
900 if not os.path.isdir(data_dir):
901 raise OSError('data_dir is not a directory: %s' % (data_dir))
902
903 logger.info('Creating ceph-iscsi config...')
904 configfs_dir = os.path.join(data_dir, 'configfs')
905 makedirs(configfs_dir, uid, gid, 0o755)
906
907 # populate files from the config-json
f67539c2 908 populate_files(data_dir, self.files, uid, gid)
1911f103
TL
909
910 @staticmethod
911 def configfs_mount_umount(data_dir, mount=True):
e306af50 912 # type: (str, bool) -> List[str]
1911f103
TL
913 mount_path = os.path.join(data_dir, 'configfs')
914 if mount:
f67539c2
TL
915 cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
916 'mount -t configfs none {0}; fi'.format(mount_path)
1911f103 917 else:
f67539c2
TL
918 cmd = 'if grep -qs {0} /proc/mounts; then ' \
919 'umount {0}; fi'.format(mount_path)
1911f103
TL
920 return cmd.split()
921
f6b5b4d7
TL
922 def get_tcmu_runner_container(self):
923 # type: () -> CephContainer
f67539c2
TL
924 tcmu_container = get_container(self.ctx, self.fsid, self.daemon_type, self.daemon_id)
925 tcmu_container.entrypoint = '/usr/bin/tcmu-runner'
f6b5b4d7 926 tcmu_container.cname = self.get_container_name(desc='tcmu')
f91f0fd5
TL
927 # remove extra container args for tcmu container.
928 # extra args could cause issue with forking service type
929 tcmu_container.container_args = []
f6b5b4d7
TL
930 return tcmu_container
931
1911f103
TL
932##################################
933
f6b5b4d7 934
f67539c2
TL
935class HAproxy(object):
936 """Defines an HAproxy container"""
937 daemon_type = 'haproxy'
938 required_files = ['haproxy.cfg']
522d829b 939 default_image = DEFAULT_HAPROXY_IMAGE
f67539c2
TL
940
941 def __init__(self,
942 ctx: CephadmContext,
943 fsid: str, daemon_id: Union[int, str],
944 config_json: Dict, image: str) -> None:
945 self.ctx = ctx
946 self.fsid = fsid
947 self.daemon_id = daemon_id
948 self.image = image
949
950 # config-json options
951 self.files = dict_get(config_json, 'files', {})
952
953 self.validate()
954
955 @classmethod
956 def init(cls, ctx: CephadmContext,
957 fsid: str, daemon_id: Union[int, str]) -> 'HAproxy':
958 return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json),
959 ctx.image)
960
961 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
962 """Create files under the container data dir"""
963 if not os.path.isdir(data_dir):
964 raise OSError('data_dir is not a directory: %s' % (data_dir))
965
966 # create additional directories in data dir for HAproxy to use
967 if not os.path.isdir(os.path.join(data_dir, 'haproxy')):
968 makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE)
969
970 data_dir = os.path.join(data_dir, 'haproxy')
971 populate_files(data_dir, self.files, uid, gid)
972
973 def get_daemon_args(self) -> List[str]:
974 return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
975
976 def validate(self):
977 # type: () -> None
978 if not is_fsid(self.fsid):
979 raise Error('not an fsid: %s' % self.fsid)
980 if not self.daemon_id:
981 raise Error('invalid daemon_id: %s' % self.daemon_id)
982 if not self.image:
983 raise Error('invalid image: %s' % self.image)
984
985 # check for the required files
986 if self.required_files:
987 for fname in self.required_files:
988 if fname not in self.files:
989 raise Error('required file missing from config-json: %s' % fname)
990
991 def get_daemon_name(self):
992 # type: () -> str
993 return '%s.%s' % (self.daemon_type, self.daemon_id)
994
995 def get_container_name(self, desc=None):
996 # type: (Optional[str]) -> str
997 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
998 if desc:
999 cname = '%s-%s' % (cname, desc)
1000 return cname
1001
522d829b 1002 def extract_uid_gid_haproxy(self) -> Tuple[int, int]:
f67539c2
TL
1003 # better directory for this?
1004 return extract_uid_gid(self.ctx, file_path='/var/lib')
1005
1006 @staticmethod
1007 def get_container_mounts(data_dir: str) -> Dict[str, str]:
1008 mounts = dict()
1009 mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
1010 return mounts
1011
b3b6e05e
TL
1012 @staticmethod
1013 def get_sysctl_settings() -> List[str]:
1014 return [
1015 '# IP forwarding',
1016 'net.ipv4.ip_forward = 1',
1017 ]
1018
f67539c2
TL
1019##################################
1020
1021
1022class Keepalived(object):
1023 """Defines an Keepalived container"""
1024 daemon_type = 'keepalived'
1025 required_files = ['keepalived.conf']
522d829b 1026 default_image = DEFAULT_KEEPALIVED_IMAGE
f67539c2
TL
1027
1028 def __init__(self,
1029 ctx: CephadmContext,
1030 fsid: str, daemon_id: Union[int, str],
1031 config_json: Dict, image: str) -> None:
1032 self.ctx = ctx
1033 self.fsid = fsid
1034 self.daemon_id = daemon_id
1035 self.image = image
1036
1037 # config-json options
1038 self.files = dict_get(config_json, 'files', {})
1039
1040 self.validate()
1041
1042 @classmethod
1043 def init(cls, ctx: CephadmContext, fsid: str,
1044 daemon_id: Union[int, str]) -> 'Keepalived':
1045 return cls(ctx, fsid, daemon_id,
1046 get_parm(ctx.config_json), ctx.image)
1047
1048 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
1049 """Create files under the container data dir"""
1050 if not os.path.isdir(data_dir):
1051 raise OSError('data_dir is not a directory: %s' % (data_dir))
1052
1053 # create additional directories in data dir for keepalived to use
1054 if not os.path.isdir(os.path.join(data_dir, 'keepalived')):
1055 makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE)
1056
1057 # populate files from the config-json
1058 populate_files(data_dir, self.files, uid, gid)
1059
1060 def validate(self):
1061 # type: () -> None
1062 if not is_fsid(self.fsid):
1063 raise Error('not an fsid: %s' % self.fsid)
1064 if not self.daemon_id:
1065 raise Error('invalid daemon_id: %s' % self.daemon_id)
1066 if not self.image:
1067 raise Error('invalid image: %s' % self.image)
1068
1069 # check for the required files
1070 if self.required_files:
1071 for fname in self.required_files:
1072 if fname not in self.files:
1073 raise Error('required file missing from config-json: %s' % fname)
1074
1075 def get_daemon_name(self):
1076 # type: () -> str
1077 return '%s.%s' % (self.daemon_type, self.daemon_id)
1078
1079 def get_container_name(self, desc=None):
1080 # type: (Optional[str]) -> str
1081 cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
1082 if desc:
1083 cname = '%s-%s' % (cname, desc)
1084 return cname
1085
1086 @staticmethod
1087 def get_container_envs():
1088 # type: () -> List[str]
1089 envs = [
1090 'KEEPALIVED_AUTOCONF=false',
1091 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
1092 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
1093 'KEEPALIVED_DEBUG=false'
1094 ]
1095 return envs
1096
1097 @staticmethod
b3b6e05e
TL
1098 def get_sysctl_settings() -> List[str]:
1099 return [
1100 '# IP forwarding and non-local bind',
1101 'net.ipv4.ip_forward = 1',
1102 'net.ipv4.ip_nonlocal_bind = 1',
1103 ]
f67539c2 1104
522d829b 1105 def extract_uid_gid_keepalived(self) -> Tuple[int, int]:
f67539c2
TL
1106 # better directory for this?
1107 return extract_uid_gid(self.ctx, file_path='/var/lib')
1108
1109 @staticmethod
1110 def get_container_mounts(data_dir: str) -> Dict[str, str]:
1111 mounts = dict()
1112 mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
1113 return mounts
1114
1115##################################
1116
1117
f91f0fd5
TL
1118class CustomContainer(object):
1119 """Defines a custom container"""
1120 daemon_type = 'container'
1121
f67539c2
TL
1122 def __init__(self,
1123 fsid: str, daemon_id: Union[int, str],
f91f0fd5
TL
1124 config_json: Dict, image: str) -> None:
1125 self.fsid = fsid
1126 self.daemon_id = daemon_id
1127 self.image = image
1128
1129 # config-json options
1130 self.entrypoint = dict_get(config_json, 'entrypoint')
1131 self.uid = dict_get(config_json, 'uid', 65534) # nobody
1132 self.gid = dict_get(config_json, 'gid', 65534) # nobody
1133 self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
1134 self.args = dict_get(config_json, 'args', [])
1135 self.envs = dict_get(config_json, 'envs', [])
1136 self.privileged = dict_get(config_json, 'privileged', False)
1137 self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
1138 self.ports = dict_get(config_json, 'ports', [])
1139 self.dirs = dict_get(config_json, 'dirs', [])
1140 self.files = dict_get(config_json, 'files', {})
1141
1142 @classmethod
f67539c2
TL
1143 def init(cls, ctx: CephadmContext,
1144 fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
1145 return cls(fsid, daemon_id,
1146 get_parm(ctx.config_json), ctx.image)
f91f0fd5
TL
1147
1148 def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
1149 """
1150 Create dirs/files below the container data directory.
1151 """
1152 logger.info('Creating custom container configuration '
1153 'dirs/files in {} ...'.format(data_dir))
1154
1155 if not os.path.isdir(data_dir):
1156 raise OSError('data_dir is not a directory: %s' % data_dir)
1157
1158 for dir_path in self.dirs:
1159 logger.info('Creating directory: {}'.format(dir_path))
1160 dir_path = os.path.join(data_dir, dir_path.strip('/'))
1161 makedirs(dir_path, uid, gid, 0o755)
1162
1163 for file_path in self.files:
1164 logger.info('Creating file: {}'.format(file_path))
1165 content = dict_get_join(self.files, file_path)
1166 file_path = os.path.join(data_dir, file_path.strip('/'))
1167 with open(file_path, 'w', encoding='utf-8') as f:
1168 os.fchown(f.fileno(), uid, gid)
1169 os.fchmod(f.fileno(), 0o600)
1170 f.write(content)
1171
1172 def get_daemon_args(self) -> List[str]:
1173 return []
1174
1175 def get_container_args(self) -> List[str]:
1176 return self.args
1177
1178 def get_container_envs(self) -> List[str]:
1179 return self.envs
1180
1181 def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
1182 """
1183 Get the volume mounts. Relative source paths will be located below
1184 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
1185
1186 Example:
1187 {
1188 /foo/conf: /conf
1189 foo/conf: /conf
1190 }
1191 becomes
1192 {
1193 /foo/conf: /conf
1194 /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf
1195 }
1196 """
1197 mounts = {}
1198 for source, destination in self.volume_mounts.items():
1199 source = os.path.join(data_dir, source)
1200 mounts[source] = destination
1201 return mounts
1202
1203 def get_container_binds(self, data_dir: str) -> List[List[str]]:
1204 """
1205 Get the bind mounts. Relative `source=...` paths will be located below
1206 `/var/lib/ceph/<cluster-fsid>/<daemon-name>`.
1207
1208 Example:
1209 [
1210 'type=bind',
1211 'source=lib/modules',
1212 'destination=/lib/modules',
1213 'ro=true'
1214 ]
1215 becomes
1216 [
1217 ...
1218 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules',
1219 ...
1220 ]
1221 """
1222 binds = self.bind_mounts.copy()
1223 for bind in binds:
1224 for index, value in enumerate(bind):
1225 match = re.match(r'^source=(.+)$', value)
1226 if match:
1227 bind[index] = 'source={}'.format(os.path.join(
1228 data_dir, match.group(1)))
1229 return binds
1230
1231##################################
1232
1233
f67539c2
TL
1234def touch(file_path: str, uid: Optional[int] = None, gid: Optional[int] = None) -> None:
1235 Path(file_path).touch()
1236 if uid and gid:
1237 os.chown(file_path, uid, gid)
1238
1239
1240##################################
1241
1242
f91f0fd5
TL
1243def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any:
1244 """
1245 Helper function to get a key from a dictionary.
1246 :param d: The dictionary to process.
1247 :param key: The name of the key to get.
1248 :param default: The default value in case the key does not
1249 exist. Default is `None`.
1250 :param require: Set to `True` if the key is required. An
1251 exception will be raised if the key does not exist in
1252 the given dictionary.
1253 :return: Returns the value of the given key.
1254 :raises: :exc:`self.Error` if the given key does not exist
1255 and `require` is set to `True`.
1256 """
1257 if require and key not in d.keys():
1258 raise Error('{} missing from dict'.format(key))
f67539c2 1259 return d.get(key, default) # type: ignore
f91f0fd5
TL
1260
1261##################################
1262
1263
1264def dict_get_join(d: Dict, key: str) -> Any:
1265 """
1266 Helper function to get the value of a given key from a dictionary.
1267 `List` values will be converted to a string by joining them with a
1268 line break.
1269 :param d: The dictionary to process.
1270 :param key: The name of the key to get.
1271 :return: Returns the value of the given key. If it was a `list`, it
1272 will be joining with a line break.
1273 """
1274 value = d.get(key)
1275 if isinstance(value, list):
1276 value = '\n'.join(map(str, value))
1277 return value
1278
1279##################################
1280
1281
9f95a23c 1282def get_supported_daemons():
e306af50 1283 # type: () -> List[str]
9f95a23c
TL
1284 supported_daemons = list(Ceph.daemons)
1285 supported_daemons.extend(Monitoring.components)
1286 supported_daemons.append(NFSGanesha.daemon_type)
1911f103 1287 supported_daemons.append(CephIscsi.daemon_type)
f91f0fd5 1288 supported_daemons.append(CustomContainer.daemon_type)
f67539c2
TL
1289 supported_daemons.append(HAproxy.daemon_type)
1290 supported_daemons.append(Keepalived.daemon_type)
20effc67
TL
1291 supported_daemons.append(CephadmAgent.daemon_type)
1292 supported_daemons.append(SNMPGateway.daemon_type)
9f95a23c
TL
1293 assert len(supported_daemons) == len(set(supported_daemons))
1294 return supported_daemons
1295
1296##################################
1297
f6b5b4d7 1298
f67539c2
TL
1299class PortOccupiedError(Error):
1300 pass
1301
1302
1303def attempt_bind(ctx, s, address, port):
1304 # type: (CephadmContext, socket.socket, str, int) -> None
9f95a23c
TL
1305 try:
1306 s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1307 s.bind((address, port))
b3b6e05e 1308 except OSError as e:
9f95a23c 1309 if e.errno == errno.EADDRINUSE:
f67539c2
TL
1310 msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e)
1311 logger.warning(msg)
1312 raise PortOccupiedError(msg)
1313 else:
b3b6e05e
TL
1314 raise Error(e)
1315 except Exception as e:
1316 raise Error(e)
9f95a23c
TL
1317 finally:
1318 s.close()
1319
f6b5b4d7 1320
f67539c2
TL
1321def port_in_use(ctx, port_num):
1322 # type: (CephadmContext, int) -> bool
9f95a23c 1323 """Detect whether a port is in use on the local machine - IPv4 and IPv6"""
e306af50 1324 logger.info('Verifying port %d ...' % port_num)
9f95a23c 1325
f67539c2
TL
1326 def _port_in_use(af: socket.AddressFamily, address: str) -> bool:
1327 try:
1328 s = socket.socket(af, socket.SOCK_STREAM)
1329 attempt_bind(ctx, s, address, port_num)
1330 except PortOccupiedError:
1331 return True
1332 except OSError as e:
1333 if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL):
1334 # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are
1335 # being tested here and one might be intentionally be disabled.
1336 # In that case no error should be raised.
1337 return False
1338 else:
1339 raise e
9f95a23c 1340 return False
f67539c2
TL
1341 return any(_port_in_use(af, address) for af, address in (
1342 (socket.AF_INET, '0.0.0.0'),
1343 (socket.AF_INET6, '::')
1344 ))
9f95a23c 1345
f6b5b4d7 1346
33c7a0ef
TL
1347def check_ip_port(ctx, ep):
1348 # type: (CephadmContext, EndPoint) -> None
f67539c2 1349 if not ctx.skip_ping_check:
33c7a0ef
TL
1350 logger.info(f'Verifying IP {ep.ip} port {ep.port} ...')
1351 if is_ipv6(ep.ip):
9f95a23c 1352 s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
33c7a0ef 1353 ip = unwrap_ipv6(ep.ip)
9f95a23c
TL
1354 else:
1355 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
33c7a0ef
TL
1356 ip = ep.ip
1357 attempt_bind(ctx, s, ip, ep.port)
9f95a23c
TL
1358
1359##################################
1360
f67539c2 1361
9f95a23c
TL
1362# this is an abbreviated version of
1363# https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py
1364# that drops all of the compatibility (this is Unix/Linux only).
1365
9f95a23c
TL
1366class Timeout(TimeoutError):
1367 """
1368 Raised when the lock could not be acquired in *timeout*
1369 seconds.
1370 """
1371
522d829b 1372 def __init__(self, lock_file: str) -> None:
9f95a23c
TL
1373 """
1374 """
1375 #: The path of the file lock.
1376 self.lock_file = lock_file
1377 return None
1378
522d829b 1379 def __str__(self) -> str:
9f95a23c
TL
1380 temp = "The file lock '{}' could not be acquired."\
1381 .format(self.lock_file)
1382 return temp
1383
1384
1385class _Acquire_ReturnProxy(object):
522d829b 1386 def __init__(self, lock: 'FileLock') -> None:
9f95a23c
TL
1387 self.lock = lock
1388 return None
1389
522d829b 1390 def __enter__(self) -> 'FileLock':
9f95a23c
TL
1391 return self.lock
1392
522d829b 1393 def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
9f95a23c
TL
1394 self.lock.release()
1395 return None
1396
1397
1398class FileLock(object):
522d829b 1399 def __init__(self, ctx: CephadmContext, name: str, timeout: int = -1) -> None:
9f95a23c
TL
1400 if not os.path.exists(LOCK_DIR):
1401 os.mkdir(LOCK_DIR, 0o700)
1402 self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
f67539c2 1403 self.ctx = ctx
9f95a23c
TL
1404
1405 # The file descriptor for the *_lock_file* as it is returned by the
1406 # os.open() function.
1407 # This file lock is only NOT None, if the object currently holds the
1408 # lock.
f67539c2 1409 self._lock_file_fd: Optional[int] = None
9f95a23c
TL
1410 self.timeout = timeout
1411 # The lock counter is used for implementing the nested locking
1412 # mechanism. Whenever the lock is acquired, the counter is increased and
1413 # the lock is only released, when this value is 0 again.
1414 self._lock_counter = 0
1415 return None
1416
1417 @property
522d829b 1418 def is_locked(self) -> bool:
9f95a23c
TL
1419 return self._lock_file_fd is not None
1420
522d829b 1421 def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy:
9f95a23c
TL
1422 """
1423 Acquires the file lock or fails with a :exc:`Timeout` error.
1424 .. code-block:: python
1425 # You can use this method in the context manager (recommended)
1426 with lock.acquire():
1427 pass
1428 # Or use an equivalent try-finally construct:
1429 lock.acquire()
1430 try:
1431 pass
1432 finally:
1433 lock.release()
1434 :arg float timeout:
1435 The maximum time waited for the file lock.
1436 If ``timeout < 0``, there is no timeout and this method will
1437 block until the lock could be acquired.
1438 If ``timeout`` is None, the default :attr:`~timeout` is used.
1439 :arg float poll_intervall:
1440 We check once in *poll_intervall* seconds if we can acquire the
1441 file lock.
1442 :raises Timeout:
1443 if the lock could not be acquired in *timeout* seconds.
1444 .. versionchanged:: 2.0.0
1445 This method returns now a *proxy* object instead of *self*,
1446 so that it can be used in a with statement without side effects.
1447 """
f67539c2 1448
9f95a23c
TL
1449 # Use the default timeout, if no timeout is provided.
1450 if timeout is None:
1451 timeout = self.timeout
1452
1453 # Increment the number right at the beginning.
1454 # We can still undo it, if something fails.
1455 self._lock_counter += 1
1456
1457 lock_id = id(self)
1458 lock_filename = self._lock_file
1459 start_time = time.time()
1460 try:
1461 while True:
1462 if not self.is_locked:
2a845540
TL
1463 logger.log(QUIET_LOG_LEVEL, 'Acquiring lock %s on %s', lock_id,
1464 lock_filename)
9f95a23c
TL
1465 self._acquire()
1466
1467 if self.is_locked:
2a845540
TL
1468 logger.log(QUIET_LOG_LEVEL, 'Lock %s acquired on %s', lock_id,
1469 lock_filename)
9f95a23c
TL
1470 break
1471 elif timeout >= 0 and time.time() - start_time > timeout:
1472 logger.warning('Timeout acquiring lock %s on %s', lock_id,
1473 lock_filename)
1474 raise Timeout(self._lock_file)
1475 else:
2a845540
TL
1476 logger.log(
1477 QUIET_LOG_LEVEL,
9f95a23c
TL
1478 'Lock %s not acquired on %s, waiting %s seconds ...',
1479 lock_id, lock_filename, poll_intervall
1480 )
1481 time.sleep(poll_intervall)
b3b6e05e 1482 except Exception:
9f95a23c
TL
1483 # Something did go wrong, so decrement the counter.
1484 self._lock_counter = max(0, self._lock_counter - 1)
1485
1486 raise
f67539c2 1487 return _Acquire_ReturnProxy(lock=self)
9f95a23c 1488
522d829b 1489 def release(self, force: bool = False) -> None:
9f95a23c
TL
1490 """
1491 Releases the file lock.
1492 Please note, that the lock is only completly released, if the lock
1493 counter is 0.
1494 Also note, that the lock file itself is not automatically deleted.
1495 :arg bool force:
1496 If true, the lock counter is ignored and the lock is released in
1497 every case.
1498 """
1499 if self.is_locked:
1500 self._lock_counter -= 1
1501
1502 if self._lock_counter == 0 or force:
522d829b
TL
1503 # lock_id = id(self)
1504 # lock_filename = self._lock_file
9f95a23c 1505
522d829b
TL
1506 # Can't log in shutdown:
1507 # File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open
1508 # NameError: name 'open' is not defined
1509 # logger.debug('Releasing lock %s on %s', lock_id, lock_filename)
9f95a23c
TL
1510 self._release()
1511 self._lock_counter = 0
522d829b 1512 # logger.debug('Lock %s released on %s', lock_id, lock_filename)
9f95a23c
TL
1513
1514 return None
1515
522d829b 1516 def __enter__(self) -> 'FileLock':
9f95a23c
TL
1517 self.acquire()
1518 return self
1519
522d829b 1520 def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
9f95a23c
TL
1521 self.release()
1522 return None
1523
522d829b 1524 def __del__(self) -> None:
f6b5b4d7 1525 self.release(force=True)
9f95a23c
TL
1526 return None
1527
522d829b 1528 def _acquire(self) -> None:
9f95a23c
TL
1529 open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
1530 fd = os.open(self._lock_file, open_mode)
1531
1532 try:
1533 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
1534 except (IOError, OSError):
1535 os.close(fd)
1536 else:
1537 self._lock_file_fd = fd
1538 return None
1539
522d829b 1540 def _release(self) -> None:
9f95a23c
TL
1541 # Do not remove the lockfile:
1542 #
1543 # https://github.com/benediktschmitt/py-filelock/issues/31
1544 # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
1545 fd = self._lock_file_fd
1546 self._lock_file_fd = None
f6b5b4d7
TL
1547 fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore
1548 os.close(fd) # type: ignore
9f95a23c
TL
1549 return None
1550
1551
1552##################################
1553# Popen wrappers, lifted from ceph-volume
1554
adb31ebb 1555class CallVerbosity(Enum):
2a845540
TL
1556 #####
1557 # Format:
1558 # Normal Operation: <log-level-when-no-errors>, Errors: <log-level-when-error>
1559 #
1560 # NOTE: QUIET log level is custom level only used when --verbose is passed
1561 #####
1562
1563 # Normal Operation: None, Errors: None
adb31ebb 1564 SILENT = 0
2a845540
TL
1565 # Normal Operation: QUIET, Error: QUIET
1566 QUIET = 1
1567 # Normal Operation: DEBUG, Error: DEBUG
1568 DEBUG = 2
1569 # Normal Operation: QUIET, Error: INFO
1570 QUIET_UNLESS_ERROR = 3
1571 # Normal Operation: DEBUG, Error: INFO
1572 VERBOSE_ON_FAILURE = 4
1573 # Normal Operation: INFO, Error: INFO
1574 VERBOSE = 5
1575
1576 def success_log_level(self) -> int:
1577 _verbosity_level_to_log_level = {
1578 self.SILENT: 0,
1579 self.QUIET: QUIET_LOG_LEVEL,
1580 self.DEBUG: logging.DEBUG,
1581 self.QUIET_UNLESS_ERROR: QUIET_LOG_LEVEL,
1582 self.VERBOSE_ON_FAILURE: logging.DEBUG,
1583 self.VERBOSE: logging.INFO
1584 }
1585 return _verbosity_level_to_log_level[self] # type: ignore
1586
1587 def error_log_level(self) -> int:
1588 _verbosity_level_to_log_level = {
1589 self.SILENT: 0,
1590 self.QUIET: QUIET_LOG_LEVEL,
1591 self.DEBUG: logging.DEBUG,
1592 self.QUIET_UNLESS_ERROR: logging.INFO,
1593 self.VERBOSE_ON_FAILURE: logging.INFO,
1594 self.VERBOSE: logging.INFO
1595 }
1596 return _verbosity_level_to_log_level[self] # type: ignore
adb31ebb
TL
1597
1598
f67539c2
TL
1599if sys.version_info < (3, 8):
1600 import itertools
1601 import threading
1602 import warnings
1603 from asyncio import events
1604
1605 class ThreadedChildWatcher(asyncio.AbstractChildWatcher):
1606 """Threaded child watcher implementation.
1607 The watcher uses a thread per process
1608 for waiting for the process finish.
1609 It doesn't require subscription on POSIX signal
1610 but a thread creation is not free.
1611 The watcher has O(1) complexity, its performance doesn't depend
1612 on amount of spawn processes.
1613 """
1614
522d829b 1615 def __init__(self) -> None:
f67539c2 1616 self._pid_counter = itertools.count(0)
a4b75251 1617 self._threads: Dict[Any, Any] = {}
f67539c2 1618
a4b75251 1619 def is_active(self) -> bool:
f67539c2
TL
1620 return True
1621
a4b75251 1622 def close(self) -> None:
f67539c2
TL
1623 self._join_threads()
1624
a4b75251 1625 def _join_threads(self) -> None:
f67539c2
TL
1626 """Internal: Join all non-daemon threads"""
1627 threads = [thread for thread in list(self._threads.values())
1628 if thread.is_alive() and not thread.daemon]
1629 for thread in threads:
1630 thread.join()
1631
a4b75251 1632 def __enter__(self) -> Any:
f67539c2
TL
1633 return self
1634
a4b75251 1635 def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
f67539c2
TL
1636 pass
1637
a4b75251 1638 def __del__(self, _warn: Any = warnings.warn) -> None:
f67539c2
TL
1639 threads = [thread for thread in list(self._threads.values())
1640 if thread.is_alive()]
1641 if threads:
1642 _warn(f'{self.__class__} has registered but not finished child processes',
1643 ResourceWarning,
1644 source=self)
1645
a4b75251 1646 def add_child_handler(self, pid: Any, callback: Any, *args: Any) -> None:
f67539c2
TL
1647 loop = events.get_event_loop()
1648 thread = threading.Thread(target=self._do_waitpid,
1649 name=f'waitpid-{next(self._pid_counter)}',
1650 args=(loop, pid, callback, args),
1651 daemon=True)
1652 self._threads[pid] = thread
1653 thread.start()
1654
a4b75251 1655 def remove_child_handler(self, pid: Any) -> bool:
f67539c2
TL
1656 # asyncio never calls remove_child_handler() !!!
1657 # The method is no-op but is implemented because
1658 # abstract base classe requires it
1659 return True
1660
a4b75251 1661 def attach_loop(self, loop: Any) -> None:
f67539c2
TL
1662 pass
1663
a4b75251 1664 def _do_waitpid(self, loop: Any, expected_pid: Any, callback: Any, args: Any) -> None:
f67539c2
TL
1665 assert expected_pid > 0
1666
1667 try:
1668 pid, status = os.waitpid(expected_pid, 0)
1669 except ChildProcessError:
1670 # The child process is already reaped
1671 # (may happen if waitpid() is called elsewhere).
1672 pid = expected_pid
1673 returncode = 255
1674 logger.warning(
1675 'Unknown child process pid %d, will report returncode 255',
1676 pid)
1677 else:
1678 if os.WIFEXITED(status):
1679 returncode = os.WEXITSTATUS(status)
1680 elif os.WIFSIGNALED(status):
1681 returncode = -os.WTERMSIG(status)
1682 else:
1683 raise ValueError(f'unknown wait status {status}')
1684 if loop.get_debug():
1685 logger.debug('process %s exited with returncode %s',
1686 expected_pid, returncode)
1687
1688 if loop.is_closed():
1689 logger.warning('Loop %r that handles pid %r is closed', loop, pid)
1690 else:
1691 loop.call_soon_threadsafe(callback, pid, returncode, *args)
1692
1693 self._threads.pop(expected_pid)
1694
1695 # unlike SafeChildWatcher which handles SIGCHLD in the main thread,
1696 # ThreadedChildWatcher runs in a separated thread, hence allows us to
1697 # run create_subprocess_exec() in non-main thread, see
1698 # https://bugs.python.org/issue35621
1699 asyncio.set_child_watcher(ThreadedChildWatcher())
1700
1701
1702try:
1703 from asyncio import run as async_run # type: ignore[attr-defined]
1704except ImportError:
1705 def async_run(coro): # type: ignore
1706 loop = asyncio.new_event_loop()
1707 try:
1708 asyncio.set_event_loop(loop)
1709 return loop.run_until_complete(coro)
1710 finally:
1711 try:
1712 loop.run_until_complete(loop.shutdown_asyncgens())
1713 finally:
1714 asyncio.set_event_loop(None)
1715 loop.close()
1716
1717
1718def call(ctx: CephadmContext,
1719 command: List[str],
adb31ebb
TL
1720 desc: Optional[str] = None,
1721 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
1722 timeout: Optional[int] = DEFAULT_TIMEOUT,
522d829b 1723 **kwargs: Any) -> Tuple[str, str, int]:
9f95a23c
TL
1724 """
1725 Wrap subprocess.Popen to
1726
1727 - log stdout/stderr to a logger,
1728 - decode utf-8
1729 - cleanly return out, err, returncode
1730
9f95a23c
TL
1731 :param timeout: timeout in seconds
1732 """
f67539c2
TL
1733
1734 prefix = command[0] if desc is None else desc
1735 if prefix:
1736 prefix += ': '
1737 timeout = timeout or ctx.timeout
1738
f67539c2
TL
1739 async def tee(reader: asyncio.StreamReader) -> str:
1740 collected = StringIO()
1741 async for line in reader:
1742 message = line.decode('utf-8')
1743 collected.write(message)
f67539c2
TL
1744 return collected.getvalue()
1745
1746 async def run_with_timeout() -> Tuple[str, str, int]:
1747 process = await asyncio.create_subprocess_exec(
1748 *command,
1749 stdout=asyncio.subprocess.PIPE,
522d829b
TL
1750 stderr=asyncio.subprocess.PIPE,
1751 env=os.environ.copy())
f67539c2
TL
1752 assert process.stdout
1753 assert process.stderr
1754 try:
1755 stdout, stderr = await asyncio.gather(tee(process.stdout),
1756 tee(process.stderr))
1757 returncode = await asyncio.wait_for(process.wait(), timeout)
1758 except asyncio.TimeoutError:
1759 logger.info(prefix + f'timeout after {timeout} seconds')
1760 return '', '', 124
9f95a23c 1761 else:
f67539c2 1762 return stdout, stderr, returncode
9f95a23c 1763
f67539c2 1764 stdout, stderr, returncode = async_run(run_with_timeout())
2a845540
TL
1765 log_level = verbosity.success_log_level()
1766 if returncode != 0:
1767 log_level = verbosity.error_log_level()
1768 logger.log(log_level, f'Non-zero exit code {returncode} from {" ".join(command)}')
1769 for line in stdout.splitlines():
1770 logger.log(log_level, prefix + 'stdout ' + line)
1771 for line in stderr.splitlines():
1772 logger.log(log_level, prefix + 'stderr ' + line)
f67539c2
TL
1773 return stdout, stderr, returncode
1774
1775
1776def call_throws(
1777 ctx: CephadmContext,
1778 command: List[str],
1779 desc: Optional[str] = None,
1780 verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE,
1781 timeout: Optional[int] = DEFAULT_TIMEOUT,
522d829b 1782 **kwargs: Any) -> Tuple[str, str, int]:
f67539c2 1783 out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs)
9f95a23c 1784 if ret:
20effc67
TL
1785 for s in (out, err):
1786 if s.strip() and len(s.splitlines()) <= 2: # readable message?
1787 raise RuntimeError(f'Failed command: {" ".join(command)}: {s}')
9f95a23c
TL
1788 raise RuntimeError('Failed command: %s' % ' '.join(command))
1789 return out, err, ret
1790
1791
f67539c2
TL
1792def call_timeout(ctx, command, timeout):
1793 # type: (CephadmContext, List[str], int) -> int
9f95a23c 1794 logger.debug('Running command (timeout=%s): %s'
f67539c2 1795 % (timeout, ' '.join(command)))
9f95a23c
TL
1796
1797 def raise_timeout(command, timeout):
1798 # type: (List[str], int) -> NoReturn
f67539c2 1799 msg = 'Command `%s` timed out after %s seconds' % (command, timeout)
9f95a23c
TL
1800 logger.debug(msg)
1801 raise TimeoutExpired(msg)
1802
f67539c2 1803 try:
522d829b 1804 return subprocess.call(command, timeout=timeout, env=os.environ.copy())
f67539c2
TL
1805 except subprocess.TimeoutExpired:
1806 raise_timeout(command, timeout)
9f95a23c
TL
1807
1808##################################
1809
f6b5b4d7 1810
522d829b 1811def json_loads_retry(cli_func: Callable[[], str]) -> Any:
b3b6e05e
TL
1812 for sleep_secs in [1, 4, 4]:
1813 try:
1814 return json.loads(cli_func())
1815 except json.JSONDecodeError:
1816 logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs)
1817 time.sleep(sleep_secs)
1818 return json.loads(cli_func())
1819
1820
f67539c2
TL
1821def is_available(ctx, what, func):
1822 # type: (CephadmContext, str, Callable[[], bool]) -> None
9f95a23c
TL
1823 """
1824 Wait for a service to become available
1825
1826 :param what: the name of the service
1827 :param func: the callable object that determines availability
1828 """
f67539c2 1829 retry = ctx.retry
f6b5b4d7 1830 logger.info('Waiting for %s...' % what)
9f95a23c
TL
1831 num = 1
1832 while True:
1833 if func():
e306af50 1834 logger.info('%s is available'
f6b5b4d7 1835 % what)
9f95a23c
TL
1836 break
1837 elif num > retry:
1838 raise Error('%s not available after %s tries'
f67539c2 1839 % (what, retry))
9f95a23c
TL
1840
1841 logger.info('%s not available, waiting (%s/%s)...'
f67539c2 1842 % (what, num, retry))
9f95a23c
TL
1843
1844 num += 1
f67539c2 1845 time.sleep(2)
9f95a23c
TL
1846
1847
1848def read_config(fn):
1849 # type: (Optional[str]) -> ConfigParser
f67539c2 1850 cp = ConfigParser()
9f95a23c 1851 if fn:
f67539c2 1852 cp.read(fn)
9f95a23c
TL
1853 return cp
1854
f6b5b4d7 1855
9f95a23c
TL
1856def pathify(p):
1857 # type: (str) -> str
e306af50
TL
1858 p = os.path.expanduser(p)
1859 return os.path.abspath(p)
9f95a23c 1860
f6b5b4d7 1861
9f95a23c 1862def get_file_timestamp(fn):
e306af50 1863 # type: (str) -> Optional[str]
9f95a23c
TL
1864 try:
1865 mt = os.path.getmtime(fn)
1866 return datetime.datetime.fromtimestamp(
1867 mt, tz=datetime.timezone.utc
1868 ).strftime(DATEFMT)
adb31ebb 1869 except Exception:
9f95a23c
TL
1870 return None
1871
f6b5b4d7 1872
9f95a23c 1873def try_convert_datetime(s):
e306af50 1874 # type: (str) -> Optional[str]
9f95a23c
TL
1875 # This is super irritating because
1876 # 1) podman and docker use different formats
1877 # 2) python's strptime can't parse either one
1878 #
1879 # I've seen:
1880 # docker 18.09.7: 2020-03-03T09:21:43.636153304Z
1881 # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00
1882 # 2020-03-03 15:52:30.136257504 -0600 CST
1883 # (In the podman case, there is a different string format for
1884 # 'inspect' and 'inspect --format {{.Created}}'!!)
1885
1886 # In *all* cases, the 9 digit second precision is too much for
1887 # python's strptime. Shorten it to 6 digits.
1888 p = re.compile(r'(\.[\d]{6})[\d]*')
1889 s = p.sub(r'\1', s)
1890
adb31ebb 1891 # replace trailing Z with -0000, since (on python 3.6.8) it won't parse
9f95a23c
TL
1892 if s and s[-1] == 'Z':
1893 s = s[:-1] + '-0000'
1894
adb31ebb 1895 # cut off the redundant 'CST' part that strptime can't parse, if
9f95a23c
TL
1896 # present.
1897 v = s.split(' ')
1898 s = ' '.join(v[0:3])
1899
1900 # try parsing with several format strings
1901 fmts = [
1902 '%Y-%m-%dT%H:%M:%S.%f%z',
1903 '%Y-%m-%d %H:%M:%S.%f %z',
1904 ]
1905 for f in fmts:
1906 try:
1907 # return timestamp normalized to UTC, rendered as DATEFMT.
1908 return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT)
1909 except ValueError:
1910 pass
1911 return None
1912
f6b5b4d7 1913
f67539c2 1914def _parse_podman_version(version_str):
9f95a23c 1915 # type: (str) -> Tuple[int, ...]
522d829b 1916 def to_int(val: str, org_e: Optional[Exception] = None) -> int:
9f95a23c
TL
1917 if not val and org_e:
1918 raise org_e
1919 try:
1920 return int(val)
1921 except ValueError as e:
1922 return to_int(val[0:-1], org_e or e)
1923
1924 return tuple(map(to_int, version_str.split('.')))
1925
1926
1927def get_hostname():
1928 # type: () -> str
1929 return socket.gethostname()
1930
f6b5b4d7 1931
9f95a23c
TL
1932def get_fqdn():
1933 # type: () -> str
1934 return socket.getfqdn() or socket.gethostname()
1935
f6b5b4d7 1936
9f95a23c
TL
1937def get_arch():
1938 # type: () -> str
1939 return platform.uname().machine
1940
f6b5b4d7 1941
9f95a23c
TL
1942def generate_service_id():
1943 # type: () -> str
1944 return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase)
1945 for _ in range(6))
1946
f6b5b4d7 1947
9f95a23c
TL
1948def generate_password():
1949 # type: () -> str
1950 return ''.join(random.choice(string.ascii_lowercase + string.digits)
1951 for i in range(10))
1952
f6b5b4d7 1953
9f95a23c
TL
1954def normalize_container_id(i):
1955 # type: (str) -> str
1956 # docker adds the sha256: prefix, but AFAICS both
1957 # docker (18.09.7 in bionic at least) and podman
1958 # both always use sha256, so leave off the prefix
1959 # for consistency.
1960 prefix = 'sha256:'
1961 if i.startswith(prefix):
1962 i = i[len(prefix):]
1963 return i
1964
f6b5b4d7 1965
9f95a23c
TL
1966def make_fsid():
1967 # type: () -> str
1968 return str(uuid.uuid1())
1969
f6b5b4d7 1970
9f95a23c
TL
1971def is_fsid(s):
1972 # type: (str) -> bool
1973 try:
1974 uuid.UUID(s)
1975 except ValueError:
1976 return False
1977 return True
1978
f6b5b4d7 1979
522d829b
TL
1980def validate_fsid(func: FuncT) -> FuncT:
1981 @wraps(func)
1982 def _validate_fsid(ctx: CephadmContext) -> Any:
1983 if 'fsid' in ctx and ctx.fsid:
1984 if not is_fsid(ctx.fsid):
1985 raise Error('not an fsid: %s' % ctx.fsid)
1986 return func(ctx)
1987 return cast(FuncT, _validate_fsid)
1988
1989
1990def infer_fsid(func: FuncT) -> FuncT:
9f95a23c
TL
1991 """
1992 If we only find a single fsid in /var/lib/ceph/*, use that
1993 """
522d829b 1994 @infer_config
9f95a23c 1995 @wraps(func)
522d829b
TL
1996 def _infer_fsid(ctx: CephadmContext) -> Any:
1997 if 'fsid' in ctx and ctx.fsid:
f67539c2
TL
1998 logger.debug('Using specified fsid: %s' % ctx.fsid)
1999 return func(ctx)
9f95a23c 2000
522d829b
TL
2001 fsids = set()
2002
2003 cp = read_config(ctx.config)
2004 if cp.has_option('global', 'fsid'):
2005 fsids.add(cp.get('global', 'fsid'))
2006
f67539c2 2007 daemon_list = list_daemons(ctx, detail=False)
9f95a23c 2008 for daemon in daemon_list:
f6b5b4d7
TL
2009 if not is_fsid(daemon['fsid']):
2010 # 'unknown' fsid
2011 continue
f67539c2
TL
2012 elif 'name' not in ctx or not ctx.name:
2013 # ctx.name not specified
522d829b 2014 fsids.add(daemon['fsid'])
f67539c2
TL
2015 elif daemon['name'] == ctx.name:
2016 # ctx.name is a match
522d829b
TL
2017 fsids.add(daemon['fsid'])
2018 fsids = sorted(fsids)
9f95a23c
TL
2019
2020 if not fsids:
2021 # some commands do not always require an fsid
2022 pass
2023 elif len(fsids) == 1:
2024 logger.info('Inferring fsid %s' % fsids[0])
f67539c2 2025 ctx.fsid = fsids[0]
9f95a23c 2026 else:
33c7a0ef 2027 raise Error('Cannot infer an fsid, one must be specified (using --fsid): %s' % fsids)
f67539c2 2028 return func(ctx)
9f95a23c 2029
522d829b 2030 return cast(FuncT, _infer_fsid)
9f95a23c 2031
f6b5b4d7 2032
522d829b 2033def infer_config(func: FuncT) -> FuncT:
e306af50 2034 """
33c7a0ef
TL
2035 Infer the clusater configuration using the followign priority order:
2036 1- if the user has provided custom conf file (-c option) use it
2037 2- otherwise if daemon --name has been provided use daemon conf
2038 3- otherwise find the mon daemon conf file and use it (if v1)
2039 4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it
2040 5- finally: fallback to the default file /etc/ceph/ceph.conf
e306af50
TL
2041 """
2042 @wraps(func)
522d829b 2043 def _infer_config(ctx: CephadmContext) -> Any:
33c7a0ef
TL
2044
2045 def config_path(daemon_type: str, daemon_name: str) -> str:
2046 data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_name)
2047 return os.path.join(data_dir, 'config')
2048
2049 def get_mon_daemon_name(fsid: str) -> Optional[str]:
2050 daemon_list = list_daemons(ctx, detail=False)
2051 for daemon in daemon_list:
2052 if (
2053 daemon.get('name', '').startswith('mon.')
2054 and daemon.get('fsid', '') == fsid
2055 and daemon.get('style', '') == 'cephadm:v1'
2056 and os.path.exists(config_path('mon', daemon['name'].split('.', 1)[1]))
2057 ):
2058 return daemon['name']
2059 return None
2060
522d829b 2061 ctx.config = ctx.config if 'config' in ctx else None
33c7a0ef
TL
2062 # check if user has provided conf by using -c option
2063 if ctx.config and (ctx.config != CEPH_DEFAULT_CONF):
2064 logger.debug(f'Using specified config: {ctx.config}')
f67539c2 2065 return func(ctx)
33c7a0ef 2066
522d829b 2067 if 'fsid' in ctx and ctx.fsid:
33c7a0ef
TL
2068 name = ctx.name if ('name' in ctx and ctx.name) else get_mon_daemon_name(ctx.fsid)
2069 if name is not None:
2070 # daemon name has been specified (or inffered from mon), let's use its conf
2071 ctx.config = config_path(name.split('.', 1)[0], name.split('.', 1)[1])
2072 else:
2073 # no daemon, in case the cluster has a config dir then use it
2074 ceph_conf = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_CONF}'
2075 if os.path.exists(ceph_conf):
2076 ctx.config = ceph_conf
2077
522d829b 2078 if ctx.config:
33c7a0ef
TL
2079 logger.info(f'Inferring config {ctx.config}')
2080 elif os.path.exists(CEPH_DEFAULT_CONF):
2081 logger.debug(f'Using default config {CEPH_DEFAULT_CONF}')
2082 ctx.config = CEPH_DEFAULT_CONF
f67539c2 2083 return func(ctx)
e306af50 2084
522d829b 2085 return cast(FuncT, _infer_config)
e306af50 2086
f6b5b4d7 2087
522d829b 2088def _get_default_image(ctx: CephadmContext) -> str:
1911f103 2089 if DEFAULT_IMAGE_IS_MASTER:
f67539c2 2090 warn = """This is a development version of cephadm.
1911f103
TL
2091For information regarding the latest stable release:
2092 https://docs.ceph.com/docs/{}/cephadm/install
f67539c2 2093""".format(LATEST_STABLE_RELEASE)
1911f103 2094 for line in warn.splitlines():
e306af50 2095 logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end))
1911f103
TL
2096 return DEFAULT_IMAGE
2097
f6b5b4d7 2098
522d829b 2099def infer_image(func: FuncT) -> FuncT:
9f95a23c
TL
2100 """
2101 Use the most recent ceph image
2102 """
2103 @wraps(func)
522d829b 2104 def _infer_image(ctx: CephadmContext) -> Any:
f67539c2
TL
2105 if not ctx.image:
2106 ctx.image = os.environ.get('CEPHADM_IMAGE')
2107 if not ctx.image:
33c7a0ef 2108 ctx.image = infer_local_ceph_image(ctx, ctx.container_engine.path)
f67539c2
TL
2109 if not ctx.image:
2110 ctx.image = _get_default_image(ctx)
2111 return func(ctx)
9f95a23c 2112
522d829b 2113 return cast(FuncT, _infer_image)
9f95a23c 2114
f6b5b4d7 2115
522d829b 2116def default_image(func: FuncT) -> FuncT:
9f95a23c 2117 @wraps(func)
522d829b 2118 def _default_image(ctx: CephadmContext) -> Any:
f67539c2
TL
2119 if not ctx.image:
2120 if 'name' in ctx and ctx.name:
2121 type_ = ctx.name.split('.', 1)[0]
9f95a23c 2122 if type_ in Monitoring.components:
f67539c2
TL
2123 ctx.image = Monitoring.components[type_]['image']
2124 if type_ == 'haproxy':
2125 ctx.image = HAproxy.default_image
2126 if type_ == 'keepalived':
2127 ctx.image = Keepalived.default_image
20effc67
TL
2128 if type_ == SNMPGateway.daemon_type:
2129 ctx.image = SNMPGateway.default_image
f67539c2
TL
2130 if not ctx.image:
2131 ctx.image = os.environ.get('CEPHADM_IMAGE')
2132 if not ctx.image:
2133 ctx.image = _get_default_image(ctx)
2134
2135 return func(ctx)
9f95a23c 2136
522d829b 2137 return cast(FuncT, _default_image)
9f95a23c 2138
f6b5b4d7 2139
33c7a0ef
TL
2140def get_container_info(ctx: CephadmContext, daemon_filter: str, by_name: bool) -> Optional[ContainerInfo]:
2141 """
2142 :param ctx: Cephadm context
2143 :param daemon_filter: daemon name or type
2144 :param by_name: must be set to True if daemon name is provided
2145 :return: Container information or None
9f95a23c 2146 """
33c7a0ef
TL
2147 def daemon_name_or_type(daemon: Dict[str, str]) -> str:
2148 return daemon['name'] if by_name else daemon['name'].split('.', 1)[0]
2149
2150 if by_name and '.' not in daemon_filter:
2151 logger.warning(f'Trying to get container info using invalid daemon name {daemon_filter}')
2152 return None
2153 daemons = list_daemons(ctx, detail=False)
2154 matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid]
2155 if matching_daemons:
2156 d_type, d_id = matching_daemons[0]['name'].split('.', 1)
2157 out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id)
2158 if not code:
2159 (container_id, image_name, image_id, start, version) = out.strip().split(',')
2160 return ContainerInfo(container_id, image_name, image_id, start, version)
2161 return None
2162
2163
2164def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]:
2165 """
2166 Infer the local ceph image based on the following priority criteria:
2167 1- the image specified by --image arg (if provided).
2168 2- the same image as the daemon container specified by --name arg (if provided).
2169 3- image used by any ceph container running on the host. In this case we use daemon types.
2170 4- if no container is found then we use the most ceph recent image on the host.
2171
2172 Note: any selected container must have the same fsid inferred previously.
2173
9f95a23c
TL
2174 :return: The most recent local ceph image (already pulled)
2175 """
33c7a0ef
TL
2176 # '|' special character is used to separate the output fields into:
2177 # - Repository@digest
2178 # - Image Id
2179 # - Image Tag
2180 # - Image creation date
f67539c2
TL
2181 out, _, _ = call_throws(ctx,
2182 [container_path, 'images',
2183 '--filter', 'label=ceph=True',
2184 '--filter', 'dangling=false',
33c7a0ef
TL
2185 '--format', '{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}'])
2186
2187 container_info = None
2188 daemon_name = ctx.name if ('name' in ctx and ctx.name and '.' in ctx.name) else None
2189 daemons_ls = [daemon_name] if daemon_name is not None else Ceph.daemons # daemon types: 'mon', 'mgr', etc
2190 for daemon in daemons_ls:
2191 container_info = get_container_info(ctx, daemon, daemon_name is not None)
2192 if container_info is not None:
2193 logger.debug(f"Using container info for daemon '{daemon}'")
2194 break
adb31ebb 2195
adb31ebb 2196 for image in out.splitlines():
33c7a0ef
TL
2197 if image and not image.isspace():
2198 (digest, image_id, tag, created_date) = image.lstrip().split('|')
2199 if container_info is not None and image_id not in container_info.image_id:
2200 continue
2201 if digest and not digest.endswith('@'):
2202 logger.info(f"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}")
2203 return digest
9f95a23c
TL
2204 return None
2205
f6b5b4d7 2206
9f95a23c 2207def write_tmp(s, uid, gid):
f67539c2 2208 # type: (str, int, int) -> IO[str]
9f95a23c
TL
2209 tmp_f = tempfile.NamedTemporaryFile(mode='w',
2210 prefix='ceph-tmp')
2211 os.fchown(tmp_f.fileno(), uid, gid)
2212 tmp_f.write(s)
2213 tmp_f.flush()
2214
2215 return tmp_f
2216
f6b5b4d7 2217
9f95a23c
TL
2218def makedirs(dir, uid, gid, mode):
2219 # type: (str, int, int, int) -> None
2220 if not os.path.exists(dir):
2221 os.makedirs(dir, mode=mode)
2222 else:
2223 os.chmod(dir, mode)
2224 os.chown(dir, uid, gid)
2225 os.chmod(dir, mode) # the above is masked by umask...
2226
f6b5b4d7 2227
f67539c2
TL
2228def get_data_dir(fsid, data_dir, t, n):
2229 # type: (str, str, str, Union[int, str]) -> str
2230 return os.path.join(data_dir, fsid, '%s.%s' % (t, n))
9f95a23c 2231
f6b5b4d7 2232
f67539c2
TL
2233def get_log_dir(fsid, log_dir):
2234 # type: (str, str) -> str
2235 return os.path.join(log_dir, fsid)
9f95a23c 2236
f6b5b4d7 2237
f67539c2
TL
2238def make_data_dir_base(fsid, data_dir, uid, gid):
2239 # type: (str, str, int, int) -> str
2240 data_dir_base = os.path.join(data_dir, fsid)
9f95a23c
TL
2241 makedirs(data_dir_base, uid, gid, DATA_DIR_MODE)
2242 makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE)
2243 makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid,
2244 DATA_DIR_MODE)
2245 return data_dir_base
2246
f6b5b4d7 2247
f67539c2
TL
2248def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None):
2249 # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str
f6b5b4d7 2250 if uid is None or gid is None:
f67539c2
TL
2251 uid, gid = extract_uid_gid(ctx)
2252 make_data_dir_base(fsid, ctx.data_dir, uid, gid)
2253 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c
TL
2254 makedirs(data_dir, uid, gid, DATA_DIR_MODE)
2255 return data_dir
2256
f6b5b4d7 2257
f67539c2
TL
2258def make_log_dir(ctx, fsid, uid=None, gid=None):
2259 # type: (CephadmContext, str, Optional[int], Optional[int]) -> str
f6b5b4d7 2260 if uid is None or gid is None:
f67539c2
TL
2261 uid, gid = extract_uid_gid(ctx)
2262 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c
TL
2263 makedirs(log_dir, uid, gid, LOG_DIR_MODE)
2264 return log_dir
2265
f6b5b4d7 2266
f67539c2
TL
2267def make_var_run(ctx, fsid, uid, gid):
2268 # type: (CephadmContext, str, int, int) -> None
2269 call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid),
2270 '/var/run/ceph/%s' % fsid])
9f95a23c 2271
f6b5b4d7 2272
f67539c2
TL
2273def copy_tree(ctx, src, dst, uid=None, gid=None):
2274 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
2275 """
2276 Copy a directory tree from src to dst
2277 """
f91f0fd5 2278 if uid is None or gid is None:
f67539c2 2279 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
2280
2281 for src_dir in src:
2282 dst_dir = dst
2283 if os.path.isdir(dst):
2284 dst_dir = os.path.join(dst, os.path.basename(src_dir))
2285
f67539c2 2286 logger.debug('copy directory `%s` -> `%s`' % (src_dir, dst_dir))
9f95a23c 2287 shutil.rmtree(dst_dir, ignore_errors=True)
f67539c2 2288 shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8
9f95a23c
TL
2289
2290 for dirpath, dirnames, filenames in os.walk(dst_dir):
f67539c2 2291 logger.debug('chown %s:%s `%s`' % (uid, gid, dirpath))
9f95a23c
TL
2292 os.chown(dirpath, uid, gid)
2293 for filename in filenames:
f67539c2 2294 logger.debug('chown %s:%s `%s`' % (uid, gid, filename))
9f95a23c
TL
2295 os.chown(os.path.join(dirpath, filename), uid, gid)
2296
2297
f67539c2
TL
2298def copy_files(ctx, src, dst, uid=None, gid=None):
2299 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
2300 """
2301 Copy a files from src to dst
2302 """
f91f0fd5 2303 if uid is None or gid is None:
f67539c2 2304 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
2305
2306 for src_file in src:
2307 dst_file = dst
2308 if os.path.isdir(dst):
2309 dst_file = os.path.join(dst, os.path.basename(src_file))
2310
f67539c2 2311 logger.debug('copy file `%s` -> `%s`' % (src_file, dst_file))
9f95a23c
TL
2312 shutil.copyfile(src_file, dst_file)
2313
f67539c2 2314 logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
9f95a23c
TL
2315 os.chown(dst_file, uid, gid)
2316
f6b5b4d7 2317
f67539c2
TL
2318def move_files(ctx, src, dst, uid=None, gid=None):
2319 # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None
9f95a23c
TL
2320 """
2321 Move files from src to dst
2322 """
f91f0fd5 2323 if uid is None or gid is None:
f67539c2 2324 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
2325
2326 for src_file in src:
2327 dst_file = dst
2328 if os.path.isdir(dst):
2329 dst_file = os.path.join(dst, os.path.basename(src_file))
2330
2331 if os.path.islink(src_file):
2332 # shutil.move() in py2 does not handle symlinks correctly
2333 src_rl = os.readlink(src_file)
2334 logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl))
2335 os.symlink(src_rl, dst_file)
2336 os.unlink(src_file)
2337 else:
2338 logger.debug("move file '%s' -> '%s'" % (src_file, dst_file))
2339 shutil.move(src_file, dst_file)
f67539c2 2340 logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file))
9f95a23c
TL
2341 os.chown(dst_file, uid, gid)
2342
f6b5b4d7 2343
33c7a0ef
TL
2344def recursive_chown(path: str, uid: int, gid: int) -> None:
2345 for dirpath, dirnames, filenames in os.walk(path):
2346 os.chown(dirpath, uid, gid)
2347 for filename in filenames:
2348 os.chown(os.path.join(dirpath, filename), uid, gid)
2349
2350
f67539c2 2351# copied from distutils
522d829b 2352def find_executable(executable: str, path: Optional[str] = None) -> Optional[str]:
9f95a23c
TL
2353 """Tries to find 'executable' in the directories listed in 'path'.
2354 A string listing directories separated by 'os.pathsep'; defaults to
2355 os.environ['PATH']. Returns the complete filename or None if not found.
2356 """
2357 _, ext = os.path.splitext(executable)
2358 if (sys.platform == 'win32') and (ext != '.exe'):
2359 executable = executable + '.exe'
2360
2361 if os.path.isfile(executable):
2362 return executable
2363
2364 if path is None:
2365 path = os.environ.get('PATH', None)
2366 if path is None:
2367 try:
f67539c2 2368 path = os.confstr('CS_PATH')
9f95a23c
TL
2369 except (AttributeError, ValueError):
2370 # os.confstr() or CS_PATH is not available
2371 path = os.defpath
2372 # bpo-35755: Don't use os.defpath if the PATH environment variable is
2373 # set to an empty string
2374
2375 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
2376 if not path:
2377 return None
2378
2379 paths = path.split(os.pathsep)
2380 for p in paths:
2381 f = os.path.join(p, executable)
2382 if os.path.isfile(f):
2383 # the file exists, we have a shot at spawn working
2384 return f
2385 return None
2386
f6b5b4d7 2387
9f95a23c
TL
2388def find_program(filename):
2389 # type: (str) -> str
2390 name = find_executable(filename)
2391 if name is None:
2392 raise ValueError('%s not found' % filename)
2393 return name
2394
f6b5b4d7 2395
522d829b 2396def find_container_engine(ctx: CephadmContext) -> Optional[ContainerEngine]:
f67539c2
TL
2397 if ctx.docker:
2398 return Docker()
2399 else:
2400 for i in CONTAINER_PREFERENCE:
2401 try:
2402 return i()
a4b75251
TL
2403 except Exception:
2404 pass
f67539c2
TL
2405 return None
2406
2407
a4b75251 2408def check_container_engine(ctx: CephadmContext) -> ContainerEngine:
f67539c2
TL
2409 engine = ctx.container_engine
2410 if not isinstance(engine, CONTAINER_PREFERENCE):
522d829b
TL
2411 # See https://github.com/python/mypy/issues/8993
2412 exes: List[str] = [i.EXE for i in CONTAINER_PREFERENCE] # type: ignore
2413 raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install <container engine>`'.format(' or '.join(exes)))
f67539c2
TL
2414 elif isinstance(engine, Podman):
2415 engine.get_version(ctx)
2416 if engine.version < MIN_PODMAN_VERSION:
2417 raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION)
a4b75251 2418 return engine
f67539c2
TL
2419
2420
9f95a23c
TL
2421def get_unit_name(fsid, daemon_type, daemon_id=None):
2422 # type: (str, str, Optional[Union[int, str]]) -> str
2423 # accept either name or type + id
20effc67 2424 if daemon_id is not None:
9f95a23c
TL
2425 return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id)
2426 else:
2427 return 'ceph-%s@%s' % (fsid, daemon_type)
2428
f6b5b4d7 2429
522d829b 2430def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid: str, name: str) -> str:
f67539c2 2431 daemon = get_daemon_description(ctx, fsid, name)
e306af50
TL
2432 try:
2433 return daemon['systemd_unit']
2434 except KeyError:
2435 raise Error('Failed to get unit name for {}'.format(daemon))
2436
f6b5b4d7 2437
f67539c2
TL
2438def check_unit(ctx, unit_name):
2439 # type: (CephadmContext, str) -> Tuple[bool, str, bool]
9f95a23c
TL
2440 # NOTE: we ignore the exit code here because systemctl outputs
2441 # various exit codes based on the state of the service, but the
2442 # string result is more explicit (and sufficient).
2443 enabled = False
2444 installed = False
2445 try:
f67539c2 2446 out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name],
2a845540 2447 verbosity=CallVerbosity.QUIET)
9f95a23c
TL
2448 if code == 0:
2449 enabled = True
2450 installed = True
f67539c2 2451 elif 'disabled' in out:
9f95a23c
TL
2452 installed = True
2453 except Exception as e:
2454 logger.warning('unable to run systemctl: %s' % e)
2455 enabled = False
2456 installed = False
2457
2458 state = 'unknown'
2459 try:
f67539c2 2460 out, err, code = call(ctx, ['systemctl', 'is-active', unit_name],
2a845540 2461 verbosity=CallVerbosity.QUIET)
9f95a23c
TL
2462 out = out.strip()
2463 if out in ['active']:
2464 state = 'running'
2465 elif out in ['inactive']:
2466 state = 'stopped'
2467 elif out in ['failed', 'auto-restart']:
2468 state = 'error'
2469 else:
2470 state = 'unknown'
2471 except Exception as e:
2472 logger.warning('unable to run systemctl: %s' % e)
2473 state = 'unknown'
2474 return (enabled, state, installed)
2475
f6b5b4d7 2476
f67539c2
TL
2477def check_units(ctx, units, enabler=None):
2478 # type: (CephadmContext, List[str], Optional[Packager]) -> bool
9f95a23c 2479 for u in units:
f67539c2 2480 (enabled, state, installed) = check_unit(ctx, u)
9f95a23c
TL
2481 if enabled and state == 'running':
2482 logger.info('Unit %s is enabled and running' % u)
2483 return True
2484 if enabler is not None:
2485 if installed:
2486 logger.info('Enabling unit %s' % u)
2487 enabler.enable_service(u)
2488 return False
2489
f6b5b4d7 2490
522d829b 2491def is_container_running(ctx: CephadmContext, c: 'CephContainer') -> bool:
20effc67
TL
2492 if ctx.name.split('.', 1)[0] in ['agent', 'cephadm-exporter']:
2493 # these are non-containerized daemon types
2494 return False
522d829b
TL
2495 return bool(get_running_container_name(ctx, c))
2496
2497
2498def get_running_container_name(ctx: CephadmContext, c: 'CephContainer') -> Optional[str]:
2499 for name in [c.cname, c.old_cname]:
2500 out, err, ret = call(ctx, [
2501 ctx.container_engine.path, 'container', 'inspect',
2502 '--format', '{{.State.Status}}', name
2503 ])
2504 if out.strip() == 'running':
2505 return name
2506 return None
f67539c2
TL
2507
2508
9f95a23c 2509def get_legacy_config_fsid(cluster, legacy_dir=None):
f6b5b4d7 2510 # type: (str, Optional[str]) -> Optional[str]
9f95a23c
TL
2511 config_file = '/etc/ceph/%s.conf' % cluster
2512 if legacy_dir is not None:
2513 config_file = os.path.abspath(legacy_dir + config_file)
2514
2515 if os.path.exists(config_file):
2516 config = read_config(config_file)
2517 if config.has_section('global') and config.has_option('global', 'fsid'):
2518 return config.get('global', 'fsid')
2519 return None
2520
f6b5b4d7 2521
f67539c2
TL
2522def get_legacy_daemon_fsid(ctx, cluster,
2523 daemon_type, daemon_id, legacy_dir=None):
2524 # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str]
9f95a23c
TL
2525 fsid = None
2526 if daemon_type == 'osd':
2527 try:
f67539c2 2528 fsid_file = os.path.join(ctx.data_dir,
9f95a23c
TL
2529 daemon_type,
2530 'ceph-%s' % daemon_id,
2531 'ceph_fsid')
2532 if legacy_dir is not None:
2533 fsid_file = os.path.abspath(legacy_dir + fsid_file)
2534 with open(fsid_file, 'r') as f:
2535 fsid = f.read().strip()
2536 except IOError:
2537 pass
2538 if not fsid:
2539 fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir)
2540 return fsid
2541
f6b5b4d7 2542
20effc67
TL
2543def should_log_to_journald(ctx: CephadmContext) -> bool:
2544 if ctx.log_to_journald is not None:
2545 return ctx.log_to_journald
2546 return isinstance(ctx.container_engine, Podman) and \
2547 ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION
2548
2549
f67539c2
TL
2550def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
2551 # type: (CephadmContext, str, str, Union[int, str]) -> List[str]
9f95a23c
TL
2552 r = list() # type: List[str]
2553
2554 if daemon_type in Ceph.daemons and daemon_type != 'crash':
2555 r += [
2556 '--setuser', 'ceph',
2557 '--setgroup', 'ceph',
2558 '--default-log-to-file=false',
9f95a23c 2559 ]
20effc67
TL
2560 log_to_journald = should_log_to_journald(ctx)
2561 if log_to_journald:
2562 r += [
2563 '--default-log-to-journald=true',
2564 '--default-log-to-stderr=false',
2565 ]
2566 else:
2567 r += [
2568 '--default-log-to-stderr=true',
2569 '--default-log-stderr-prefix=debug ',
2570 ]
9f95a23c
TL
2571 if daemon_type == 'mon':
2572 r += [
2573 '--default-mon-cluster-log-to-file=false',
9f95a23c 2574 ]
20effc67
TL
2575 if log_to_journald:
2576 r += [
2577 '--default-mon-cluster-log-to-journald=true',
2578 '--default-mon-cluster-log-to-stderr=false',
2579 ]
2580 else:
2581 r += ['--default-mon-cluster-log-to-stderr=true']
9f95a23c
TL
2582 elif daemon_type in Monitoring.components:
2583 metadata = Monitoring.components[daemon_type]
2584 r += metadata.get('args', list())
b3b6e05e 2585 # set ip and port to bind to for nodeexporter,alertmanager,prometheus
33c7a0ef 2586 if daemon_type not in ['grafana', 'loki', 'promtail']:
b3b6e05e
TL
2587 ip = ''
2588 port = Monitoring.port_map[daemon_type][0]
2589 if 'meta_json' in ctx and ctx.meta_json:
2590 meta = json.loads(ctx.meta_json) or {}
2591 if 'ip' in meta and meta['ip']:
2592 ip = meta['ip']
2593 if 'ports' in meta and meta['ports']:
2594 port = meta['ports'][0]
2595 r += [f'--web.listen-address={ip}:{port}']
33c7a0ef
TL
2596 if daemon_type == 'prometheus':
2597 scheme = 'http'
2598 host = get_fqdn()
2599 r += [f'--web.external-url={scheme}://{host}:{port}']
9f95a23c 2600 if daemon_type == 'alertmanager':
f67539c2 2601 config = get_parm(ctx.config_json)
9f95a23c
TL
2602 peers = config.get('peers', list()) # type: ignore
2603 for peer in peers:
f67539c2 2604 r += ['--cluster.peer={}'.format(peer)]
f6b5b4d7 2605 # some alertmanager, by default, look elsewhere for a config
f67539c2 2606 r += ['--config.file=/etc/alertmanager/alertmanager.yml']
33c7a0ef 2607 if daemon_type == 'promtail':
2a845540 2608 r += ['--config.expand-env']
33c7a0ef
TL
2609 if daemon_type == 'node-exporter':
2610 r += ['--path.procfs=/host/proc',
2611 '--path.sysfs=/host/sys',
2612 '--path.rootfs=/rootfs']
9f95a23c 2613 elif daemon_type == NFSGanesha.daemon_type:
f67539c2 2614 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
1911f103 2615 r += nfs_ganesha.get_daemon_args()
f67539c2
TL
2616 elif daemon_type == HAproxy.daemon_type:
2617 haproxy = HAproxy.init(ctx, fsid, daemon_id)
2618 r += haproxy.get_daemon_args()
f91f0fd5 2619 elif daemon_type == CustomContainer.daemon_type:
f67539c2 2620 cc = CustomContainer.init(ctx, fsid, daemon_id)
f91f0fd5 2621 r.extend(cc.get_daemon_args())
20effc67
TL
2622 elif daemon_type == SNMPGateway.daemon_type:
2623 sc = SNMPGateway.init(ctx, fsid, daemon_id)
2624 r.extend(sc.get_daemon_args())
9f95a23c
TL
2625
2626 return r
2627
f6b5b4d7 2628
f67539c2 2629def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
e306af50 2630 config=None, keyring=None):
f67539c2
TL
2631 # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None
2632 data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid)
20effc67
TL
2633
2634 if daemon_type in Ceph.daemons:
2635 make_log_dir(ctx, fsid, uid=uid, gid=gid)
9f95a23c
TL
2636
2637 if config:
2638 config_path = os.path.join(data_dir, 'config')
2639 with open(config_path, 'w') as f:
2640 os.fchown(f.fileno(), uid, gid)
2641 os.fchmod(f.fileno(), 0o600)
2642 f.write(config)
f91f0fd5 2643
9f95a23c
TL
2644 if keyring:
2645 keyring_path = os.path.join(data_dir, 'keyring')
2646 with open(keyring_path, 'w') as f:
2647 os.fchmod(f.fileno(), 0o600)
2648 os.fchown(f.fileno(), uid, gid)
2649 f.write(keyring)
2650
2651 if daemon_type in Monitoring.components.keys():
522d829b
TL
2652 config_json: Dict[str, Any] = dict()
2653 if 'config_json' in ctx:
2654 config_json = get_parm(ctx.config_json)
9f95a23c
TL
2655
2656 # Set up directories specific to the monitoring component
2657 config_dir = ''
f67539c2 2658 data_dir_root = ''
9f95a23c 2659 if daemon_type == 'prometheus':
f67539c2
TL
2660 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2661 daemon_type, daemon_id)
9f95a23c
TL
2662 config_dir = 'etc/prometheus'
2663 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2664 makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755)
2665 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
33c7a0ef
TL
2666 recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid)
2667 recursive_chown(os.path.join(data_dir_root, 'data'), uid, gid)
9f95a23c 2668 elif daemon_type == 'grafana':
f67539c2
TL
2669 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2670 daemon_type, daemon_id)
9f95a23c
TL
2671 config_dir = 'etc/grafana'
2672 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2673 makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
2674 makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
2675 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
f67539c2 2676 touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid)
9f95a23c 2677 elif daemon_type == 'alertmanager':
f67539c2
TL
2678 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2679 daemon_type, daemon_id)
9f95a23c
TL
2680 config_dir = 'etc/alertmanager'
2681 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2682 makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755)
33c7a0ef
TL
2683 elif daemon_type == 'promtail':
2684 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2685 daemon_type, daemon_id)
2686 config_dir = 'etc/promtail'
2687 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2688 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
2689 elif daemon_type == 'loki':
2690 data_dir_root = get_data_dir(fsid, ctx.data_dir,
2691 daemon_type, daemon_id)
2692 config_dir = 'etc/loki'
2693 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
2694 makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
9f95a23c 2695
9f95a23c 2696 # populate the config directory for the component from the config-json
b3b6e05e
TL
2697 if 'files' in config_json:
2698 for fname in config_json['files']:
f91f0fd5 2699 content = dict_get_join(config_json['files'], fname)
b3b6e05e
TL
2700 if os.path.isabs(fname):
2701 fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep))
2702 else:
2703 fpath = os.path.join(data_dir_root, config_dir, fname)
2704 with open(fpath, 'w', encoding='utf-8') as f:
9f95a23c
TL
2705 os.fchown(f.fileno(), uid, gid)
2706 os.fchmod(f.fileno(), 0o600)
2707 f.write(content)
2708
f91f0fd5 2709 elif daemon_type == NFSGanesha.daemon_type:
f67539c2 2710 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
9f95a23c
TL
2711 nfs_ganesha.create_daemon_dirs(data_dir, uid, gid)
2712
f91f0fd5 2713 elif daemon_type == CephIscsi.daemon_type:
f67539c2 2714 ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
1911f103
TL
2715 ceph_iscsi.create_daemon_dirs(data_dir, uid, gid)
2716
f67539c2
TL
2717 elif daemon_type == HAproxy.daemon_type:
2718 haproxy = HAproxy.init(ctx, fsid, daemon_id)
2719 haproxy.create_daemon_dirs(data_dir, uid, gid)
2720
2721 elif daemon_type == Keepalived.daemon_type:
2722 keepalived = Keepalived.init(ctx, fsid, daemon_id)
2723 keepalived.create_daemon_dirs(data_dir, uid, gid)
2724
f91f0fd5 2725 elif daemon_type == CustomContainer.daemon_type:
f67539c2 2726 cc = CustomContainer.init(ctx, fsid, daemon_id)
f91f0fd5
TL
2727 cc.create_daemon_dirs(data_dir, uid, gid)
2728
20effc67
TL
2729 elif daemon_type == SNMPGateway.daemon_type:
2730 sg = SNMPGateway.init(ctx, fsid, daemon_id)
2731 sg.create_daemon_conf()
2732
2a845540
TL
2733 _write_custom_conf_files(ctx, daemon_type, str(daemon_id), fsid, uid, gid)
2734
f6b5b4d7 2735
2a845540
TL
2736def _write_custom_conf_files(ctx: CephadmContext, daemon_type: str, daemon_id: str, fsid: str, uid: int, gid: int) -> None:
2737 # mostly making this its own function to make unit testing easier
2738 if 'config_json' not in ctx or not ctx.config_json:
2739 return
2740 config_json = get_custom_config_files(ctx.config_json)
2741 custom_config_dir = os.path.join(ctx.data_dir, fsid, 'custom_config_files', f'{daemon_type}.{daemon_id}')
2742 if not os.path.exists(custom_config_dir):
2743 makedirs(custom_config_dir, uid, gid, 0o755)
2744 mandatory_keys = ['mount_path', 'content']
2745 for ccf in config_json['custom_config_files']:
2746 if all(k in ccf for k in mandatory_keys):
2747 file_path = os.path.join(custom_config_dir, os.path.basename(ccf['mount_path']))
2748 with open(file_path, 'w+', encoding='utf-8') as f:
2749 os.fchown(f.fileno(), uid, gid)
2750 os.fchmod(f.fileno(), 0o600)
2751 f.write(ccf['content'])
9f95a23c 2752
2a845540
TL
2753
2754def get_parm(option: str) -> Dict[str, str]:
2755 js = _get_config_json(option)
2756 # custom_config_files is a special field that may be in the config
2757 # dict. It is used for mounting custom config files into daemon's containers
2758 # and should be accessed through the "get_custom_config_files" function.
2759 # For get_parm we need to discard it.
2760 js.pop('custom_config_files', None)
2761 return js
2762
2763
2764def get_custom_config_files(option: str) -> Dict[str, List[Dict[str, str]]]:
2765 js = _get_config_json(option)
2766 res: Dict[str, List[Dict[str, str]]] = {'custom_config_files': []}
2767 if 'custom_config_files' in js:
2768 res['custom_config_files'] = js['custom_config_files']
2769 return res
2770
2771
2772def _get_config_json(option: str) -> Dict[str, Any]:
9f95a23c
TL
2773 if not option:
2774 return dict()
2775
2776 global cached_stdin
2777 if option == '-':
2778 if cached_stdin is not None:
2779 j = cached_stdin
2780 else:
f67539c2
TL
2781 j = sys.stdin.read()
2782 cached_stdin = j
9f95a23c
TL
2783 else:
2784 # inline json string
2785 if option[0] == '{' and option[-1] == '}':
2786 j = option
2787 # json file
2788 elif os.path.exists(option):
2789 with open(option, 'r') as f:
2790 j = f.read()
2791 else:
f67539c2 2792 raise Error('Config file {} not found'.format(option))
9f95a23c
TL
2793
2794 try:
2795 js = json.loads(j)
2796 except ValueError as e:
f67539c2 2797 raise Error('Invalid JSON in {}: {}'.format(option, e))
9f95a23c
TL
2798 else:
2799 return js
2800
f6b5b4d7 2801
f67539c2
TL
2802def get_config_and_keyring(ctx):
2803 # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
801d1391
TL
2804 config = None
2805 keyring = None
2806
f67539c2
TL
2807 if 'config_json' in ctx and ctx.config_json:
2808 d = get_parm(ctx.config_json)
9f95a23c
TL
2809 config = d.get('config')
2810 keyring = d.get('keyring')
a4b75251
TL
2811 if config and keyring:
2812 return config, keyring
9f95a23c 2813
f67539c2
TL
2814 if 'config' in ctx and ctx.config:
2815 try:
2816 with open(ctx.config, 'r') as f:
2817 config = f.read()
b3b6e05e
TL
2818 except FileNotFoundError as e:
2819 raise Error(e)
f67539c2
TL
2820
2821 if 'key' in ctx and ctx.key:
2822 keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key)
2823 elif 'keyring' in ctx and ctx.keyring:
2824 try:
2825 with open(ctx.keyring, 'r') as f:
2826 keyring = f.read()
b3b6e05e
TL
2827 except FileNotFoundError as e:
2828 raise Error(e)
9f95a23c 2829
f6b5b4d7
TL
2830 return config, keyring
2831
2832
f67539c2
TL
2833def get_container_binds(ctx, fsid, daemon_type, daemon_id):
2834 # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]]
f6b5b4d7
TL
2835 binds = list()
2836
2837 if daemon_type == CephIscsi.daemon_type:
f6b5b4d7 2838 binds.extend(CephIscsi.get_container_binds())
f91f0fd5
TL
2839 elif daemon_type == CustomContainer.daemon_type:
2840 assert daemon_id
f67539c2
TL
2841 cc = CustomContainer.init(ctx, fsid, daemon_id)
2842 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
f91f0fd5 2843 binds.extend(cc.get_container_binds(data_dir))
f6b5b4d7
TL
2844
2845 return binds
2846
9f95a23c 2847
f67539c2 2848def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
9f95a23c 2849 no_config=False):
f67539c2 2850 # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str]
9f95a23c
TL
2851 mounts = dict()
2852
2853 if daemon_type in Ceph.daemons:
2854 if fsid:
f67539c2 2855 run_path = os.path.join('/var/run/ceph', fsid)
9f95a23c
TL
2856 if os.path.exists(run_path):
2857 mounts[run_path] = '/var/run/ceph:z'
f67539c2 2858 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c
TL
2859 mounts[log_dir] = '/var/log/ceph:z'
2860 crash_dir = '/var/lib/ceph/%s/crash' % fsid
2861 if os.path.exists(crash_dir):
2862 mounts[crash_dir] = '/var/lib/ceph/crash:z'
20effc67
TL
2863 if daemon_type != 'crash' and should_log_to_journald(ctx):
2864 journald_sock_dir = '/run/systemd/journal'
2865 mounts[journald_sock_dir] = journald_sock_dir
9f95a23c
TL
2866
2867 if daemon_type in Ceph.daemons and daemon_id:
f67539c2 2868 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c
TL
2869 if daemon_type == 'rgw':
2870 cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id)
2871 else:
2872 cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id)
2873 if daemon_type != 'crash':
2874 mounts[data_dir] = cdata_dir + ':z'
2875 if not no_config:
2876 mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
f67539c2 2877 if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash']:
9f95a23c
TL
2878 # these do not search for their keyrings in a data directory
2879 mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id)
2880
b3b6e05e 2881 if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']:
9f95a23c
TL
2882 mounts['/dev'] = '/dev' # FIXME: narrow this down?
2883 mounts['/run/udev'] = '/run/udev'
b3b6e05e 2884 if daemon_type in ['osd', 'clusterless-ceph-volume']:
9f95a23c 2885 mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ...
b3b6e05e
TL
2886 mounts['/run/lvm'] = '/run/lvm'
2887 mounts['/run/lock/lvm'] = '/run/lock/lvm'
2888 if daemon_type == 'osd':
f67539c2
TL
2889 # selinux-policy in the container may not match the host.
2890 if HostFacts(ctx).selinux_enabled:
2891 selinux_folder = '/var/lib/ceph/%s/selinux' % fsid
2892 if not os.path.exists(selinux_folder):
2893 os.makedirs(selinux_folder, mode=0o755)
2894 mounts[selinux_folder] = '/sys/fs/selinux:ro'
20effc67 2895 mounts['/'] = '/rootfs'
9f95a23c 2896
e306af50 2897 try:
f67539c2
TL
2898 if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development
2899 ceph_folder = pathify(ctx.shared_ceph_folder)
e306af50
TL
2900 if os.path.exists(ceph_folder):
2901 mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
522d829b 2902 mounts[ceph_folder + '/src/cephadm/cephadm'] = '/usr/sbin/cephadm'
e306af50
TL
2903 mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
2904 mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
20effc67
TL
2905 mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard'
2906 mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
e306af50
TL
2907 else:
2908 logger.error('{}{}{}'.format(termcolor.red,
f67539c2
TL
2909 'Ceph shared source folder does not exist.',
2910 termcolor.end))
e306af50
TL
2911 except AttributeError:
2912 pass
2913
9f95a23c 2914 if daemon_type in Monitoring.components and daemon_id:
f67539c2 2915 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
33c7a0ef 2916 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c
TL
2917 if daemon_type == 'prometheus':
2918 mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
2919 mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
33c7a0ef
TL
2920 elif daemon_type == 'loki':
2921 mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z'
2922 mounts[os.path.join(data_dir, 'data')] = '/loki:Z'
2923 elif daemon_type == 'promtail':
2924 mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
2925 mounts[log_dir] = '/var/log/ceph:z'
2926 mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
9f95a23c
TL
2927 elif daemon_type == 'node-exporter':
2928 mounts['/proc'] = '/host/proc:ro'
2929 mounts['/sys'] = '/host/sys:ro'
2930 mounts['/'] = '/rootfs:ro'
f67539c2 2931 elif daemon_type == 'grafana':
9f95a23c
TL
2932 mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
2933 mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
2934 mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
f67539c2 2935 mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
9f95a23c 2936 elif daemon_type == 'alertmanager':
f6b5b4d7 2937 mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
9f95a23c
TL
2938
2939 if daemon_type == NFSGanesha.daemon_type:
2940 assert daemon_id
f67539c2
TL
2941 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2942 nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id)
f91f0fd5 2943 mounts.update(nfs_ganesha.get_container_mounts(data_dir))
9f95a23c 2944
f67539c2
TL
2945 if daemon_type == HAproxy.daemon_type:
2946 assert daemon_id
2947 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2948 mounts.update(HAproxy.get_container_mounts(data_dir))
2949
1911f103
TL
2950 if daemon_type == CephIscsi.daemon_type:
2951 assert daemon_id
f67539c2
TL
2952 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2953 log_dir = get_log_dir(fsid, ctx.log_dir)
1911f103
TL
2954 mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
2955
f67539c2
TL
2956 if daemon_type == Keepalived.daemon_type:
2957 assert daemon_id
2958 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
2959 mounts.update(Keepalived.get_container_mounts(data_dir))
2960
f91f0fd5
TL
2961 if daemon_type == CustomContainer.daemon_type:
2962 assert daemon_id
f67539c2
TL
2963 cc = CustomContainer.init(ctx, fsid, daemon_id)
2964 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
f91f0fd5
TL
2965 mounts.update(cc.get_container_mounts(data_dir))
2966
9f95a23c
TL
2967 return mounts
2968
f6b5b4d7 2969
20effc67
TL
2970def get_ceph_volume_container(ctx: CephadmContext,
2971 privileged: bool = True,
2972 cname: str = '',
2973 volume_mounts: Dict[str, str] = {},
2974 bind_mounts: Optional[List[List[str]]] = None,
2975 args: List[str] = [],
2976 envs: Optional[List[str]] = None) -> 'CephContainer':
2977 if envs is None:
2978 envs = []
2979 envs.append('CEPH_VOLUME_SKIP_RESTORECON=yes')
2980 envs.append('CEPH_VOLUME_DEBUG=1')
2981
2982 return CephContainer(
2983 ctx,
2984 image=ctx.image,
2985 entrypoint='/usr/sbin/ceph-volume',
2986 args=args,
2987 volume_mounts=volume_mounts,
2988 bind_mounts=bind_mounts,
2989 envs=envs,
2990 privileged=privileged,
2991 cname=cname,
2992 memory_request=ctx.memory_request,
2993 memory_limit=ctx.memory_limit,
2994 )
2995
2996
f67539c2
TL
2997def get_container(ctx: CephadmContext,
2998 fsid: str, daemon_type: str, daemon_id: Union[int, str],
f91f0fd5
TL
2999 privileged: bool = False,
3000 ptrace: bool = False,
3001 container_args: Optional[List[str]] = None) -> 'CephContainer':
3002 entrypoint: str = ''
3003 name: str = ''
3004 ceph_args: List[str] = []
522d829b 3005 envs: List[str] = []
f91f0fd5
TL
3006 host_network: bool = True
3007
522d829b
TL
3008 if daemon_type in Ceph.daemons:
3009 envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
f91f0fd5
TL
3010 if container_args is None:
3011 container_args = []
9f95a23c
TL
3012 if daemon_type in ['mon', 'osd']:
3013 # mon and osd need privileged in order for libudev to query devices
3014 privileged = True
3015 if daemon_type == 'rgw':
3016 entrypoint = '/usr/bin/radosgw'
3017 name = 'client.rgw.%s' % daemon_id
3018 elif daemon_type == 'rbd-mirror':
3019 entrypoint = '/usr/bin/rbd-mirror'
3020 name = 'client.rbd-mirror.%s' % daemon_id
f67539c2
TL
3021 elif daemon_type == 'cephfs-mirror':
3022 entrypoint = '/usr/bin/cephfs-mirror'
3023 name = 'client.cephfs-mirror.%s' % daemon_id
9f95a23c
TL
3024 elif daemon_type == 'crash':
3025 entrypoint = '/usr/bin/ceph-crash'
3026 name = 'client.crash.%s' % daemon_id
3027 elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
3028 entrypoint = '/usr/bin/ceph-' + daemon_type
3029 name = '%s.%s' % (daemon_type, daemon_id)
3030 elif daemon_type in Monitoring.components:
3031 entrypoint = ''
9f95a23c
TL
3032 elif daemon_type == NFSGanesha.daemon_type:
3033 entrypoint = NFSGanesha.entrypoint
3034 name = '%s.%s' % (daemon_type, daemon_id)
f91f0fd5 3035 envs.extend(NFSGanesha.get_container_envs())
f67539c2
TL
3036 elif daemon_type == HAproxy.daemon_type:
3037 name = '%s.%s' % (daemon_type, daemon_id)
522d829b 3038 container_args.extend(['--user=root']) # haproxy 2.4 defaults to a different user
f67539c2
TL
3039 elif daemon_type == Keepalived.daemon_type:
3040 name = '%s.%s' % (daemon_type, daemon_id)
3041 envs.extend(Keepalived.get_container_envs())
3042 container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
1911f103
TL
3043 elif daemon_type == CephIscsi.daemon_type:
3044 entrypoint = CephIscsi.entrypoint
3045 name = '%s.%s' % (daemon_type, daemon_id)
e306af50
TL
3046 # So the container can modprobe iscsi_target_mod and have write perms
3047 # to configfs we need to make this a privileged container.
3048 privileged = True
f91f0fd5 3049 elif daemon_type == CustomContainer.daemon_type:
f67539c2 3050 cc = CustomContainer.init(ctx, fsid, daemon_id)
f91f0fd5
TL
3051 entrypoint = cc.entrypoint
3052 host_network = False
3053 envs.extend(cc.get_container_envs())
3054 container_args.extend(cc.get_container_args())
9f95a23c 3055
9f95a23c 3056 if daemon_type in Monitoring.components:
f67539c2 3057 uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
9f95a23c
TL
3058 monitoring_args = [
3059 '--user',
3060 str(uid),
3061 # FIXME: disable cpu/memory limits for the time being (not supported
3062 # by ubuntu 18.04 kernel!)
9f95a23c
TL
3063 ]
3064 container_args.extend(monitoring_args)
33c7a0ef
TL
3065 if daemon_type == 'node-exporter':
3066 # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
3067 # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
3068 # between the node-exporter container and the host to avoid selinux denials
3069 container_args.extend(['--security-opt', 'label=disable'])
9f95a23c
TL
3070 elif daemon_type == 'crash':
3071 ceph_args = ['-n', name]
3072 elif daemon_type in Ceph.daemons:
3073 ceph_args = ['-n', name, '-f']
20effc67
TL
3074 elif daemon_type == SNMPGateway.daemon_type:
3075 sg = SNMPGateway.init(ctx, fsid, daemon_id)
3076 container_args.append(
3077 f'--env-file={sg.conf_file_path}'
3078 )
9f95a23c 3079
f91f0fd5
TL
3080 # if using podman, set -d, --conmon-pidfile & --cidfile flags
3081 # so service can have Type=Forking
f67539c2 3082 if isinstance(ctx.container_engine, Podman):
f91f0fd5 3083 runtime_dir = '/run'
f67539c2
TL
3084 container_args.extend([
3085 '-d', '--log-driver', 'journald',
f91f0fd5
TL
3086 '--conmon-pidfile',
3087 runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id),
3088 '--cidfile',
f67539c2
TL
3089 runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id),
3090 ])
3091 if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
3092 container_args.append('--cgroups=split')
9f95a23c 3093
522d829b 3094 return CephContainer.for_daemon(
f67539c2 3095 ctx,
522d829b
TL
3096 fsid=fsid,
3097 daemon_type=daemon_type,
3098 daemon_id=str(daemon_id),
9f95a23c 3099 entrypoint=entrypoint,
f67539c2 3100 args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id),
9f95a23c 3101 container_args=container_args,
f67539c2
TL
3102 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
3103 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
9f95a23c
TL
3104 envs=envs,
3105 privileged=privileged,
3106 ptrace=ptrace,
f91f0fd5 3107 host_network=host_network,
9f95a23c
TL
3108 )
3109
f6b5b4d7 3110
f67539c2
TL
3111def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'):
3112 # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]
9f95a23c
TL
3113
3114 if not img:
f67539c2 3115 img = ctx.image
9f95a23c 3116
f6b5b4d7
TL
3117 if isinstance(file_path, str):
3118 paths = [file_path]
3119 else:
3120 paths = file_path
3121
20effc67
TL
3122 ex: Optional[Tuple[str, RuntimeError]] = None
3123
f6b5b4d7
TL
3124 for fp in paths:
3125 try:
3126 out = CephContainer(
f67539c2 3127 ctx,
f6b5b4d7
TL
3128 image=img,
3129 entrypoint='stat',
3130 args=['-c', '%u %g', fp]
2a845540 3131 ).run(verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
f6b5b4d7
TL
3132 uid, gid = out.split(' ')
3133 return int(uid), int(gid)
20effc67
TL
3134 except RuntimeError as e:
3135 ex = (fp, e)
3136 if ex:
3137 raise Error(f'Failed to extract uid/gid for path {ex[0]}: {ex[1]}')
3138
f6b5b4d7
TL
3139 raise RuntimeError('uid/gid not found')
3140
9f95a23c 3141
f67539c2 3142def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid,
9f95a23c
TL
3143 config=None, keyring=None,
3144 osd_fsid=None,
f6b5b4d7
TL
3145 reconfig=False,
3146 ports=None):
f67539c2 3147 # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None
f6b5b4d7
TL
3148
3149 ports = ports or []
f67539c2 3150 if any([port_in_use(ctx, port) for port in ports]):
b3b6e05e
TL
3151 if daemon_type == 'mgr':
3152 # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't
3153 # tell whether that is the case here.
3154 logger.warning(
3155 f"ceph-mgr TCP port(s) {','.join(map(str, ports))} already in use"
3156 )
3157 else:
3158 raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports)), daemon_type))
f6b5b4d7 3159
f67539c2 3160 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c
TL
3161 if reconfig and not os.path.exists(data_dir):
3162 raise Error('cannot reconfig, data path %s does not exist' % data_dir)
3163 if daemon_type == 'mon' and not os.path.exists(data_dir):
3164 assert config
3165 assert keyring
3166 # tmp keyring file
3167 tmp_keyring = write_tmp(keyring, uid, gid)
3168
3169 # tmp config file
3170 tmp_config = write_tmp(config, uid, gid)
3171
3172 # --mkfs
f67539c2
TL
3173 create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid)
3174 mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id)
3175 log_dir = get_log_dir(fsid, ctx.log_dir)
3176 CephContainer(
3177 ctx,
3178 image=ctx.image,
9f95a23c 3179 entrypoint='/usr/bin/ceph-mon',
f67539c2
TL
3180 args=[
3181 '--mkfs',
3182 '-i', str(daemon_id),
3183 '--fsid', fsid,
3184 '-c', '/tmp/config',
3185 '--keyring', '/tmp/keyring',
3186 ] + get_daemon_args(ctx, fsid, 'mon', daemon_id),
9f95a23c
TL
3187 volume_mounts={
3188 log_dir: '/var/log/ceph:z',
3189 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id),
3190 tmp_keyring.name: '/tmp/keyring:z',
3191 tmp_config.name: '/tmp/config:z',
3192 },
3193 ).run()
3194
3195 # write conf
3196 with open(mon_dir + '/config', 'w') as f:
3197 os.fchown(f.fileno(), uid, gid)
3198 os.fchmod(f.fileno(), 0o600)
3199 f.write(config)
3200 else:
3201 # dirs, conf, keyring
3202 create_daemon_dirs(
f67539c2 3203 ctx,
9f95a23c
TL
3204 fsid, daemon_type, daemon_id,
3205 uid, gid,
3206 config, keyring)
3207
3208 if not reconfig:
20effc67 3209 if daemon_type == CephadmAgent.daemon_type:
f67539c2
TL
3210 if ctx.config_json == '-':
3211 config_js = get_parm('-')
3212 else:
3213 config_js = get_parm(ctx.config_json)
3214 assert isinstance(config_js, dict)
3215
20effc67
TL
3216 cephadm_agent = CephadmAgent(ctx, fsid, daemon_id)
3217 cephadm_agent.deploy_daemon_unit(config_js)
f67539c2
TL
3218 else:
3219 if c:
3220 deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id,
3221 c, osd_fsid=osd_fsid, ports=ports)
3222 else:
3223 raise RuntimeError('attempting to deploy a daemon without a container image')
9f95a23c
TL
3224
3225 if not os.path.exists(data_dir + '/unit.created'):
3226 with open(data_dir + '/unit.created', 'w') as f:
3227 os.fchmod(f.fileno(), 0o600)
3228 os.fchown(f.fileno(), uid, gid)
3229 f.write('mtime is time the daemon deployment was created\n')
3230
3231 with open(data_dir + '/unit.configured', 'w') as f:
3232 f.write('mtime is time we were last configured\n')
3233 os.fchmod(f.fileno(), 0o600)
3234 os.fchown(f.fileno(), uid, gid)
3235
f67539c2 3236 update_firewalld(ctx, daemon_type)
9f95a23c 3237
f6b5b4d7
TL
3238 # Open ports explicitly required for the daemon
3239 if ports:
f67539c2 3240 fw = Firewalld(ctx)
f6b5b4d7
TL
3241 fw.open_ports(ports)
3242 fw.apply_rules()
3243
9f95a23c
TL
3244 if reconfig and daemon_type not in Ceph.daemons:
3245 # ceph daemons do not need a restart; others (presumably) do to pick
3246 # up the new config
f67539c2
TL
3247 call_throws(ctx, ['systemctl', 'reset-failed',
3248 get_unit_name(fsid, daemon_type, daemon_id)])
3249 call_throws(ctx, ['systemctl', 'restart',
3250 get_unit_name(fsid, daemon_type, daemon_id)])
3251
9f95a23c 3252
f67539c2
TL
3253def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False):
3254 # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None
f6b5b4d7 3255 if comment:
f91f0fd5 3256 # Sometimes adding a comment, especially if there are multiple containers in one
f6b5b4d7
TL
3257 # unit file, makes it easier to read and grok.
3258 file_obj.write('# ' + comment + '\n')
3259 # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
522d829b 3260 file_obj.write('! ' + ' '.join(container.rm_cmd(old_cname=True)) + ' 2> /dev/null\n')
f67539c2 3261 file_obj.write('! ' + ' '.join(container.rm_cmd()) + ' 2> /dev/null\n')
f6b5b4d7 3262 # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
f67539c2
TL
3263 if isinstance(ctx.container_engine, Podman):
3264 file_obj.write(
3265 '! '
3266 + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)])
3267 + ' 2> /dev/null\n')
522d829b
TL
3268 file_obj.write(
3269 '! '
3270 + ' '.join([shlex.quote(a) for a in container.rm_cmd(old_cname=True, storage=True)])
3271 + ' 2> /dev/null\n')
f6b5b4d7
TL
3272
3273 # container run command
f67539c2
TL
3274 file_obj.write(
3275 ' '.join([shlex.quote(a) for a in container.run_cmd()])
3276 + (' &' if background else '') + '\n')
3277
3278
522d829b
TL
3279def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None:
3280 # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
3281 # see https://tracker.ceph.com/issues/50998
3282
3283 CGROUPV2_PATH = Path('/sys/fs/cgroup')
3284 if not (CGROUPV2_PATH / 'system.slice').exists():
3285 # Only unified cgroup is affected, skip if not the case
3286 return
3287
3288 slice_name = 'system-ceph\\x2d{}.slice'.format(fsid.replace('-', '\\x2d'))
3289 cg_path = CGROUPV2_PATH / 'system.slice' / slice_name / f'{unit_name}.service'
3290 if not cg_path.exists():
3291 return
3292
3293 def cg_trim(path: Path) -> None:
3294 for p in path.iterdir():
3295 if p.is_dir():
3296 cg_trim(p)
3297 path.rmdir()
3298 try:
3299 cg_trim(cg_path)
3300 except OSError:
3301 logger.warning(f'Failed to trim old cgroups {cg_path}')
3302
3303
f67539c2
TL
3304def deploy_daemon_units(
3305 ctx: CephadmContext,
3306 fsid: str,
3307 uid: int,
3308 gid: int,
3309 daemon_type: str,
3310 daemon_id: Union[int, str],
3311 c: 'CephContainer',
3312 enable: bool = True,
3313 start: bool = True,
3314 osd_fsid: Optional[str] = None,
3315 ports: Optional[List[int]] = None,
3316) -> None:
9f95a23c 3317 # cmd
f67539c2
TL
3318 data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id)
3319 with open(data_dir + '/unit.run.new', 'w') as f, \
b3b6e05e 3320 open(data_dir + '/unit.meta.new', 'w') as metaf:
f6b5b4d7 3321 f.write('set -e\n')
f91f0fd5
TL
3322
3323 if daemon_type in Ceph.daemons:
3324 install_path = find_program('install')
3325 f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
3326
9f95a23c
TL
3327 # pre-start cmd(s)
3328 if daemon_type == 'osd':
3329 # osds have a pre-start step
3330 assert osd_fsid
f6b5b4d7
TL
3331 simple_fn = os.path.join('/etc/ceph/osd',
3332 '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid))
3333 if os.path.exists(simple_fn):
3334 f.write('# Simple OSDs need chown on startup:\n')
3335 for n in ['block', 'block.db', 'block.wal']:
3336 p = os.path.join(data_dir, n)
3337 f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
3338 else:
20effc67
TL
3339 # if ceph-volume does not support 'ceph-volume activate', we must
3340 # do 'ceph-volume lvm activate'.
3341 test_cv = get_ceph_volume_container(
f67539c2 3342 ctx,
20effc67
TL
3343 args=['activate', '--bad-option'],
3344 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
3345 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
3346 cname='ceph-%s-%s.%s-activate-test' % (fsid, daemon_type, daemon_id),
3347 )
3348 out, err, ret = call(ctx, test_cv.run_cmd(), verbosity=CallVerbosity.SILENT)
3349 # bad: ceph-volume: error: unrecognized arguments: activate --bad-option
3350 # good: ceph-volume: error: unrecognized arguments: --bad-option
3351 if 'unrecognized arguments: activate' in err:
3352 # older ceph-volume without top-level activate or --no-tmpfs
3353 cmd = [
f6b5b4d7
TL
3354 'lvm', 'activate',
3355 str(daemon_id), osd_fsid,
20effc67
TL
3356 '--no-systemd',
3357 ]
3358 else:
3359 cmd = [
3360 'activate',
3361 '--osd-id', str(daemon_id),
3362 '--osd-uuid', osd_fsid,
3363 '--no-systemd',
3364 '--no-tmpfs',
3365 ]
3366
3367 prestart = get_ceph_volume_container(
3368 ctx,
3369 args=cmd,
f67539c2
TL
3370 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
3371 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
f6b5b4d7
TL
3372 cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
3373 )
f67539c2 3374 _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
1911f103
TL
3375 elif daemon_type == CephIscsi.daemon_type:
3376 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
f67539c2 3377 ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
f6b5b4d7 3378 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
a4b75251 3379 _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True)
f67539c2
TL
3380
3381 _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id)))
3382
3383 # some metadata about the deploy
3384 meta: Dict[str, Any] = {}
3385 if 'meta_json' in ctx and ctx.meta_json:
3386 meta = json.loads(ctx.meta_json) or {}
3387 meta.update({
3388 'memory_request': int(ctx.memory_request) if ctx.memory_request else None,
3389 'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None,
3390 })
3391 if not meta.get('ports'):
3392 meta['ports'] = ports
3393 metaf.write(json.dumps(meta, indent=4) + '\n')
1911f103 3394
9f95a23c 3395 os.fchmod(f.fileno(), 0o600)
f67539c2 3396 os.fchmod(metaf.fileno(), 0o600)
9f95a23c
TL
3397 os.rename(data_dir + '/unit.run.new',
3398 data_dir + '/unit.run')
f67539c2
TL
3399 os.rename(data_dir + '/unit.meta.new',
3400 data_dir + '/unit.meta')
9f95a23c
TL
3401
3402 # post-stop command(s)
3403 with open(data_dir + '/unit.poststop.new', 'w') as f:
3404 if daemon_type == 'osd':
3405 assert osd_fsid
20effc67 3406 poststop = get_ceph_volume_container(
f67539c2 3407 ctx,
9f95a23c
TL
3408 args=[
3409 'lvm', 'deactivate',
3410 str(daemon_id), osd_fsid,
3411 ],
f67539c2
TL
3412 volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id),
3413 bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id),
9f95a23c
TL
3414 cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type,
3415 daemon_id),
3416 )
f67539c2 3417 _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
1911f103 3418 elif daemon_type == CephIscsi.daemon_type:
f6b5b4d7 3419 # make sure we also stop the tcmu container
f67539c2 3420 ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id)
f6b5b4d7 3421 tcmu_container = ceph_iscsi.get_tcmu_runner_container()
f67539c2 3422 f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n')
1911f103 3423 f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
9f95a23c
TL
3424 os.fchmod(f.fileno(), 0o600)
3425 os.rename(data_dir + '/unit.poststop.new',
3426 data_dir + '/unit.poststop')
3427
522d829b
TL
3428 # post-stop command(s)
3429 with open(data_dir + '/unit.stop.new', 'w') as f:
33c7a0ef
TL
3430 # following generated script basically checks if the container exists
3431 # before stopping it. Exit code will be success either if it doesn't
3432 # exist or if it exists and is stopped successfully.
3433 container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null'
3434 f.write(f'! {container_exists % c.old_cname} || {" ".join(c.stop_cmd(old_cname=True))} \n')
3435 f.write(f'! {container_exists % c.cname} || {" ".join(c.stop_cmd())} \n')
522d829b
TL
3436
3437 os.fchmod(f.fileno(), 0o600)
3438 os.rename(data_dir + '/unit.stop.new',
3439 data_dir + '/unit.stop')
3440
f67539c2
TL
3441 if c:
3442 with open(data_dir + '/unit.image.new', 'w') as f:
3443 f.write(c.image + '\n')
3444 os.fchmod(f.fileno(), 0o600)
3445 os.rename(data_dir + '/unit.image.new',
3446 data_dir + '/unit.image')
9f95a23c 3447
b3b6e05e
TL
3448 # sysctl
3449 install_sysctl(ctx, fsid, daemon_type)
3450
9f95a23c 3451 # systemd
f67539c2
TL
3452 install_base_units(ctx, fsid)
3453 unit = get_unit_file(ctx, fsid)
9f95a23c 3454 unit_file = 'ceph-%s@.service' % (fsid)
f67539c2 3455 with open(ctx.unit_dir + '/' + unit_file + '.new', 'w') as f:
9f95a23c 3456 f.write(unit)
f67539c2
TL
3457 os.rename(ctx.unit_dir + '/' + unit_file + '.new',
3458 ctx.unit_dir + '/' + unit_file)
3459 call_throws(ctx, ['systemctl', 'daemon-reload'])
9f95a23c
TL
3460
3461 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
f67539c2 3462 call(ctx, ['systemctl', 'stop', unit_name],
adb31ebb 3463 verbosity=CallVerbosity.DEBUG)
f67539c2 3464 call(ctx, ['systemctl', 'reset-failed', unit_name],
adb31ebb 3465 verbosity=CallVerbosity.DEBUG)
9f95a23c 3466 if enable:
f67539c2 3467 call_throws(ctx, ['systemctl', 'enable', unit_name])
9f95a23c 3468 if start:
522d829b 3469 clean_cgroup(ctx, fsid, unit_name)
f67539c2 3470 call_throws(ctx, ['systemctl', 'start', unit_name])
9f95a23c 3471
f6b5b4d7
TL
3472
3473class Firewalld(object):
f67539c2
TL
3474 def __init__(self, ctx):
3475 # type: (CephadmContext) -> None
3476 self.ctx = ctx
f6b5b4d7
TL
3477 self.available = self.check()
3478
3479 def check(self):
3480 # type: () -> bool
3481 self.cmd = find_executable('firewall-cmd')
3482 if not self.cmd:
3483 logger.debug('firewalld does not appear to be present')
3484 return False
f67539c2 3485 (enabled, state, _) = check_unit(self.ctx, 'firewalld.service')
f6b5b4d7
TL
3486 if not enabled:
3487 logger.debug('firewalld.service is not enabled')
3488 return False
f67539c2 3489 if state != 'running':
f6b5b4d7
TL
3490 logger.debug('firewalld.service is not running')
3491 return False
3492
f67539c2 3493 logger.info('firewalld ready')
f6b5b4d7
TL
3494 return True
3495
3496 def enable_service_for(self, daemon_type):
3497 # type: (str) -> None
3498 if not self.available:
3499 logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type)
3500 return
3501
3502 if daemon_type == 'mon':
3503 svc = 'ceph-mon'
3504 elif daemon_type in ['mgr', 'mds', 'osd']:
3505 svc = 'ceph'
3506 elif daemon_type == NFSGanesha.daemon_type:
3507 svc = 'nfs'
3508 else:
3509 return
3510
f67539c2
TL
3511 if not self.cmd:
3512 raise RuntimeError('command not defined')
3513
3514 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
9f95a23c
TL
3515 if ret:
3516 logger.info('Enabling firewalld service %s in current zone...' % svc)
f67539c2 3517 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc])
9f95a23c
TL
3518 if ret:
3519 raise RuntimeError(
3520 'unable to add service %s to current zone: %s' % (svc, err))
3521 else:
3522 logger.debug('firewalld service %s is enabled in current zone' % svc)
f6b5b4d7
TL
3523
3524 def open_ports(self, fw_ports):
3525 # type: (List[int]) -> None
3526 if not self.available:
3527 logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
3528 return
3529
f67539c2
TL
3530 if not self.cmd:
3531 raise RuntimeError('command not defined')
3532
f6b5b4d7
TL
3533 for port in fw_ports:
3534 tcp_port = str(port) + '/tcp'
f67539c2 3535 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
9f95a23c 3536 if ret:
f6b5b4d7 3537 logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
f67539c2 3538 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port])
f6b5b4d7
TL
3539 if ret:
3540 raise RuntimeError('unable to add port %s to current zone: %s' %
f67539c2 3541 (tcp_port, err))
f6b5b4d7
TL
3542 else:
3543 logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
3544
f67539c2
TL
3545 def close_ports(self, fw_ports):
3546 # type: (List[int]) -> None
3547 if not self.available:
3548 logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports)
3549 return
3550
3551 if not self.cmd:
3552 raise RuntimeError('command not defined')
3553
3554 for port in fw_ports:
3555 tcp_port = str(port) + '/tcp'
3556 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
3557 if not ret:
3558 logger.info('Disabling port %s in current zone...' % tcp_port)
3559 out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port])
3560 if ret:
3561 raise RuntimeError('unable to remove port %s from current zone: %s' %
3562 (tcp_port, err))
3563 else:
3564 logger.info(f'Port {tcp_port} disabled')
3565 else:
3566 logger.info(f'firewalld port {tcp_port} already closed')
3567
f6b5b4d7
TL
3568 def apply_rules(self):
3569 # type: () -> None
3570 if not self.available:
3571 return
3572
f67539c2
TL
3573 if not self.cmd:
3574 raise RuntimeError('command not defined')
f6b5b4d7 3575
f67539c2 3576 call_throws(self.ctx, [self.cmd, '--reload'])
f6b5b4d7 3577
f67539c2
TL
3578
3579def update_firewalld(ctx, daemon_type):
3580 # type: (CephadmContext, str) -> None
33c7a0ef
TL
3581 if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
3582 firewall = Firewalld(ctx)
3583 firewall.enable_service_for(daemon_type)
3584 firewall.apply_rules()
f6b5b4d7 3585
f6b5b4d7 3586
b3b6e05e
TL
3587def install_sysctl(ctx: CephadmContext, fsid: str, daemon_type: str) -> None:
3588 """
3589 Set up sysctl settings
3590 """
3591 def _write(conf: Path, lines: List[str]) -> None:
3592 lines = [
3593 '# created by cephadm',
3594 '',
3595 *lines,
3596 '',
3597 ]
3598 with open(conf, 'w') as f:
3599 f.write('\n'.join(lines))
f6b5b4d7 3600
b3b6e05e
TL
3601 conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf')
3602 lines: Optional[List] = None
3603
3604 if daemon_type == 'osd':
3605 lines = OSD.get_sysctl_settings()
3606 elif daemon_type == 'haproxy':
3607 lines = HAproxy.get_sysctl_settings()
3608 elif daemon_type == 'keepalived':
3609 lines = Keepalived.get_sysctl_settings()
3610
3611 # apply the sysctl settings
3612 if lines:
522d829b 3613 Path(ctx.sysctl_dir).mkdir(mode=0o755, exist_ok=True)
b3b6e05e
TL
3614 _write(conf, lines)
3615 call_throws(ctx, ['sysctl', '--system'])
9f95a23c 3616
f67539c2 3617
33c7a0ef
TL
3618def migrate_sysctl_dir(ctx: CephadmContext, fsid: str) -> None:
3619 """
3620 Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration.
3621 This moves it to '/etc/sysctl.d'.
3622 """
3623 deprecated_location: str = '/usr/lib/sysctl.d'
3624 deprecated_confs: List[str] = glob(f'{deprecated_location}/90-ceph-{fsid}-*.conf')
3625 if not deprecated_confs:
3626 return
3627
3628 file_count: int = len(deprecated_confs)
3629 logger.info(f'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.')
3630 for conf in deprecated_confs:
3631 try:
3632 shutil.move(conf, ctx.sysctl_dir)
3633 file_count -= 1
3634 except shutil.Error as err:
3635 if str(err).endswith('already exists'):
3636 logger.warning(f'Destination file already exists. Deleting {conf}.')
3637 try:
3638 os.unlink(conf)
3639 file_count -= 1
3640 except OSError as del_err:
3641 logger.warning(f'Could not remove {conf}: {del_err}.')
3642 else:
3643 logger.warning(f'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}')
3644
3645 # Log successful migration
3646 if file_count == 0:
3647 logger.info(f'Successfully migrated sysctl config to {ctx.sysctl_dir}.')
3648 return
3649
3650 # Log partially successful / unsuccessful migration
3651 files_processed: int = len(deprecated_confs)
3652 if file_count < files_processed:
3653 status: str = f'partially successful (failed {file_count}/{files_processed})'
3654 elif file_count == files_processed:
3655 status = 'unsuccessful'
3656 logger.warning(f'Migration of sysctl configuration {status}. You may want to perform a migration manually.')
3657
3658
f67539c2
TL
3659def install_base_units(ctx, fsid):
3660 # type: (CephadmContext, str) -> None
9f95a23c
TL
3661 """
3662 Set up ceph.target and ceph-$fsid.target units.
3663 """
3664 # global unit
f67539c2
TL
3665 existed = os.path.exists(ctx.unit_dir + '/ceph.target')
3666 with open(ctx.unit_dir + '/ceph.target.new', 'w') as f:
9f95a23c
TL
3667 f.write('[Unit]\n'
3668 'Description=All Ceph clusters and services\n'
3669 '\n'
3670 '[Install]\n'
3671 'WantedBy=multi-user.target\n')
f67539c2
TL
3672 os.rename(ctx.unit_dir + '/ceph.target.new',
3673 ctx.unit_dir + '/ceph.target')
9f95a23c
TL
3674 if not existed:
3675 # we disable before enable in case a different ceph.target
3676 # (from the traditional package) is present; while newer
3677 # systemd is smart enough to disable the old
3678 # (/lib/systemd/...) and enable the new (/etc/systemd/...),
3679 # some older versions of systemd error out with EEXIST.
f67539c2
TL
3680 call_throws(ctx, ['systemctl', 'disable', 'ceph.target'])
3681 call_throws(ctx, ['systemctl', 'enable', 'ceph.target'])
3682 call_throws(ctx, ['systemctl', 'start', 'ceph.target'])
9f95a23c
TL
3683
3684 # cluster unit
f67539c2
TL
3685 existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid)
3686 with open(ctx.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f:
3687 f.write(
3688 '[Unit]\n'
3689 'Description=Ceph cluster {fsid}\n'
3690 'PartOf=ceph.target\n'
3691 'Before=ceph.target\n'
3692 '\n'
3693 '[Install]\n'
3694 'WantedBy=multi-user.target ceph.target\n'.format(
3695 fsid=fsid)
9f95a23c 3696 )
f67539c2
TL
3697 os.rename(ctx.unit_dir + '/ceph-%s.target.new' % fsid,
3698 ctx.unit_dir + '/ceph-%s.target' % fsid)
9f95a23c 3699 if not existed:
f67539c2
TL
3700 call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid])
3701 call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid])
9f95a23c
TL
3702
3703 # logrotate for the cluster
f67539c2 3704 with open(ctx.logrotate_dir + '/ceph-%s' % fsid, 'w') as f:
9f95a23c
TL
3705 """
3706 This is a bit sloppy in that the killall/pkill will touch all ceph daemons
3707 in all containers, but I don't see an elegant way to send SIGHUP *just* to
3708 the daemons for this cluster. (1) systemd kill -s will get the signal to
3709 podman, but podman will exit. (2) podman kill will get the signal to the
3710 first child (bash), but that isn't the ceph daemon. This is simpler and
3711 should be harmless.
3712 """
3713 f.write("""# created by cephadm
3714/var/log/ceph/%s/*.log {
3715 rotate 7
3716 daily
3717 compress
3718 sharedscripts
3719 postrotate
f67539c2 3720 killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror' || true
9f95a23c
TL
3721 endscript
3722 missingok
3723 notifempty
3724 su root root
3725}
3726""" % fsid)
3727
f6b5b4d7 3728
f67539c2
TL
3729def get_unit_file(ctx, fsid):
3730 # type: (CephadmContext, str) -> str
f91f0fd5 3731 extra_args = ''
f67539c2
TL
3732 if isinstance(ctx.container_engine, Podman):
3733 extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3734 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
3735 'Type=forking\n'
3736 'PIDFile=%t/%n-pid\n')
3737 if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
3738 extra_args += 'Delegate=yes\n'
3739
3740 docker = isinstance(ctx.container_engine, Docker)
9f95a23c
TL
3741 u = """# generated by cephadm
3742[Unit]
3743Description=Ceph %i for {fsid}
3744
3745# According to:
3746# http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
3747# these can be removed once ceph-mon will dynamically change network
3748# configuration.
f67539c2 3749After=network-online.target local-fs.target time-sync.target{docker_after}
9f95a23c 3750Wants=network-online.target local-fs.target time-sync.target
f67539c2 3751{docker_requires}
9f95a23c
TL
3752
3753PartOf=ceph-{fsid}.target
3754Before=ceph-{fsid}.target
3755
3756[Service]
3757LimitNOFILE=1048576
3758LimitNPROC=1048576
3759EnvironmentFile=-/etc/environment
9f95a23c 3760ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
33c7a0ef 3761ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop'
9f95a23c
TL
3762ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
3763KillMode=none
3764Restart=on-failure
3765RestartSec=10s
3766TimeoutStartSec=120
e306af50 3767TimeoutStopSec=120
9f95a23c
TL
3768StartLimitInterval=30min
3769StartLimitBurst=5
f91f0fd5 3770{extra_args}
9f95a23c
TL
3771[Install]
3772WantedBy=ceph-{fsid}.target
33c7a0ef 3773""".format(fsid=fsid,
f67539c2
TL
3774 data_dir=ctx.data_dir,
3775 extra_args=extra_args,
3776 # if docker, we depend on docker.service
3777 docker_after=' docker.service' if docker else '',
3778 docker_requires='Requires=docker.service\n' if docker else '')
f91f0fd5 3779
9f95a23c
TL
3780 return u
3781
3782##################################
3783
f6b5b4d7 3784
9f95a23c
TL
3785class CephContainer:
3786 def __init__(self,
f67539c2 3787 ctx: CephadmContext,
f91f0fd5
TL
3788 image: str,
3789 entrypoint: str,
3790 args: List[str] = [],
3791 volume_mounts: Dict[str, str] = {},
3792 cname: str = '',
3793 container_args: List[str] = [],
3794 envs: Optional[List[str]] = None,
3795 privileged: bool = False,
3796 ptrace: bool = False,
3797 bind_mounts: Optional[List[List[str]]] = None,
f67539c2 3798 init: Optional[bool] = None,
f91f0fd5 3799 host_network: bool = True,
f67539c2
TL
3800 memory_request: Optional[str] = None,
3801 memory_limit: Optional[str] = None,
f91f0fd5 3802 ) -> None:
f67539c2 3803 self.ctx = ctx
9f95a23c
TL
3804 self.image = image
3805 self.entrypoint = entrypoint
3806 self.args = args
3807 self.volume_mounts = volume_mounts
522d829b 3808 self._cname = cname
9f95a23c
TL
3809 self.container_args = container_args
3810 self.envs = envs
3811 self.privileged = privileged
3812 self.ptrace = ptrace
f6b5b4d7 3813 self.bind_mounts = bind_mounts if bind_mounts else []
f67539c2 3814 self.init = init if init else ctx.container_init
f91f0fd5 3815 self.host_network = host_network
f67539c2
TL
3816 self.memory_request = memory_request
3817 self.memory_limit = memory_limit
9f95a23c 3818
522d829b
TL
3819 @classmethod
3820 def for_daemon(cls,
3821 ctx: CephadmContext,
3822 fsid: str,
3823 daemon_type: str,
3824 daemon_id: str,
3825 entrypoint: str,
3826 args: List[str] = [],
3827 volume_mounts: Dict[str, str] = {},
3828 container_args: List[str] = [],
3829 envs: Optional[List[str]] = None,
3830 privileged: bool = False,
3831 ptrace: bool = False,
3832 bind_mounts: Optional[List[List[str]]] = None,
3833 init: Optional[bool] = None,
3834 host_network: bool = True,
3835 memory_request: Optional[str] = None,
3836 memory_limit: Optional[str] = None,
3837 ) -> 'CephContainer':
3838 return cls(
3839 ctx,
3840 image=ctx.image,
3841 entrypoint=entrypoint,
3842 args=args,
3843 volume_mounts=volume_mounts,
3844 cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
3845 container_args=container_args,
3846 envs=envs,
3847 privileged=privileged,
3848 ptrace=ptrace,
3849 bind_mounts=bind_mounts,
3850 init=init,
3851 host_network=host_network,
3852 memory_request=memory_request,
3853 memory_limit=memory_limit,
3854 )
3855
3856 @property
3857 def cname(self) -> str:
3858 """
3859 podman adds the current container name to the /etc/hosts
3860 file. Turns out, python's `socket.getfqdn()` differs from
3861 `hostname -f`, when we have the container names containing
3862 dots in it.:
3863
3864 # podman run --name foo.bar.baz.com ceph/ceph /bin/bash
3865 [root@sebastians-laptop /]# cat /etc/hosts
3866 127.0.0.1 localhost
3867 ::1 localhost
3868 127.0.1.1 sebastians-laptop foo.bar.baz.com
3869 [root@sebastians-laptop /]# hostname -f
3870 sebastians-laptop
3871 [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())'
3872 foo.bar.baz.com
3873
3874 Fascinatingly, this doesn't happen when using dashes.
3875 """
3876 return self._cname.replace('.', '-')
3877
3878 @cname.setter
3879 def cname(self, val: str) -> None:
3880 self._cname = val
3881
3882 @property
3883 def old_cname(self) -> str:
3884 return self._cname
3885
f91f0fd5
TL
3886 def run_cmd(self) -> List[str]:
3887 cmd_args: List[str] = [
f67539c2 3888 str(self.ctx.container_engine.path),
f91f0fd5
TL
3889 'run',
3890 '--rm',
3891 '--ipc=host',
b3b6e05e
TL
3892 # some containers (ahem, haproxy) override this, but we want a fast
3893 # shutdown always (and, more importantly, a successful exit even if we
3894 # fall back to SIGKILL).
3895 '--stop-signal=SIGTERM',
f91f0fd5 3896 ]
f67539c2
TL
3897
3898 if isinstance(self.ctx.container_engine, Podman):
f67539c2
TL
3899 if os.path.exists('/etc/ceph/podman-auth.json'):
3900 cmd_args.append('--authfile=/etc/ceph/podman-auth.json')
3901
f91f0fd5
TL
3902 envs: List[str] = [
3903 '-e', 'CONTAINER_IMAGE=%s' % self.image,
3904 '-e', 'NODE_NAME=%s' % get_hostname(),
3905 ]
3906 vols: List[str] = []
3907 binds: List[str] = []
9f95a23c 3908
f67539c2
TL
3909 if self.memory_request:
3910 cmd_args.extend(['-e', 'POD_MEMORY_REQUEST', str(self.memory_request)])
3911 if self.memory_limit:
3912 cmd_args.extend(['-e', 'POD_MEMORY_LIMIT', str(self.memory_limit)])
3913 cmd_args.extend(['--memory', str(self.memory_limit)])
3914
f91f0fd5
TL
3915 if self.host_network:
3916 cmd_args.append('--net=host')
3917 if self.entrypoint:
3918 cmd_args.extend(['--entrypoint', self.entrypoint])
9f95a23c 3919 if self.privileged:
f91f0fd5
TL
3920 cmd_args.extend([
3921 '--privileged',
3922 # let OSD etc read block devs that haven't been chowned
3923 '--group-add=disk'])
3924 if self.ptrace and not self.privileged:
3925 # if privileged, the SYS_PTRACE cap is already added
3926 # in addition, --cap-add and --privileged are mutually
3927 # exclusive since podman >= 2.0
3928 cmd_args.append('--cap-add=SYS_PTRACE')
3929 if self.init:
3930 cmd_args.append('--init')
f67539c2 3931 envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
f91f0fd5
TL
3932 if self.cname:
3933 cmd_args.extend(['--name', self.cname])
3934 if self.envs:
3935 for env in self.envs:
3936 envs.extend(['-e', env])
3937
9f95a23c
TL
3938 vols = sum(
3939 [['-v', '%s:%s' % (host_dir, container_dir)]
3940 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7 3941 binds = sum([['--mount', '{}'.format(','.join(bind))]
f91f0fd5
TL
3942 for bind in self.bind_mounts], [])
3943
f67539c2
TL
3944 return \
3945 cmd_args + self.container_args + \
3946 envs + vols + binds + \
3947 [self.image] + self.args # type: ignore
f91f0fd5
TL
3948
3949 def shell_cmd(self, cmd: List[str]) -> List[str]:
3950 cmd_args: List[str] = [
f67539c2 3951 str(self.ctx.container_engine.path),
9f95a23c
TL
3952 'run',
3953 '--rm',
e306af50 3954 '--ipc=host',
f91f0fd5
TL
3955 ]
3956 envs: List[str] = [
3957 '-e', 'CONTAINER_IMAGE=%s' % self.image,
3958 '-e', 'NODE_NAME=%s' % get_hostname(),
3959 ]
3960 vols: List[str] = []
3961 binds: List[str] = []
9f95a23c 3962
f91f0fd5
TL
3963 if self.host_network:
3964 cmd_args.append('--net=host')
b3b6e05e
TL
3965 if self.ctx.no_hosts:
3966 cmd_args.append('--no-hosts')
9f95a23c 3967 if self.privileged:
f91f0fd5
TL
3968 cmd_args.extend([
3969 '--privileged',
3970 # let OSD etc read block devs that haven't been chowned
3971 '--group-add=disk',
3972 ])
f67539c2
TL
3973 if self.init:
3974 cmd_args.append('--init')
3975 envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1']
f91f0fd5
TL
3976 if self.envs:
3977 for env in self.envs:
3978 envs.extend(['-e', env])
3979
9f95a23c
TL
3980 vols = sum(
3981 [['-v', '%s:%s' % (host_dir, container_dir)]
3982 for host_dir, container_dir in self.volume_mounts.items()], [])
f6b5b4d7
TL
3983 binds = sum([['--mount', '{}'.format(','.join(bind))]
3984 for bind in self.bind_mounts], [])
f91f0fd5
TL
3985
3986 return cmd_args + self.container_args + envs + vols + binds + [
9f95a23c 3987 '--entrypoint', cmd[0],
f91f0fd5 3988 self.image,
9f95a23c
TL
3989 ] + cmd[1:]
3990
3991 def exec_cmd(self, cmd):
3992 # type: (List[str]) -> List[str]
522d829b
TL
3993 cname = get_running_container_name(self.ctx, self)
3994 if not cname:
3995 raise Error('unable to find container "{}"'.format(self.cname))
9f95a23c 3996 return [
f67539c2 3997 str(self.ctx.container_engine.path),
9f95a23c
TL
3998 'exec',
3999 ] + self.container_args + [
4000 self.cname,
4001 ] + cmd
4002
522d829b 4003 def rm_cmd(self, old_cname: bool = False, storage: bool = False) -> List[str]:
f6b5b4d7 4004 ret = [
f67539c2 4005 str(self.ctx.container_engine.path),
f6b5b4d7
TL
4006 'rm', '-f',
4007 ]
4008 if storage:
4009 ret.append('--storage')
522d829b
TL
4010 if old_cname:
4011 ret.append(self.old_cname)
4012 else:
4013 ret.append(self.cname)
f6b5b4d7
TL
4014 return ret
4015
522d829b 4016 def stop_cmd(self, old_cname: bool = False) -> List[str]:
f6b5b4d7 4017 ret = [
f67539c2 4018 str(self.ctx.container_engine.path),
522d829b 4019 'stop', self.old_cname if old_cname else self.cname,
f6b5b4d7
TL
4020 ]
4021 return ret
4022
2a845540
TL
4023 def run(self, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE):
4024 # type: (Optional[int], CallVerbosity) -> str
f67539c2 4025 out, _, _ = call_throws(self.ctx, self.run_cmd(),
2a845540 4026 desc=self.entrypoint, timeout=timeout, verbosity=verbosity)
9f95a23c
TL
4027 return out
4028
20effc67
TL
4029
4030#####################################
4031
4032class MgrListener(Thread):
4033 def __init__(self, agent: 'CephadmAgent') -> None:
4034 self.agent = agent
4035 self.stop = False
4036 super(MgrListener, self).__init__(target=self.run)
4037
4038 def run(self) -> None:
4039 listenSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
4040 listenSocket.bind(('0.0.0.0', int(self.agent.listener_port)))
4041 listenSocket.settimeout(60)
4042 listenSocket.listen(1)
4043 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
4044 ssl_ctx.verify_mode = ssl.CERT_REQUIRED
4045 ssl_ctx.load_cert_chain(self.agent.listener_cert_path, self.agent.listener_key_path)
4046 ssl_ctx.load_verify_locations(self.agent.ca_path)
4047 secureListenSocket = ssl_ctx.wrap_socket(listenSocket, server_side=True)
4048 while not self.stop:
4049 try:
4050 try:
4051 conn, _ = secureListenSocket.accept()
4052 except socket.timeout:
4053 continue
4054 try:
4055 length: int = int(conn.recv(10).decode())
4056 except Exception as e:
4057 err_str = f'Failed to extract length of payload from message: {e}'
4058 conn.send(err_str.encode())
4059 logger.error(err_str)
4060 while True:
4061 payload = conn.recv(length).decode()
4062 if not payload:
4063 break
4064 try:
4065 data: Dict[Any, Any] = json.loads(payload)
4066 self.handle_json_payload(data)
4067 except Exception as e:
4068 err_str = f'Failed to extract json payload from message: {e}'
4069 conn.send(err_str.encode())
4070 logger.error(err_str)
4071 else:
4072 conn.send(b'ACK')
4073 if 'config' in data:
4074 self.agent.wakeup()
4075 self.agent.ls_gatherer.wakeup()
4076 self.agent.volume_gatherer.wakeup()
4077 logger.debug(f'Got mgr message {data}')
4078 except Exception as e:
4079 logger.error(f'Mgr Listener encountered exception: {e}')
4080
4081 def shutdown(self) -> None:
4082 self.stop = True
4083
4084 def handle_json_payload(self, data: Dict[Any, Any]) -> None:
4085 self.agent.ack = int(data['counter'])
4086 if 'config' in data:
4087 logger.info('Received new config from mgr')
4088 config = data['config']
4089 for filename in config:
4090 if filename in self.agent.required_files:
4091 file_path = os.path.join(self.agent.daemon_dir, filename)
4092 with open(os.open(file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
4093 f.write(config[filename])
4094 os.rename(file_path + '.new', file_path)
4095 self.agent.pull_conf_settings()
4096 self.agent.wakeup()
4097
4098
4099class CephadmAgent():
4100
4101 daemon_type = 'agent'
4102 default_port = 8498
4103 loop_interval = 30
4104 stop = False
4105
4106 required_files = [
4107 'agent.json',
4108 'keyring',
4109 'root_cert.pem',
4110 'listener.crt',
4111 'listener.key',
4112 ]
4113
4114 def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] = ''):
4115 self.ctx = ctx
4116 self.fsid = fsid
4117 self.daemon_id = daemon_id
4118 self.starting_port = 14873
4119 self.target_ip = ''
4120 self.target_port = ''
4121 self.host = ''
4122 self.daemon_dir = os.path.join(ctx.data_dir, self.fsid, f'{self.daemon_type}.{self.daemon_id}')
4123 self.config_path = os.path.join(self.daemon_dir, 'agent.json')
4124 self.keyring_path = os.path.join(self.daemon_dir, 'keyring')
4125 self.ca_path = os.path.join(self.daemon_dir, 'root_cert.pem')
4126 self.listener_cert_path = os.path.join(self.daemon_dir, 'listener.crt')
4127 self.listener_key_path = os.path.join(self.daemon_dir, 'listener.key')
4128 self.listener_port = ''
4129 self.ack = 1
4130 self.event = Event()
4131 self.mgr_listener = MgrListener(self)
4132 self.ls_gatherer = AgentGatherer(self, lambda: self._get_ls(), 'Ls')
4133 self.volume_gatherer = AgentGatherer(self, lambda: self._ceph_volume(enhanced=False), 'Volume')
4134 self.device_enhanced_scan = False
4135 self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
4136 self.recent_iteration_index: int = 0
4137 self.cached_ls_values: Dict[str, Dict[str, str]] = {}
4138
4139 def validate(self, config: Dict[str, str] = {}) -> None:
4140 # check for the required files
4141 for fname in self.required_files:
4142 if fname not in config:
4143 raise Error('required file missing from config: %s' % fname)
4144
4145 def deploy_daemon_unit(self, config: Dict[str, str] = {}) -> None:
4146 if not config:
4147 raise Error('Agent needs a config')
4148 assert isinstance(config, dict)
4149 self.validate(config)
4150
4151 # Create the required config files in the daemons dir, with restricted permissions
4152 for filename in config:
4153 if filename in self.required_files:
4154 file_path = os.path.join(self.daemon_dir, filename)
4155 with open(os.open(file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
4156 f.write(config[filename])
4157 os.rename(file_path + '.new', file_path)
4158
4159 unit_run_path = os.path.join(self.daemon_dir, 'unit.run')
4160 with open(os.open(unit_run_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
4161 f.write(self.unit_run())
4162 os.rename(unit_run_path + '.new', unit_run_path)
4163
4164 meta: Dict[str, Any] = {}
4165 meta_file_path = os.path.join(self.daemon_dir, 'unit.meta')
4166 if 'meta_json' in self.ctx and self.ctx.meta_json:
4167 meta = json.loads(self.ctx.meta_json) or {}
4168 with open(os.open(meta_file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
4169 f.write(json.dumps(meta, indent=4) + '\n')
4170 os.rename(meta_file_path + '.new', meta_file_path)
4171
4172 unit_file_path = os.path.join(self.ctx.unit_dir, self.unit_name())
4173 with open(os.open(unit_file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
4174 f.write(self.unit_file())
4175 os.rename(unit_file_path + '.new', unit_file_path)
4176
4177 call_throws(self.ctx, ['systemctl', 'daemon-reload'])
4178 call(self.ctx, ['systemctl', 'stop', self.unit_name()],
4179 verbosity=CallVerbosity.DEBUG)
4180 call(self.ctx, ['systemctl', 'reset-failed', self.unit_name()],
4181 verbosity=CallVerbosity.DEBUG)
4182 call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name()])
4183
4184 def unit_name(self) -> str:
4185 return '{}.service'.format(get_unit_name(self.fsid, self.daemon_type, self.daemon_id))
4186
4187 def unit_run(self) -> str:
4188 py3 = shutil.which('python3')
4189 binary_path = os.path.realpath(sys.argv[0])
4190 return ('set -e\n' + f'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n')
4191
4192 def unit_file(self) -> str:
4193 return """#generated by cephadm
4194[Unit]
4195Description=cephadm agent for cluster {fsid}
4196
4197PartOf=ceph-{fsid}.target
4198Before=ceph-{fsid}.target
4199
4200[Service]
4201Type=forking
4202ExecStart=/bin/bash {data_dir}/unit.run
4203Restart=on-failure
4204RestartSec=10s
4205
4206[Install]
4207WantedBy=ceph-{fsid}.target
4208""".format(
4209 fsid=self.fsid,
4210 data_dir=self.daemon_dir
4211 )
4212
4213 def shutdown(self) -> None:
4214 self.stop = True
4215 if self.mgr_listener.is_alive():
4216 self.mgr_listener.shutdown()
4217
4218 def wakeup(self) -> None:
4219 self.event.set()
4220
4221 def pull_conf_settings(self) -> None:
4222 try:
4223 with open(self.config_path, 'r') as f:
4224 config = json.load(f)
4225 self.target_ip = config['target_ip']
4226 self.target_port = config['target_port']
4227 self.loop_interval = int(config['refresh_period'])
4228 self.starting_port = int(config['listener_port'])
4229 self.host = config['host']
4230 use_lsm = config['device_enhanced_scan']
4231 except Exception as e:
4232 self.shutdown()
4233 raise Error(f'Failed to get agent target ip and port from config: {e}')
4234
4235 try:
4236 with open(self.keyring_path, 'r') as f:
4237 self.keyring = f.read()
4238 except Exception as e:
4239 self.shutdown()
4240 raise Error(f'Failed to get agent keyring: {e}')
4241
4242 assert self.target_ip and self.target_port
4243
4244 self.device_enhanced_scan = False
4245 if use_lsm.lower() == 'true':
4246 self.device_enhanced_scan = True
4247 self.volume_gatherer.update_func(lambda: self._ceph_volume(enhanced=self.device_enhanced_scan))
4248
4249 def run(self) -> None:
4250 self.pull_conf_settings()
4251
4252 try:
4253 for _ in range(1001):
4254 if not port_in_use(self.ctx, self.starting_port):
4255 self.listener_port = str(self.starting_port)
4256 break
4257 self.starting_port += 1
4258 if not self.listener_port:
4259 raise Error(f'All 1000 ports starting at {str(self.starting_port - 1001)} taken.')
4260 except Exception as e:
4261 raise Error(f'Failed to pick port for agent to listen on: {e}')
4262
4263 if not self.mgr_listener.is_alive():
4264 self.mgr_listener.start()
4265
4266 if not self.ls_gatherer.is_alive():
4267 self.ls_gatherer.start()
4268
4269 if not self.volume_gatherer.is_alive():
4270 self.volume_gatherer.start()
4271
4272 ssl_ctx = ssl.create_default_context()
4273 ssl_ctx.check_hostname = True
4274 ssl_ctx.verify_mode = ssl.CERT_REQUIRED
4275 ssl_ctx.load_verify_locations(self.ca_path)
4276
4277 while not self.stop:
4278 start_time = time.monotonic()
4279 ack = self.ack
4280
4281 # part of the networks info is returned as a set which is not JSON
4282 # serializable. The set must be converted to a list
4283 networks = list_networks(self.ctx)
4284 networks_list = {}
4285 for key in networks.keys():
4286 for k, v in networks[key].items():
4287 networks_list[key] = {k: list(v)}
4288
4289 data = json.dumps({'host': self.host,
4290 'ls': (self.ls_gatherer.data if self.ack == self.ls_gatherer.ack
4291 and self.ls_gatherer.data is not None else []),
4292 'networks': networks_list,
4293 'facts': HostFacts(self.ctx).dump(),
4294 'volume': (self.volume_gatherer.data if self.ack == self.volume_gatherer.ack
4295 and self.volume_gatherer.data is not None else ''),
4296 'ack': str(ack),
4297 'keyring': self.keyring,
4298 'port': self.listener_port})
4299 data = data.encode('ascii')
4300
4301 url = f'https://{self.target_ip}:{self.target_port}/data'
4302 try:
4303 req = Request(url, data, {'Content-Type': 'application/json'})
4304 send_time = time.monotonic()
4305 with urlopen(req, context=ssl_ctx) as response:
4306 response_str = response.read()
4307 response_json = json.loads(response_str)
4308 total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds()
4309 logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.')
4310 except Exception as e:
4311 logger.error(f'Failed to send metadata to mgr: {e}')
4312
4313 end_time = time.monotonic()
4314 run_time = datetime.timedelta(seconds=(end_time - start_time))
4315 self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds()
4316 self.recent_iteration_index = (self.recent_iteration_index + 1) % 3
4317 run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t])
4318
4319 self.event.wait(max(self.loop_interval - int(run_time_average), 0))
4320 self.event.clear()
4321
4322 def _ceph_volume(self, enhanced: bool = False) -> Tuple[str, bool]:
4323 self.ctx.command = 'inventory --format=json'.split()
4324 if enhanced:
4325 self.ctx.command.append('--with-lsm')
4326 self.ctx.fsid = self.fsid
4327
4328 stream = io.StringIO()
4329 with redirect_stdout(stream):
4330 command_ceph_volume(self.ctx)
4331
4332 stdout = stream.getvalue()
4333
4334 if stdout:
4335 return (stdout, False)
4336 else:
4337 raise Exception('ceph-volume returned empty value')
4338
4339 def _daemon_ls_subset(self) -> Dict[str, Dict[str, Any]]:
4340 # gets a subset of ls info quickly. The results of this will tell us if our
4341 # cached info is still good or if we need to run the full ls again.
4342 # for legacy containers, we just grab the full info. For cephadmv1 containers,
4343 # we only grab enabled, state, mem_usage and container id. If container id has
4344 # not changed for any daemon, we assume our cached info is good.
4345 daemons: Dict[str, Dict[str, Any]] = {}
4346 data_dir = self.ctx.data_dir
4347 seen_memusage = {} # type: Dict[str, int]
4348 out, err, code = call(
4349 self.ctx,
4350 [self.ctx.container_engine.path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
4351 verbosity=CallVerbosity.DEBUG
4352 )
4353 seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out)
4354 # we need a mapping from container names to ids. Later we will convert daemon
4355 # names to container names to get daemons container id to see if it has changed
4356 out, err, code = call(
4357 self.ctx,
4358 [self.ctx.container_engine.path, 'ps', '--format', '{{.ID}},{{.Names}}', '--no-trunc'],
4359 verbosity=CallVerbosity.DEBUG
4360 )
4361 name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out)
4362 for i in os.listdir(data_dir):
4363 if i in ['mon', 'osd', 'mds', 'mgr']:
4364 daemon_type = i
4365 for j in os.listdir(os.path.join(data_dir, i)):
4366 if '-' not in j:
4367 continue
4368 (cluster, daemon_id) = j.split('-', 1)
4369 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
4370 (enabled, state, _) = check_unit(self.ctx, legacy_unit_name)
4371 daemons[f'{daemon_type}.{daemon_id}'] = {
4372 'style': 'legacy',
4373 'name': '%s.%s' % (daemon_type, daemon_id),
4374 'fsid': self.ctx.fsid if self.ctx.fsid is not None else 'unknown',
4375 'systemd_unit': legacy_unit_name,
4376 'enabled': 'true' if enabled else 'false',
4377 'state': state,
4378 }
4379 elif is_fsid(i):
4380 fsid = str(i) # convince mypy that fsid is a str here
4381 for j in os.listdir(os.path.join(data_dir, i)):
4382 if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
4383 (daemon_type, daemon_id) = j.split('.', 1)
4384 unit_name = get_unit_name(fsid, daemon_type, daemon_id)
4385 (enabled, state, _) = check_unit(self.ctx, unit_name)
4386 daemons[j] = {
4387 'style': 'cephadm:v1',
4388 'systemd_unit': unit_name,
4389 'enabled': 'true' if enabled else 'false',
4390 'state': state,
4391 }
4392 c = CephContainer.for_daemon(self.ctx, self.ctx.fsid, daemon_type, daemon_id, 'bash')
4393 container_id: Optional[str] = None
4394 for name in (c.cname, c.old_cname):
4395 if name in name_id_mapping:
4396 container_id = name_id_mapping[name]
4397 break
4398 daemons[j]['container_id'] = container_id
4399 if container_id:
4400 daemons[j]['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
4401 return daemons
4402
4403 def _parse_container_id_name(self, code: int, out: str) -> Dict[str, str]:
4404 # map container names to ids from ps output
4405 name_id_mapping = {} # type: Dict[str, str]
4406 if not code:
4407 for line in out.splitlines():
4408 id, name = line.split(',')
4409 name_id_mapping[name] = id
4410 return name_id_mapping
4411
4412 def _get_ls(self) -> Tuple[List[Dict[str, str]], bool]:
4413 if not self.cached_ls_values:
4414 logger.info('No cached ls output. Running full daemon ls')
4415 ls = list_daemons(self.ctx)
4416 for d in ls:
4417 self.cached_ls_values[d['name']] = d
4418 return (ls, True)
4419 else:
4420 ls_subset = self._daemon_ls_subset()
4421 need_full_ls = False
4422 state_change = False
4423 if set(self.cached_ls_values.keys()) != set(ls_subset.keys()):
4424 # case for a new daemon in ls or an old daemon no longer appearing.
4425 # If that happens we need a full ls
4426 logger.info('Change detected in state of daemons. Running full daemon ls')
4427 ls = list_daemons(self.ctx)
4428 for d in ls:
4429 self.cached_ls_values[d['name']] = d
4430 return (ls, True)
4431 for daemon, info in self.cached_ls_values.items():
4432 if info['style'] == 'legacy':
4433 # for legacy containers, ls_subset just grabs all the info
4434 self.cached_ls_values[daemon] = ls_subset[daemon]
4435 else:
4436 if info['container_id'] != ls_subset[daemon]['container_id']:
4437 # case for container id having changed. We need full ls as
4438 # info we didn't grab like version and start time could have changed
4439 need_full_ls = True
4440 break
4441
4442 # want to know if a daemons state change because in those cases we want
4443 # to report back quicker
4444 if (
4445 self.cached_ls_values[daemon]['enabled'] != ls_subset[daemon]['enabled']
4446 or self.cached_ls_values[daemon]['state'] != ls_subset[daemon]['state']
4447 ):
4448 state_change = True
4449 # if we reach here, container id matched. Update the few values we do track
4450 # from ls subset: state, enabled, memory_usage.
4451 self.cached_ls_values[daemon]['enabled'] = ls_subset[daemon]['enabled']
4452 self.cached_ls_values[daemon]['state'] = ls_subset[daemon]['state']
4453 if 'memory_usage' in ls_subset[daemon]:
4454 self.cached_ls_values[daemon]['memory_usage'] = ls_subset[daemon]['memory_usage']
4455 if need_full_ls:
4456 logger.info('Change detected in state of daemons. Running full daemon ls')
4457 ls = list_daemons(self.ctx)
4458 for d in ls:
4459 self.cached_ls_values[d['name']] = d
4460 return (ls, True)
4461 else:
4462 ls = [info for daemon, info in self.cached_ls_values.items()]
4463 return (ls, state_change)
4464
4465
4466class AgentGatherer(Thread):
4467 def __init__(self, agent: 'CephadmAgent', func: Callable, gatherer_type: str = 'Unnamed', initial_ack: int = 0) -> None:
4468 self.agent = agent
4469 self.func = func
4470 self.gatherer_type = gatherer_type
4471 self.ack = initial_ack
4472 self.event = Event()
4473 self.data: Any = None
4474 self.stop = False
4475 self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
4476 self.recent_iteration_index: int = 0
4477 super(AgentGatherer, self).__init__(target=self.run)
4478
4479 def run(self) -> None:
4480 while not self.stop:
4481 try:
4482 start_time = time.monotonic()
4483
4484 ack = self.agent.ack
4485 change = False
4486 try:
4487 self.data, change = self.func()
4488 except Exception as e:
4489 logger.error(f'{self.gatherer_type} Gatherer encountered exception gathering data: {e}')
4490 self.data = None
4491 if ack != self.ack or change:
4492 self.ack = ack
4493 self.agent.wakeup()
4494
4495 end_time = time.monotonic()
4496 run_time = datetime.timedelta(seconds=(end_time - start_time))
4497 self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds()
4498 self.recent_iteration_index = (self.recent_iteration_index + 1) % 3
4499 run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t])
4500
4501 self.event.wait(max(self.agent.loop_interval - int(run_time_average), 0))
4502 self.event.clear()
4503 except Exception as e:
4504 logger.error(f'{self.gatherer_type} Gatherer encountered exception: {e}')
4505
4506 def shutdown(self) -> None:
4507 self.stop = True
4508
4509 def wakeup(self) -> None:
4510 self.event.set()
4511
4512 def update_func(self, func: Callable) -> None:
4513 self.func = func
4514
4515
4516def command_agent(ctx: CephadmContext) -> None:
4517 agent = CephadmAgent(ctx, ctx.fsid, ctx.daemon_id)
4518
4519 if not os.path.isdir(agent.daemon_dir):
4520 raise Error(f'Agent daemon directory {agent.daemon_dir} does not exist. Perhaps agent was never deployed?')
4521
4522 agent.run()
4523
4524
9f95a23c
TL
4525##################################
4526
f6b5b4d7 4527
9f95a23c 4528@infer_image
f67539c2
TL
4529def command_version(ctx):
4530 # type: (CephadmContext) -> int
4531 c = CephContainer(ctx, ctx.image, 'ceph', ['--version'])
4532 out, err, ret = call(ctx, c.run_cmd(), desc=c.entrypoint)
4533 if not ret:
4534 print(out.strip())
4535 return ret
9f95a23c
TL
4536
4537##################################
4538
f6b5b4d7 4539
33c7a0ef 4540@default_image
f67539c2
TL
4541def command_pull(ctx):
4542 # type: (CephadmContext) -> int
f6b5b4d7 4543
33c7a0ef
TL
4544 try:
4545 _pull_image(ctx, ctx.image, ctx.insecure)
4546 except UnauthorizedRegistryError:
4547 err_str = 'Failed to pull container image. Check that host(s) are logged into the registry'
4548 logger.debug(f'Pulling image for `command_pull` failed: {err_str}')
4549 raise Error(err_str)
f67539c2 4550 return command_inspect_image(ctx)
9f95a23c 4551
f6b5b4d7 4552
a4b75251
TL
4553def _pull_image(ctx, image, insecure=False):
4554 # type: (CephadmContext, str, bool) -> None
f6b5b4d7
TL
4555 logger.info('Pulling container image %s...' % image)
4556
4557 ignorelist = [
f67539c2
TL
4558 'error creating read-write layer with ID',
4559 'net/http: TLS handshake timeout',
4560 'Digest did not match, expected',
f6b5b4d7
TL
4561 ]
4562
f67539c2 4563 cmd = [ctx.container_engine.path, 'pull', image]
a4b75251
TL
4564 if isinstance(ctx.container_engine, Podman):
4565 if insecure:
4566 cmd.append('--tls-verify=false')
4567
4568 if os.path.exists('/etc/ceph/podman-auth.json'):
4569 cmd.append('--authfile=/etc/ceph/podman-auth.json')
f6b5b4d7
TL
4570 cmd_str = ' '.join(cmd)
4571
4572 for sleep_secs in [1, 4, 25]:
2a845540 4573 out, err, ret = call(ctx, cmd, verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
f6b5b4d7
TL
4574 if not ret:
4575 return
4576
33c7a0ef
TL
4577 if 'unauthorized' in err:
4578 raise UnauthorizedRegistryError()
4579
f6b5b4d7 4580 if not any(pattern in err for pattern in ignorelist):
a4b75251 4581 raise Error('Failed command: %s' % cmd_str)
f6b5b4d7 4582
f67539c2 4583 logger.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs))
f6b5b4d7
TL
4584 time.sleep(sleep_secs)
4585
a4b75251 4586 raise Error('Failed command: %s: maximum retries reached' % cmd_str)
f67539c2 4587
9f95a23c
TL
4588##################################
4589
f6b5b4d7 4590
9f95a23c 4591@infer_image
f67539c2
TL
4592def command_inspect_image(ctx):
4593 # type: (CephadmContext) -> int
4594 out, err, ret = call_throws(ctx, [
4595 ctx.container_engine.path, 'inspect',
cd265ab1 4596 '--format', '{{.ID}},{{.RepoDigests}}',
f67539c2 4597 ctx.image])
9f95a23c
TL
4598 if ret:
4599 return errno.ENOENT
f67539c2 4600 info_from = get_image_info_from_inspect(out.strip(), ctx.image)
f91f0fd5 4601
f67539c2 4602 ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
f91f0fd5
TL
4603 info_from['ceph_version'] = ver
4604
4605 print(json.dumps(info_from, indent=4, sort_keys=True))
4606 return 0
4607
4608
522d829b 4609def normalize_image_digest(digest: str) -> str:
20effc67
TL
4610 """
4611 Normal case:
4612 >>> normalize_image_digest('ceph/ceph', 'docker.io')
4613 'docker.io/ceph/ceph'
4614
4615 No change:
4616 >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')
4617 'quay.ceph.io/ceph/ceph'
4618
4619 >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')
4620 'docker.io/ubuntu'
4621
4622 >>> normalize_image_digest('localhost/ceph', 'docker.io')
4623 'localhost/ceph'
4624 """
4625 known_shortnames = [
4626 'ceph/ceph',
4627 'ceph/daemon',
4628 'ceph/daemon-base',
4629 ]
4630 for image in known_shortnames:
4631 if digest.startswith(image):
4632 return f'{DEFAULT_REGISTRY}/{digest}'
f67539c2
TL
4633 return digest
4634
4635
f91f0fd5 4636def get_image_info_from_inspect(out, image):
f67539c2 4637 # type: (str, str) -> Dict[str, Union[str,List[str]]]
f91f0fd5
TL
4638 image_id, digests = out.split(',', 1)
4639 if not out:
4640 raise Error('inspect {}: empty result'.format(image))
9f95a23c 4641 r = {
f91f0fd5 4642 'image_id': normalize_container_id(image_id)
f67539c2 4643 } # type: Dict[str, Union[str,List[str]]]
f91f0fd5 4644 if digests:
20effc67 4645 r['repo_digests'] = list(map(normalize_image_digest, digests[1: -1].split(' ')))
f91f0fd5
TL
4646 return r
4647
9f95a23c
TL
4648##################################
4649
f91f0fd5 4650
f67539c2
TL
4651def check_subnet(subnets: str) -> Tuple[int, List[int], str]:
4652 """Determine whether the given string is a valid subnet
4653
4654 :param subnets: subnet string, a single definition or comma separated list of CIDR subnets
4655 :returns: return code, IP version list of the subnets and msg describing any errors validation errors
4656 """
4657
4658 rc = 0
4659 versions = set()
4660 errors = []
4661 subnet_list = subnets.split(',')
4662 for subnet in subnet_list:
4663 # ensure the format of the string is as expected address/netmask
33c7a0ef 4664 subnet = subnet.strip()
f67539c2
TL
4665 if not re.search(r'\/\d+$', subnet):
4666 rc = 1
4667 errors.append(f'{subnet} is not in CIDR format (address/netmask)')
4668 continue
4669 try:
4670 v = ipaddress.ip_network(subnet).version
4671 versions.add(v)
4672 except ValueError as e:
4673 rc = 1
4674 errors.append(f'{subnet} invalid: {str(e)}')
4675
4676 return rc, list(versions), ', '.join(errors)
4677
4678
f6b5b4d7
TL
4679def unwrap_ipv6(address):
4680 # type: (str) -> str
4681 if address.startswith('[') and address.endswith(']'):
20effc67 4682 return address[1: -1]
f6b5b4d7
TL
4683 return address
4684
4685
f91f0fd5
TL
4686def wrap_ipv6(address):
4687 # type: (str) -> str
4688
4689 # We cannot assume it's already wrapped or even an IPv6 address if
4690 # it's already wrapped it'll not pass (like if it's a hostname) and trigger
4691 # the ValueError
4692 try:
f67539c2
TL
4693 if ipaddress.ip_address(address).version == 6:
4694 return f'[{address}]'
f91f0fd5
TL
4695 except ValueError:
4696 pass
4697
4698 return address
4699
4700
f6b5b4d7
TL
4701def is_ipv6(address):
4702 # type: (str) -> bool
4703 address = unwrap_ipv6(address)
4704 try:
f67539c2 4705 return ipaddress.ip_address(address).version == 6
f6b5b4d7 4706 except ValueError:
f67539c2 4707 logger.warning('Address: {} is not a valid IP address'.format(address))
f6b5b4d7
TL
4708 return False
4709
4710
33c7a0ef
TL
4711def ip_in_subnets(ip_addr: str, subnets: str) -> bool:
4712 """Determine if the ip_addr belongs to any of the subnets list."""
4713 subnet_list = [x.strip() for x in subnets.split(',')]
4714 for subnet in subnet_list:
4715 ip_address = unwrap_ipv6(ip_addr) if is_ipv6(ip_addr) else ip_addr
4716 if ipaddress.ip_address(ip_address) in ipaddress.ip_network(subnet):
4717 return True
4718 return False
4719
4720
4721def parse_mon_addrv(addrv_arg: str) -> List[EndPoint]:
4722 """Parse mon-addrv param into a list of mon end points."""
9f95a23c 4723 r = re.compile(r':(\d+)$')
33c7a0ef
TL
4724 addrv_args = []
4725 addr_arg = addrv_arg
4726 if addr_arg[0] != '[' or addr_arg[-1] != ']':
4727 raise Error(f'--mon-addrv value {addr_arg} must use square backets')
4728
4729 for addr in addr_arg[1: -1].split(','):
4730 hasport = r.findall(addr)
4731 if not hasport:
4732 raise Error(f'--mon-addrv value {addr_arg} must include port number')
4733 port_str = hasport[0]
4734 addr = re.sub(r'^v\d+:', '', addr) # strip off v1: or v2: prefix
4735 base_ip = addr[0:-(len(port_str)) - 1]
4736 addrv_args.append(EndPoint(base_ip, int(port_str)))
4737
4738 return addrv_args
4739
4740
4741def parse_mon_ip(mon_ip: str) -> List[EndPoint]:
4742 """Parse mon-ip param into a list of mon end points."""
4743 r = re.compile(r':(\d+)$')
4744 addrv_args = []
4745 hasport = r.findall(mon_ip)
4746 if hasport:
4747 port_str = hasport[0]
4748 base_ip = mon_ip[0:-(len(port_str)) - 1]
4749 addrv_args.append(EndPoint(base_ip, int(port_str)))
4750 else:
4751 # No port provided: use fixed ports for ceph monitor
4752 addrv_args.append(EndPoint(mon_ip, 3300))
4753 addrv_args.append(EndPoint(mon_ip, 6789))
4754
4755 return addrv_args
4756
4757
4758def build_addrv_params(addrv: List[EndPoint]) -> str:
4759 """Convert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]"""
4760 if len(addrv) > 2:
4761 raise Error('Detected a local mon-addrv list with more than 2 entries.')
4762 port_to_ver: Dict[int, str] = {6789: 'v1', 3300: 'v2'}
4763 addr_arg_list: List[str] = []
4764 for ep in addrv:
4765 if ep.port in port_to_ver:
4766 ver = port_to_ver[ep.port]
4767 else:
4768 ver = 'v2' # default mon protocol version if port is not provided
4769 logger.warning(f'Using msgr2 protocol for unrecognized port {ep}')
4770 addr_arg_list.append(f'{ver}:{ep.ip}:{ep.port}')
4771
4772 addr_arg = '[{0}]'.format(','.join(addr_arg_list))
4773 return addr_arg
4774
4775
4776def get_public_net_from_cfg(ctx: CephadmContext) -> Optional[str]:
4777 """Get mon public network from configuration file."""
4778 cp = read_config(ctx.config)
4779 if not cp.has_option('global', 'public_network'):
4780 return None
4781
4782 # Ensure all public CIDR networks are valid
4783 public_network = cp.get('global', 'public_network')
4784 rc, _, err_msg = check_subnet(public_network)
4785 if rc:
4786 raise Error(f'Invalid public_network {public_network} parameter: {err_msg}')
4787
4788 # Ensure all public CIDR networks are configured locally
4789 configured_subnets = set([x.strip() for x in public_network.split(',')])
4790 local_subnets = set([x[0] for x in list_networks(ctx).items()])
4791 valid_public_net = False
4792 for net in configured_subnets:
4793 if net in local_subnets:
4794 valid_public_net = True
4795 else:
4796 logger.warning(f'The public CIDR network {net} (from -c conf file) is not configured locally.')
4797 if not valid_public_net:
4798 raise Error(f'None of the public CIDR network(s) {configured_subnets} (from -c conf file) is configured locally.')
4799
4800 # Ensure public_network is compatible with the provided mon-ip (or mon-addrv)
4801 if ctx.mon_ip:
4802 if not ip_in_subnets(ctx.mon_ip, public_network):
4803 raise Error(f'The provided --mon-ip {ctx.mon_ip} does not belong to any public_network(s) {public_network}')
4804 elif ctx.mon_addrv:
4805 addrv_args = parse_mon_addrv(ctx.mon_addrv)
4806 for addrv in addrv_args:
4807 if not ip_in_subnets(addrv.ip, public_network):
4808 raise Error(f'The provided --mon-addrv {addrv.ip} ip does not belong to any public_network(s) {public_network}')
4809
4810 logger.debug(f'Using mon public network from configuration file {public_network}')
4811 return public_network
4812
4813
4814def infer_mon_network(ctx: CephadmContext, mon_eps: List[EndPoint]) -> Optional[str]:
4815 """Infer mon public network from local network."""
4816 # Make sure IP is configured locally, and then figure out the CIDR network
4817 mon_networks = []
4818 for net, ifaces in list_networks(ctx).items():
4819 # build local_ips list for the specified network
4820 local_ips: List[str] = []
4821 for _, ls in ifaces.items():
4822 local_ips.extend([ipaddress.ip_address(ip) for ip in ls])
4823
4824 # check if any of mon ips belong to this net
4825 for mon_ep in mon_eps:
4826 try:
4827 if ipaddress.ip_address(unwrap_ipv6(mon_ep.ip)) in local_ips:
4828 mon_networks.append(net)
4829 logger.info(f'Mon IP `{mon_ep.ip}` is in CIDR network `{net}`')
4830 except ValueError as e:
4831 logger.warning(f'Cannot infer CIDR network for mon IP `{mon_ep.ip}` : {e}')
4832
4833 if not mon_networks:
4834 raise Error('Cannot infer CIDR network. Pass --skip-mon-network to configure it later')
4835 else:
4836 logger.debug(f'Inferred mon public CIDR from local network configuration {mon_networks}')
4837
4838 mon_networks = list(set(mon_networks)) # remove duplicates
4839 return ','.join(mon_networks)
4840
4841
4842def prepare_mon_addresses(ctx: CephadmContext) -> Tuple[str, bool, Optional[str]]:
4843 """Get mon public network configuration."""
f67539c2 4844 ipv6 = False
33c7a0ef
TL
4845 addrv_args: List[EndPoint] = []
4846 mon_addrv: str = '' # i.e: [v2:192.168.100.1:3300,v1:192.168.100.1:6789]
f67539c2
TL
4847
4848 if ctx.mon_ip:
4849 ipv6 = is_ipv6(ctx.mon_ip)
f91f0fd5 4850 if ipv6:
f67539c2 4851 ctx.mon_ip = wrap_ipv6(ctx.mon_ip)
33c7a0ef
TL
4852 addrv_args = parse_mon_ip(ctx.mon_ip)
4853 mon_addrv = build_addrv_params(addrv_args)
f67539c2 4854 elif ctx.mon_addrv:
33c7a0ef
TL
4855 ipv6 = ctx.mon_addrv.count('[') > 1
4856 addrv_args = parse_mon_addrv(ctx.mon_addrv)
4857 mon_addrv = ctx.mon_addrv
9f95a23c
TL
4858 else:
4859 raise Error('must specify --mon-ip or --mon-addrv')
9f95a23c 4860
33c7a0ef
TL
4861 if addrv_args:
4862 for end_point in addrv_args:
4863 check_ip_port(ctx, end_point)
4864
4865 logger.debug(f'Base mon IP(s) is {addrv_args}, mon addrv is {mon_addrv}')
9f95a23c 4866 mon_network = None
f67539c2 4867 if not ctx.skip_mon_network:
33c7a0ef 4868 mon_network = get_public_net_from_cfg(ctx) or infer_mon_network(ctx, addrv_args)
9f95a23c 4869
33c7a0ef 4870 return (mon_addrv, ipv6, mon_network)
9f95a23c 4871
f6b5b4d7 4872
f67539c2 4873def prepare_cluster_network(ctx: CephadmContext) -> Tuple[str, bool]:
f67539c2
TL
4874 # the cluster network may not exist on this node, so all we can do is
4875 # validate that the address given is valid ipv4 or ipv6 subnet
33c7a0ef
TL
4876 ipv6_cluster_network = False
4877 cp = read_config(ctx.config)
4878 cluster_network = ctx.cluster_network
4879 if cluster_network is None and cp.has_option('global', 'cluster_network'):
4880 cluster_network = cp.get('global', 'cluster_network')
4881
4882 if cluster_network:
4883 cluser_nets = set([x.strip() for x in cluster_network.split(',')])
4884 local_subnets = set([x[0] for x in list_networks(ctx).items()])
4885 for net in cluser_nets:
4886 if net not in local_subnets:
4887 logger.warning(f'The cluster CIDR network {net} is not configured locally.')
4888
4889 rc, versions, err_msg = check_subnet(cluster_network)
f67539c2
TL
4890 if rc:
4891 raise Error(f'Invalid --cluster-network parameter: {err_msg}')
f67539c2
TL
4892 ipv6_cluster_network = True if 6 in versions else False
4893 else:
33c7a0ef 4894 logger.info('Internal network (--cluster-network) has not '
f67539c2
TL
4895 'been provided, OSD replication will default to '
4896 'the public_network')
9f95a23c 4897
f67539c2
TL
4898 return cluster_network, ipv6_cluster_network
4899
4900
4901def create_initial_keys(
4902 ctx: CephadmContext,
4903 uid: int, gid: int,
4904 mgr_id: str
4905) -> Tuple[str, str, str, Any, Any]: # type: ignore
4906
4907 _image = ctx.image
9f95a23c
TL
4908
4909 # create some initial keys
4910 logger.info('Creating initial keys...')
4911 mon_key = CephContainer(
f67539c2
TL
4912 ctx,
4913 image=_image,
9f95a23c
TL
4914 entrypoint='/usr/bin/ceph-authtool',
4915 args=['--gen-print-key'],
4916 ).run().strip()
4917 admin_key = CephContainer(
f67539c2
TL
4918 ctx,
4919 image=_image,
9f95a23c
TL
4920 entrypoint='/usr/bin/ceph-authtool',
4921 args=['--gen-print-key'],
4922 ).run().strip()
4923 mgr_key = CephContainer(
f67539c2
TL
4924 ctx,
4925 image=_image,
9f95a23c
TL
4926 entrypoint='/usr/bin/ceph-authtool',
4927 args=['--gen-print-key'],
4928 ).run().strip()
4929
4930 keyring = ('[mon.]\n'
4931 '\tkey = %s\n'
4932 '\tcaps mon = allow *\n'
4933 '[client.admin]\n'
4934 '\tkey = %s\n'
4935 '\tcaps mon = allow *\n'
4936 '\tcaps mds = allow *\n'
4937 '\tcaps mgr = allow *\n'
4938 '\tcaps osd = allow *\n'
4939 '[mgr.%s]\n'
4940 '\tkey = %s\n'
4941 '\tcaps mon = profile mgr\n'
4942 '\tcaps mds = allow *\n'
4943 '\tcaps osd = allow *\n'
4944 % (mon_key, admin_key, mgr_id, mgr_key))
4945
f67539c2
TL
4946 admin_keyring = write_tmp('[client.admin]\n'
4947 '\tkey = ' + admin_key + '\n',
4948 uid, gid)
4949
9f95a23c 4950 # tmp keyring file
f67539c2
TL
4951 bootstrap_keyring = write_tmp(keyring, uid, gid)
4952 return (mon_key, mgr_key, admin_key,
4953 bootstrap_keyring, admin_keyring)
4954
9f95a23c 4955
f67539c2
TL
4956def create_initial_monmap(
4957 ctx: CephadmContext,
4958 uid: int, gid: int,
4959 fsid: str,
4960 mon_id: str, mon_addr: str
4961) -> Any:
9f95a23c 4962 logger.info('Creating initial monmap...')
f67539c2 4963 monmap = write_tmp('', 0, 0)
9f95a23c 4964 out = CephContainer(
f67539c2
TL
4965 ctx,
4966 image=ctx.image,
9f95a23c 4967 entrypoint='/usr/bin/monmaptool',
f67539c2
TL
4968 args=[
4969 '--create',
4970 '--clobber',
4971 '--fsid', fsid,
4972 '--addv', mon_id, mon_addr,
4973 '/tmp/monmap'
9f95a23c
TL
4974 ],
4975 volume_mounts={
f67539c2 4976 monmap.name: '/tmp/monmap:z',
9f95a23c
TL
4977 },
4978 ).run()
f67539c2 4979 logger.debug(f'monmaptool for {mon_id} {mon_addr} on {out}')
9f95a23c
TL
4980
4981 # pass monmap file to ceph user for use by ceph-mon --mkfs below
f67539c2
TL
4982 os.fchown(monmap.fileno(), uid, gid)
4983 return monmap
9f95a23c 4984
f67539c2
TL
4985
4986def prepare_create_mon(
4987 ctx: CephadmContext,
4988 uid: int, gid: int,
4989 fsid: str, mon_id: str,
4990 bootstrap_keyring_path: str,
4991 monmap_path: str
522d829b 4992) -> Tuple[str, str]:
9f95a23c 4993 logger.info('Creating mon...')
f67539c2
TL
4994 create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid)
4995 mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id)
4996 log_dir = get_log_dir(fsid, ctx.log_dir)
9f95a23c 4997 out = CephContainer(
f67539c2
TL
4998 ctx,
4999 image=ctx.image,
9f95a23c 5000 entrypoint='/usr/bin/ceph-mon',
f67539c2
TL
5001 args=[
5002 '--mkfs',
5003 '-i', mon_id,
5004 '--fsid', fsid,
5005 '-c', '/dev/null',
5006 '--monmap', '/tmp/monmap',
5007 '--keyring', '/tmp/keyring',
5008 ] + get_daemon_args(ctx, fsid, 'mon', mon_id),
9f95a23c
TL
5009 volume_mounts={
5010 log_dir: '/var/log/ceph:z',
5011 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
f67539c2
TL
5012 bootstrap_keyring_path: '/tmp/keyring:z',
5013 monmap_path: '/tmp/monmap:z',
9f95a23c
TL
5014 },
5015 ).run()
f67539c2
TL
5016 logger.debug(f'create mon.{mon_id} on {out}')
5017 return (mon_dir, log_dir)
5018
5019
5020def create_mon(
5021 ctx: CephadmContext,
5022 uid: int, gid: int,
5023 fsid: str, mon_id: str
5024) -> None:
5025 mon_c = get_container(ctx, fsid, 'mon', mon_id)
5026 ctx.meta_json = json.dumps({'service_name': 'mon'})
5027 deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid,
9f95a23c
TL
5028 config=None, keyring=None)
5029
9f95a23c 5030
f67539c2
TL
5031def wait_for_mon(
5032 ctx: CephadmContext,
5033 mon_id: str, mon_dir: str,
5034 admin_keyring_path: str, config_path: str
522d829b 5035) -> None:
9f95a23c
TL
5036 logger.info('Waiting for mon to start...')
5037 c = CephContainer(
f67539c2
TL
5038 ctx,
5039 image=ctx.image,
9f95a23c
TL
5040 entrypoint='/usr/bin/ceph',
5041 args=[
5042 'status'],
5043 volume_mounts={
5044 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
f67539c2
TL
5045 admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z',
5046 config_path: '/etc/ceph/ceph.conf:z',
9f95a23c
TL
5047 },
5048 )
5049
5050 # wait for the service to become available
5051 def is_mon_available():
5052 # type: () -> bool
f67539c2
TL
5053 timeout = ctx.timeout if ctx.timeout else 60 # seconds
5054 out, err, ret = call(ctx, c.run_cmd(),
9f95a23c 5055 desc=c.entrypoint,
2a845540
TL
5056 timeout=timeout,
5057 verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
9f95a23c 5058 return ret == 0
9f95a23c 5059
f67539c2
TL
5060 is_available(ctx, 'mon', is_mon_available)
5061
5062
5063def create_mgr(
5064 ctx: CephadmContext,
5065 uid: int, gid: int,
5066 fsid: str, mgr_id: str, mgr_key: str,
5067 config: str, clifunc: Callable
5068) -> None:
5069 logger.info('Creating mgr...')
5070 mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
5071 mgr_c = get_container(ctx, fsid, 'mgr', mgr_id)
5072 # Note:the default port used by the Prometheus node exporter is opened in fw
5073 ctx.meta_json = json.dumps({'service_name': 'mgr'})
5074 deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid,
5075 config=config, keyring=mgr_keyring, ports=[9283])
5076
5077 # wait for the service to become available
5078 logger.info('Waiting for mgr to start...')
5079
5080 def is_mgr_available():
5081 # type: () -> bool
5082 timeout = ctx.timeout if ctx.timeout else 60 # seconds
5083 try:
2a845540
TL
5084 out = clifunc(['status', '-f', 'json-pretty'],
5085 timeout=timeout,
5086 verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
f67539c2
TL
5087 j = json.loads(out)
5088 return j.get('mgrmap', {}).get('available', False)
5089 except Exception as e:
5090 logger.debug('status failed: %s' % e)
5091 return False
5092 is_available(ctx, 'mgr', is_mgr_available)
5093
5094
5095def prepare_ssh(
5096 ctx: CephadmContext,
5097 cli: Callable, wait_for_mgr_restart: Callable
5098) -> None:
5099
5100 cli(['cephadm', 'set-user', ctx.ssh_user])
5101
5102 if ctx.ssh_config:
5103 logger.info('Using provided ssh config...')
5104 mounts = {
5105 pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z',
5106 }
5107 cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts)
5108
5109 if ctx.ssh_private_key and ctx.ssh_public_key:
5110 logger.info('Using provided ssh keys...')
5111 mounts = {
5112 pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z',
5113 pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z'
5114 }
5115 cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts)
5116 cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts)
33c7a0ef 5117 ssh_pub = cli(['cephadm', 'get-pub-key'])
f67539c2
TL
5118 else:
5119 logger.info('Generating ssh key...')
5120 cli(['cephadm', 'generate-key'])
5121 ssh_pub = cli(['cephadm', 'get-pub-key'])
f67539c2
TL
5122 with open(ctx.output_pub_ssh_key, 'w') as f:
5123 f.write(ssh_pub)
5124 logger.info('Wrote public SSH key to %s' % ctx.output_pub_ssh_key)
5125
33c7a0ef 5126 authorize_ssh_key(ssh_pub, ctx.ssh_user)
f67539c2
TL
5127
5128 host = get_hostname()
5129 logger.info('Adding host %s...' % host)
5130 try:
5131 args = ['orch', 'host', 'add', host]
5132 if ctx.mon_ip:
522d829b 5133 args.append(unwrap_ipv6(ctx.mon_ip))
33c7a0ef
TL
5134 elif ctx.mon_addrv:
5135 addrv_args = parse_mon_addrv(ctx.mon_addrv)
5136 args.append(unwrap_ipv6(addrv_args[0].ip))
f67539c2
TL
5137 cli(args)
5138 except RuntimeError as e:
5139 raise Error('Failed to add host <%s>: %s' % (host, e))
5140
5141 for t in ['mon', 'mgr']:
5142 if not ctx.orphan_initial_daemons:
5143 logger.info('Deploying %s service with default placement...' % t)
5144 cli(['orch', 'apply', t])
5145 else:
5146 logger.info('Deploying unmanaged %s service...' % t)
5147 cli(['orch', 'apply', t, '--unmanaged'])
5148
5149 if not ctx.orphan_initial_daemons:
5150 logger.info('Deploying crash service with default placement...')
5151 cli(['orch', 'apply', 'crash'])
5152
5153 if not ctx.skip_monitoring_stack:
f67539c2
TL
5154 for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
5155 logger.info('Deploying %s service with default placement...' % t)
5156 cli(['orch', 'apply', t])
5157
2a845540
TL
5158 if ctx.with_centralized_logging:
5159 for t in ['loki', 'promtail']:
5160 logger.info('Deploying %s service with default placement...' % t)
5161 cli(['orch', 'apply', t])
5162
f67539c2
TL
5163
5164def enable_cephadm_mgr_module(
5165 cli: Callable, wait_for_mgr_restart: Callable
5166) -> None:
5167
5168 logger.info('Enabling cephadm module...')
5169 cli(['mgr', 'module', 'enable', 'cephadm'])
5170 wait_for_mgr_restart()
5171 logger.info('Setting orchestrator backend to cephadm...')
5172 cli(['orch', 'set', 'backend', 'cephadm'])
5173
5174
5175def prepare_dashboard(
5176 ctx: CephadmContext,
5177 uid: int, gid: int,
5178 cli: Callable, wait_for_mgr_restart: Callable
5179) -> None:
5180
5181 # Configure SSL port (cephadm only allows to configure dashboard SSL port)
5182 # if the user does not want to use SSL he can change this setting once the cluster is up
5183 cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx.ssl_dashboard_port)])
5184
5185 # configuring dashboard parameters
5186 logger.info('Enabling the dashboard module...')
5187 cli(['mgr', 'module', 'enable', 'dashboard'])
5188 wait_for_mgr_restart()
5189
5190 # dashboard crt and key
5191 if ctx.dashboard_key and ctx.dashboard_crt:
5192 logger.info('Using provided dashboard certificate...')
5193 mounts = {
5194 pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z',
5195 pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z'
5196 }
5197 cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
5198 cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
5199 else:
5200 logger.info('Generating a dashboard self-signed certificate...')
5201 cli(['dashboard', 'create-self-signed-cert'])
5202
5203 logger.info('Creating initial admin user...')
5204 password = ctx.initial_dashboard_password or generate_password()
5205 tmp_password_file = write_tmp(password, uid, gid)
5206 cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password']
5207 if not ctx.dashboard_password_noupdate:
5208 cmd.append('--pwd-update-required')
5209 cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'})
5210 logger.info('Fetching dashboard port number...')
5211 out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port'])
5212 port = int(out)
5213
5214 # Open dashboard port
33c7a0ef
TL
5215 if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
5216 fw = Firewalld(ctx)
5217 fw.open_ports([port])
5218 fw.apply_rules()
f67539c2
TL
5219
5220 logger.info('Ceph Dashboard is now available at:\n\n'
5221 '\t URL: https://%s:%s/\n'
5222 '\t User: %s\n'
5223 '\tPassword: %s\n' % (
5224 get_fqdn(), port,
5225 ctx.initial_dashboard_user,
5226 password))
5227
5228
5229def prepare_bootstrap_config(
5230 ctx: CephadmContext,
5231 fsid: str, mon_addr: str, image: str
5232
5233) -> str:
5234
5235 cp = read_config(ctx.config)
5236 if not cp.has_section('global'):
5237 cp.add_section('global')
5238 cp.set('global', 'fsid', fsid)
5239 cp.set('global', 'mon_host', mon_addr)
5240 cp.set('global', 'container_image', image)
b3b6e05e 5241
f67539c2
TL
5242 if not cp.has_section('mon'):
5243 cp.add_section('mon')
5244 if (
5245 not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim')
5246 and not cp.has_option('mon', 'auth allow insecure global id reclaim')
5247 ):
5248 cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
b3b6e05e
TL
5249
5250 if ctx.single_host_defaults:
5251 logger.info('Adjusting default settings to suit single-host cluster...')
5252 # replicate across osds, not hosts
5253 if (
a4b75251
TL
5254 not cp.has_option('global', 'osd_crush_chooseleaf_type')
5255 and not cp.has_option('global', 'osd crush chooseleaf type')
b3b6e05e 5256 ):
a4b75251 5257 cp.set('global', 'osd_crush_chooseleaf_type', '0')
b3b6e05e
TL
5258 # replica 2x
5259 if (
5260 not cp.has_option('global', 'osd_pool_default_size')
5261 and not cp.has_option('global', 'osd pool default size')
5262 ):
5263 cp.set('global', 'osd_pool_default_size', '2')
5264 # disable mgr standby modules (so we can colocate multiple mgrs on one host)
5265 if not cp.has_section('mgr'):
5266 cp.add_section('mgr')
5267 if (
5268 not cp.has_option('mgr', 'mgr_standby_modules')
5269 and not cp.has_option('mgr', 'mgr standby modules')
5270 ):
5271 cp.set('mgr', 'mgr_standby_modules', 'false')
522d829b
TL
5272 if ctx.log_to_file:
5273 cp.set('global', 'log_to_file', 'true')
5274 cp.set('global', 'log_to_stderr', 'false')
5275 cp.set('global', 'log_to_journald', 'false')
5276 cp.set('global', 'mon_cluster_log_to_file', 'true')
5277 cp.set('global', 'mon_cluster_log_to_stderr', 'false')
5278 cp.set('global', 'mon_cluster_log_to_journald', 'false')
b3b6e05e 5279
f67539c2
TL
5280 cpf = StringIO()
5281 cp.write(cpf)
5282 config = cpf.getvalue()
5283
5284 if ctx.registry_json or ctx.registry_url:
5285 command_registry_login(ctx)
5286
5287 return config
5288
5289
5290def finish_bootstrap_config(
5291 ctx: CephadmContext,
5292 fsid: str,
5293 config: str,
5294 mon_id: str, mon_dir: str,
5295 mon_network: Optional[str], ipv6: bool,
5296 cli: Callable,
5297 cluster_network: Optional[str], ipv6_cluster_network: bool
5298
5299) -> None:
5300 if not ctx.no_minimize_config:
9f95a23c
TL
5301 logger.info('Assimilating anything we can from ceph.conf...')
5302 cli([
5303 'config', 'assimilate-conf',
5304 '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
5305 ], {
5306 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
5307 })
5308 logger.info('Generating new minimal ceph.conf...')
5309 cli([
5310 'config', 'generate-minimal-conf',
5311 '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id
5312 ], {
5313 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id
5314 })
5315 # re-read our minimized config
5316 with open(mon_dir + '/config', 'r') as f:
5317 config = f.read()
5318 logger.info('Restarting the monitor...')
f67539c2 5319 call_throws(ctx, [
9f95a23c
TL
5320 'systemctl',
5321 'restart',
5322 get_unit_name(fsid, 'mon', mon_id)
5323 ])
33c7a0ef
TL
5324 elif 'image' in ctx and ctx.image:
5325 # we still want to assimilate the given container image if provided
5326 cli(['config', 'set', 'global', 'container_image', f'{ctx.image}'])
9f95a23c
TL
5327
5328 if mon_network:
f67539c2 5329 logger.info(f'Setting mon public_network to {mon_network}')
9f95a23c
TL
5330 cli(['config', 'set', 'mon', 'public_network', mon_network])
5331
f67539c2
TL
5332 if cluster_network:
5333 logger.info(f'Setting cluster_network to {cluster_network}')
5334 cli(['config', 'set', 'global', 'cluster_network', cluster_network])
5335
5336 if ipv6 or ipv6_cluster_network:
5337 logger.info('Enabling IPv6 (ms_bind_ipv6) binding')
f6b5b4d7
TL
5338 cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
5339
f67539c2
TL
5340 with open(ctx.output_config, 'w') as f:
5341 f.write(config)
5342 logger.info('Wrote config to %s' % ctx.output_config)
5343 pass
5344
5345
a4b75251
TL
5346# funcs to process spec file for apply spec
5347def _parse_yaml_docs(f: Iterable[str]) -> List[List[str]]:
5348 docs = []
5349 current_doc = [] # type: List[str]
5350 for line in f:
33c7a0ef 5351 if re.search(r'^---\s+', line):
a4b75251
TL
5352 if current_doc:
5353 docs.append(current_doc)
5354 current_doc = []
5355 else:
5356 current_doc.append(line.rstrip())
5357 if current_doc:
5358 docs.append(current_doc)
5359 return docs
5360
5361
5362def _parse_yaml_obj(doc: List[str]) -> Dict[str, str]:
5363 # note: this only parses the first layer of yaml
5364 obj = {} # type: Dict[str, str]
5365 current_key = ''
5366 for line in doc:
5367 if line.startswith(' '):
5368 obj[current_key] += line.strip()
5369 elif line.endswith(':'):
5370 current_key = line.strip(':')
5371 obj[current_key] = ''
5372 else:
5373 current_key, val = line.split(':')
5374 obj[current_key] = val.strip()
5375 return obj
5376
5377
5378def parse_yaml_objs(f: Iterable[str]) -> List[Dict[str, str]]:
5379 objs = []
5380 for d in _parse_yaml_docs(f):
5381 objs.append(_parse_yaml_obj(d))
5382 return objs
5383
5384
5385def _distribute_ssh_keys(ctx: CephadmContext, host_spec: Dict[str, str], bootstrap_hostname: str) -> int:
5386 # copy ssh key to hosts in host spec (used for apply spec)
33c7a0ef 5387 ssh_key = CEPH_DEFAULT_PUBKEY
a4b75251
TL
5388 if ctx.ssh_public_key:
5389 ssh_key = ctx.ssh_public_key.name
5390
5391 if bootstrap_hostname != host_spec['hostname']:
5392 if 'addr' in host_spec:
5393 addr = host_spec['addr']
5394 else:
5395 addr = host_spec['hostname']
5396 out, err, code = call(ctx, ['sudo', '-u', ctx.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx.ssh_user, addr)])
5397 if code:
5398 logger.info('\nCopying ssh key to host %s at address %s failed!\n' % (host_spec['hostname'], addr))
5399 return 1
5400 else:
5401 logger.info('Added ssh key to host %s at address %s\n' % (host_spec['hostname'], addr))
5402 return 0
5403
5404
33c7a0ef
TL
5405def save_cluster_config(ctx: CephadmContext, uid: int, gid: int, fsid: str) -> None:
5406 """Save cluster configuration to the per fsid directory """
5407 def copy_file(src: str, dst: str) -> None:
5408 if src:
5409 shutil.copyfile(src, dst)
5410
5411 conf_dir = f'{ctx.data_dir}/{fsid}/{CEPH_CONF_DIR}'
5412 makedirs(conf_dir, uid, gid, DATA_DIR_MODE)
5413 if os.path.exists(conf_dir):
5414 logger.info(f'Saving cluster configuration to {conf_dir} directory')
5415 copy_file(ctx.output_config, os.path.join(conf_dir, CEPH_CONF))
5416 copy_file(ctx.output_keyring, os.path.join(conf_dir, CEPH_KEYRING))
5417 # ctx.output_pub_ssh_key may not exist if user has provided custom ssh keys
5418 if (os.path.exists(ctx.output_pub_ssh_key)):
5419 copy_file(ctx.output_pub_ssh_key, os.path.join(conf_dir, CEPH_PUBKEY))
5420 else:
5421 logger.warning(f'Cannot create cluster configuration directory {conf_dir}')
5422
5423
f67539c2
TL
5424@default_image
5425def command_bootstrap(ctx):
5426 # type: (CephadmContext) -> int
5427
2a845540
TL
5428 ctx.error_code = 0
5429
f67539c2 5430 if not ctx.output_config:
33c7a0ef 5431 ctx.output_config = os.path.join(ctx.output_dir, CEPH_CONF)
f67539c2 5432 if not ctx.output_keyring:
33c7a0ef 5433 ctx.output_keyring = os.path.join(ctx.output_dir, CEPH_KEYRING)
f67539c2 5434 if not ctx.output_pub_ssh_key:
33c7a0ef
TL
5435 ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, CEPH_PUBKEY)
5436
5437 if bool(ctx.ssh_private_key) is not bool(ctx.ssh_public_key):
5438 raise Error('--ssh-private-key and --ssh-public-key must be provided together or not at all.')
5439
5440 if ctx.fsid:
5441 data_dir_base = os.path.join(ctx.data_dir, ctx.fsid)
5442 if os.path.exists(data_dir_base):
5443 raise Error(f"A cluster with the same fsid '{ctx.fsid}' already exists.")
5444 else:
5445 logger.warning('Specifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.')
f67539c2
TL
5446
5447 # verify output files
5448 for f in [ctx.output_config, ctx.output_keyring,
5449 ctx.output_pub_ssh_key]:
5450 if not ctx.allow_overwrite:
5451 if os.path.exists(f):
5452 raise Error('%s already exists; delete or pass '
5453 '--allow-overwrite to overwrite' % f)
5454 dirname = os.path.dirname(f)
5455 if dirname and not os.path.exists(dirname):
5456 fname = os.path.basename(f)
5457 logger.info(f'Creating directory {dirname} for {fname}')
5458 try:
5459 # use makedirs to create intermediate missing dirs
5460 os.makedirs(dirname, 0o755)
5461 except PermissionError:
5462 raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
5463
b3b6e05e
TL
5464 (user_conf, _) = get_config_and_keyring(ctx)
5465
33c7a0ef
TL
5466 if ctx.ssh_user != 'root':
5467 check_ssh_connectivity(ctx)
5468
f67539c2
TL
5469 if not ctx.skip_prepare_host:
5470 command_prepare_host(ctx)
5471 else:
5472 logger.info('Skip prepare_host')
5473
5474 # initial vars
5475 fsid = ctx.fsid or make_fsid()
b3b6e05e
TL
5476 if not is_fsid(fsid):
5477 raise Error('not an fsid: %s' % fsid)
5478 logger.info('Cluster fsid: %s' % fsid)
5479
f67539c2
TL
5480 hostname = get_hostname()
5481 if '.' in hostname and not ctx.allow_fqdn_hostname:
5482 raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0]))
5483 mon_id = ctx.mon_id or hostname
5484 mgr_id = ctx.mgr_id or generate_service_id()
f67539c2
TL
5485
5486 lock = FileLock(ctx, fsid)
5487 lock.acquire()
5488
5489 (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx)
5490 cluster_network, ipv6_cluster_network = prepare_cluster_network(ctx)
5491
5492 config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image)
5493
5494 if not ctx.skip_pull:
33c7a0ef
TL
5495 try:
5496 _pull_image(ctx, ctx.image)
5497 except UnauthorizedRegistryError:
5498 err_str = 'Failed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentials'
5499 logger.debug(f'Pulling image for bootstrap on {hostname} failed: {err_str}')
5500 raise Error(err_str)
f67539c2
TL
5501
5502 image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip()
5503 logger.info(f'Ceph version: {image_ver}')
b3b6e05e
TL
5504
5505 if not ctx.allow_mismatched_release:
5506 image_release = image_ver.split()[4]
5507 if image_release not in \
5508 [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE]:
5509 raise Error(
5510 f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};'
5511 ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)'
5512 )
f67539c2
TL
5513
5514 logger.info('Extracting ceph user uid/gid from container image...')
5515 (uid, gid) = extract_uid_gid(ctx)
5516
5517 # create some initial keys
20effc67 5518 (mon_key, mgr_key, admin_key, bootstrap_keyring, admin_keyring) = create_initial_keys(ctx, uid, gid, mgr_id)
f67539c2
TL
5519
5520 monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg)
20effc67
TL
5521 (mon_dir, log_dir) = prepare_create_mon(ctx, uid, gid, fsid, mon_id,
5522 bootstrap_keyring.name, monmap.name)
f67539c2
TL
5523
5524 with open(mon_dir + '/config', 'w') as f:
5525 os.fchown(f.fileno(), uid, gid)
5526 os.fchmod(f.fileno(), 0o600)
5527 f.write(config)
5528
5529 make_var_run(ctx, fsid, uid, gid)
5530 create_mon(ctx, uid, gid, fsid, mon_id)
5531
5532 # config to issue various CLI commands
5533 tmp_config = write_tmp(config, uid, gid)
5534
5535 # a CLI helper to reduce our typing
2a845540
TL
5536 def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE):
5537 # type: (List[str], Dict[str, str], Optional[int], CallVerbosity) -> str
f67539c2
TL
5538 mounts = {
5539 log_dir: '/var/log/ceph:z',
5540 admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
5541 tmp_config.name: '/etc/ceph/ceph.conf:z',
5542 }
5543 for k, v in extra_mounts.items():
5544 mounts[k] = v
5545 timeout = timeout or ctx.timeout
5546 return CephContainer(
5547 ctx,
5548 image=ctx.image,
5549 entrypoint='/usr/bin/ceph',
5550 args=cmd,
5551 volume_mounts=mounts,
2a845540 5552 ).run(timeout=timeout, verbosity=verbosity)
f67539c2
TL
5553
5554 wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name)
5555
5556 finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir,
5557 mon_network, ipv6, cli,
5558 cluster_network, ipv6_cluster_network)
9f95a23c
TL
5559
5560 # output files
f67539c2 5561 with open(ctx.output_keyring, 'w') as f:
9f95a23c
TL
5562 os.fchmod(f.fileno(), 0o600)
5563 f.write('[client.admin]\n'
5564 '\tkey = ' + admin_key + '\n')
f67539c2 5565 logger.info('Wrote keyring to %s' % ctx.output_keyring)
9f95a23c 5566
f67539c2
TL
5567 # create mgr
5568 create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli)
9f95a23c 5569
b3b6e05e
TL
5570 if user_conf:
5571 # user given config settings were already assimilated earlier
5572 # but if the given settings contained any attributes in
5573 # the mgr (e.g. mgr/cephadm/container_image_prometheus)
5574 # they don't seem to be stored if there isn't a mgr yet.
5575 # Since re-assimilating the same conf settings should be
5576 # idempotent we can just do it again here.
5577 with tempfile.NamedTemporaryFile(buffering=0) as tmp:
5578 tmp.write(user_conf.encode('utf-8'))
5579 cli(['config', 'assimilate-conf',
5580 '-i', '/var/lib/ceph/user.conf'],
5581 {tmp.name: '/var/lib/ceph/user.conf:z'})
9f95a23c
TL
5582
5583 # wait for mgr to restart (after enabling a module)
522d829b 5584 def wait_for_mgr_restart() -> None:
f67539c2
TL
5585 # first get latest mgrmap epoch from the mon. try newer 'mgr
5586 # stat' command first, then fall back to 'mgr dump' if
5587 # necessary
5588 try:
2a845540 5589 j = json_loads_retry(lambda: cli(['mgr', 'stat'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR))
f67539c2 5590 except Exception:
2a845540 5591 j = json_loads_retry(lambda: cli(['mgr', 'dump'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR))
9f95a23c 5592 epoch = j['epoch']
f67539c2 5593
9f95a23c
TL
5594 # wait for mgr to have it
5595 logger.info('Waiting for the mgr to restart...')
f67539c2 5596
9f95a23c
TL
5597 def mgr_has_latest_epoch():
5598 # type: () -> bool
5599 try:
5600 out = cli(['tell', 'mgr', 'mgr_status'])
5601 j = json.loads(out)
5602 return j['mgrmap_epoch'] >= epoch
5603 except Exception as e:
5604 logger.debug('tell mgr mgr_status failed: %s' % e)
5605 return False
f67539c2 5606 is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch)
e306af50 5607
f67539c2 5608 enable_cephadm_mgr_module(cli, wait_for_mgr_restart)
e306af50 5609
f67539c2
TL
5610 # ssh
5611 if not ctx.skip_ssh:
5612 prepare_ssh(ctx, cli, wait_for_mgr_restart)
5613
5614 if ctx.registry_url and ctx.registry_username and ctx.registry_password:
20effc67
TL
5615 registry_credentials = {'url': ctx.registry_url, 'username': ctx.registry_username, 'password': ctx.registry_password}
5616 cli(['config-key', 'set', 'mgr/cephadm/registry_credentials', json.dumps(registry_credentials)])
f67539c2
TL
5617
5618 cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force'])
5619
f67539c2
TL
5620 if not ctx.skip_dashboard:
5621 prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart)
f6b5b4d7 5622
33c7a0ef 5623 if ctx.output_config == CEPH_DEFAULT_CONF and not ctx.skip_admin_label and not ctx.no_minimize_config:
b3b6e05e
TL
5624 logger.info('Enabling client.admin keyring and conf on hosts with "admin" label')
5625 try:
5626 cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin'])
5627 cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin'])
5628 except Exception:
5629 logger.info('Unable to set up "admin" label; assuming older version of Ceph')
5630
f67539c2
TL
5631 if ctx.apply_spec:
5632 logger.info('Applying %s to cluster' % ctx.apply_spec)
a4b75251 5633 # copy ssh key to hosts in spec file
f67539c2 5634 with open(ctx.apply_spec) as f:
a4b75251
TL
5635 try:
5636 for spec in parse_yaml_objs(f):
5637 if spec.get('service_type') == 'host':
5638 _distribute_ssh_keys(ctx, spec, hostname)
5639 except ValueError:
5640 logger.info('Unable to parse %s succesfully' % ctx.apply_spec)
e306af50
TL
5641
5642 mounts = {}
a4b75251
TL
5643 mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro'
5644 try:
5645 out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
5646 logger.info(out)
5647 except Exception:
2a845540 5648 ctx.error_code = -errno.EINVAL
a4b75251 5649 logger.info('\nApplying %s to cluster failed!\n' % ctx.apply_spec)
9f95a23c 5650
33c7a0ef
TL
5651 save_cluster_config(ctx, uid, gid, fsid)
5652
20effc67
TL
5653 # enable autotune for osd_memory_target
5654 logger.info('Enabling autotune for osd_memory_target')
5655 cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true'])
5656
5657 # Notify the Dashboard to show the 'Expand cluster' page on first log in.
5658 cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED'])
5659
33c7a0ef 5660 logger.info('You can access the Ceph CLI as following in case of multi-cluster or non-default config:\n\n'
9f95a23c
TL
5661 '\tsudo %s shell --fsid %s -c %s -k %s\n' % (
5662 sys.argv[0],
5663 fsid,
f67539c2
TL
5664 ctx.output_config,
5665 ctx.output_keyring))
33c7a0ef
TL
5666
5667 logger.info('Or, if you are only running a single cluster on this host:\n\n\tsudo %s shell \n' % (sys.argv[0]))
5668
9f95a23c
TL
5669 logger.info('Please consider enabling telemetry to help improve Ceph:\n\n'
5670 '\tceph telemetry on\n\n'
5671 'For more information see:\n\n'
20effc67 5672 '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n')
9f95a23c 5673 logger.info('Bootstrap complete.')
2a845540 5674 return ctx.error_code
9f95a23c
TL
5675
5676##################################
5677
f67539c2 5678
522d829b 5679def command_registry_login(ctx: CephadmContext) -> int:
f67539c2
TL
5680 if ctx.registry_json:
5681 logger.info('Pulling custom registry login info from %s.' % ctx.registry_json)
5682 d = get_parm(ctx.registry_json)
f6b5b4d7 5683 if d.get('url') and d.get('username') and d.get('password'):
f67539c2
TL
5684 ctx.registry_url = d.get('url')
5685 ctx.registry_username = d.get('username')
5686 ctx.registry_password = d.get('password')
5687 registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
f6b5b4d7 5688 else:
f67539c2
TL
5689 raise Error('json provided for custom registry login did not include all necessary fields. '
5690 'Please setup json file as\n'
5691 '{\n'
5692 ' "url": "REGISTRY_URL",\n'
5693 ' "username": "REGISTRY_USERNAME",\n'
5694 ' "password": "REGISTRY_PASSWORD"\n'
5695 '}\n')
5696 elif ctx.registry_url and ctx.registry_username and ctx.registry_password:
5697 registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password)
f6b5b4d7 5698 else:
f67539c2
TL
5699 raise Error('Invalid custom registry arguments received. To login to a custom registry include '
5700 '--registry-url, --registry-username and --registry-password '
5701 'options or --registry-json option')
f6b5b4d7
TL
5702 return 0
5703
f67539c2 5704
522d829b 5705def registry_login(ctx: CephadmContext, url: Optional[str], username: Optional[str], password: Optional[str]) -> None:
f67539c2 5706 logger.info('Logging into custom registry.')
f6b5b4d7 5707 try:
f67539c2
TL
5708 engine = ctx.container_engine
5709 cmd = [engine.path, 'login',
5710 '-u', username, '-p', password,
5711 url]
5712 if isinstance(engine, Podman):
5713 cmd.append('--authfile=/etc/ceph/podman-auth.json')
5714 out, _, _ = call_throws(ctx, cmd)
5715 if isinstance(engine, Podman):
5716 os.chmod('/etc/ceph/podman-auth.json', 0o600)
5717 except Exception:
5718 raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx.registry_url, ctx.registry_username))
f6b5b4d7
TL
5719
5720##################################
5721
5722
f67539c2
TL
5723def extract_uid_gid_monitoring(ctx, daemon_type):
5724 # type: (CephadmContext, str) -> Tuple[int, int]
9f95a23c
TL
5725
5726 if daemon_type == 'prometheus':
f67539c2 5727 uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus')
9f95a23c
TL
5728 elif daemon_type == 'node-exporter':
5729 uid, gid = 65534, 65534
5730 elif daemon_type == 'grafana':
f67539c2 5731 uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
33c7a0ef
TL
5732 elif daemon_type == 'loki':
5733 uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
5734 elif daemon_type == 'promtail':
5735 uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
9f95a23c 5736 elif daemon_type == 'alertmanager':
f67539c2 5737 uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus'])
9f95a23c 5738 else:
f67539c2 5739 raise Error('{} not implemented yet'.format(daemon_type))
9f95a23c
TL
5740 return uid, gid
5741
5742
2a845540
TL
5743def get_deployment_container(ctx: CephadmContext,
5744 fsid: str, daemon_type: str, daemon_id: Union[int, str],
5745 privileged: bool = False,
5746 ptrace: bool = False,
5747 container_args: Optional[List[str]] = None) -> 'CephContainer':
5748 # wrapper for get_container specifically for containers made during the `cephadm deploy`
5749 # command. Adds some extra things such as extra container args and custom config files
20effc67
TL
5750 c = get_container(ctx, fsid, daemon_type, daemon_id, privileged, ptrace, container_args)
5751 if 'extra_container_args' in ctx and ctx.extra_container_args:
5752 c.container_args.extend(ctx.extra_container_args)
2a845540
TL
5753 if 'config_json' in ctx and ctx.config_json:
5754 conf_files = get_custom_config_files(ctx.config_json)
5755 mandatory_keys = ['mount_path', 'content']
5756 for conf in conf_files['custom_config_files']:
5757 if all(k in conf for k in mandatory_keys):
5758 mount_path = conf['mount_path']
5759 file_path = os.path.join(
5760 ctx.data_dir,
5761 fsid,
5762 'custom_config_files',
5763 f'{daemon_type}.{daemon_id}',
5764 os.path.basename(mount_path)
5765 )
5766 c.volume_mounts[file_path] = mount_path
20effc67
TL
5767 return c
5768
5769
9f95a23c 5770@default_image
f67539c2
TL
5771def command_deploy(ctx):
5772 # type: (CephadmContext) -> None
5773 daemon_type, daemon_id = ctx.name.split('.', 1)
9f95a23c 5774
f67539c2
TL
5775 lock = FileLock(ctx, ctx.fsid)
5776 lock.acquire()
9f95a23c
TL
5777
5778 if daemon_type not in get_supported_daemons():
5779 raise Error('daemon type %s not recognized' % daemon_type)
5780
e306af50 5781 redeploy = False
f67539c2 5782 unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id)
f67539c2 5783 (_, state, _) = check_unit(ctx, unit_name)
522d829b 5784 if state == 'running' or is_container_running(ctx, CephContainer.for_daemon(ctx, ctx.fsid, daemon_type, daemon_id, 'bash')):
e306af50
TL
5785 redeploy = True
5786
f67539c2
TL
5787 if ctx.reconfig:
5788 logger.info('%s daemon %s ...' % ('Reconfig', ctx.name))
e306af50 5789 elif redeploy:
f67539c2 5790 logger.info('%s daemon %s ...' % ('Redeploy', ctx.name))
e306af50 5791 else:
f67539c2 5792 logger.info('%s daemon %s ...' % ('Deploy', ctx.name))
9f95a23c 5793
33c7a0ef
TL
5794 # Migrate sysctl conf files from /usr/lib to /etc
5795 migrate_sysctl_dir(ctx, ctx.fsid)
5796
f6b5b4d7 5797 # Get and check ports explicitly required to be opened
f67539c2
TL
5798 daemon_ports = [] # type: List[int]
5799
5800 # only check port in use if not reconfig or redeploy since service
5801 # we are redeploying/reconfiguring will already be using the port
5802 if not ctx.reconfig and not redeploy:
5803 if ctx.tcp_ports:
5804 daemon_ports = list(map(int, ctx.tcp_ports.split()))
f6b5b4d7 5805
9f95a23c 5806 if daemon_type in Ceph.daemons:
f67539c2
TL
5807 config, keyring = get_config_and_keyring(ctx)
5808 uid, gid = extract_uid_gid(ctx)
5809 make_var_run(ctx, ctx.fsid, uid, gid)
f6b5b4d7 5810
2a845540
TL
5811 c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id,
5812 ptrace=ctx.allow_ptrace)
f67539c2 5813 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
9f95a23c 5814 config=config, keyring=keyring,
f67539c2
TL
5815 osd_fsid=ctx.osd_fsid,
5816 reconfig=ctx.reconfig,
f6b5b4d7 5817 ports=daemon_ports)
9f95a23c
TL
5818
5819 elif daemon_type in Monitoring.components:
5820 # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
9f95a23c 5821 # Default Checks
9f95a23c 5822 # make sure provided config-json is sufficient
f67539c2 5823 config = get_parm(ctx.config_json) # type: ignore
9f95a23c
TL
5824 required_files = Monitoring.components[daemon_type].get('config-json-files', list())
5825 required_args = Monitoring.components[daemon_type].get('config-json-args', list())
5826 if required_files:
5827 if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore
f67539c2
TL
5828 raise Error('{} deployment requires config-json which must '
5829 'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files)))
9f95a23c
TL
5830 if required_args:
5831 if not config or not all(c in config.keys() for c in required_args): # type: ignore
f67539c2
TL
5832 raise Error('{} deployment requires config-json which must '
5833 'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args)))
9f95a23c 5834
f67539c2 5835 uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
2a845540 5836 c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
f67539c2
TL
5837 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
5838 reconfig=ctx.reconfig,
f6b5b4d7 5839 ports=daemon_ports)
9f95a23c
TL
5840
5841 elif daemon_type == NFSGanesha.daemon_type:
b3b6e05e
TL
5842 if not ctx.reconfig and not redeploy and not daemon_ports:
5843 daemon_ports = list(NFSGanesha.port_map.values())
f6b5b4d7 5844
f67539c2 5845 config, keyring = get_config_and_keyring(ctx)
9f95a23c 5846 # TODO: extract ganesha uid/gid (997, 994) ?
f67539c2 5847 uid, gid = extract_uid_gid(ctx)
2a845540 5848 c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
f67539c2 5849 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
9f95a23c 5850 config=config, keyring=keyring,
f67539c2 5851 reconfig=ctx.reconfig,
f6b5b4d7 5852 ports=daemon_ports)
e306af50 5853
1911f103 5854 elif daemon_type == CephIscsi.daemon_type:
f67539c2
TL
5855 config, keyring = get_config_and_keyring(ctx)
5856 uid, gid = extract_uid_gid(ctx)
2a845540 5857 c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
f67539c2 5858 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
1911f103 5859 config=config, keyring=keyring,
f67539c2
TL
5860 reconfig=ctx.reconfig,
5861 ports=daemon_ports)
5862
5863 elif daemon_type == HAproxy.daemon_type:
5864 haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id)
5865 uid, gid = haproxy.extract_uid_gid_haproxy()
2a845540 5866 c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
f67539c2
TL
5867 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
5868 reconfig=ctx.reconfig,
5869 ports=daemon_ports)
5870
5871 elif daemon_type == Keepalived.daemon_type:
5872 keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id)
5873 uid, gid = keepalived.extract_uid_gid_keepalived()
2a845540 5874 c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
f67539c2
TL
5875 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid,
5876 reconfig=ctx.reconfig,
f6b5b4d7 5877 ports=daemon_ports)
f91f0fd5
TL
5878
5879 elif daemon_type == CustomContainer.daemon_type:
f67539c2
TL
5880 cc = CustomContainer.init(ctx, ctx.fsid, daemon_id)
5881 if not ctx.reconfig and not redeploy:
f91f0fd5 5882 daemon_ports.extend(cc.ports)
2a845540
TL
5883 c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id,
5884 privileged=cc.privileged,
5885 ptrace=ctx.allow_ptrace)
f67539c2 5886 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
f91f0fd5 5887 uid=cc.uid, gid=cc.gid, config=None,
f67539c2 5888 keyring=None, reconfig=ctx.reconfig,
f91f0fd5
TL
5889 ports=daemon_ports)
5890
20effc67 5891 elif daemon_type == CephadmAgent.daemon_type:
f67539c2
TL
5892 # get current user gid and uid
5893 uid = os.getuid()
5894 gid = os.getgid()
f67539c2
TL
5895 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None,
5896 uid, gid, ports=daemon_ports)
5897
20effc67
TL
5898 elif daemon_type == SNMPGateway.daemon_type:
5899 sc = SNMPGateway.init(ctx, ctx.fsid, daemon_id)
2a845540 5900 c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id)
20effc67
TL
5901 deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c,
5902 sc.uid, sc.gid,
5903 ports=daemon_ports)
5904
9f95a23c 5905 else:
f91f0fd5
TL
5906 raise Error('daemon type {} not implemented in command_deploy function'
5907 .format(daemon_type))
9f95a23c
TL
5908
5909##################################
5910
f6b5b4d7 5911
9f95a23c 5912@infer_image
f67539c2
TL
5913def command_run(ctx):
5914 # type: (CephadmContext) -> int
5915 (daemon_type, daemon_id) = ctx.name.split('.', 1)
5916 c = get_container(ctx, ctx.fsid, daemon_type, daemon_id)
9f95a23c 5917 command = c.run_cmd()
f67539c2 5918 return call_timeout(ctx, command, ctx.timeout)
9f95a23c
TL
5919
5920##################################
5921
f6b5b4d7 5922
9f95a23c 5923@infer_fsid
e306af50 5924@infer_config
9f95a23c 5925@infer_image
522d829b 5926@validate_fsid
f67539c2
TL
5927def command_shell(ctx):
5928 # type: (CephadmContext) -> int
522d829b
TL
5929 cp = read_config(ctx.config)
5930 if cp.has_option('global', 'fsid') and \
5931 cp.get('global', 'fsid') != ctx.fsid:
5932 raise Error('fsid does not match ceph.conf')
f67539c2 5933
f67539c2
TL
5934 if ctx.name:
5935 if '.' in ctx.name:
5936 (daemon_type, daemon_id) = ctx.name.split('.', 1)
9f95a23c 5937 else:
f67539c2 5938 daemon_type = ctx.name
9f95a23c
TL
5939 daemon_id = None
5940 else:
5941 daemon_type = 'osd' # get the most mounts
5942 daemon_id = None
5943
20effc67
TL
5944 if ctx.fsid and daemon_type in Ceph.daemons:
5945 make_log_dir(ctx, ctx.fsid)
5946
f67539c2 5947 if daemon_id and not ctx.fsid:
9f95a23c
TL
5948 raise Error('must pass --fsid to specify cluster')
5949
33c7a0ef
TL
5950 # in case a dedicated keyring for the specified fsid is found we us it.
5951 # Otherwise, use /etc/ceph files by default, if present. We do this instead of
9f95a23c
TL
5952 # making these defaults in the arg parser because we don't want an error
5953 # if they don't exist.
33c7a0ef
TL
5954 if not ctx.keyring:
5955 keyring_file = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_KEYRING}'
5956 if os.path.exists(keyring_file):
5957 ctx.keyring = keyring_file
5958 elif os.path.exists(CEPH_DEFAULT_KEYRING):
5959 ctx.keyring = CEPH_DEFAULT_KEYRING
f67539c2
TL
5960
5961 container_args: List[str] = ['-i']
5962 mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id,
5963 no_config=True if ctx.config else False)
5964 binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id)
5965 if ctx.config:
5966 mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z'
5967 if ctx.keyring:
5968 mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z'
5969 if ctx.mount:
5970 for _mount in ctx.mount:
f91f0fd5
TL
5971 split_src_dst = _mount.split(':')
5972 mount = pathify(split_src_dst[0])
5973 filename = os.path.basename(split_src_dst[0])
5974 if len(split_src_dst) > 1:
a4b75251
TL
5975 dst = split_src_dst[1]
5976 if len(split_src_dst) == 3:
5977 dst = '{}:{}'.format(dst, split_src_dst[2])
f91f0fd5
TL
5978 mounts[mount] = dst
5979 else:
a4b75251 5980 mounts[mount] = '/mnt/{}'.format(filename)
f67539c2
TL
5981 if ctx.command:
5982 command = ctx.command
9f95a23c
TL
5983 else:
5984 command = ['bash']
5985 container_args += [
f67539c2 5986 '-t',
9f95a23c 5987 '-e', 'LANG=C',
f67539c2 5988 '-e', 'PS1=%s' % CUSTOM_PS1,
9f95a23c 5989 ]
f67539c2
TL
5990 if ctx.fsid:
5991 home = os.path.join(ctx.data_dir, ctx.fsid, 'home')
9f95a23c
TL
5992 if not os.path.exists(home):
5993 logger.debug('Creating root home at %s' % home)
5994 makedirs(home, 0, 0, 0o660)
5995 if os.path.exists('/etc/skel'):
5996 for f in os.listdir('/etc/skel'):
5997 if f.startswith('.bash'):
5998 shutil.copyfile(os.path.join('/etc/skel', f),
5999 os.path.join(home, f))
6000 mounts[home] = '/root'
6001
b3b6e05e
TL
6002 for i in ctx.volume:
6003 a, b = i.split(':', 1)
6004 mounts[a] = b
6005
9f95a23c 6006 c = CephContainer(
f67539c2
TL
6007 ctx,
6008 image=ctx.image,
9f95a23c
TL
6009 entrypoint='doesnotmatter',
6010 args=[],
6011 container_args=container_args,
6012 volume_mounts=mounts,
f6b5b4d7 6013 bind_mounts=binds,
f67539c2 6014 envs=ctx.env,
9f95a23c
TL
6015 privileged=True)
6016 command = c.shell_cmd(command)
6017
f67539c2 6018 return call_timeout(ctx, command, ctx.timeout)
9f95a23c
TL
6019
6020##################################
6021
f6b5b4d7 6022
9f95a23c 6023@infer_fsid
f67539c2
TL
6024def command_enter(ctx):
6025 # type: (CephadmContext) -> int
6026 if not ctx.fsid:
9f95a23c 6027 raise Error('must pass --fsid to specify cluster')
f67539c2
TL
6028 (daemon_type, daemon_id) = ctx.name.split('.', 1)
6029 container_args = ['-i'] # type: List[str]
6030 if ctx.command:
6031 command = ctx.command
9f95a23c
TL
6032 else:
6033 command = ['sh']
6034 container_args += [
f67539c2 6035 '-t',
9f95a23c 6036 '-e', 'LANG=C',
f67539c2 6037 '-e', 'PS1=%s' % CUSTOM_PS1,
9f95a23c 6038 ]
1911f103 6039 c = CephContainer(
f67539c2
TL
6040 ctx,
6041 image=ctx.image,
1911f103
TL
6042 entrypoint='doesnotmatter',
6043 container_args=container_args,
f67539c2 6044 cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id),
1911f103 6045 )
9f95a23c 6046 command = c.exec_cmd(command)
f67539c2 6047 return call_timeout(ctx, command, ctx.timeout)
9f95a23c
TL
6048
6049##################################
6050
f6b5b4d7 6051
9f95a23c
TL
6052@infer_fsid
6053@infer_image
522d829b 6054@validate_fsid
f67539c2
TL
6055def command_ceph_volume(ctx):
6056 # type: (CephadmContext) -> None
522d829b
TL
6057 cp = read_config(ctx.config)
6058 if cp.has_option('global', 'fsid') and \
6059 cp.get('global', 'fsid') != ctx.fsid:
6060 raise Error('fsid does not match ceph.conf')
6061
f67539c2
TL
6062 if ctx.fsid:
6063 make_log_dir(ctx, ctx.fsid)
9f95a23c 6064
f67539c2
TL
6065 lock = FileLock(ctx, ctx.fsid)
6066 lock.acquire()
1911f103 6067
f67539c2
TL
6068 (uid, gid) = (0, 0) # ceph-volume runs as root
6069 mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None)
9f95a23c
TL
6070
6071 tmp_config = None
6072 tmp_keyring = None
6073
f67539c2 6074 (config, keyring) = get_config_and_keyring(ctx)
9f95a23c 6075
801d1391 6076 if config:
9f95a23c
TL
6077 # tmp config file
6078 tmp_config = write_tmp(config, uid, gid)
9f95a23c 6079 mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z'
801d1391
TL
6080
6081 if keyring:
6082 # tmp keyring file
6083 tmp_keyring = write_tmp(keyring, uid, gid)
9f95a23c
TL
6084 mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z'
6085
20effc67 6086 c = get_ceph_volume_container(
f67539c2 6087 ctx,
f67539c2
TL
6088 envs=ctx.env,
6089 args=ctx.command,
9f95a23c
TL
6090 volume_mounts=mounts,
6091 )
b3b6e05e 6092
2a845540 6093 out, err, code = call_throws(ctx, c.run_cmd(), verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
9f95a23c
TL
6094 if not code:
6095 print(out)
6096
6097##################################
6098
f6b5b4d7 6099
9f95a23c 6100@infer_fsid
f67539c2 6101def command_unit(ctx):
33c7a0ef 6102 # type: (CephadmContext) -> int
f67539c2 6103 if not ctx.fsid:
9f95a23c 6104 raise Error('must pass --fsid to specify cluster')
e306af50 6105
f67539c2 6106 unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
e306af50 6107
33c7a0ef
TL
6108 _, _, code = call(
6109 ctx,
6110 ['systemctl', ctx.command, unit_name],
adb31ebb
TL
6111 verbosity=CallVerbosity.VERBOSE,
6112 desc=''
6113 )
33c7a0ef 6114 return code
9f95a23c
TL
6115
6116##################################
6117
f6b5b4d7 6118
9f95a23c 6119@infer_fsid
f67539c2
TL
6120def command_logs(ctx):
6121 # type: (CephadmContext) -> None
6122 if not ctx.fsid:
9f95a23c
TL
6123 raise Error('must pass --fsid to specify cluster')
6124
f67539c2 6125 unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
9f95a23c
TL
6126
6127 cmd = [find_program('journalctl')]
6128 cmd.extend(['-u', unit_name])
f67539c2
TL
6129 if ctx.command:
6130 cmd.extend(ctx.command)
9f95a23c
TL
6131
6132 # call this directly, without our wrapper, so that we get an unmolested
6133 # stdout with logger prefixing.
f67539c2 6134 logger.debug('Running command: %s' % ' '.join(cmd))
522d829b 6135 subprocess.call(cmd, env=os.environ.copy()) # type: ignore
9f95a23c
TL
6136
6137##################################
6138
f6b5b4d7 6139
f67539c2 6140def list_networks(ctx):
522d829b 6141 # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]]
9f95a23c 6142
f67539c2
TL
6143 # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
6144 # so we'll need to use a regex to parse 'ip' command output.
6145 #
6146 # out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
6147 # j = json.loads(out)
6148 # for x in j:
f67539c2
TL
6149 res = _list_ipv4_networks(ctx)
6150 res.update(_list_ipv6_networks(ctx))
f6b5b4d7
TL
6151 return res
6152
6153
522d829b 6154def _list_ipv4_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]:
f67539c2
TL
6155 execstr: Optional[str] = find_executable('ip')
6156 if not execstr:
6157 raise FileNotFoundError("unable to find 'ip' command")
2a845540 6158 out, _, _ = call_throws(ctx, [execstr, 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
f6b5b4d7
TL
6159 return _parse_ipv4_route(out)
6160
9f95a23c 6161
522d829b
TL
6162def _parse_ipv4_route(out: str) -> Dict[str, Dict[str, Set[str]]]:
6163 r = {} # type: Dict[str, Dict[str, Set[str]]]
33c7a0ef 6164 p = re.compile(r'^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)')
9f95a23c
TL
6165 for line in out.splitlines():
6166 m = p.findall(line)
6167 if not m:
6168 continue
6169 net = m[0][0]
33c7a0ef
TL
6170 if '/' not in net: # aggregate /32 mask for single host sub-networks
6171 net += '/32'
f67539c2
TL
6172 iface = m[0][1]
6173 ip = m[0][4]
9f95a23c 6174 if net not in r:
f67539c2
TL
6175 r[net] = {}
6176 if iface not in r[net]:
522d829b
TL
6177 r[net][iface] = set()
6178 r[net][iface].add(ip)
9f95a23c
TL
6179 return r
6180
f6b5b4d7 6181
522d829b 6182def _list_ipv6_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]:
f67539c2
TL
6183 execstr: Optional[str] = find_executable('ip')
6184 if not execstr:
6185 raise FileNotFoundError("unable to find 'ip' command")
2a845540
TL
6186 routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
6187 ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
f6b5b4d7
TL
6188 return _parse_ipv6_route(routes, ips)
6189
6190
522d829b
TL
6191def _parse_ipv6_route(routes: str, ips: str) -> Dict[str, Dict[str, Set[str]]]:
6192 r = {} # type: Dict[str, Dict[str, Set[str]]]
f6b5b4d7
TL
6193 route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$')
6194 ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$')
f67539c2 6195 iface_p = re.compile(r'^(\d+): (\S+): (.*)$')
f6b5b4d7
TL
6196 for line in routes.splitlines():
6197 m = route_p.findall(line)
6198 if not m or m[0][0].lower() == 'default':
6199 continue
6200 net = m[0][0]
33c7a0ef
TL
6201 if '/' not in net: # aggregate /128 mask for single host sub-networks
6202 net += '/128'
f67539c2 6203 iface = m[0][1]
33c7a0ef
TL
6204 if iface == 'lo': # skip loopback devices
6205 continue
f6b5b4d7 6206 if net not in r:
f67539c2
TL
6207 r[net] = {}
6208 if iface not in r[net]:
522d829b 6209 r[net][iface] = set()
f6b5b4d7 6210
f67539c2 6211 iface = None
f6b5b4d7
TL
6212 for line in ips.splitlines():
6213 m = ip_p.findall(line)
6214 if not m:
f67539c2
TL
6215 m = iface_p.findall(line)
6216 if m:
6217 # drop @... suffix, if present
6218 iface = m[0][1].split('@')[0]
f6b5b4d7
TL
6219 continue
6220 ip = m[0][0]
6221 # find the network it belongs to
6222 net = [n for n in r.keys()
f67539c2 6223 if ipaddress.ip_address(ip) in ipaddress.ip_network(n)]
20effc67 6224 if net and iface in r[net[0]]:
2a845540 6225 assert iface
522d829b 6226 r[net[0]][iface].add(ip)
f6b5b4d7
TL
6227
6228 return r
6229
6230
f67539c2
TL
6231def command_list_networks(ctx):
6232 # type: (CephadmContext) -> None
6233 r = list_networks(ctx)
522d829b
TL
6234
6235 def serialize_sets(obj: Any) -> Any:
6236 return list(obj) if isinstance(obj, set) else obj
6237
6238 print(json.dumps(r, indent=4, default=serialize_sets))
9f95a23c
TL
6239
6240##################################
6241
f6b5b4d7 6242
f67539c2
TL
6243def command_ls(ctx):
6244 # type: (CephadmContext) -> None
6245 ls = list_daemons(ctx, detail=not ctx.no_detail,
6246 legacy_dir=ctx.legacy_dir)
9f95a23c
TL
6247 print(json.dumps(ls, indent=4))
6248
f6b5b4d7 6249
f67539c2
TL
6250def with_units_to_int(v: str) -> int:
6251 if v.endswith('iB'):
6252 v = v[:-2]
6253 elif v.endswith('B'):
6254 v = v[:-1]
6255 mult = 1
6256 if v[-1].upper() == 'K':
6257 mult = 1024
6258 v = v[:-1]
6259 elif v[-1].upper() == 'M':
6260 mult = 1024 * 1024
6261 v = v[:-1]
6262 elif v[-1].upper() == 'G':
6263 mult = 1024 * 1024 * 1024
6264 v = v[:-1]
6265 elif v[-1].upper() == 'T':
6266 mult = 1024 * 1024 * 1024 * 1024
6267 v = v[:-1]
6268 return int(float(v) * mult)
6269
6270
6271def list_daemons(ctx, detail=True, legacy_dir=None):
6272 # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
6273 host_version: Optional[str] = None
9f95a23c 6274 ls = []
f67539c2 6275 container_path = ctx.container_engine.path
9f95a23c 6276
f67539c2 6277 data_dir = ctx.data_dir
9f95a23c
TL
6278 if legacy_dir is not None:
6279 data_dir = os.path.abspath(legacy_dir + data_dir)
6280
6281 # keep track of ceph versions we see
6282 seen_versions = {} # type: Dict[str, Optional[str]]
6283
f67539c2
TL
6284 # keep track of image digests
6285 seen_digests = {} # type: Dict[str, List[str]]
6286
33c7a0ef 6287 # keep track of memory and cpu usage we've seen
f67539c2 6288 seen_memusage = {} # type: Dict[str, int]
33c7a0ef 6289 seen_cpuperc = {} # type: Dict[str, str]
f67539c2
TL
6290 out, err, code = call(
6291 ctx,
6292 [container_path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'],
2a845540 6293 verbosity=CallVerbosity.QUIET
f67539c2 6294 )
522d829b 6295 seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out)
f67539c2 6296
33c7a0ef
TL
6297 out, err, code = call(
6298 ctx,
6299 [container_path, 'stats', '--format', '{{.ID}},{{.CPUPerc}}', '--no-stream'],
2a845540 6300 verbosity=CallVerbosity.QUIET
33c7a0ef
TL
6301 )
6302 seen_cpuperc_cid_len, seen_cpuperc = _parse_cpu_perc(code, out)
6303
9f95a23c
TL
6304 # /var/lib/ceph
6305 if os.path.exists(data_dir):
6306 for i in os.listdir(data_dir):
6307 if i in ['mon', 'osd', 'mds', 'mgr']:
6308 daemon_type = i
6309 for j in os.listdir(os.path.join(data_dir, i)):
6310 if '-' not in j:
6311 continue
6312 (cluster, daemon_id) = j.split('-', 1)
f67539c2
TL
6313 fsid = get_legacy_daemon_fsid(ctx,
6314 cluster, daemon_type, daemon_id,
6315 legacy_dir=legacy_dir)
e306af50 6316 legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
f67539c2 6317 val: Dict[str, Any] = {
9f95a23c
TL
6318 'style': 'legacy',
6319 'name': '%s.%s' % (daemon_type, daemon_id),
6320 'fsid': fsid if fsid is not None else 'unknown',
e306af50 6321 'systemd_unit': legacy_unit_name,
9f95a23c
TL
6322 }
6323 if detail:
20effc67 6324 (val['enabled'], val['state'], _) = check_unit(ctx, legacy_unit_name)
9f95a23c
TL
6325 if not host_version:
6326 try:
f67539c2
TL
6327 out, err, code = call(ctx,
6328 ['ceph', '-v'],
2a845540 6329 verbosity=CallVerbosity.QUIET)
9f95a23c
TL
6330 if not code and out.startswith('ceph version '):
6331 host_version = out.split(' ')[2]
6332 except Exception:
6333 pass
f67539c2
TL
6334 val['host_version'] = host_version
6335 ls.append(val)
9f95a23c
TL
6336 elif is_fsid(i):
6337 fsid = str(i) # convince mypy that fsid is a str here
6338 for j in os.listdir(os.path.join(data_dir, i)):
f67539c2 6339 if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
9f95a23c
TL
6340 name = j
6341 (daemon_type, daemon_id) = j.split('.', 1)
6342 unit_name = get_unit_name(fsid,
6343 daemon_type,
6344 daemon_id)
6345 else:
6346 continue
f67539c2 6347 val = {
9f95a23c
TL
6348 'style': 'cephadm:v1',
6349 'name': name,
6350 'fsid': fsid,
e306af50 6351 'systemd_unit': unit_name,
9f95a23c
TL
6352 }
6353 if detail:
6354 # get container id
20effc67 6355 (val['enabled'], val['state'], _) = check_unit(ctx, unit_name)
9f95a23c
TL
6356 container_id = None
6357 image_name = None
6358 image_id = None
f67539c2 6359 image_digests = None
9f95a23c
TL
6360 version = None
6361 start_stamp = None
6362
522d829b 6363 out, err, code = get_container_stats(ctx, container_path, fsid, daemon_type, daemon_id)
9f95a23c
TL
6364 if not code:
6365 (container_id, image_name, image_id, start,
6366 version) = out.strip().split(',')
6367 image_id = normalize_container_id(image_id)
6368 daemon_type = name.split('.', 1)[0]
6369 start_stamp = try_convert_datetime(start)
f67539c2
TL
6370
6371 # collect digests for this image id
6372 image_digests = seen_digests.get(image_id)
6373 if not image_digests:
6374 out, err, code = call(
6375 ctx,
6376 [
6377 container_path, 'image', 'inspect', image_id,
6378 '--format', '{{.RepoDigests}}',
6379 ],
2a845540 6380 verbosity=CallVerbosity.QUIET)
f67539c2 6381 if not code:
18d92ca7
TL
6382 image_digests = list(set(map(
6383 normalize_image_digest,
6384 out.strip()[1:-1].split(' '))))
f67539c2
TL
6385 seen_digests[image_id] = image_digests
6386
6387 # identify software version inside the container (if we can)
9f95a23c
TL
6388 if not version or '.' not in version:
6389 version = seen_versions.get(image_id, None)
6390 if daemon_type == NFSGanesha.daemon_type:
f67539c2 6391 version = NFSGanesha.get_version(ctx, container_id)
1911f103 6392 if daemon_type == CephIscsi.daemon_type:
f67539c2 6393 version = CephIscsi.get_version(ctx, container_id)
9f95a23c
TL
6394 elif not version:
6395 if daemon_type in Ceph.daemons:
f67539c2
TL
6396 out, err, code = call(ctx,
6397 [container_path, 'exec', container_id,
6398 'ceph', '-v'],
2a845540 6399 verbosity=CallVerbosity.QUIET)
9f95a23c
TL
6400 if not code and \
6401 out.startswith('ceph version '):
6402 version = out.split(' ')[2]
6403 seen_versions[image_id] = version
6404 elif daemon_type == 'grafana':
f67539c2
TL
6405 out, err, code = call(ctx,
6406 [container_path, 'exec', container_id,
6407 'grafana-server', '-v'],
2a845540 6408 verbosity=CallVerbosity.QUIET)
9f95a23c
TL
6409 if not code and \
6410 out.startswith('Version '):
6411 version = out.split(' ')[1]
6412 seen_versions[image_id] = version
6413 elif daemon_type in ['prometheus',
6414 'alertmanager',
33c7a0ef
TL
6415 'node-exporter',
6416 'loki',
6417 'promtail']:
f67539c2
TL
6418 version = Monitoring.get_version(ctx, container_id, daemon_type)
6419 seen_versions[image_id] = version
6420 elif daemon_type == 'haproxy':
6421 out, err, code = call(ctx,
6422 [container_path, 'exec', container_id,
6423 'haproxy', '-v'],
2a845540 6424 verbosity=CallVerbosity.QUIET)
f67539c2
TL
6425 if not code and \
6426 out.startswith('HA-Proxy version '):
6427 version = out.split(' ')[2]
6428 seen_versions[image_id] = version
6429 elif daemon_type == 'keepalived':
6430 out, err, code = call(ctx,
6431 [container_path, 'exec', container_id,
6432 'keepalived', '--version'],
2a845540 6433 verbosity=CallVerbosity.QUIET)
9f95a23c 6434 if not code and \
f67539c2
TL
6435 err.startswith('Keepalived '):
6436 version = err.split(' ')[1]
6437 if version[0] == 'v':
6438 version = version[1:]
9f95a23c 6439 seen_versions[image_id] = version
f91f0fd5
TL
6440 elif daemon_type == CustomContainer.daemon_type:
6441 # Because a custom container can contain
6442 # everything, we do not know which command
6443 # to execute to get the version.
6444 pass
20effc67
TL
6445 elif daemon_type == SNMPGateway.daemon_type:
6446 version = SNMPGateway.get_version(ctx, fsid, daemon_id)
6447 seen_versions[image_id] = version
9f95a23c 6448 else:
f91f0fd5 6449 logger.warning('version for unknown daemon type %s' % daemon_type)
9f95a23c 6450 else:
f67539c2 6451 vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore
9f95a23c
TL
6452 try:
6453 with open(vfile, 'r') as f:
6454 image_name = f.read().strip() or None
6455 except IOError:
6456 pass
f67539c2
TL
6457
6458 # unit.meta?
6459 mfile = os.path.join(data_dir, fsid, j, 'unit.meta') # type: ignore
6460 try:
6461 with open(mfile, 'r') as f:
6462 meta = json.loads(f.read())
6463 val.update(meta)
6464 except IOError:
6465 pass
6466
6467 val['container_id'] = container_id
6468 val['container_image_name'] = image_name
6469 val['container_image_id'] = image_id
6470 val['container_image_digests'] = image_digests
6471 if container_id:
6472 val['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len])
33c7a0ef 6473 val['cpu_percentage'] = seen_cpuperc.get(container_id[0:seen_cpuperc_cid_len])
f67539c2
TL
6474 val['version'] = version
6475 val['started'] = start_stamp
6476 val['created'] = get_file_timestamp(
9f95a23c
TL
6477 os.path.join(data_dir, fsid, j, 'unit.created')
6478 )
f67539c2 6479 val['deployed'] = get_file_timestamp(
9f95a23c 6480 os.path.join(data_dir, fsid, j, 'unit.image'))
f67539c2 6481 val['configured'] = get_file_timestamp(
9f95a23c 6482 os.path.join(data_dir, fsid, j, 'unit.configured'))
f67539c2 6483 ls.append(val)
9f95a23c 6484
9f95a23c
TL
6485 return ls
6486
6487
522d829b
TL
6488def _parse_mem_usage(code: int, out: str) -> Tuple[int, Dict[str, int]]:
6489 # keep track of memory usage we've seen
6490 seen_memusage = {} # type: Dict[str, int]
6491 seen_memusage_cid_len = 0
6492 if not code:
6493 for line in out.splitlines():
6494 (cid, usage) = line.split(',')
6495 (used, limit) = usage.split(' / ')
6496 try:
6497 seen_memusage[cid] = with_units_to_int(used)
6498 if not seen_memusage_cid_len:
6499 seen_memusage_cid_len = len(cid)
6500 except ValueError:
6501 logger.info('unable to parse memory usage line\n>{}'.format(line))
6502 pass
6503 return seen_memusage_cid_len, seen_memusage
6504
6505
33c7a0ef
TL
6506def _parse_cpu_perc(code: int, out: str) -> Tuple[int, Dict[str, str]]:
6507 seen_cpuperc = {}
6508 seen_cpuperc_cid_len = 0
6509 if not code:
6510 for line in out.splitlines():
6511 (cid, cpuperc) = line.split(',')
6512 try:
6513 seen_cpuperc[cid] = cpuperc
6514 if not seen_cpuperc_cid_len:
6515 seen_cpuperc_cid_len = len(cid)
6516 except ValueError:
6517 logger.info('unable to parse cpu percentage line\n>{}'.format(line))
6518 pass
6519 return seen_cpuperc_cid_len, seen_cpuperc
6520
6521
f67539c2
TL
6522def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None):
6523 # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str]
e306af50 6524
f67539c2 6525 for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir):
e306af50
TL
6526 if d['fsid'] != fsid:
6527 continue
6528 if d['name'] != name:
6529 continue
6530 return d
6531 raise Error('Daemon not found: {}. See `cephadm ls`'.format(name))
6532
522d829b
TL
6533
6534def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, daemon_type: str, daemon_id: str) -> Tuple[str, str, int]:
6535 c = CephContainer.for_daemon(ctx, fsid, daemon_type, daemon_id, 'bash')
6536 out, err, code = '', '', -1
6537 for name in (c.cname, c.old_cname):
6538 cmd = [
6539 container_path, 'inspect',
6540 '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
6541 name
6542 ]
2a845540 6543 out, err, code = call(ctx, cmd, verbosity=CallVerbosity.QUIET)
522d829b
TL
6544 if not code:
6545 break
6546 return out, err, code
6547
9f95a23c
TL
6548##################################
6549
f67539c2 6550
9f95a23c 6551@default_image
f67539c2
TL
6552def command_adopt(ctx):
6553 # type: (CephadmContext) -> None
9f95a23c 6554
f67539c2 6555 if not ctx.skip_pull:
33c7a0ef
TL
6556 try:
6557 _pull_image(ctx, ctx.image)
6558 except UnauthorizedRegistryError:
6559 err_str = 'Failed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url <url> --registry-username <username> --registry-password <password>` or supply login info via a json file with `cephadm registry-login --registry-json <file>`'
6560 logger.debug(f'Pulling image for `command_adopt` failed: {err_str}')
6561 raise Error(err_str)
9f95a23c 6562
f67539c2 6563 (daemon_type, daemon_id) = ctx.name.split('.', 1)
9f95a23c
TL
6564
6565 # legacy check
f67539c2
TL
6566 if ctx.style != 'legacy':
6567 raise Error('adoption of style %s not implemented' % ctx.style)
9f95a23c
TL
6568
6569 # lock
f67539c2
TL
6570 fsid = get_legacy_daemon_fsid(ctx,
6571 ctx.cluster,
9f95a23c
TL
6572 daemon_type,
6573 daemon_id,
f67539c2 6574 legacy_dir=ctx.legacy_dir)
9f95a23c
TL
6575 if not fsid:
6576 raise Error('could not detect legacy fsid; set fsid in ceph.conf')
f67539c2
TL
6577 lock = FileLock(ctx, fsid)
6578 lock.acquire()
9f95a23c
TL
6579
6580 # call correct adoption
6581 if daemon_type in Ceph.daemons:
f67539c2 6582 command_adopt_ceph(ctx, daemon_type, daemon_id, fsid)
9f95a23c 6583 elif daemon_type == 'prometheus':
f67539c2 6584 command_adopt_prometheus(ctx, daemon_id, fsid)
9f95a23c 6585 elif daemon_type == 'grafana':
f67539c2 6586 command_adopt_grafana(ctx, daemon_id, fsid)
9f95a23c
TL
6587 elif daemon_type == 'node-exporter':
6588 raise Error('adoption of node-exporter not implemented')
6589 elif daemon_type == 'alertmanager':
f67539c2 6590 command_adopt_alertmanager(ctx, daemon_id, fsid)
9f95a23c
TL
6591 else:
6592 raise Error('daemon type %s not recognized' % daemon_type)
6593
6594
1911f103 6595class AdoptOsd(object):
f67539c2
TL
6596 def __init__(self, ctx, osd_data_dir, osd_id):
6597 # type: (CephadmContext, str, str) -> None
6598 self.ctx = ctx
1911f103
TL
6599 self.osd_data_dir = osd_data_dir
6600 self.osd_id = osd_id
6601
6602 def check_online_osd(self):
6603 # type: () -> Tuple[Optional[str], Optional[str]]
6604
6605 osd_fsid, osd_type = None, None
6606
6607 path = os.path.join(self.osd_data_dir, 'fsid')
6608 try:
6609 with open(path, 'r') as f:
6610 osd_fsid = f.read().strip()
f67539c2 6611 logger.info('Found online OSD at %s' % path)
1911f103
TL
6612 except IOError:
6613 logger.info('Unable to read OSD fsid from %s' % path)
e306af50
TL
6614 if os.path.exists(os.path.join(self.osd_data_dir, 'type')):
6615 with open(os.path.join(self.osd_data_dir, 'type')) as f:
6616 osd_type = f.read().strip()
6617 else:
6618 logger.info('"type" file missing for OSD data dir')
1911f103
TL
6619
6620 return osd_fsid, osd_type
6621
6622 def check_offline_lvm_osd(self):
6623 # type: () -> Tuple[Optional[str], Optional[str]]
1911f103
TL
6624 osd_fsid, osd_type = None, None
6625
20effc67 6626 c = get_ceph_volume_container(
f67539c2 6627 self.ctx,
1911f103 6628 args=['lvm', 'list', '--format=json'],
1911f103 6629 )
f67539c2 6630 out, err, code = call_throws(self.ctx, c.run_cmd())
1911f103
TL
6631 if not code:
6632 try:
6633 js = json.loads(out)
6634 if self.osd_id in js:
f67539c2 6635 logger.info('Found offline LVM OSD {}'.format(self.osd_id))
1911f103
TL
6636 osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid']
6637 for device in js[self.osd_id]:
6638 if device['tags']['ceph.type'] == 'block':
6639 osd_type = 'bluestore'
6640 break
6641 if device['tags']['ceph.type'] == 'data':
6642 osd_type = 'filestore'
6643 break
6644 except ValueError as e:
f67539c2 6645 logger.info('Invalid JSON in ceph-volume lvm list: {}'.format(e))
1911f103
TL
6646
6647 return osd_fsid, osd_type
6648
6649 def check_offline_simple_osd(self):
6650 # type: () -> Tuple[Optional[str], Optional[str]]
1911f103
TL
6651 osd_fsid, osd_type = None, None
6652
f67539c2 6653 osd_file = glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self.osd_id))
1911f103
TL
6654 if len(osd_file) == 1:
6655 with open(osd_file[0], 'r') as f:
6656 try:
6657 js = json.loads(f.read())
f67539c2
TL
6658 logger.info('Found offline simple OSD {}'.format(self.osd_id))
6659 osd_fsid = js['fsid']
6660 osd_type = js['type']
6661 if osd_type != 'filestore':
1911f103
TL
6662 # need this to be mounted for the adopt to work, as it
6663 # needs to move files from this directory
f67539c2 6664 call_throws(self.ctx, ['mount', js['data']['path'], self.osd_data_dir])
1911f103 6665 except ValueError as e:
f67539c2 6666 logger.info('Invalid JSON in {}: {}'.format(osd_file, e))
1911f103
TL
6667
6668 return osd_fsid, osd_type
6669
2a845540
TL
6670 def change_cluster_name(self) -> None:
6671 logger.info('Attempting to convert osd cluster name to ceph . . .')
6672 c = get_ceph_volume_container(
6673 self.ctx,
6674 args=['lvm', 'list', '{}'.format(self.osd_id), '--format=json'],
6675 )
6676 out, err, code = call_throws(self.ctx, c.run_cmd())
6677 if code:
6678 raise Exception(f'Failed to get list of LVs: {err}\nceph-volume failed with rc {code}')
6679 try:
6680 js = json.loads(out)
6681 if not js:
6682 raise RuntimeError(f'Failed to find osd.{self.osd_id}')
6683 device: Optional[Dict[Any, Any]] = None
6684 for d in js[self.osd_id]:
6685 if d['type'] == 'block':
6686 device = d
6687 break
6688 if not device:
6689 raise RuntimeError(f'Failed to find block device for osd.{self.osd_id}')
6690 vg = device['vg_name']
6691 out, err, code = call_throws(self.ctx, ['lvchange', '--deltag', f'ceph.cluster_name={self.ctx.cluster}', vg])
6692 if code:
6693 raise RuntimeError(f"Can't delete tag ceph.cluster_name={self.ctx.cluster} on osd.{self.osd_id}.\nlvchange failed with rc {code}")
6694 out, err, code = call_throws(self.ctx, ['lvchange', '--addtag', 'ceph.cluster_name=ceph', vg])
6695 if code:
6696 raise RuntimeError(f"Can't add tag ceph.cluster_name=ceph on osd.{self.osd_id}.\nlvchange failed with rc {code}")
6697 logger.info('Successfully converted osd cluster name')
6698 except (Exception, RuntimeError) as e:
6699 logger.info(f'Failed to convert osd cluster name: {e}')
6700
9f95a23c 6701
f67539c2
TL
6702def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid):
6703 # type: (CephadmContext, str, str, str) -> None
9f95a23c 6704
f67539c2 6705 (uid, gid) = extract_uid_gid(ctx)
9f95a23c
TL
6706
6707 data_dir_src = ('/var/lib/ceph/%s/%s-%s' %
f67539c2
TL
6708 (daemon_type, ctx.cluster, daemon_id))
6709 data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src)
9f95a23c 6710
1911f103
TL
6711 if not os.path.exists(data_dir_src):
6712 raise Error("{}.{} data directory '{}' does not exist. "
f67539c2
TL
6713 'Incorrect ID specified, or daemon already adopted?'.format(
6714 daemon_type, daemon_id, data_dir_src))
1911f103 6715
9f95a23c
TL
6716 osd_fsid = None
6717 if daemon_type == 'osd':
f67539c2 6718 adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id)
1911f103
TL
6719 osd_fsid, osd_type = adopt_osd.check_online_osd()
6720 if not osd_fsid:
6721 osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd()
6722 if not osd_fsid:
6723 osd_fsid, osd_type = adopt_osd.check_offline_simple_osd()
6724 if not osd_fsid:
6725 raise Error('Unable to find OSD {}'.format(daemon_id))
2a845540
TL
6726 elif ctx.cluster != 'ceph':
6727 adopt_osd.change_cluster_name()
1911f103 6728 logger.info('objectstore_type is %s' % osd_type)
e306af50 6729 assert osd_type
1911f103 6730 if osd_type == 'filestore':
9f95a23c
TL
6731 raise Error('FileStore is not supported by cephadm')
6732
6733 # NOTE: implicit assumption here that the units correspond to the
6734 # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph
6735 # CLUSTER field.
6736 unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id)
f67539c2 6737 (enabled, state, _) = check_unit(ctx, unit_name)
9f95a23c
TL
6738 if state == 'running':
6739 logger.info('Stopping old systemd unit %s...' % unit_name)
f67539c2 6740 call_throws(ctx, ['systemctl', 'stop', unit_name])
9f95a23c
TL
6741 if enabled:
6742 logger.info('Disabling old systemd unit %s...' % unit_name)
f67539c2 6743 call_throws(ctx, ['systemctl', 'disable', unit_name])
9f95a23c
TL
6744
6745 # data
6746 logger.info('Moving data...')
f67539c2 6747 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
9f95a23c 6748 uid=uid, gid=gid)
f67539c2 6749 move_files(ctx, glob(os.path.join(data_dir_src, '*')),
9f95a23c
TL
6750 data_dir_dst,
6751 uid=uid, gid=gid)
f67539c2 6752 logger.debug('Remove dir `%s`' % (data_dir_src))
9f95a23c 6753 if os.path.ismount(data_dir_src):
f67539c2 6754 call_throws(ctx, ['umount', data_dir_src])
9f95a23c
TL
6755 os.rmdir(data_dir_src)
6756
6757 logger.info('Chowning content...')
f67539c2 6758 call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst])
9f95a23c
TL
6759
6760 if daemon_type == 'mon':
6761 # rename *.ldb -> *.sst, in case they are coming from ubuntu
6762 store = os.path.join(data_dir_dst, 'store.db')
6763 num_renamed = 0
6764 if os.path.exists(store):
6765 for oldf in os.listdir(store):
6766 if oldf.endswith('.ldb'):
6767 newf = oldf.replace('.ldb', '.sst')
6768 oldp = os.path.join(store, oldf)
6769 newp = os.path.join(store, newf)
6770 logger.debug('Renaming %s -> %s' % (oldp, newp))
6771 os.rename(oldp, newp)
6772 if num_renamed:
6773 logger.info('Renamed %d leveldb *.ldb files to *.sst',
6774 num_renamed)
6775 if daemon_type == 'osd':
6776 for n in ['block', 'block.db', 'block.wal']:
6777 p = os.path.join(data_dir_dst, n)
6778 if os.path.exists(p):
6779 logger.info('Chowning %s...' % p)
6780 os.chown(p, uid, gid)
6781 # disable the ceph-volume 'simple' mode files on the host
6782 simple_fn = os.path.join('/etc/ceph/osd',
6783 '%s-%s.json' % (daemon_id, osd_fsid))
6784 if os.path.exists(simple_fn):
6785 new_fn = simple_fn + '.adopted-by-cephadm'
6786 logger.info('Renaming %s -> %s', simple_fn, new_fn)
6787 os.rename(simple_fn, new_fn)
6788 logger.info('Disabling host unit ceph-volume@ simple unit...')
f67539c2
TL
6789 call(ctx, ['systemctl', 'disable',
6790 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)])
9f95a23c
TL
6791 else:
6792 # assume this is an 'lvm' c-v for now, but don't error
6793 # out if it's not.
6794 logger.info('Disabling host unit ceph-volume@ lvm unit...')
f67539c2
TL
6795 call(ctx, ['systemctl', 'disable',
6796 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)])
9f95a23c
TL
6797
6798 # config
f67539c2
TL
6799 config_src = '/etc/ceph/%s.conf' % (ctx.cluster)
6800 config_src = os.path.abspath(ctx.legacy_dir + config_src)
9f95a23c 6801 config_dst = os.path.join(data_dir_dst, 'config')
f67539c2 6802 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
9f95a23c
TL
6803
6804 # logs
6805 logger.info('Moving logs...')
6806 log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' %
f67539c2
TL
6807 (ctx.cluster, daemon_type, daemon_id))
6808 log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src)
6809 log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid)
6810 move_files(ctx, glob(log_dir_src),
9f95a23c
TL
6811 log_dir_dst,
6812 uid=uid, gid=gid)
6813
6814 logger.info('Creating new units...')
f67539c2
TL
6815 make_var_run(ctx, fsid, uid, gid)
6816 c = get_container(ctx, fsid, daemon_type, daemon_id)
6817 deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c,
9f95a23c 6818 enable=True, # unconditionally enable the new unit
f67539c2 6819 start=(state == 'running' or ctx.force_start),
9f95a23c 6820 osd_fsid=osd_fsid)
f67539c2 6821 update_firewalld(ctx, daemon_type)
9f95a23c
TL
6822
6823
f67539c2
TL
6824def command_adopt_prometheus(ctx, daemon_id, fsid):
6825 # type: (CephadmContext, str, str) -> None
9f95a23c 6826 daemon_type = 'prometheus'
f67539c2 6827 (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
9f95a23c 6828
f67539c2 6829 _stop_and_disable(ctx, 'prometheus')
9f95a23c 6830
f67539c2
TL
6831 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
6832 uid=uid, gid=gid)
9f95a23c
TL
6833
6834 # config
6835 config_src = '/etc/prometheus/prometheus.yml'
f67539c2 6836 config_src = os.path.abspath(ctx.legacy_dir + config_src)
9f95a23c 6837 config_dst = os.path.join(data_dir_dst, 'etc/prometheus')
1911f103 6838 makedirs(config_dst, uid, gid, 0o755)
f67539c2 6839 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
9f95a23c
TL
6840
6841 # data
6842 data_src = '/var/lib/prometheus/metrics/'
f67539c2 6843 data_src = os.path.abspath(ctx.legacy_dir + data_src)
9f95a23c 6844 data_dst = os.path.join(data_dir_dst, 'data')
f67539c2 6845 copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
9f95a23c 6846
f67539c2
TL
6847 make_var_run(ctx, fsid, uid, gid)
6848 c = get_container(ctx, fsid, daemon_type, daemon_id)
6849 deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
6850 update_firewalld(ctx, daemon_type)
9f95a23c 6851
f6b5b4d7 6852
f67539c2
TL
6853def command_adopt_grafana(ctx, daemon_id, fsid):
6854 # type: (CephadmContext, str, str) -> None
9f95a23c
TL
6855
6856 daemon_type = 'grafana'
f67539c2 6857 (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
9f95a23c 6858
f67539c2 6859 _stop_and_disable(ctx, 'grafana-server')
9f95a23c 6860
f67539c2
TL
6861 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
6862 uid=uid, gid=gid)
9f95a23c
TL
6863
6864 # config
6865 config_src = '/etc/grafana/grafana.ini'
f67539c2 6866 config_src = os.path.abspath(ctx.legacy_dir + config_src)
9f95a23c
TL
6867 config_dst = os.path.join(data_dir_dst, 'etc/grafana')
6868 makedirs(config_dst, uid, gid, 0o755)
f67539c2 6869 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
9f95a23c
TL
6870
6871 prov_src = '/etc/grafana/provisioning/'
f67539c2 6872 prov_src = os.path.abspath(ctx.legacy_dir + prov_src)
9f95a23c 6873 prov_dst = os.path.join(data_dir_dst, 'etc/grafana')
f67539c2 6874 copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid)
9f95a23c
TL
6875
6876 # cert
6877 cert = '/etc/grafana/grafana.crt'
6878 key = '/etc/grafana/grafana.key'
6879 if os.path.exists(cert) and os.path.exists(key):
6880 cert_src = '/etc/grafana/grafana.crt'
f67539c2 6881 cert_src = os.path.abspath(ctx.legacy_dir + cert_src)
9f95a23c
TL
6882 makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755)
6883 cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file')
f67539c2 6884 copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid)
9f95a23c
TL
6885
6886 key_src = '/etc/grafana/grafana.key'
f67539c2 6887 key_src = os.path.abspath(ctx.legacy_dir + key_src)
9f95a23c 6888 key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key')
f67539c2 6889 copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid)
9f95a23c
TL
6890
6891 _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini'))
6892 else:
f67539c2 6893 logger.debug('Skipping ssl, missing cert {} or key {}'.format(cert, key))
9f95a23c 6894
9f95a23c
TL
6895 # data - possible custom dashboards/plugins
6896 data_src = '/var/lib/grafana/'
f67539c2 6897 data_src = os.path.abspath(ctx.legacy_dir + data_src)
9f95a23c 6898 data_dst = os.path.join(data_dir_dst, 'data')
f67539c2 6899 copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
9f95a23c 6900
f67539c2
TL
6901 make_var_run(ctx, fsid, uid, gid)
6902 c = get_container(ctx, fsid, daemon_type, daemon_id)
6903 deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
6904 update_firewalld(ctx, daemon_type)
9f95a23c 6905
f6b5b4d7 6906
f67539c2
TL
6907def command_adopt_alertmanager(ctx, daemon_id, fsid):
6908 # type: (CephadmContext, str, str) -> None
801d1391
TL
6909
6910 daemon_type = 'alertmanager'
f67539c2 6911 (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
801d1391 6912
f67539c2 6913 _stop_and_disable(ctx, 'prometheus-alertmanager')
801d1391 6914
f67539c2
TL
6915 data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id,
6916 uid=uid, gid=gid)
801d1391
TL
6917
6918 # config
6919 config_src = '/etc/prometheus/alertmanager.yml'
f67539c2 6920 config_src = os.path.abspath(ctx.legacy_dir + config_src)
801d1391
TL
6921 config_dst = os.path.join(data_dir_dst, 'etc/alertmanager')
6922 makedirs(config_dst, uid, gid, 0o755)
f67539c2 6923 copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid)
801d1391
TL
6924
6925 # data
6926 data_src = '/var/lib/prometheus/alertmanager/'
f67539c2 6927 data_src = os.path.abspath(ctx.legacy_dir + data_src)
801d1391 6928 data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data')
f67539c2 6929 copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid)
801d1391 6930
f67539c2
TL
6931 make_var_run(ctx, fsid, uid, gid)
6932 c = get_container(ctx, fsid, daemon_type, daemon_id)
6933 deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid)
6934 update_firewalld(ctx, daemon_type)
801d1391 6935
f6b5b4d7 6936
9f95a23c
TL
6937def _adjust_grafana_ini(filename):
6938 # type: (str) -> None
6939
6940 # Update cert_file, cert_key pathnames in server section
6941 # ConfigParser does not preserve comments
6942 try:
f67539c2 6943 with open(filename, 'r') as grafana_ini:
9f95a23c 6944 lines = grafana_ini.readlines()
f67539c2
TL
6945 with open('{}.new'.format(filename), 'w') as grafana_ini:
6946 server_section = False
9f95a23c
TL
6947 for line in lines:
6948 if line.startswith('['):
f67539c2 6949 server_section = False
9f95a23c 6950 if line.startswith('[server]'):
f67539c2 6951 server_section = True
9f95a23c
TL
6952 if server_section:
6953 line = re.sub(r'^cert_file.*',
f67539c2 6954 'cert_file = /etc/grafana/certs/cert_file', line)
9f95a23c 6955 line = re.sub(r'^cert_key.*',
f67539c2 6956 'cert_key = /etc/grafana/certs/cert_key', line)
9f95a23c 6957 grafana_ini.write(line)
f67539c2 6958 os.rename('{}.new'.format(filename), filename)
9f95a23c 6959 except OSError as err:
f67539c2 6960 raise Error('Cannot update {}: {}'.format(filename, err))
9f95a23c
TL
6961
6962
f67539c2
TL
6963def _stop_and_disable(ctx, unit_name):
6964 # type: (CephadmContext, str) -> None
9f95a23c 6965
f67539c2 6966 (enabled, state, _) = check_unit(ctx, unit_name)
9f95a23c
TL
6967 if state == 'running':
6968 logger.info('Stopping old systemd unit %s...' % unit_name)
f67539c2 6969 call_throws(ctx, ['systemctl', 'stop', unit_name])
9f95a23c
TL
6970 if enabled:
6971 logger.info('Disabling old systemd unit %s...' % unit_name)
f67539c2 6972 call_throws(ctx, ['systemctl', 'disable', unit_name])
9f95a23c
TL
6973
6974##################################
6975
9f95a23c 6976
f67539c2
TL
6977def command_rm_daemon(ctx):
6978 # type: (CephadmContext) -> None
6979 lock = FileLock(ctx, ctx.fsid)
6980 lock.acquire()
9f95a23c 6981
f67539c2
TL
6982 (daemon_type, daemon_id) = ctx.name.split('.', 1)
6983 unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
e306af50 6984
f67539c2 6985 if daemon_type in ['mon', 'osd'] and not ctx.force:
9f95a23c 6986 raise Error('must pass --force to proceed: '
f67539c2 6987 'this command may destroy precious data!')
e306af50 6988
f67539c2 6989 call(ctx, ['systemctl', 'stop', unit_name],
adb31ebb 6990 verbosity=CallVerbosity.DEBUG)
f67539c2 6991 call(ctx, ['systemctl', 'reset-failed', unit_name],
adb31ebb 6992 verbosity=CallVerbosity.DEBUG)
f67539c2 6993 call(ctx, ['systemctl', 'disable', unit_name],
adb31ebb 6994 verbosity=CallVerbosity.DEBUG)
f67539c2 6995 data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id)
9f95a23c 6996 if daemon_type in ['mon', 'osd', 'prometheus'] and \
f67539c2 6997 not ctx.force_delete_data:
9f95a23c 6998 # rename it out of the way -- do not delete
f67539c2 6999 backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed')
9f95a23c
TL
7000 if not os.path.exists(backup_dir):
7001 makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
7002 dirname = '%s.%s_%s' % (daemon_type, daemon_id,
7003 datetime.datetime.utcnow().strftime(DATEFMT))
7004 os.rename(data_dir,
7005 os.path.join(backup_dir, dirname))
7006 else:
f67539c2 7007 call_throws(ctx, ['rm', '-rf', data_dir])
9f95a23c 7008
33c7a0ef
TL
7009 if 'tcp_ports' in ctx and ctx.tcp_ports is not None:
7010 ports: List[int] = [int(p) for p in ctx.tcp_ports.split()]
7011 try:
7012 fw = Firewalld(ctx)
7013 fw.close_ports(ports)
7014 fw.apply_rules()
7015 except RuntimeError as e:
7016 # in case we cannot close the ports we will remove
7017 # the daemon but keep them open.
7018 logger.warning(f' Error when trying to close ports: {e}')
7019
7020
9f95a23c
TL
7021##################################
7022
f6b5b4d7 7023
522d829b 7024def _zap(ctx: CephadmContext, what: str) -> None:
b3b6e05e 7025 mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
20effc67
TL
7026 c = get_ceph_volume_container(ctx,
7027 args=['lvm', 'zap', '--destroy', what],
7028 volume_mounts=mounts,
7029 envs=ctx.env)
b3b6e05e
TL
7030 logger.info(f'Zapping {what}...')
7031 out, err, code = call_throws(ctx, c.run_cmd())
7032
7033
7034@infer_image
522d829b 7035def _zap_osds(ctx: CephadmContext) -> None:
b3b6e05e
TL
7036 # assume fsid lock already held
7037
7038 # list
7039 mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None)
20effc67
TL
7040 c = get_ceph_volume_container(ctx,
7041 args=['inventory', '--format', 'json'],
7042 volume_mounts=mounts,
7043 envs=ctx.env)
b3b6e05e
TL
7044 out, err, code = call_throws(ctx, c.run_cmd())
7045 if code:
7046 raise Error('failed to list osd inventory')
7047 try:
7048 ls = json.loads(out)
7049 except ValueError as e:
7050 raise Error(f'Invalid JSON in ceph-volume inventory: {e}')
7051
7052 for i in ls:
2a845540 7053 matches = [lv.get('cluster_fsid') == ctx.fsid and i.get('ceph_device') for lv in i.get('lvs', [])]
b3b6e05e
TL
7054 if any(matches) and all(matches):
7055 _zap(ctx, i.get('path'))
7056 elif any(matches):
7057 lv_names = [lv['name'] for lv in i.get('lvs', [])]
7058 # TODO: we need to map the lv_names back to device paths (the vg
7059 # id isn't part of the output here!)
7060 logger.warning(f'Not zapping LVs (not implemented): {lv_names}')
7061
7062
522d829b 7063def command_zap_osds(ctx: CephadmContext) -> None:
b3b6e05e
TL
7064 if not ctx.force:
7065 raise Error('must pass --force to proceed: '
7066 'this command may destroy precious data!')
7067
7068 lock = FileLock(ctx, ctx.fsid)
7069 lock.acquire()
7070
7071 _zap_osds(ctx)
7072
7073##################################
7074
7075
33c7a0ef
TL
7076def get_ceph_cluster_count(ctx: CephadmContext) -> int:
7077 return len([c for c in os.listdir(ctx.data_dir) if is_fsid(c)])
7078
7079
f67539c2
TL
7080def command_rm_cluster(ctx):
7081 # type: (CephadmContext) -> None
7082 if not ctx.force:
9f95a23c 7083 raise Error('must pass --force to proceed: '
f67539c2 7084 'this command may destroy precious data!')
9f95a23c 7085
f67539c2
TL
7086 lock = FileLock(ctx, ctx.fsid)
7087 lock.acquire()
9f95a23c 7088
33c7a0ef 7089 def disable_systemd_service(unit_name: str) -> None:
f67539c2 7090 call(ctx, ['systemctl', 'stop', unit_name],
adb31ebb 7091 verbosity=CallVerbosity.DEBUG)
f67539c2 7092 call(ctx, ['systemctl', 'reset-failed', unit_name],
adb31ebb 7093 verbosity=CallVerbosity.DEBUG)
f67539c2 7094 call(ctx, ['systemctl', 'disable', unit_name],
adb31ebb 7095 verbosity=CallVerbosity.DEBUG)
9f95a23c 7096
33c7a0ef
TL
7097 # stop + disable individual daemon units
7098 for d in list_daemons(ctx, detail=False):
7099 if d['fsid'] != ctx.fsid:
7100 continue
7101 if d['style'] != 'cephadm:v1':
7102 continue
7103 disable_systemd_service(get_unit_name(ctx.fsid, d['name']))
7104
9f95a23c 7105 # cluster units
f67539c2 7106 for unit_name in ['ceph-%s.target' % ctx.fsid]:
33c7a0ef 7107 disable_systemd_service(unit_name)
9f95a23c 7108
522d829b 7109 slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d'))
f67539c2 7110 call(ctx, ['systemctl', 'stop', slice_name],
adb31ebb 7111 verbosity=CallVerbosity.DEBUG)
9f95a23c 7112
b3b6e05e
TL
7113 # osds?
7114 if ctx.zap_osds:
7115 _zap_osds(ctx)
7116
9f95a23c 7117 # rm units
b3b6e05e
TL
7118 call_throws(ctx, ['rm', '-f', ctx.unit_dir
7119 + '/ceph-%s@.service' % ctx.fsid])
7120 call_throws(ctx, ['rm', '-f', ctx.unit_dir
7121 + '/ceph-%s.target' % ctx.fsid])
f67539c2
TL
7122 call_throws(ctx, ['rm', '-rf',
7123 ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid])
9f95a23c 7124 # rm data
f67539c2
TL
7125 call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid])
7126
7127 if not ctx.keep_logs:
7128 # rm logs
7129 call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid])
b3b6e05e
TL
7130 call_throws(ctx, ['rm', '-rf', ctx.log_dir
7131 + '/*.wants/ceph-%s@*' % ctx.fsid])
f67539c2 7132
9f95a23c 7133 # rm logrotate config
f67539c2 7134 call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid])
9f95a23c 7135
33c7a0ef
TL
7136 # if last cluster on host remove shared files
7137 if get_ceph_cluster_count(ctx) == 0:
7138 disable_systemd_service('ceph.target')
7139
7140 # rm shared ceph target files
7141 call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/multi-user.target.wants/ceph.target'])
7142 call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/ceph.target'])
7143
7144 # rm cephadm logrotate config
b3b6e05e
TL
7145 call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm'])
7146
33c7a0ef
TL
7147 if not ctx.keep_logs:
7148 # remove all cephadm logs
7149 for fname in glob(f'{ctx.log_dir}/cephadm.log*'):
7150 os.remove(fname)
7151
b3b6e05e 7152 # rm sysctl settings
33c7a0ef 7153 sysctl_dirs: List[Path] = [Path(ctx.sysctl_dir), Path('/usr/lib/sysctl.d')]
b3b6e05e 7154
33c7a0ef
TL
7155 for sysctl_dir in sysctl_dirs:
7156 for p in sysctl_dir.glob(f'90-ceph-{ctx.fsid}-*.conf'):
7157 p.unlink()
1911f103 7158
33c7a0ef
TL
7159 # cleanup remaining ceph directories
7160 ceph_dirs = [f'/run/ceph/{ctx.fsid}', f'/tmp/var/lib/ceph/{ctx.fsid}', f'/var/run/ceph/{ctx.fsid}']
7161 for dd in ceph_dirs:
7162 shutil.rmtree(dd, ignore_errors=True)
7163
7164 # clean up config, keyring, and pub key files
7165 files = [CEPH_DEFAULT_CONF, CEPH_DEFAULT_PUBKEY, CEPH_DEFAULT_KEYRING]
1911f103
TL
7166 if os.path.exists(files[0]):
7167 valid_fsid = False
7168 with open(files[0]) as f:
f67539c2 7169 if ctx.fsid in f.read():
1911f103
TL
7170 valid_fsid = True
7171 if valid_fsid:
33c7a0ef 7172 # rm configuration files on /etc/ceph
1911f103
TL
7173 for n in range(0, len(files)):
7174 if os.path.exists(files[n]):
7175 os.remove(files[n])
7176
9f95a23c
TL
7177##################################
7178
f67539c2
TL
7179
7180def check_time_sync(ctx, enabler=None):
7181 # type: (CephadmContext, Optional[Packager]) -> bool
9f95a23c
TL
7182 units = [
7183 'chrony.service', # 18.04 (at least)
f67539c2 7184 'chronyd.service', # el / opensuse
9f95a23c 7185 'systemd-timesyncd.service',
f67539c2 7186 'ntpd.service', # el7 (at least)
9f95a23c 7187 'ntp.service', # 18.04 (at least)
f91f0fd5 7188 'ntpsec.service', # 20.04 (at least) / buster
522d829b 7189 'openntpd.service', # ubuntu / debian
9f95a23c 7190 ]
f67539c2 7191 if not check_units(ctx, units, enabler):
9f95a23c
TL
7192 logger.warning('No time sync service is running; checked for %s' % units)
7193 return False
7194 return True
7195
f6b5b4d7 7196
f67539c2 7197def command_check_host(ctx: CephadmContext) -> None:
1911f103 7198 errors = []
9f95a23c
TL
7199 commands = ['systemctl', 'lvcreate']
7200
f67539c2 7201 try:
a4b75251
TL
7202 engine = check_container_engine(ctx)
7203 logger.info(f'{engine} is present')
f67539c2
TL
7204 except Error as e:
7205 errors.append(str(e))
1911f103 7206
9f95a23c
TL
7207 for command in commands:
7208 try:
7209 find_program(command)
7210 logger.info('%s is present' % command)
7211 except ValueError:
1911f103 7212 errors.append('%s binary does not appear to be installed' % command)
9f95a23c
TL
7213
7214 # check for configured+running chronyd or ntp
f67539c2 7215 if not check_time_sync(ctx):
1911f103 7216 errors.append('No time synchronization is active')
9f95a23c 7217
f67539c2
TL
7218 if 'expect_hostname' in ctx and ctx.expect_hostname:
7219 if get_hostname().lower() != ctx.expect_hostname.lower():
1911f103 7220 errors.append('hostname "%s" does not match expected hostname "%s"' % (
f67539c2 7221 get_hostname(), ctx.expect_hostname))
20effc67
TL
7222 else:
7223 logger.info('Hostname "%s" matches what is expected.',
7224 ctx.expect_hostname)
9f95a23c 7225
1911f103 7226 if errors:
f67539c2 7227 raise Error('\nERROR: '.join(errors))
1911f103 7228
9f95a23c
TL
7229 logger.info('Host looks OK')
7230
7231##################################
7232
f6b5b4d7 7233
33c7a0ef
TL
7234def get_ssh_vars(ssh_user: str) -> Tuple[int, int, str]:
7235 try:
7236 s_pwd = pwd.getpwnam(ssh_user)
7237 except KeyError:
7238 raise Error('Cannot find uid/gid for ssh-user: %s' % (ssh_user))
7239
7240 ssh_uid = s_pwd.pw_uid
7241 ssh_gid = s_pwd.pw_gid
7242 ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh')
7243 return ssh_uid, ssh_gid, ssh_dir
7244
7245
7246def authorize_ssh_key(ssh_pub_key: str, ssh_user: str) -> bool:
7247 """Authorize the public key for the provided ssh user"""
7248
7249 def key_in_file(path: str, key: str) -> bool:
7250 if not os.path.exists(path):
7251 return False
7252 with open(path) as f:
7253 lines = f.readlines()
7254 for line in lines:
7255 if line.strip() == key.strip():
7256 return True
7257 return False
7258
7259 logger.info(f'Adding key to {ssh_user}@localhost authorized_keys...')
7260 if ssh_pub_key is None or ssh_pub_key.isspace():
7261 raise Error('Trying to authorize an empty ssh key')
7262
7263 ssh_pub_key = ssh_pub_key.strip()
7264 ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user)
7265 if not os.path.exists(ssh_dir):
7266 makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700)
7267
7268 auth_keys_file = '%s/authorized_keys' % ssh_dir
7269 if key_in_file(auth_keys_file, ssh_pub_key):
7270 logger.info(f'key already in {ssh_user}@localhost authorized_keys...')
7271 return False
7272
7273 add_newline = False
7274 if os.path.exists(auth_keys_file):
7275 with open(auth_keys_file, 'r') as f:
7276 f.seek(0, os.SEEK_END)
7277 if f.tell() > 0:
7278 f.seek(f.tell() - 1, os.SEEK_SET) # go to last char
7279 if f.read() != '\n':
7280 add_newline = True
7281
7282 with open(auth_keys_file, 'a') as f:
7283 os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it
7284 os.fchmod(f.fileno(), 0o600) # just in case we created it
7285 if add_newline:
7286 f.write('\n')
7287 f.write(ssh_pub_key + '\n')
7288
7289 return True
7290
7291
7292def revoke_ssh_key(key: str, ssh_user: str) -> None:
7293 """Revoke the public key authorization for the ssh user"""
7294 ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user)
7295 auth_keys_file = '%s/authorized_keys' % ssh_dir
7296 deleted = False
7297 if os.path.exists(auth_keys_file):
7298 with open(auth_keys_file, 'r') as f:
7299 lines = f.readlines()
7300 _, filename = tempfile.mkstemp()
7301 with open(filename, 'w') as f:
7302 os.fchown(f.fileno(), ssh_uid, ssh_gid)
7303 os.fchmod(f.fileno(), 0o600) # secure access to the keys file
7304 for line in lines:
7305 if line.strip() == key.strip():
7306 deleted = True
7307 else:
7308 f.write(line)
7309
7310 if deleted:
7311 shutil.move(filename, auth_keys_file)
7312 else:
7313 logger.warning('Cannot find the ssh key to be deleted')
7314
7315
7316def check_ssh_connectivity(ctx: CephadmContext) -> None:
7317
7318 def cmd_is_available(cmd: str) -> bool:
7319 if shutil.which(cmd) is None:
7320 logger.warning(f'Command not found: {cmd}')
7321 return False
7322 return True
7323
7324 if not cmd_is_available('ssh') or not cmd_is_available('ssh-keygen'):
7325 logger.warning('Cannot check ssh connectivity. Skipping...')
7326 return
7327
7328 logger.info('Verifying ssh connectivity ...')
7329 if ctx.ssh_private_key and ctx.ssh_public_key:
7330 # let's use the keys provided by the user
7331 ssh_priv_key_path = pathify(ctx.ssh_private_key.name)
7332 ssh_pub_key_path = pathify(ctx.ssh_public_key.name)
7333 else:
7334 # no custom keys, let's generate some random keys just for this check
7335 ssh_priv_key_path = f'/tmp/ssh_key_{uuid.uuid1()}'
7336 ssh_pub_key_path = f'{ssh_priv_key_path}.pub'
7337 ssh_key_gen_cmd = ['ssh-keygen', '-q', '-t', 'rsa', '-N', '', '-C', '', '-f', ssh_priv_key_path]
7338 _, _, code = call(ctx, ssh_key_gen_cmd)
7339 if code != 0:
7340 logger.warning('Cannot generate keys to check ssh connectivity.')
7341 return
7342
7343 with open(ssh_pub_key_path, 'r') as f:
7344 key = f.read().strip()
7345 new_key = authorize_ssh_key(key, ctx.ssh_user)
7346 ssh_cfg_file_arg = ['-F', pathify(ctx.ssh_config.name)] if ctx.ssh_config else []
7347 _, _, code = call(ctx, ['ssh', '-o StrictHostKeyChecking=no',
7348 *ssh_cfg_file_arg, '-i', ssh_priv_key_path,
7349 '-o PasswordAuthentication=no',
7350 f'{ctx.ssh_user}@{get_hostname()}',
7351 'sudo echo'])
7352
7353 # we only remove the key if it's a new one. In case the user has provided
7354 # some already existing key then we don't alter authorized_keys file
7355 if new_key:
7356 revoke_ssh_key(key, ctx.ssh_user)
7357
7358 pub_key_msg = '- The public key file configured by --ssh-public-key is valid\n' if ctx.ssh_public_key else ''
7359 prv_key_msg = '- The private key file configured by --ssh-private-key is valid\n' if ctx.ssh_private_key else ''
7360 ssh_cfg_msg = '- The ssh configuration file configured by --ssh-config is valid\n' if ctx.ssh_config else ''
7361 err_msg = f"""
7362** Please verify your user's ssh configuration and make sure:
7363- User {ctx.ssh_user} must have passwordless sudo access
7364{pub_key_msg}{prv_key_msg}{ssh_cfg_msg}
7365"""
7366 if code != 0:
7367 raise Error(err_msg)
7368
7369
f67539c2 7370def command_prepare_host(ctx: CephadmContext) -> None:
9f95a23c
TL
7371 logger.info('Verifying podman|docker is present...')
7372 pkg = None
f67539c2
TL
7373 try:
7374 check_container_engine(ctx)
7375 except Error as e:
7376 logger.warning(str(e))
9f95a23c 7377 if not pkg:
f67539c2 7378 pkg = create_packager(ctx)
9f95a23c
TL
7379 pkg.install_podman()
7380
7381 logger.info('Verifying lvm2 is present...')
7382 if not find_executable('lvcreate'):
7383 if not pkg:
f67539c2 7384 pkg = create_packager(ctx)
9f95a23c
TL
7385 pkg.install(['lvm2'])
7386
7387 logger.info('Verifying time synchronization is in place...')
f67539c2 7388 if not check_time_sync(ctx):
9f95a23c 7389 if not pkg:
f67539c2 7390 pkg = create_packager(ctx)
9f95a23c
TL
7391 pkg.install(['chrony'])
7392 # check again, and this time try to enable
7393 # the service
f67539c2 7394 check_time_sync(ctx, enabler=pkg)
9f95a23c 7395
f67539c2
TL
7396 if 'expect_hostname' in ctx and ctx.expect_hostname and ctx.expect_hostname != get_hostname():
7397 logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname))
7398 call_throws(ctx, ['hostname', ctx.expect_hostname])
9f95a23c 7399 with open('/etc/hostname', 'w') as f:
f67539c2 7400 f.write(ctx.expect_hostname + '\n')
9f95a23c
TL
7401
7402 logger.info('Repeating the final host check...')
f67539c2 7403 command_check_host(ctx)
9f95a23c
TL
7404
7405##################################
7406
f6b5b4d7 7407
9f95a23c
TL
7408class CustomValidation(argparse.Action):
7409
522d829b 7410 def _check_name(self, values: str) -> None:
9f95a23c
TL
7411 try:
7412 (daemon_type, daemon_id) = values.split('.', 1)
7413 except ValueError:
7414 raise argparse.ArgumentError(self,
f67539c2 7415 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com')
9f95a23c
TL
7416
7417 daemons = get_supported_daemons()
7418 if daemon_type not in daemons:
7419 raise argparse.ArgumentError(self,
f67539c2
TL
7420 'name must declare the type of daemon e.g. '
7421 '{}'.format(', '.join(daemons)))
9f95a23c 7422
522d829b
TL
7423 def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Union[str, Sequence[Any], None],
7424 option_string: Optional[str] = None) -> None:
7425 assert isinstance(values, str)
f67539c2 7426 if self.dest == 'name':
9f95a23c
TL
7427 self._check_name(values)
7428 setattr(namespace, self.dest, values)
7429
7430##################################
7431
f6b5b4d7 7432
9f95a23c 7433def get_distro():
e306af50 7434 # type: () -> Tuple[Optional[str], Optional[str], Optional[str]]
9f95a23c
TL
7435 distro = None
7436 distro_version = None
7437 distro_codename = None
7438 with open('/etc/os-release', 'r') as f:
7439 for line in f.readlines():
7440 line = line.strip()
7441 if '=' not in line or line.startswith('#'):
7442 continue
7443 (var, val) = line.split('=', 1)
7444 if val[0] == '"' and val[-1] == '"':
7445 val = val[1:-1]
7446 if var == 'ID':
7447 distro = val.lower()
7448 elif var == 'VERSION_ID':
7449 distro_version = val.lower()
7450 elif var == 'VERSION_CODENAME':
7451 distro_codename = val.lower()
7452 return distro, distro_version, distro_codename
7453
f6b5b4d7 7454
9f95a23c 7455class Packager(object):
f67539c2 7456 def __init__(self, ctx: CephadmContext,
522d829b
TL
7457 stable: Optional[str] = None, version: Optional[str] = None,
7458 branch: Optional[str] = None, commit: Optional[str] = None):
9f95a23c
TL
7459 assert \
7460 (stable and not version and not branch and not commit) or \
7461 (not stable and version and not branch and not commit) or \
7462 (not stable and not version and branch) or \
7463 (not stable and not version and not branch and not commit)
f67539c2 7464 self.ctx = ctx
9f95a23c
TL
7465 self.stable = stable
7466 self.version = version
7467 self.branch = branch
7468 self.commit = commit
7469
20effc67
TL
7470 def validate(self) -> None:
7471 """Validate parameters before writing any state to disk."""
7472 pass
7473
522d829b
TL
7474 def add_repo(self) -> None:
7475 raise NotImplementedError
7476
7477 def rm_repo(self) -> None:
7478 raise NotImplementedError
7479
7480 def install(self, ls: List[str]) -> None:
9f95a23c
TL
7481 raise NotImplementedError
7482
522d829b 7483 def install_podman(self) -> None:
9f95a23c
TL
7484 raise NotImplementedError
7485
522d829b 7486 def query_shaman(self, distro: str, distro_version: Any, branch: Optional[str], commit: Optional[str]) -> str:
9f95a23c 7487 # query shaman
f91f0fd5 7488 logger.info('Fetching repo metadata from shaman and chacra...')
9f95a23c
TL
7489 shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
7490 distro=distro,
7491 distro_version=distro_version,
7492 branch=branch,
7493 sha1=commit or 'latest',
7494 arch=get_arch()
7495 )
7496 try:
7497 shaman_response = urlopen(shaman_url)
7498 except HTTPError as err:
f91f0fd5 7499 logger.error('repository not found in shaman (might not be available yet)')
9f95a23c 7500 raise Error('%s, failed to fetch %s' % (err, shaman_url))
f67539c2 7501 chacra_url = ''
9f95a23c
TL
7502 try:
7503 chacra_url = shaman_response.geturl()
7504 chacra_response = urlopen(chacra_url)
7505 except HTTPError as err:
f91f0fd5 7506 logger.error('repository not found in chacra (might not be available yet)')
9f95a23c
TL
7507 raise Error('%s, failed to fetch %s' % (err, chacra_url))
7508 return chacra_response.read().decode('utf-8')
7509
522d829b 7510 def repo_gpgkey(self) -> Tuple[str, str]:
f67539c2 7511 if self.ctx.gpg_url:
2a845540 7512 return self.ctx.gpg_url, 'manual'
9f95a23c 7513 if self.stable or self.version:
b3b6e05e 7514 return 'https://download.ceph.com/keys/release.gpg', 'release'
9f95a23c 7515 else:
b3b6e05e 7516 return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild'
9f95a23c 7517
522d829b 7518 def enable_service(self, service: str) -> None:
9f95a23c
TL
7519 """
7520 Start and enable the service (typically using systemd).
7521 """
f67539c2 7522 call_throws(self.ctx, ['systemctl', 'enable', '--now', service])
9f95a23c
TL
7523
7524
7525class Apt(Packager):
7526 DISTRO_NAMES = {
7527 'ubuntu': 'ubuntu',
7528 'debian': 'debian',
7529 }
7530
f67539c2 7531 def __init__(self, ctx: CephadmContext,
522d829b
TL
7532 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
7533 distro: Optional[str], distro_version: Optional[str], distro_codename: Optional[str]) -> None:
f67539c2 7534 super(Apt, self).__init__(ctx, stable=stable, version=version,
9f95a23c 7535 branch=branch, commit=commit)
522d829b 7536 assert distro
f67539c2 7537 self.ctx = ctx
9f95a23c
TL
7538 self.distro = self.DISTRO_NAMES[distro]
7539 self.distro_codename = distro_codename
f91f0fd5 7540 self.distro_version = distro_version
9f95a23c 7541
522d829b 7542 def repo_path(self) -> str:
9f95a23c
TL
7543 return '/etc/apt/sources.list.d/ceph.list'
7544
522d829b 7545 def add_repo(self) -> None:
f67539c2 7546
9f95a23c 7547 url, name = self.repo_gpgkey()
f91f0fd5 7548 logger.info('Installing repo GPG key from %s...' % url)
9f95a23c
TL
7549 try:
7550 response = urlopen(url)
7551 except HTTPError as err:
f91f0fd5 7552 logger.error('failed to fetch GPG repo key from %s: %s' % (
9f95a23c
TL
7553 url, err))
7554 raise Error('failed to fetch GPG key')
b3b6e05e
TL
7555 key = response.read()
7556 with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'wb') as f:
9f95a23c
TL
7557 f.write(key)
7558
7559 if self.version:
7560 content = 'deb %s/debian-%s/ %s main\n' % (
f67539c2 7561 self.ctx.repo_url, self.version, self.distro_codename)
9f95a23c
TL
7562 elif self.stable:
7563 content = 'deb %s/debian-%s/ %s main\n' % (
f67539c2 7564 self.ctx.repo_url, self.stable, self.distro_codename)
9f95a23c
TL
7565 else:
7566 content = self.query_shaman(self.distro, self.distro_codename, self.branch,
7567 self.commit)
7568
f91f0fd5 7569 logger.info('Installing repo file at %s...' % self.repo_path())
9f95a23c
TL
7570 with open(self.repo_path(), 'w') as f:
7571 f.write(content)
7572
b3b6e05e
TL
7573 self.update()
7574
522d829b 7575 def rm_repo(self) -> None:
2a845540 7576 for name in ['autobuild', 'release', 'manual']:
9f95a23c
TL
7577 p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name
7578 if os.path.exists(p):
f91f0fd5 7579 logger.info('Removing repo GPG key %s...' % p)
9f95a23c
TL
7580 os.unlink(p)
7581 if os.path.exists(self.repo_path()):
f91f0fd5 7582 logger.info('Removing repo at %s...' % self.repo_path())
9f95a23c
TL
7583 os.unlink(self.repo_path())
7584
f91f0fd5
TL
7585 if self.distro == 'ubuntu':
7586 self.rm_kubic_repo()
7587
522d829b 7588 def install(self, ls: List[str]) -> None:
f91f0fd5 7589 logger.info('Installing packages %s...' % ls)
f67539c2 7590 call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls)
9f95a23c 7591
522d829b 7592 def update(self) -> None:
b3b6e05e
TL
7593 logger.info('Updating package list...')
7594 call_throws(self.ctx, ['apt-get', 'update'])
7595
522d829b 7596 def install_podman(self) -> None:
9f95a23c 7597 if self.distro == 'ubuntu':
f91f0fd5
TL
7598 logger.info('Setting up repo for podman...')
7599 self.add_kubic_repo()
b3b6e05e 7600 self.update()
9f95a23c 7601
f91f0fd5 7602 logger.info('Attempting podman install...')
9f95a23c
TL
7603 try:
7604 self.install(['podman'])
f67539c2 7605 except Error:
f91f0fd5 7606 logger.info('Podman did not work. Falling back to docker...')
9f95a23c
TL
7607 self.install(['docker.io'])
7608
522d829b 7609 def kubic_repo_url(self) -> str:
f91f0fd5
TL
7610 return 'https://download.opensuse.org/repositories/devel:/kubic:/' \
7611 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version
7612
522d829b 7613 def kubic_repo_path(self) -> str:
f91f0fd5
TL
7614 return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list'
7615
522d829b 7616 def kubric_repo_gpgkey_url(self) -> str:
f91f0fd5
TL
7617 return '%s/Release.key' % self.kubic_repo_url()
7618
522d829b 7619 def kubric_repo_gpgkey_path(self) -> str:
f91f0fd5
TL
7620 return '/etc/apt/trusted.gpg.d/kubic.release.gpg'
7621
522d829b 7622 def add_kubic_repo(self) -> None:
f91f0fd5
TL
7623 url = self.kubric_repo_gpgkey_url()
7624 logger.info('Installing repo GPG key from %s...' % url)
7625 try:
7626 response = urlopen(url)
7627 except HTTPError as err:
7628 logger.error('failed to fetch GPG repo key from %s: %s' % (
7629 url, err))
7630 raise Error('failed to fetch GPG key')
7631 key = response.read().decode('utf-8')
7632 tmp_key = write_tmp(key, 0, 0)
7633 keyring = self.kubric_repo_gpgkey_path()
f67539c2 7634 call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name])
f91f0fd5
TL
7635
7636 logger.info('Installing repo file at %s...' % self.kubic_repo_path())
7637 content = 'deb %s /\n' % self.kubic_repo_url()
7638 with open(self.kubic_repo_path(), 'w') as f:
7639 f.write(content)
7640
522d829b 7641 def rm_kubic_repo(self) -> None:
f91f0fd5
TL
7642 keyring = self.kubric_repo_gpgkey_path()
7643 if os.path.exists(keyring):
7644 logger.info('Removing repo GPG key %s...' % keyring)
7645 os.unlink(keyring)
7646
7647 p = self.kubic_repo_path()
7648 if os.path.exists(p):
7649 logger.info('Removing repo at %s...' % p)
7650 os.unlink(p)
7651
f6b5b4d7 7652
9f95a23c
TL
7653class YumDnf(Packager):
7654 DISTRO_NAMES = {
7655 'centos': ('centos', 'el'),
7656 'rhel': ('centos', 'el'),
7657 'scientific': ('centos', 'el'),
b3b6e05e 7658 'rocky': ('centos', 'el'),
522d829b 7659 'almalinux': ('centos', 'el'),
2a845540 7660 'ol': ('centos', 'el'),
9f95a23c 7661 'fedora': ('fedora', 'fc'),
a4b75251 7662 'mariner': ('mariner', 'cm'),
9f95a23c
TL
7663 }
7664
f67539c2 7665 def __init__(self, ctx: CephadmContext,
522d829b
TL
7666 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
7667 distro: Optional[str], distro_version: Optional[str]) -> None:
f67539c2 7668 super(YumDnf, self).__init__(ctx, stable=stable, version=version,
9f95a23c 7669 branch=branch, commit=commit)
522d829b
TL
7670 assert distro
7671 assert distro_version
f67539c2 7672 self.ctx = ctx
9f95a23c
TL
7673 self.major = int(distro_version.split('.')[0])
7674 self.distro_normalized = self.DISTRO_NAMES[distro][0]
7675 self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
7676 if (self.distro_code == 'fc' and self.major >= 30) or \
7677 (self.distro_code == 'el' and self.major >= 8):
7678 self.tool = 'dnf'
a4b75251
TL
7679 elif (self.distro_code == 'cm'):
7680 self.tool = 'tdnf'
9f95a23c
TL
7681 else:
7682 self.tool = 'yum'
7683
522d829b 7684 def custom_repo(self, **kw: Any) -> str:
9f95a23c
TL
7685 """
7686 Repo files need special care in that a whole line should not be present
7687 if there is no value for it. Because we were using `format()` we could
7688 not conditionally add a line for a repo file. So the end result would
7689 contain a key with a missing value (say if we were passing `None`).
7690
7691 For example, it could look like::
7692
7693 [ceph repo]
7694 name= ceph repo
7695 proxy=
7696 gpgcheck=
7697
7698 Which breaks. This function allows us to conditionally add lines,
7699 preserving an order and be more careful.
7700
7701 Previously, and for historical purposes, this is how the template used
7702 to look::
7703
7704 custom_repo =
7705 [{repo_name}]
7706 name={name}
7707 baseurl={baseurl}
7708 enabled={enabled}
7709 gpgcheck={gpgcheck}
7710 type={_type}
7711 gpgkey={gpgkey}
7712 proxy={proxy}
7713
7714 """
7715 lines = []
7716
7717 # by using tuples (vs a dict) we preserve the order of what we want to
7718 # return, like starting with a [repo name]
7719 tmpl = (
7720 ('reponame', '[%s]'),
7721 ('name', 'name=%s'),
7722 ('baseurl', 'baseurl=%s'),
7723 ('enabled', 'enabled=%s'),
7724 ('gpgcheck', 'gpgcheck=%s'),
7725 ('_type', 'type=%s'),
7726 ('gpgkey', 'gpgkey=%s'),
7727 ('proxy', 'proxy=%s'),
7728 ('priority', 'priority=%s'),
7729 )
7730
7731 for line in tmpl:
7732 tmpl_key, tmpl_value = line # key values from tmpl
7733
7734 # ensure that there is an actual value (not None nor empty string)
7735 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
7736 lines.append(tmpl_value % kw.get(tmpl_key))
7737
7738 return '\n'.join(lines)
7739
522d829b 7740 def repo_path(self) -> str:
9f95a23c
TL
7741 return '/etc/yum.repos.d/ceph.repo'
7742
522d829b 7743 def repo_baseurl(self) -> str:
9f95a23c
TL
7744 assert self.stable or self.version
7745 if self.version:
f67539c2 7746 return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version,
9f95a23c
TL
7747 self.distro_code)
7748 else:
f67539c2 7749 return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable,
9f95a23c
TL
7750 self.distro_code)
7751
20effc67 7752 def validate(self) -> None:
b3b6e05e
TL
7753 if self.distro_code.startswith('fc'):
7754 raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro')
7755 if self.distro_code == 'el7':
7756 if self.stable and self.stable >= 'pacific':
7757 raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it')
7758 if self.version and self.version.split('.')[0] >= '16':
7759 raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it')
20effc67
TL
7760
7761 if self.stable or self.version:
7762 # we know that yum & dnf require there to be a
7763 # $base_url/$arch/repodata/repomd.xml so we can test if this URL
7764 # is gettable in order to validate the inputs
7765 test_url = self.repo_baseurl() + '/noarch/repodata/repomd.xml'
7766 try:
7767 urlopen(test_url)
7768 except HTTPError as err:
7769 logger.error('unable to fetch repo metadata: %r', err)
7770 raise Error('failed to fetch repository metadata. please check'
7771 ' the provided parameters are correct and try again')
7772
7773 def add_repo(self) -> None:
9f95a23c
TL
7774 if self.stable or self.version:
7775 content = ''
7776 for n, t in {
7777 'Ceph': '$basearch',
7778 'Ceph-noarch': 'noarch',
7779 'Ceph-source': 'SRPMS'}.items():
7780 content += '[%s]\n' % (n)
7781 content += self.custom_repo(
7782 name='Ceph %s' % t,
7783 baseurl=self.repo_baseurl() + '/' + t,
7784 enabled=1,
7785 gpgcheck=1,
7786 gpgkey=self.repo_gpgkey()[0],
7787 )
7788 content += '\n\n'
7789 else:
7790 content = self.query_shaman(self.distro_normalized, self.major,
7791 self.branch,
7792 self.commit)
7793
f91f0fd5 7794 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
7795 with open(self.repo_path(), 'w') as f:
7796 f.write(content)
7797
7798 if self.distro_code.startswith('el'):
7799 logger.info('Enabling EPEL...')
f67539c2 7800 call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release'])
9f95a23c 7801
522d829b 7802 def rm_repo(self) -> None:
9f95a23c
TL
7803 if os.path.exists(self.repo_path()):
7804 os.unlink(self.repo_path())
9f95a23c 7805
522d829b 7806 def install(self, ls: List[str]) -> None:
9f95a23c 7807 logger.info('Installing packages %s...' % ls)
f67539c2 7808 call_throws(self.ctx, [self.tool, 'install', '-y'] + ls)
9f95a23c 7809
522d829b 7810 def install_podman(self) -> None:
9f95a23c
TL
7811 self.install(['podman'])
7812
7813
7814class Zypper(Packager):
7815 DISTRO_NAMES = [
7816 'sles',
7817 'opensuse-tumbleweed',
7818 'opensuse-leap'
7819 ]
7820
f67539c2 7821 def __init__(self, ctx: CephadmContext,
522d829b
TL
7822 stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str],
7823 distro: Optional[str], distro_version: Optional[str]) -> None:
f67539c2 7824 super(Zypper, self).__init__(ctx, stable=stable, version=version,
9f95a23c 7825 branch=branch, commit=commit)
522d829b 7826 assert distro is not None
f67539c2 7827 self.ctx = ctx
9f95a23c
TL
7828 self.tool = 'zypper'
7829 self.distro = 'opensuse'
7830 self.distro_version = '15.1'
7831 if 'tumbleweed' not in distro and distro_version is not None:
7832 self.distro_version = distro_version
7833
522d829b 7834 def custom_repo(self, **kw: Any) -> str:
9f95a23c
TL
7835 """
7836 See YumDnf for format explanation.
7837 """
7838 lines = []
7839
7840 # by using tuples (vs a dict) we preserve the order of what we want to
7841 # return, like starting with a [repo name]
7842 tmpl = (
7843 ('reponame', '[%s]'),
7844 ('name', 'name=%s'),
7845 ('baseurl', 'baseurl=%s'),
7846 ('enabled', 'enabled=%s'),
7847 ('gpgcheck', 'gpgcheck=%s'),
7848 ('_type', 'type=%s'),
7849 ('gpgkey', 'gpgkey=%s'),
7850 ('proxy', 'proxy=%s'),
7851 ('priority', 'priority=%s'),
7852 )
7853
7854 for line in tmpl:
7855 tmpl_key, tmpl_value = line # key values from tmpl
7856
7857 # ensure that there is an actual value (not None nor empty string)
7858 if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''):
7859 lines.append(tmpl_value % kw.get(tmpl_key))
7860
7861 return '\n'.join(lines)
7862
522d829b 7863 def repo_path(self) -> str:
9f95a23c
TL
7864 return '/etc/zypp/repos.d/ceph.repo'
7865
522d829b 7866 def repo_baseurl(self) -> str:
9f95a23c
TL
7867 assert self.stable or self.version
7868 if self.version:
f67539c2
TL
7869 return '%s/rpm-%s/%s' % (self.ctx.repo_url,
7870 self.stable, self.distro)
9f95a23c 7871 else:
f67539c2
TL
7872 return '%s/rpm-%s/%s' % (self.ctx.repo_url,
7873 self.stable, self.distro)
9f95a23c 7874
522d829b 7875 def add_repo(self) -> None:
9f95a23c
TL
7876 if self.stable or self.version:
7877 content = ''
7878 for n, t in {
7879 'Ceph': '$basearch',
7880 'Ceph-noarch': 'noarch',
7881 'Ceph-source': 'SRPMS'}.items():
7882 content += '[%s]\n' % (n)
7883 content += self.custom_repo(
7884 name='Ceph %s' % t,
7885 baseurl=self.repo_baseurl() + '/' + t,
7886 enabled=1,
7887 gpgcheck=1,
7888 gpgkey=self.repo_gpgkey()[0],
7889 )
7890 content += '\n\n'
7891 else:
7892 content = self.query_shaman(self.distro, self.distro_version,
7893 self.branch,
7894 self.commit)
7895
f91f0fd5 7896 logger.info('Writing repo to %s...' % self.repo_path())
9f95a23c
TL
7897 with open(self.repo_path(), 'w') as f:
7898 f.write(content)
7899
522d829b 7900 def rm_repo(self) -> None:
9f95a23c
TL
7901 if os.path.exists(self.repo_path()):
7902 os.unlink(self.repo_path())
7903
522d829b 7904 def install(self, ls: List[str]) -> None:
9f95a23c 7905 logger.info('Installing packages %s...' % ls)
f67539c2 7906 call_throws(self.ctx, [self.tool, 'in', '-y'] + ls)
9f95a23c 7907
522d829b 7908 def install_podman(self) -> None:
9f95a23c
TL
7909 self.install(['podman'])
7910
7911
f67539c2 7912def create_packager(ctx: CephadmContext,
522d829b
TL
7913 stable: Optional[str] = None, version: Optional[str] = None,
7914 branch: Optional[str] = None, commit: Optional[str] = None) -> Packager:
9f95a23c
TL
7915 distro, distro_version, distro_codename = get_distro()
7916 if distro in YumDnf.DISTRO_NAMES:
f67539c2 7917 return YumDnf(ctx, stable=stable, version=version,
9f95a23c 7918 branch=branch, commit=commit,
f67539c2 7919 distro=distro, distro_version=distro_version)
9f95a23c 7920 elif distro in Apt.DISTRO_NAMES:
f67539c2 7921 return Apt(ctx, stable=stable, version=version,
9f95a23c
TL
7922 branch=branch, commit=commit,
7923 distro=distro, distro_version=distro_version,
7924 distro_codename=distro_codename)
7925 elif distro in Zypper.DISTRO_NAMES:
f67539c2 7926 return Zypper(ctx, stable=stable, version=version,
9f95a23c
TL
7927 branch=branch, commit=commit,
7928 distro=distro, distro_version=distro_version)
7929 raise Error('Distro %s version %s not supported' % (distro, distro_version))
7930
7931
522d829b 7932def command_add_repo(ctx: CephadmContext) -> None:
f67539c2 7933 if ctx.version and ctx.release:
9f95a23c 7934 raise Error('you can specify either --release or --version but not both')
f67539c2 7935 if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit:
1911f103 7936 raise Error('please supply a --release, --version, --dev or --dev-commit argument')
f67539c2 7937 if ctx.version:
9f95a23c 7938 try:
f67539c2
TL
7939 (x, y, z) = ctx.version.split('.')
7940 except Exception:
9f95a23c 7941 raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
b3b6e05e
TL
7942 if ctx.release:
7943 # Pacific =/= pacific in this case, set to undercase to avoid confision
7944 ctx.release = ctx.release.lower()
9f95a23c 7945
f67539c2
TL
7946 pkg = create_packager(ctx, stable=ctx.release,
7947 version=ctx.version,
7948 branch=ctx.dev,
7949 commit=ctx.dev_commit)
20effc67 7950 pkg.validate()
9f95a23c 7951 pkg.add_repo()
b3b6e05e 7952 logger.info('Completed adding repo.')
9f95a23c 7953
f6b5b4d7 7954
522d829b 7955def command_rm_repo(ctx: CephadmContext) -> None:
f67539c2 7956 pkg = create_packager(ctx)
9f95a23c
TL
7957 pkg.rm_repo()
7958
f6b5b4d7 7959
522d829b 7960def command_install(ctx: CephadmContext) -> None:
f67539c2
TL
7961 pkg = create_packager(ctx)
7962 pkg.install(ctx.packages)
9f95a23c 7963
2a845540
TL
7964
7965def command_rescan_disks(ctx: CephadmContext) -> str:
7966
7967 def probe_hba(scan_path: str) -> None:
7968 """Tell the adapter to rescan"""
7969 with open(scan_path, 'w') as f:
7970 f.write('- - -')
7971
7972 cmd = ctx.func.__name__.replace('command_', '')
7973 logger.info(f'{cmd}: starting')
7974 start = time.time()
7975
7976 all_scan_files = glob('/sys/class/scsi_host/*/scan')
7977 scan_files = []
7978 skipped = []
7979 for scan_path in all_scan_files:
7980 adapter_name = os.path.basename(os.path.dirname(scan_path))
7981 proc_name = read_file([os.path.join(os.path.dirname(scan_path), 'proc_name')])
7982 if proc_name in ['unknown', 'usb-storage']:
7983 skipped.append(os.path.basename(scan_path))
7984 logger.info(f'{cmd}: rescan skipping incompatible host adapter {adapter_name} : {proc_name}')
7985 continue
7986
7987 scan_files.append(scan_path)
7988
7989 if not scan_files:
7990 logger.info(f'{cmd}: no compatible HBAs found')
7991 return 'Ok. No compatible HBAs found'
7992
7993 responses = async_run(concurrent_tasks(probe_hba, scan_files))
7994 failures = [r for r in responses if r]
7995
7996 logger.info(f'{cmd}: Complete. {len(scan_files)} adapters rescanned, {len(failures)} failures, {len(skipped)} skipped')
7997
7998 elapsed = time.time() - start
7999 if failures:
8000 plural = 's' if len(failures) > 1 else ''
8001 if len(failures) == len(scan_files):
8002 return f'Failed. All {len(scan_files)} rescan requests failed'
8003 else:
8004 return f'Partial. {len(scan_files) - len(failures)} successful, {len(failures)} failure{plural} against: {", ".join(failures)}'
8005
8006 return f'Ok. {len(all_scan_files)} adapters detected: {len(scan_files)} rescanned, {len(skipped)} skipped, {len(failures)} failed ({elapsed:.2f}s)'
8007
9f95a23c
TL
8008##################################
8009
f67539c2 8010
f91f0fd5
TL
8011def get_ipv4_address(ifname):
8012 # type: (str) -> str
522d829b 8013 def _extract(sock: socket.socket, offset: int) -> str:
f91f0fd5 8014 return socket.inet_ntop(
f67539c2
TL
8015 socket.AF_INET,
8016 fcntl.ioctl(
8017 sock.fileno(),
8018 offset,
8019 struct.pack('256s', bytes(ifname[:15], 'utf-8'))
8020 )[20:24])
f91f0fd5
TL
8021
8022 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
8023 try:
8024 addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR
8025 dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK
8026 except OSError:
8027 # interface does not have an ipv4 address
8028 return ''
8029
8030 dec_mask = sum([bin(int(i)).count('1')
8031 for i in dq_mask.split('.')])
8032 return '{}/{}'.format(addr, dec_mask)
8033
8034
8035def get_ipv6_address(ifname):
8036 # type: (str) -> str
8037 if not os.path.exists('/proc/net/if_inet6'):
8038 return ''
8039
8040 raw = read_file(['/proc/net/if_inet6'])
8041 data = raw.splitlines()
8042 # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html
8043 # field 0 is ipv6, field 2 is scope
8044 for iface_setting in data:
8045 field = iface_setting.split()
8046 if field[-1] == ifname:
8047 ipv6_raw = field[0]
f67539c2 8048 ipv6_fmtd = ':'.join([ipv6_raw[_p:_p + 4] for _p in range(0, len(field[0]), 4)])
f91f0fd5
TL
8049 # apply naming rules using ipaddress module
8050 ipv6 = ipaddress.ip_address(ipv6_fmtd)
f67539c2 8051 return '{}/{}'.format(str(ipv6), int('0x{}'.format(field[2]), 16))
f91f0fd5
TL
8052 return ''
8053
8054
8055def bytes_to_human(num, mode='decimal'):
8056 # type: (float, str) -> str
8057 """Convert a bytes value into it's human-readable form.
8058
8059 :param num: number, in bytes, to convert
8060 :param mode: Either decimal (default) or binary to determine divisor
8061 :returns: string representing the bytes value in a more readable format
8062 """
8063 unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
8064 divisor = 1000.0
f67539c2 8065 yotta = 'YB'
f91f0fd5
TL
8066
8067 if mode == 'binary':
8068 unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB']
8069 divisor = 1024.0
f67539c2 8070 yotta = 'YiB'
f91f0fd5
TL
8071
8072 for unit in unit_list:
8073 if abs(num) < divisor:
f67539c2 8074 return '%3.1f%s' % (num, unit)
f91f0fd5 8075 num /= divisor
f67539c2 8076 return '%.1f%s' % (num, yotta)
f91f0fd5
TL
8077
8078
8079def read_file(path_list, file_name=''):
8080 # type: (List[str], str) -> str
8081 """Returns the content of the first file found within the `path_list`
8082
8083 :param path_list: list of file paths to search
8084 :param file_name: optional file_name to be applied to a file path
8085 :returns: content of the file or 'Unknown'
8086 """
8087 for path in path_list:
8088 if file_name:
8089 file_path = os.path.join(path, file_name)
8090 else:
8091 file_path = path
8092 if os.path.exists(file_path):
8093 with open(file_path, 'r') as f:
8094 try:
8095 content = f.read().strip()
8096 except OSError:
8097 # sysfs may populate the file, but for devices like
8098 # virtio reads can fail
f67539c2 8099 return 'Unknown'
f91f0fd5
TL
8100 else:
8101 return content
f67539c2 8102 return 'Unknown'
f91f0fd5
TL
8103
8104##################################
f67539c2
TL
8105
8106
f91f0fd5
TL
8107class HostFacts():
8108 _dmi_path_list = ['/sys/class/dmi/id']
8109 _nic_path_list = ['/sys/class/net']
f91f0fd5
TL
8110 _apparmor_path_list = ['/etc/apparmor']
8111 _disk_vendor_workarounds = {
f67539c2 8112 '0x1af4': 'Virtio Block Device'
f91f0fd5 8113 }
a4b75251 8114 _excluded_block_devices = ('sr', 'zram', 'dm-')
f91f0fd5 8115
f67539c2
TL
8116 def __init__(self, ctx: CephadmContext):
8117 self.ctx: CephadmContext = ctx
8118 self.cpu_model: str = 'Unknown'
8119 self.cpu_count: int = 0
8120 self.cpu_cores: int = 0
8121 self.cpu_threads: int = 0
8122 self.interfaces: Dict[str, Any] = {}
f91f0fd5 8123
f67539c2 8124 self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines()
f91f0fd5
TL
8125 self._get_cpuinfo()
8126 self._process_nics()
f67539c2
TL
8127 self.arch: str = platform.processor()
8128 self.kernel: str = platform.release()
f91f0fd5
TL
8129
8130 def _get_cpuinfo(self):
8131 # type: () -> None
8132 """Determine cpu information via /proc/cpuinfo"""
8133 raw = read_file(['/proc/cpuinfo'])
8134 output = raw.splitlines()
8135 cpu_set = set()
8136
8137 for line in output:
f67539c2
TL
8138 field = [f.strip() for f in line.split(':')]
8139 if 'model name' in line:
f91f0fd5 8140 self.cpu_model = field[1]
f67539c2 8141 if 'physical id' in line:
f91f0fd5 8142 cpu_set.add(field[1])
f67539c2 8143 if 'siblings' in line:
f91f0fd5 8144 self.cpu_threads = int(field[1].strip())
f67539c2 8145 if 'cpu cores' in line:
f91f0fd5
TL
8146 self.cpu_cores = int(field[1].strip())
8147 pass
8148 self.cpu_count = len(cpu_set)
8149
8150 def _get_block_devs(self):
8151 # type: () -> List[str]
8152 """Determine the list of block devices by looking at /sys/block"""
8153 return [dev for dev in os.listdir('/sys/block')
a4b75251 8154 if not dev.startswith(HostFacts._excluded_block_devices)]
f91f0fd5
TL
8155
8156 def _get_devs_by_type(self, rota='0'):
8157 # type: (str) -> List[str]
8158 """Filter block devices by a given rotational attribute (0=flash, 1=spinner)"""
8159 devs = list()
8160 for blk_dev in self._get_block_devs():
8161 rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev)
8162 rot_value = read_file([rot_path])
8163 if rot_value == rota:
8164 devs.append(blk_dev)
8165 return devs
8166
8167 @property
8168 def operating_system(self):
8169 # type: () -> str
8170 """Determine OS version"""
8171 raw_info = read_file(['/etc/os-release'])
8172 os_release = raw_info.splitlines()
8173 rel_str = 'Unknown'
8174 rel_dict = dict()
8175
8176 for line in os_release:
f67539c2 8177 if '=' in line:
f91f0fd5
TL
8178 var_name, var_value = line.split('=')
8179 rel_dict[var_name] = var_value.strip('"')
8180
8181 # Would normally use PRETTY_NAME, but NAME and VERSION are more
8182 # consistent
f67539c2
TL
8183 if all(_v in rel_dict for _v in ['NAME', 'VERSION']):
8184 rel_str = '{} {}'.format(rel_dict['NAME'], rel_dict['VERSION'])
f91f0fd5
TL
8185 return rel_str
8186
8187 @property
8188 def hostname(self):
8189 # type: () -> str
8190 """Return the hostname"""
8191 return platform.node()
8192
8193 @property
8194 def subscribed(self):
8195 # type: () -> str
8196 """Highlevel check to see if the host is subscribed to receive updates/support"""
8197 def _red_hat():
8198 # type: () -> str
8199 # RHEL 7 and RHEL 8
8200 entitlements_dir = '/etc/pki/entitlement'
8201 if os.path.exists(entitlements_dir):
8202 pems = glob('{}/*.pem'.format(entitlements_dir))
8203 if len(pems) >= 2:
f67539c2 8204 return 'Yes'
f91f0fd5 8205
f67539c2 8206 return 'No'
f91f0fd5
TL
8207
8208 os_name = self.operating_system
f67539c2 8209 if os_name.upper().startswith('RED HAT'):
f91f0fd5
TL
8210 return _red_hat()
8211
f67539c2 8212 return 'Unknown'
f91f0fd5
TL
8213
8214 @property
8215 def hdd_count(self):
8216 # type: () -> int
8217 """Return a count of HDDs (spinners)"""
8218 return len(self._get_devs_by_type(rota='1'))
8219
8220 def _get_capacity(self, dev):
8221 # type: (str) -> int
8222 """Determine the size of a given device"""
8223 size_path = os.path.join('/sys/block', dev, 'size')
8224 size_blocks = int(read_file([size_path]))
8225 blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size')
8226 blk_count = int(read_file([blk_path]))
8227 return size_blocks * blk_count
8228
8229 def _get_capacity_by_type(self, rota='0'):
8230 # type: (str) -> int
8231 """Return the total capacity of a category of device (flash or hdd)"""
8232 devs = self._get_devs_by_type(rota=rota)
8233 capacity = 0
8234 for dev in devs:
8235 capacity += self._get_capacity(dev)
8236 return capacity
8237
8238 def _dev_list(self, dev_list):
8239 # type: (List[str]) -> List[Dict[str, object]]
8240 """Return a 'pretty' name list for each device in the `dev_list`"""
8241 disk_list = list()
8242
8243 for dev in dev_list:
8244 disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip()
8245 disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip()
8246 disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip()
8247 vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip()
8248 disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor)
8249 disk_size_bytes = self._get_capacity(dev)
8250 disk_list.append({
f67539c2
TL
8251 'description': '{} {} ({})'.format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)),
8252 'vendor': disk_vendor,
8253 'model': disk_model,
8254 'rev': disk_rev,
8255 'wwid': disk_wwid,
8256 'dev_name': dev,
8257 'disk_size_bytes': disk_size_bytes,
8258 })
f91f0fd5
TL
8259 return disk_list
8260
8261 @property
8262 def hdd_list(self):
8263 # type: () -> List[Dict[str, object]]
8264 """Return a list of devices that are HDDs (spinners)"""
8265 devs = self._get_devs_by_type(rota='1')
8266 return self._dev_list(devs)
8267
8268 @property
8269 def flash_list(self):
8270 # type: () -> List[Dict[str, object]]
8271 """Return a list of devices that are flash based (SSD, NVMe)"""
8272 devs = self._get_devs_by_type(rota='0')
8273 return self._dev_list(devs)
8274
8275 @property
8276 def hdd_capacity_bytes(self):
8277 # type: () -> int
8278 """Return the total capacity for all HDD devices (bytes)"""
8279 return self._get_capacity_by_type(rota='1')
8280
8281 @property
8282 def hdd_capacity(self):
8283 # type: () -> str
8284 """Return the total capacity for all HDD devices (human readable format)"""
8285 return bytes_to_human(self.hdd_capacity_bytes)
8286
8287 @property
8288 def cpu_load(self):
8289 # type: () -> Dict[str, float]
8290 """Return the cpu load average data for the host"""
8291 raw = read_file(['/proc/loadavg']).strip()
8292 data = raw.split()
8293 return {
f67539c2
TL
8294 '1min': float(data[0]),
8295 '5min': float(data[1]),
8296 '15min': float(data[2]),
f91f0fd5
TL
8297 }
8298
8299 @property
8300 def flash_count(self):
8301 # type: () -> int
8302 """Return the number of flash devices in the system (SSD, NVMe)"""
8303 return len(self._get_devs_by_type(rota='0'))
8304
8305 @property
8306 def flash_capacity_bytes(self):
8307 # type: () -> int
8308 """Return the total capacity for all flash devices (bytes)"""
8309 return self._get_capacity_by_type(rota='0')
8310
8311 @property
8312 def flash_capacity(self):
8313 # type: () -> str
8314 """Return the total capacity for all Flash devices (human readable format)"""
8315 return bytes_to_human(self.flash_capacity_bytes)
8316
8317 def _process_nics(self):
8318 # type: () -> None
8319 """Look at the NIC devices and extract network related metadata"""
8320 # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h
8321 hw_lookup = {
f67539c2
TL
8322 '1': 'ethernet',
8323 '32': 'infiniband',
8324 '772': 'loopback',
f91f0fd5
TL
8325 }
8326
8327 for nic_path in HostFacts._nic_path_list:
8328 if not os.path.exists(nic_path):
8329 continue
8330 for iface in os.listdir(nic_path):
8331
33c7a0ef
TL
8332 if os.path.exists(os.path.join(nic_path, iface, 'bridge')):
8333 nic_type = 'bridge'
8334 elif os.path.exists(os.path.join(nic_path, iface, 'bonding')):
8335 nic_type = 'bonding'
8336 else:
8337 nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), 'Unknown')
8338
8339 if nic_type == 'loopback': # skip loopback devices
8340 continue
8341
f67539c2
TL
8342 lower_devs_list = [os.path.basename(link.replace('lower_', '')) for link in glob(os.path.join(nic_path, iface, 'lower_*'))]
8343 upper_devs_list = [os.path.basename(link.replace('upper_', '')) for link in glob(os.path.join(nic_path, iface, 'upper_*'))]
f91f0fd5
TL
8344
8345 try:
8346 mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')]))
8347 except ValueError:
8348 mtu = 0
8349
8350 operstate = read_file([os.path.join(nic_path, iface, 'operstate')])
8351 try:
8352 speed = int(read_file([os.path.join(nic_path, iface, 'speed')]))
8353 except (OSError, ValueError):
8354 # OSError : device doesn't support the ethtool get_link_ksettings
8355 # ValueError : raised when the read fails, and returns Unknown
8356 #
8357 # Either way, we show a -1 when speed isn't available
8358 speed = -1
8359
f91f0fd5
TL
8360 dev_link = os.path.join(nic_path, iface, 'device')
8361 if os.path.exists(dev_link):
8362 iftype = 'physical'
8363 driver_path = os.path.join(dev_link, 'driver')
8364 if os.path.exists(driver_path):
f67539c2 8365 driver = os.path.basename(os.path.realpath(driver_path))
f91f0fd5
TL
8366 else:
8367 driver = 'Unknown'
8368
8369 else:
8370 iftype = 'logical'
8371 driver = ''
8372
8373 self.interfaces[iface] = {
f67539c2
TL
8374 'mtu': mtu,
8375 'upper_devs_list': upper_devs_list,
8376 'lower_devs_list': lower_devs_list,
8377 'operstate': operstate,
8378 'iftype': iftype,
8379 'nic_type': nic_type,
8380 'driver': driver,
8381 'speed': speed,
8382 'ipv4_address': get_ipv4_address(iface),
8383 'ipv6_address': get_ipv6_address(iface),
f91f0fd5
TL
8384 }
8385
8386 @property
8387 def nic_count(self):
8388 # type: () -> int
8389 """Return a total count of all physical NICs detected in the host"""
8390 phys_devs = []
8391 for iface in self.interfaces:
f67539c2 8392 if self.interfaces[iface]['iftype'] == 'physical':
f91f0fd5
TL
8393 phys_devs.append(iface)
8394 return len(phys_devs)
8395
f91f0fd5
TL
8396 def _get_mem_data(self, field_name):
8397 # type: (str) -> int
8398 for line in self._meminfo:
8399 if line.startswith(field_name):
8400 _d = line.split()
8401 return int(_d[1])
8402 return 0
8403
8404 @property
8405 def memory_total_kb(self):
8406 # type: () -> int
8407 """Determine the memory installed (kb)"""
8408 return self._get_mem_data('MemTotal')
8409
8410 @property
8411 def memory_free_kb(self):
8412 # type: () -> int
8413 """Determine the memory free (not cache, immediately usable)"""
8414 return self._get_mem_data('MemFree')
8415
8416 @property
8417 def memory_available_kb(self):
8418 # type: () -> int
8419 """Determine the memory available to new applications without swapping"""
8420 return self._get_mem_data('MemAvailable')
8421
8422 @property
8423 def vendor(self):
8424 # type: () -> str
8425 """Determine server vendor from DMI data in sysfs"""
f67539c2 8426 return read_file(HostFacts._dmi_path_list, 'sys_vendor')
f91f0fd5
TL
8427
8428 @property
8429 def model(self):
8430 # type: () -> str
8431 """Determine server model information from DMI data in sysfs"""
f67539c2
TL
8432 family = read_file(HostFacts._dmi_path_list, 'product_family')
8433 product = read_file(HostFacts._dmi_path_list, 'product_name')
f91f0fd5 8434 if family == 'Unknown' and product:
f67539c2 8435 return '{}'.format(product)
f91f0fd5 8436
f67539c2 8437 return '{} ({})'.format(family, product)
f91f0fd5
TL
8438
8439 @property
8440 def bios_version(self):
8441 # type: () -> str
8442 """Determine server BIOS version from DMI data in sysfs"""
f67539c2 8443 return read_file(HostFacts._dmi_path_list, 'bios_version')
f91f0fd5
TL
8444
8445 @property
8446 def bios_date(self):
8447 # type: () -> str
8448 """Determine server BIOS date from DMI data in sysfs"""
f67539c2 8449 return read_file(HostFacts._dmi_path_list, 'bios_date')
f91f0fd5
TL
8450
8451 @property
8452 def timestamp(self):
8453 # type: () -> float
8454 """Return the current time as Epoch seconds"""
8455 return time.time()
8456
8457 @property
8458 def system_uptime(self):
8459 # type: () -> float
8460 """Return the system uptime (in secs)"""
8461 raw_time = read_file(['/proc/uptime'])
8462 up_secs, _ = raw_time.split()
8463 return float(up_secs)
8464
f67539c2 8465 @property
f91f0fd5
TL
8466 def kernel_security(self):
8467 # type: () -> Dict[str, str]
8468 """Determine the security features enabled in the kernel - SELinux, AppArmor"""
f67539c2 8469 def _fetch_selinux() -> Dict[str, str]:
522d829b 8470 """Get the selinux status"""
f91f0fd5 8471 security = {}
522d829b
TL
8472 try:
8473 out, err, code = call(self.ctx, ['sestatus'],
2a845540 8474 verbosity=CallVerbosity.QUIET)
522d829b
TL
8475 security['type'] = 'SELinux'
8476 status, mode, policy = '', '', ''
8477 for line in out.split('\n'):
8478 if line.startswith('SELinux status:'):
8479 k, v = line.split(':')
8480 status = v.strip()
8481 elif line.startswith('Current mode:'):
8482 k, v = line.split(':')
8483 mode = v.strip()
8484 elif line.startswith('Loaded policy name:'):
8485 k, v = line.split(':')
8486 policy = v.strip()
8487 if status == 'disabled':
8488 security['description'] = 'SELinux: Disabled'
8489 else:
8490 security['description'] = 'SELinux: Enabled({}, {})'.format(mode, policy)
8491 except Exception as e:
8492 logger.info('unable to get selinux status: %s' % e)
8493 return security
f91f0fd5 8494
f67539c2 8495 def _fetch_apparmor() -> Dict[str, str]:
f91f0fd5
TL
8496 """Read the apparmor profiles directly, returning an overview of AppArmor status"""
8497 security = {}
8498 for apparmor_path in HostFacts._apparmor_path_list:
8499 if os.path.exists(apparmor_path):
f67539c2
TL
8500 security['type'] = 'AppArmor'
8501 security['description'] = 'AppArmor: Enabled'
f91f0fd5
TL
8502 try:
8503 profiles = read_file(['/sys/kernel/security/apparmor/profiles'])
b3b6e05e
TL
8504 if len(profiles) == 0:
8505 return {}
f91f0fd5
TL
8506 except OSError:
8507 pass
8508 else:
8509 summary = {} # type: Dict[str, int]
8510 for line in profiles.split('\n'):
8511 item, mode = line.split(' ')
f67539c2 8512 mode = mode.strip('()')
f91f0fd5
TL
8513 if mode in summary:
8514 summary[mode] += 1
8515 else:
8516 summary[mode] = 0
f67539c2
TL
8517 summary_str = ','.join(['{} {}'.format(v, k) for k, v in summary.items()])
8518 security = {**security, **summary} # type: ignore
8519 security['description'] += '({})'.format(summary_str)
f91f0fd5
TL
8520
8521 return security
f67539c2 8522 return {}
f91f0fd5 8523
f67539c2 8524 ret = {}
f91f0fd5
TL
8525 if os.path.exists('/sys/kernel/security/lsm'):
8526 lsm = read_file(['/sys/kernel/security/lsm']).strip()
8527 if 'selinux' in lsm:
f67539c2 8528 ret = _fetch_selinux()
f91f0fd5 8529 elif 'apparmor' in lsm:
f67539c2 8530 ret = _fetch_apparmor()
f91f0fd5
TL
8531 else:
8532 return {
f67539c2
TL
8533 'type': 'Unknown',
8534 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor'
f91f0fd5
TL
8535 }
8536
f67539c2
TL
8537 if ret:
8538 return ret
8539
f91f0fd5 8540 return {
f67539c2
TL
8541 'type': 'None',
8542 'description': 'Linux Security Module framework is not available'
f91f0fd5
TL
8543 }
8544
f67539c2 8545 @property
522d829b 8546 def selinux_enabled(self) -> bool:
f67539c2
TL
8547 return (self.kernel_security['type'] == 'SELinux') and \
8548 (self.kernel_security['description'] != 'SELinux: Disabled')
8549
adb31ebb
TL
8550 @property
8551 def kernel_parameters(self):
8552 # type: () -> Dict[str, str]
8553 """Get kernel parameters required/used in Ceph clusters"""
8554
8555 k_param = {}
f67539c2 8556 out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT)
adb31ebb
TL
8557 if out:
8558 param_list = out.split('\n')
f67539c2 8559 param_dict = {param.split(' = ')[0]: param.split(' = ')[-1] for param in param_list}
adb31ebb
TL
8560
8561 # return only desired parameters
8562 if 'net.ipv4.ip_nonlocal_bind' in param_dict:
8563 k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind']
8564
8565 return k_param
8566
522d829b
TL
8567 @staticmethod
8568 def _process_net_data(tcp_file: str, protocol: str = 'tcp') -> List[int]:
8569 listening_ports = []
8570 # Connections state documentation
8571 # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h
8572 # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul <port>)
8573 listening_state = {
8574 'tcp': '0A',
8575 'udp': '07'
8576 }
8577
8578 if protocol not in listening_state.keys():
8579 return []
8580
8581 if os.path.exists(tcp_file):
8582 with open(tcp_file) as f:
8583 tcp_data = f.readlines()[1:]
8584
8585 for con in tcp_data:
8586 con_info = con.strip().split()
8587 if con_info[3] == listening_state[protocol]:
8588 local_port = int(con_info[1].split(':')[1], 16)
8589 listening_ports.append(local_port)
8590
8591 return listening_ports
8592
8593 @property
8594 def tcp_ports_used(self) -> List[int]:
8595 return HostFacts._process_net_data('/proc/net/tcp')
8596
8597 @property
8598 def tcp6_ports_used(self) -> List[int]:
8599 return HostFacts._process_net_data('/proc/net/tcp6')
8600
8601 @property
8602 def udp_ports_used(self) -> List[int]:
8603 return HostFacts._process_net_data('/proc/net/udp', 'udp')
8604
8605 @property
8606 def udp6_ports_used(self) -> List[int]:
8607 return HostFacts._process_net_data('/proc/net/udp6', 'udp')
8608
f91f0fd5
TL
8609 def dump(self):
8610 # type: () -> str
8611 """Return the attributes of this HostFacts object as json"""
f67539c2
TL
8612 data = {
8613 k: getattr(self, k) for k in dir(self)
8614 if not k.startswith('_')
8615 and isinstance(getattr(self, k), (float, int, str, list, dict, tuple))
f91f0fd5
TL
8616 }
8617 return json.dumps(data, indent=2, sort_keys=True)
8618
8619##################################
8620
f67539c2 8621
522d829b 8622def command_gather_facts(ctx: CephadmContext) -> None:
f91f0fd5 8623 """gather_facts is intended to provide host releated metadata to the caller"""
f67539c2 8624 host = HostFacts(ctx)
f91f0fd5
TL
8625 print(host.dump())
8626
f67539c2
TL
8627
8628##################################
8629
8630
a4b75251 8631def systemd_target_state(ctx: CephadmContext, target_name: str, subsystem: str = 'ceph') -> bool:
f67539c2
TL
8632 # TODO: UNITTEST
8633 return os.path.exists(
8634 os.path.join(
a4b75251 8635 ctx.unit_dir,
f67539c2
TL
8636 f'{subsystem}.target.wants',
8637 target_name
8638 )
8639 )
8640
8641
a4b75251
TL
8642def target_exists(ctx: CephadmContext) -> bool:
8643 return os.path.exists(ctx.unit_dir + '/ceph.target')
8644
8645
f67539c2 8646@infer_fsid
522d829b 8647def command_maintenance(ctx: CephadmContext) -> str:
f67539c2 8648 if not ctx.fsid:
a4b75251 8649 raise Error('failed - must pass --fsid to specify cluster')
f67539c2
TL
8650
8651 target = f'ceph-{ctx.fsid}.target'
8652
8653 if ctx.maintenance_action.lower() == 'enter':
8654 logger.info('Requested to place host into maintenance')
a4b75251 8655 if systemd_target_state(ctx, target):
f67539c2
TL
8656 _out, _err, code = call(ctx,
8657 ['systemctl', 'disable', target],
8658 verbosity=CallVerbosity.DEBUG)
8659 if code:
8660 logger.error(f'Failed to disable the {target} target')
8661 return 'failed - to disable the target'
8662 else:
8663 # stopping a target waits by default
8664 _out, _err, code = call(ctx,
8665 ['systemctl', 'stop', target],
8666 verbosity=CallVerbosity.DEBUG)
8667 if code:
8668 logger.error(f'Failed to stop the {target} target')
8669 return 'failed - to disable the target'
8670 else:
8671 return f'success - systemd target {target} disabled'
8672
8673 else:
8674 return 'skipped - target already disabled'
8675
8676 else:
8677 logger.info('Requested to exit maintenance state')
a4b75251
TL
8678 # if we've never deployed a daemon on this host there will be no systemd
8679 # target to disable so attempting a disable will fail. We still need to
8680 # return success here or host will be permanently stuck in maintenance mode
8681 # as no daemons can be deployed so no systemd target will ever exist to disable.
8682 if not target_exists(ctx):
8683 return 'skipped - systemd target not present on this host. Host removed from maintenance mode.'
f67539c2 8684 # exit maintenance request
a4b75251 8685 if not systemd_target_state(ctx, target):
f67539c2
TL
8686 _out, _err, code = call(ctx,
8687 ['systemctl', 'enable', target],
8688 verbosity=CallVerbosity.DEBUG)
8689 if code:
8690 logger.error(f'Failed to enable the {target} target')
8691 return 'failed - unable to enable the target'
8692 else:
8693 # starting a target waits by default
8694 _out, _err, code = call(ctx,
8695 ['systemctl', 'start', target],
8696 verbosity=CallVerbosity.DEBUG)
8697 if code:
8698 logger.error(f'Failed to start the {target} target')
8699 return 'failed - unable to start the target'
8700 else:
8701 return f'success - systemd target {target} enabled and started'
522d829b 8702 return f'success - systemd target {target} enabled and started'
f91f0fd5
TL
8703
8704##################################
8705
f6b5b4d7 8706
9f95a23c
TL
8707def _get_parser():
8708 # type: () -> argparse.ArgumentParser
8709 parser = argparse.ArgumentParser(
8710 description='Bootstrap Ceph daemons with systemd and containers.',
8711 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
8712 parser.add_argument(
8713 '--image',
8714 help='container image. Can also be set via the "CEPHADM_IMAGE" '
8715 'env var')
8716 parser.add_argument(
8717 '--docker',
8718 action='store_true',
8719 help='use docker instead of podman')
8720 parser.add_argument(
8721 '--data-dir',
8722 default=DATA_DIR,
8723 help='base directory for daemon data')
8724 parser.add_argument(
8725 '--log-dir',
8726 default=LOG_DIR,
8727 help='base directory for daemon logs')
8728 parser.add_argument(
8729 '--logrotate-dir',
8730 default=LOGROTATE_DIR,
8731 help='location of logrotate configuration files')
b3b6e05e
TL
8732 parser.add_argument(
8733 '--sysctl-dir',
8734 default=SYSCTL_DIR,
8735 help='location of sysctl configuration files')
9f95a23c
TL
8736 parser.add_argument(
8737 '--unit-dir',
8738 default=UNIT_DIR,
8739 help='base directory for systemd units')
8740 parser.add_argument(
8741 '--verbose', '-v',
8742 action='store_true',
8743 help='Show debug-level log messages')
8744 parser.add_argument(
8745 '--timeout',
8746 type=int,
8747 default=DEFAULT_TIMEOUT,
8748 help='timeout in seconds')
8749 parser.add_argument(
8750 '--retry',
8751 type=int,
8752 default=DEFAULT_RETRY,
8753 help='max number of retries')
e306af50
TL
8754 parser.add_argument(
8755 '--env', '-e',
8756 action='append',
8757 default=[],
8758 help='set environment variable')
f67539c2
TL
8759 parser.add_argument(
8760 '--no-container-init',
8761 action='store_true',
8762 default=not CONTAINER_INIT,
8763 help='Do not run podman/docker with `--init`')
9f95a23c
TL
8764
8765 subparsers = parser.add_subparsers(help='sub-command')
8766
8767 parser_version = subparsers.add_parser(
8768 'version', help='get ceph version from container')
8769 parser_version.set_defaults(func=command_version)
8770
8771 parser_pull = subparsers.add_parser(
33c7a0ef 8772 'pull', help='pull the default container image')
9f95a23c 8773 parser_pull.set_defaults(func=command_pull)
a4b75251
TL
8774 parser_pull.add_argument(
8775 '--insecure',
8776 action='store_true',
8777 help=argparse.SUPPRESS,
8778 )
9f95a23c
TL
8779
8780 parser_inspect_image = subparsers.add_parser(
8781 'inspect-image', help='inspect local container image')
8782 parser_inspect_image.set_defaults(func=command_inspect_image)
8783
8784 parser_ls = subparsers.add_parser(
8785 'ls', help='list daemon instances on this host')
8786 parser_ls.set_defaults(func=command_ls)
8787 parser_ls.add_argument(
8788 '--no-detail',
8789 action='store_true',
8790 help='Do not include daemon status')
8791 parser_ls.add_argument(
8792 '--legacy-dir',
8793 default='/',
8794 help='base directory for legacy daemon data')
8795
8796 parser_list_networks = subparsers.add_parser(
8797 'list-networks', help='list IP networks')
8798 parser_list_networks.set_defaults(func=command_list_networks)
8799
8800 parser_adopt = subparsers.add_parser(
8801 'adopt', help='adopt daemon deployed with a different tool')
8802 parser_adopt.set_defaults(func=command_adopt)
8803 parser_adopt.add_argument(
8804 '--name', '-n',
8805 required=True,
8806 help='daemon name (type.id)')
8807 parser_adopt.add_argument(
8808 '--style',
8809 required=True,
8810 help='deployment style (legacy, ...)')
8811 parser_adopt.add_argument(
8812 '--cluster',
8813 default='ceph',
8814 help='cluster name')
8815 parser_adopt.add_argument(
8816 '--legacy-dir',
8817 default='/',
8818 help='base directory for legacy daemon data')
8819 parser_adopt.add_argument(
8820 '--config-json',
8821 help='Additional configuration information in JSON format')
8822 parser_adopt.add_argument(
8823 '--skip-firewalld',
8824 action='store_true',
8825 help='Do not configure firewalld')
8826 parser_adopt.add_argument(
8827 '--skip-pull',
8828 action='store_true',
33c7a0ef 8829 help='do not pull the default image before adopting')
1911f103
TL
8830 parser_adopt.add_argument(
8831 '--force-start',
8832 action='store_true',
f67539c2 8833 help='start newly adoped daemon, even if it was not running previously')
f91f0fd5
TL
8834 parser_adopt.add_argument(
8835 '--container-init',
8836 action='store_true',
f67539c2
TL
8837 default=CONTAINER_INIT,
8838 help=argparse.SUPPRESS)
9f95a23c
TL
8839
8840 parser_rm_daemon = subparsers.add_parser(
8841 'rm-daemon', help='remove daemon instance')
8842 parser_rm_daemon.set_defaults(func=command_rm_daemon)
8843 parser_rm_daemon.add_argument(
8844 '--name', '-n',
8845 required=True,
8846 action=CustomValidation,
8847 help='daemon name (type.id)')
33c7a0ef
TL
8848 parser_rm_daemon.add_argument(
8849 '--tcp-ports',
8850 help='List of tcp ports to close in the host firewall')
9f95a23c
TL
8851 parser_rm_daemon.add_argument(
8852 '--fsid',
8853 required=True,
8854 help='cluster FSID')
8855 parser_rm_daemon.add_argument(
8856 '--force',
8857 action='store_true',
8858 help='proceed, even though this may destroy valuable data')
8859 parser_rm_daemon.add_argument(
8860 '--force-delete-data',
8861 action='store_true',
8862 help='delete valuable daemon data instead of making a backup')
8863
8864 parser_rm_cluster = subparsers.add_parser(
8865 'rm-cluster', help='remove all daemons for a cluster')
8866 parser_rm_cluster.set_defaults(func=command_rm_cluster)
8867 parser_rm_cluster.add_argument(
8868 '--fsid',
8869 required=True,
8870 help='cluster FSID')
8871 parser_rm_cluster.add_argument(
8872 '--force',
8873 action='store_true',
8874 help='proceed, even though this may destroy valuable data')
f67539c2
TL
8875 parser_rm_cluster.add_argument(
8876 '--keep-logs',
8877 action='store_true',
8878 help='do not remove log files')
b3b6e05e
TL
8879 parser_rm_cluster.add_argument(
8880 '--zap-osds',
8881 action='store_true',
8882 help='zap OSD devices for this cluster')
9f95a23c
TL
8883
8884 parser_run = subparsers.add_parser(
8885 'run', help='run a ceph daemon, in a container, in the foreground')
8886 parser_run.set_defaults(func=command_run)
8887 parser_run.add_argument(
8888 '--name', '-n',
8889 required=True,
8890 help='daemon name (type.id)')
8891 parser_run.add_argument(
8892 '--fsid',
8893 required=True,
8894 help='cluster FSID')
8895
8896 parser_shell = subparsers.add_parser(
8897 'shell', help='run an interactive shell inside a daemon container')
8898 parser_shell.set_defaults(func=command_shell)
20effc67
TL
8899 parser_shell.add_argument(
8900 '--shared_ceph_folder',
8901 metavar='CEPH_SOURCE_FOLDER',
8902 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c
TL
8903 parser_shell.add_argument(
8904 '--fsid',
8905 help='cluster FSID')
8906 parser_shell.add_argument(
8907 '--name', '-n',
8908 help='daemon name (type.id)')
8909 parser_shell.add_argument(
8910 '--config', '-c',
8911 help='ceph.conf to pass through to the container')
8912 parser_shell.add_argument(
8913 '--keyring', '-k',
8914 help='ceph.keyring to pass through to the container')
e306af50
TL
8915 parser_shell.add_argument(
8916 '--mount', '-m',
f67539c2
TL
8917 help=('mount a file or directory in the container. '
8918 'Support multiple mounts. '
8919 'ie: `--mount /foo /bar:/bar`. '
8920 'When no destination is passed, default is /mnt'),
8921 nargs='+')
9f95a23c
TL
8922 parser_shell.add_argument(
8923 '--env', '-e',
8924 action='append',
8925 default=[],
8926 help='set environment variable')
b3b6e05e
TL
8927 parser_shell.add_argument(
8928 '--volume', '-v',
8929 action='append',
8930 default=[],
8931 help='set environment variable')
9f95a23c 8932 parser_shell.add_argument(
e306af50 8933 'command', nargs=argparse.REMAINDER,
9f95a23c 8934 help='command (optional)')
b3b6e05e
TL
8935 parser_shell.add_argument(
8936 '--no-hosts',
8937 action='store_true',
8938 help='dont pass /etc/hosts through to the container')
9f95a23c
TL
8939
8940 parser_enter = subparsers.add_parser(
8941 'enter', help='run an interactive shell inside a running daemon container')
8942 parser_enter.set_defaults(func=command_enter)
8943 parser_enter.add_argument(
8944 '--fsid',
8945 help='cluster FSID')
8946 parser_enter.add_argument(
8947 '--name', '-n',
8948 required=True,
8949 help='daemon name (type.id)')
8950 parser_enter.add_argument(
e306af50 8951 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
8952 help='command')
8953
8954 parser_ceph_volume = subparsers.add_parser(
8955 'ceph-volume', help='run ceph-volume inside a container')
8956 parser_ceph_volume.set_defaults(func=command_ceph_volume)
20effc67
TL
8957 parser_ceph_volume.add_argument(
8958 '--shared_ceph_folder',
8959 metavar='CEPH_SOURCE_FOLDER',
8960 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c
TL
8961 parser_ceph_volume.add_argument(
8962 '--fsid',
8963 help='cluster FSID')
8964 parser_ceph_volume.add_argument(
8965 '--config-json',
20effc67 8966 help='JSON file with config and (client.bootstrap-osd) key')
801d1391
TL
8967 parser_ceph_volume.add_argument(
8968 '--config', '-c',
8969 help='ceph conf file')
8970 parser_ceph_volume.add_argument(
8971 '--keyring', '-k',
8972 help='ceph.keyring to pass through to the container')
9f95a23c 8973 parser_ceph_volume.add_argument(
e306af50 8974 'command', nargs=argparse.REMAINDER,
9f95a23c
TL
8975 help='command')
8976
b3b6e05e
TL
8977 parser_zap_osds = subparsers.add_parser(
8978 'zap-osds', help='zap all OSDs associated with a particular fsid')
8979 parser_zap_osds.set_defaults(func=command_zap_osds)
8980 parser_zap_osds.add_argument(
8981 '--fsid',
8982 required=True,
8983 help='cluster FSID')
8984 parser_zap_osds.add_argument(
8985 '--force',
8986 action='store_true',
8987 help='proceed, even though this may destroy valuable data')
8988
9f95a23c 8989 parser_unit = subparsers.add_parser(
f67539c2 8990 'unit', help="operate on the daemon's systemd unit")
9f95a23c
TL
8991 parser_unit.set_defaults(func=command_unit)
8992 parser_unit.add_argument(
8993 'command',
8994 help='systemd command (start, stop, restart, enable, disable, ...)')
8995 parser_unit.add_argument(
8996 '--fsid',
8997 help='cluster FSID')
8998 parser_unit.add_argument(
8999 '--name', '-n',
9000 required=True,
9001 help='daemon name (type.id)')
9002
9003 parser_logs = subparsers.add_parser(
9004 'logs', help='print journald logs for a daemon container')
9005 parser_logs.set_defaults(func=command_logs)
9006 parser_logs.add_argument(
9007 '--fsid',
9008 help='cluster FSID')
9009 parser_logs.add_argument(
9010 '--name', '-n',
9011 required=True,
9012 help='daemon name (type.id)')
9013 parser_logs.add_argument(
9014 'command', nargs='*',
9015 help='additional journalctl args')
9016
9017 parser_bootstrap = subparsers.add_parser(
9018 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)')
9019 parser_bootstrap.set_defaults(func=command_bootstrap)
9020 parser_bootstrap.add_argument(
9021 '--config', '-c',
9022 help='ceph conf file to incorporate')
9023 parser_bootstrap.add_argument(
9024 '--mon-id',
9025 required=False,
9026 help='mon id (default: local hostname)')
33c7a0ef
TL
9027 group = parser_bootstrap.add_mutually_exclusive_group()
9028 group.add_argument(
9f95a23c
TL
9029 '--mon-addrv',
9030 help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])')
33c7a0ef 9031 group.add_argument(
9f95a23c
TL
9032 '--mon-ip',
9033 help='mon IP')
9034 parser_bootstrap.add_argument(
9035 '--mgr-id',
9036 required=False,
9037 help='mgr id (default: randomly generated)')
9038 parser_bootstrap.add_argument(
9039 '--fsid',
9040 help='cluster FSID')
9041 parser_bootstrap.add_argument(
9042 '--output-dir',
9043 default='/etc/ceph',
9044 help='directory to write config, keyring, and pub key files')
9045 parser_bootstrap.add_argument(
9046 '--output-keyring',
9047 help='location to write keyring file with new cluster admin and mon keys')
9048 parser_bootstrap.add_argument(
9049 '--output-config',
9050 help='location to write conf file to connect to new cluster')
9051 parser_bootstrap.add_argument(
9052 '--output-pub-ssh-key',
f67539c2 9053 help="location to write the cluster's public SSH key")
b3b6e05e
TL
9054 parser_bootstrap.add_argument(
9055 '--skip-admin-label',
9056 action='store_true',
9057 help='do not create admin label for ceph.conf and client.admin keyring distribution')
9f95a23c
TL
9058 parser_bootstrap.add_argument(
9059 '--skip-ssh',
9060 action='store_true',
9061 help='skip setup of ssh key on local host')
9062 parser_bootstrap.add_argument(
9063 '--initial-dashboard-user',
9064 default='admin',
9065 help='Initial user for the dashboard')
9066 parser_bootstrap.add_argument(
9067 '--initial-dashboard-password',
9068 help='Initial password for the initial dashboard user')
f6b5b4d7
TL
9069 parser_bootstrap.add_argument(
9070 '--ssl-dashboard-port',
9071 type=int,
f67539c2 9072 default=8443,
f6b5b4d7 9073 help='Port number used to connect with dashboard using SSL')
9f95a23c
TL
9074 parser_bootstrap.add_argument(
9075 '--dashboard-key',
e306af50 9076 type=argparse.FileType('r'),
9f95a23c
TL
9077 help='Dashboard key')
9078 parser_bootstrap.add_argument(
9079 '--dashboard-crt',
e306af50 9080 type=argparse.FileType('r'),
9f95a23c
TL
9081 help='Dashboard certificate')
9082
e306af50
TL
9083 parser_bootstrap.add_argument(
9084 '--ssh-config',
9085 type=argparse.FileType('r'),
9086 help='SSH config')
9087 parser_bootstrap.add_argument(
9088 '--ssh-private-key',
9089 type=argparse.FileType('r'),
9090 help='SSH private key')
9091 parser_bootstrap.add_argument(
9092 '--ssh-public-key',
9093 type=argparse.FileType('r'),
9094 help='SSH public key')
f6b5b4d7
TL
9095 parser_bootstrap.add_argument(
9096 '--ssh-user',
9097 default='root',
9098 help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users')
9f95a23c
TL
9099 parser_bootstrap.add_argument(
9100 '--skip-mon-network',
9101 action='store_true',
9102 help='set mon public_network based on bootstrap mon ip')
9103 parser_bootstrap.add_argument(
9104 '--skip-dashboard',
9105 action='store_true',
9106 help='do not enable the Ceph Dashboard')
9107 parser_bootstrap.add_argument(
9108 '--dashboard-password-noupdate',
9109 action='store_true',
9110 help='stop forced dashboard password change')
9111 parser_bootstrap.add_argument(
9112 '--no-minimize-config',
9113 action='store_true',
9114 help='do not assimilate and minimize the config file')
9115 parser_bootstrap.add_argument(
9116 '--skip-ping-check',
9117 action='store_true',
9118 help='do not verify that mon IP is pingable')
9119 parser_bootstrap.add_argument(
9120 '--skip-pull',
9121 action='store_true',
33c7a0ef 9122 help='do not pull the default image before bootstrapping')
9f95a23c
TL
9123 parser_bootstrap.add_argument(
9124 '--skip-firewalld',
9125 action='store_true',
9126 help='Do not configure firewalld')
9127 parser_bootstrap.add_argument(
9128 '--allow-overwrite',
9129 action='store_true',
9130 help='allow overwrite of existing --output-* config/keyring/ssh files')
9131 parser_bootstrap.add_argument(
9132 '--allow-fqdn-hostname',
9133 action='store_true',
9134 help='allow hostname that is fully-qualified (contains ".")')
f67539c2
TL
9135 parser_bootstrap.add_argument(
9136 '--allow-mismatched-release',
9137 action='store_true',
9138 help="allow bootstrap of ceph that doesn't match this version of cephadm")
9f95a23c
TL
9139 parser_bootstrap.add_argument(
9140 '--skip-prepare-host',
9141 action='store_true',
9142 help='Do not prepare host')
9143 parser_bootstrap.add_argument(
9144 '--orphan-initial-daemons',
9145 action='store_true',
f67539c2 9146 help='Set mon and mgr service to `unmanaged`, Do not create the crash service')
9f95a23c
TL
9147 parser_bootstrap.add_argument(
9148 '--skip-monitoring-stack',
9149 action='store_true',
9150 help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
2a845540
TL
9151 parser_bootstrap.add_argument(
9152 '--with-centralized-logging',
9153 action='store_true',
9154 help='Automatically provision centralized logging (promtail, loki)')
e306af50
TL
9155 parser_bootstrap.add_argument(
9156 '--apply-spec',
9157 help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)')
e306af50
TL
9158 parser_bootstrap.add_argument(
9159 '--shared_ceph_folder',
9160 metavar='CEPH_SOURCE_FOLDER',
9161 help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder')
9f95a23c 9162
f6b5b4d7
TL
9163 parser_bootstrap.add_argument(
9164 '--registry-url',
9165 help='url for custom registry')
9166 parser_bootstrap.add_argument(
9167 '--registry-username',
9168 help='username for custom registry')
9169 parser_bootstrap.add_argument(
9170 '--registry-password',
9171 help='password for custom registry')
9172 parser_bootstrap.add_argument(
9173 '--registry-json',
9174 help='json file with custom registry login info (URL, Username, Password)')
f91f0fd5
TL
9175 parser_bootstrap.add_argument(
9176 '--container-init',
9177 action='store_true',
f67539c2
TL
9178 default=CONTAINER_INIT,
9179 help=argparse.SUPPRESS)
f67539c2
TL
9180 parser_bootstrap.add_argument(
9181 '--cluster-network',
9182 help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)')
b3b6e05e
TL
9183 parser_bootstrap.add_argument(
9184 '--single-host-defaults',
9185 action='store_true',
9186 help='adjust configuration defaults to suit a single-host cluster')
522d829b
TL
9187 parser_bootstrap.add_argument(
9188 '--log-to-file',
9189 action='store_true',
9190 help='configure cluster to log to traditional log files in /var/log/ceph/$fsid')
f6b5b4d7 9191
9f95a23c
TL
9192 parser_deploy = subparsers.add_parser(
9193 'deploy', help='deploy a daemon')
9194 parser_deploy.set_defaults(func=command_deploy)
9195 parser_deploy.add_argument(
9196 '--name',
9197 required=True,
9198 action=CustomValidation,
9199 help='daemon name (type.id)')
9200 parser_deploy.add_argument(
9201 '--fsid',
9202 required=True,
9203 help='cluster FSID')
9204 parser_deploy.add_argument(
9205 '--config', '-c',
9206 help='config file for new daemon')
9207 parser_deploy.add_argument(
9208 '--config-json',
9209 help='Additional configuration information in JSON format')
9210 parser_deploy.add_argument(
9211 '--keyring',
9212 help='keyring for new daemon')
9213 parser_deploy.add_argument(
9214 '--key',
9215 help='key for new daemon')
9216 parser_deploy.add_argument(
9217 '--osd-fsid',
9218 help='OSD uuid, if creating an OSD container')
9219 parser_deploy.add_argument(
9220 '--skip-firewalld',
9221 action='store_true',
9222 help='Do not configure firewalld')
f6b5b4d7
TL
9223 parser_deploy.add_argument(
9224 '--tcp-ports',
9225 help='List of tcp ports to open in the host firewall')
9f95a23c
TL
9226 parser_deploy.add_argument(
9227 '--reconfig',
9228 action='store_true',
9229 help='Reconfigure a previously deployed daemon')
9230 parser_deploy.add_argument(
9231 '--allow-ptrace',
9232 action='store_true',
9233 help='Allow SYS_PTRACE on daemon container')
f91f0fd5
TL
9234 parser_deploy.add_argument(
9235 '--container-init',
9236 action='store_true',
f67539c2
TL
9237 default=CONTAINER_INIT,
9238 help=argparse.SUPPRESS)
9239 parser_deploy.add_argument(
9240 '--memory-request',
9241 help='Container memory request/target'
9242 )
9243 parser_deploy.add_argument(
9244 '--memory-limit',
9245 help='Container memory hard limit'
9246 )
9247 parser_deploy.add_argument(
9248 '--meta-json',
9249 help='JSON dict of additional metadata'
9250 )
20effc67
TL
9251 parser_deploy.add_argument(
9252 '--extra-container-args',
9253 action='append',
9254 default=[],
9255 help='Additional container arguments to apply to deamon'
9256 )
9f95a23c
TL
9257
9258 parser_check_host = subparsers.add_parser(
9259 'check-host', help='check host configuration')
9260 parser_check_host.set_defaults(func=command_check_host)
9261 parser_check_host.add_argument(
9262 '--expect-hostname',
9263 help='Check that hostname matches an expected value')
9264
9265 parser_prepare_host = subparsers.add_parser(
9266 'prepare-host', help='prepare a host for cephadm use')
9267 parser_prepare_host.set_defaults(func=command_prepare_host)
9268 parser_prepare_host.add_argument(
9269 '--expect-hostname',
9270 help='Set hostname')
9271
9272 parser_add_repo = subparsers.add_parser(
9273 'add-repo', help='configure package repository')
9274 parser_add_repo.set_defaults(func=command_add_repo)
9275 parser_add_repo.add_argument(
9276 '--release',
1911f103 9277 help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE))
9f95a23c
TL
9278 parser_add_repo.add_argument(
9279 '--version',
9280 help='use specific upstream version (x.y.z)')
9281 parser_add_repo.add_argument(
9282 '--dev',
9283 help='use specified bleeding edge build from git branch or tag')
9284 parser_add_repo.add_argument(
9285 '--dev-commit',
9286 help='use specified bleeding edge build from git commit')
9287 parser_add_repo.add_argument(
9288 '--gpg-url',
9289 help='specify alternative GPG key location')
9290 parser_add_repo.add_argument(
9291 '--repo-url',
9292 default='https://download.ceph.com',
9293 help='specify alternative repo location')
9294 # TODO: proxy?
9295
9296 parser_rm_repo = subparsers.add_parser(
9297 'rm-repo', help='remove package repository configuration')
9298 parser_rm_repo.set_defaults(func=command_rm_repo)
9299
9300 parser_install = subparsers.add_parser(
9301 'install', help='install ceph package(s)')
9302 parser_install.set_defaults(func=command_install)
9303 parser_install.add_argument(
9304 'packages', nargs='*',
9305 default=['cephadm'],
9306 help='packages')
9307
f6b5b4d7
TL
9308 parser_registry_login = subparsers.add_parser(
9309 'registry-login', help='log host into authenticated registry')
9310 parser_registry_login.set_defaults(func=command_registry_login)
9311 parser_registry_login.add_argument(
9312 '--registry-url',
9313 help='url for custom registry')
9314 parser_registry_login.add_argument(
9315 '--registry-username',
9316 help='username for custom registry')
9317 parser_registry_login.add_argument(
9318 '--registry-password',
9319 help='password for custom registry')
9320 parser_registry_login.add_argument(
9321 '--registry-json',
9322 help='json file with custom registry login info (URL, Username, Password)')
9323 parser_registry_login.add_argument(
9324 '--fsid',
9325 help='cluster FSID')
9326
f91f0fd5
TL
9327 parser_gather_facts = subparsers.add_parser(
9328 'gather-facts', help='gather and return host related information (JSON format)')
9329 parser_gather_facts.set_defaults(func=command_gather_facts)
9330
f67539c2
TL
9331 parser_maintenance = subparsers.add_parser(
9332 'host-maintenance', help='Manage the maintenance state of a host')
9333 parser_maintenance.add_argument(
9334 '--fsid',
9335 help='cluster FSID')
9336 parser_maintenance.add_argument(
9337 'maintenance_action',
9338 type=str,
9339 choices=['enter', 'exit'],
9340 help='Maintenance action - enter maintenance, or exit maintenance')
9341 parser_maintenance.set_defaults(func=command_maintenance)
9342
20effc67
TL
9343 parser_agent = subparsers.add_parser(
9344 'agent', help='start cephadm agent')
9345 parser_agent.set_defaults(func=command_agent)
9346 parser_agent.add_argument(
9347 '--fsid',
9348 required=True,
9349 help='cluster FSID')
9350 parser_agent.add_argument(
9351 '--daemon-id',
9352 help='daemon id for agent')
9353
2a845540
TL
9354 parser_disk_rescan = subparsers.add_parser(
9355 'disk-rescan', help='rescan all HBAs to detect new/removed devices')
9356 parser_disk_rescan.set_defaults(func=command_rescan_disks)
9357
9f95a23c
TL
9358 return parser
9359
f6b5b4d7 9360
522d829b 9361def _parse_args(av: List[str]) -> argparse.Namespace:
9f95a23c 9362 parser = _get_parser()
f67539c2 9363
e306af50 9364 args = parser.parse_args(av)
f67539c2 9365 if 'command' in args and args.command and args.command[0] == '--':
e306af50 9366 args.command.pop(0)
f67539c2
TL
9367
9368 # workaround argparse to deprecate the subparser `--container-init` flag
9369 # container_init and no_container_init must always be mutually exclusive
9370 container_init_args = ('--container-init', '--no-container-init')
9371 if set(container_init_args).issubset(av):
9372 parser.error('argument %s: not allowed with argument %s' % (container_init_args))
9373 elif '--container-init' in av:
9374 args.no_container_init = not args.container_init
9375 else:
9376 args.container_init = not args.no_container_init
9377 assert args.container_init is not args.no_container_init
9378
e306af50 9379 return args
9f95a23c 9380
f6b5b4d7 9381
b3b6e05e 9382def cephadm_init_ctx(args: List[str]) -> CephadmContext:
f67539c2
TL
9383 ctx = CephadmContext()
9384 ctx.set_args(_parse_args(args))
9385 return ctx
9386
9387
20effc67
TL
9388def cephadm_init_logging(ctx: CephadmContext, args: List[str]) -> None:
9389 """Configure the logging for cephadm as well as updating the system
9390 to have the expected log dir and logrotate configuration.
9391 """
2a845540 9392 logging.addLevelName(QUIET_LOG_LEVEL, 'QUIET')
f67539c2 9393 global logger
f91f0fd5
TL
9394 if not os.path.exists(LOG_DIR):
9395 os.makedirs(LOG_DIR)
33c7a0ef
TL
9396 operations = ['bootstrap', 'rm-cluster']
9397 if any(op in args for op in operations):
9398 dictConfig(interactive_logging_config)
9399 else:
9400 dictConfig(logging_config)
9401
f91f0fd5 9402 logger = logging.getLogger()
2a845540 9403 logger.setLevel(QUIET_LOG_LEVEL)
f91f0fd5 9404
b3b6e05e
TL
9405 if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
9406 with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
9407 f.write("""# created by cephadm
9408/var/log/ceph/cephadm.log {
9409 rotate 7
9410 daily
9411 compress
9412 missingok
9413 notifempty
2a845540 9414 su root root
b3b6e05e
TL
9415}
9416""")
9417
f67539c2 9418 if ctx.verbose:
f91f0fd5 9419 for handler in logger.handlers:
2a845540
TL
9420 if handler.name in ['console', 'log_file', 'console_stdout']:
9421 handler.setLevel(QUIET_LOG_LEVEL)
a4b75251 9422 logger.debug('%s\ncephadm %s' % ('-' * 80, args))
f67539c2
TL
9423
9424
20effc67
TL
9425def cephadm_require_root() -> None:
9426 """Exit if the process is not running as root."""
f67539c2
TL
9427 if os.geteuid() != 0:
9428 sys.stderr.write('ERROR: cephadm should be run as root\n')
9f95a23c
TL
9429 sys.exit(1)
9430
20effc67
TL
9431
9432def main() -> None:
f67539c2
TL
9433 av: List[str] = []
9434 av = sys.argv[1:]
9435
20effc67 9436 ctx = cephadm_init_ctx(av)
b3b6e05e
TL
9437 if not ctx.has_function():
9438 sys.stderr.write('No command specified; pass -h or --help for usage\n')
f67539c2 9439 sys.exit(1)
1911f103 9440
20effc67
TL
9441 cephadm_require_root()
9442 cephadm_init_logging(ctx, av)
9f95a23c 9443 try:
f67539c2
TL
9444 # podman or docker?
9445 ctx.container_engine = find_container_engine(ctx)
9446 if ctx.func not in \
a4b75251
TL
9447 [
9448 command_check_host,
9449 command_prepare_host,
9450 command_add_repo,
9451 command_rm_repo,
9452 command_install
9453 ]:
f67539c2
TL
9454 check_container_engine(ctx)
9455 # command handler
9456 r = ctx.func(ctx)
9f95a23c 9457 except Error as e:
f67539c2 9458 if ctx.verbose:
9f95a23c 9459 raise
f67539c2 9460 logger.error('ERROR: %s' % e)
9f95a23c
TL
9461 sys.exit(1)
9462 if not r:
9463 r = 0
9464 sys.exit(r)
f67539c2
TL
9465
9466
9467if __name__ == '__main__':
9468 main()