]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | #!/usr/bin/python3 |
2 | ||
f67539c2 TL |
3 | import asyncio |
4 | import asyncio.subprocess | |
5 | import argparse | |
6 | import datetime | |
7 | import fcntl | |
8 | import ipaddress | |
20effc67 | 9 | import io |
f67539c2 TL |
10 | import json |
11 | import logging | |
12 | from logging.config import dictConfig | |
13 | import os | |
14 | import platform | |
15 | import pwd | |
16 | import random | |
17 | import shlex | |
18 | import shutil | |
19 | import socket | |
20 | import string | |
21 | import subprocess | |
22 | import sys | |
23 | import tempfile | |
24 | import time | |
25 | import errno | |
26 | import struct | |
f67539c2 TL |
27 | import ssl |
28 | from enum import Enum | |
a4b75251 | 29 | from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO, Sequence, TypeVar, cast, Set, Iterable |
f67539c2 TL |
30 | |
31 | import re | |
32 | import uuid | |
33 | ||
34 | from configparser import ConfigParser | |
20effc67 | 35 | from contextlib import redirect_stdout |
f67539c2 TL |
36 | from functools import wraps |
37 | from glob import glob | |
38 | from io import StringIO | |
20effc67 TL |
39 | from threading import Thread, Event |
40 | from urllib.error import HTTPError, URLError | |
41 | from urllib.request import urlopen, Request | |
f67539c2 TL |
42 | from pathlib import Path |
43 | ||
522d829b TL |
44 | FuncT = TypeVar('FuncT', bound=Callable) |
45 | ||
f67539c2 | 46 | # Default container images ----------------------------------------------------- |
1d09f67e TL |
47 | DEFAULT_IMAGE = 'quay.io/ceph/ceph:v17' |
48 | DEFAULT_IMAGE_IS_MASTER = False | |
20effc67 | 49 | DEFAULT_IMAGE_RELEASE = 'quincy' |
1d09f67e | 50 | DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4' |
33c7a0ef TL |
51 | DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0' |
52 | DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0' | |
1d09f67e TL |
53 | DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1' |
54 | DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0' | |
55 | DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:8.3.5' | |
2a845540 TL |
56 | DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3' |
57 | DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.1.5' | |
20effc67 | 58 | DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1' |
f67539c2 TL |
59 | DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this |
60 | # ------------------------------------------------------------------------------ | |
61 | ||
1d09f67e | 62 | LATEST_STABLE_RELEASE = 'quincy' |
f6b5b4d7 TL |
63 | DATA_DIR = '/var/lib/ceph' |
64 | LOG_DIR = '/var/log/ceph' | |
65 | LOCK_DIR = '/run/cephadm' | |
66 | LOGROTATE_DIR = '/etc/logrotate.d' | |
33c7a0ef | 67 | SYSCTL_DIR = '/etc/sysctl.d' |
f6b5b4d7 | 68 | UNIT_DIR = '/etc/systemd/system' |
33c7a0ef TL |
69 | CEPH_CONF_DIR = 'config' |
70 | CEPH_CONF = 'ceph.conf' | |
71 | CEPH_PUBKEY = 'ceph.pub' | |
72 | CEPH_KEYRING = 'ceph.client.admin.keyring' | |
73 | CEPH_DEFAULT_CONF = f'/etc/ceph/{CEPH_CONF}' | |
74 | CEPH_DEFAULT_KEYRING = f'/etc/ceph/{CEPH_KEYRING}' | |
75 | CEPH_DEFAULT_PUBKEY = f'/etc/ceph/{CEPH_PUBKEY}' | |
f6b5b4d7 TL |
76 | LOG_DIR_MODE = 0o770 |
77 | DATA_DIR_MODE = 0o700 | |
f67539c2 TL |
78 | CONTAINER_INIT = True |
79 | MIN_PODMAN_VERSION = (2, 0, 2) | |
80 | CGROUPS_SPLIT_PODMAN_VERSION = (2, 1, 0) | |
f6b5b4d7 TL |
81 | CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ ' |
82 | DEFAULT_TIMEOUT = None # in seconds | |
f67539c2 | 83 | DEFAULT_RETRY = 15 |
f67539c2 | 84 | DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ' |
2a845540 | 85 | QUIET_LOG_LEVEL = 9 # DEBUG is 10, so using 9 to be lower level than DEBUG |
f67539c2 TL |
86 | |
87 | logger: logging.Logger = None # type: ignore | |
9f95a23c TL |
88 | |
89 | """ | |
90 | You can invoke cephadm in two ways: | |
91 | ||
92 | 1. The normal way, at the command line. | |
93 | ||
94 | 2. By piping the script to the python3 binary. In this latter case, you should | |
95 | prepend one or more lines to the beginning of the script. | |
96 | ||
97 | For arguments, | |
98 | ||
99 | injected_argv = [...] | |
100 | ||
101 | e.g., | |
102 | ||
103 | injected_argv = ['ls'] | |
104 | ||
105 | For reading stdin from the '--config-json -' argument, | |
106 | ||
107 | injected_stdin = '...' | |
108 | """ | |
f67539c2 | 109 | cached_stdin = None |
f91f0fd5 | 110 | |
2a845540 | 111 | |
f67539c2 | 112 | ################################## |
9f95a23c | 113 | |
9f95a23c | 114 | |
2a845540 TL |
115 | async def run_func(func: Callable, cmd: str) -> subprocess.CompletedProcess: |
116 | logger.debug(f'running function {func.__name__}, with parms: {cmd}') | |
117 | response = func(cmd) | |
118 | return response | |
119 | ||
120 | ||
121 | async def concurrent_tasks(func: Callable, cmd_list: List[str]) -> List[Any]: | |
122 | tasks = [] | |
123 | for cmd in cmd_list: | |
124 | tasks.append(run_func(func, cmd)) | |
125 | ||
126 | data = await asyncio.gather(*tasks) | |
127 | ||
128 | return data | |
129 | ||
130 | ||
33c7a0ef TL |
131 | class EndPoint: |
132 | """EndPoint representing an ip:port format""" | |
133 | ||
134 | def __init__(self, ip: str, port: int) -> None: | |
135 | self.ip = ip | |
136 | self.port = port | |
137 | ||
138 | def __str__(self) -> str: | |
139 | return f'{self.ip}:{self.port}' | |
140 | ||
141 | def __repr__(self) -> str: | |
142 | return f'{self.ip}:{self.port}' | |
143 | ||
144 | ||
145 | class ContainerInfo: | |
146 | def __init__(self, container_id: str, | |
147 | image_name: str, | |
148 | image_id: str, | |
149 | start: str, | |
150 | version: str) -> None: | |
151 | self.container_id = container_id | |
152 | self.image_name = image_name | |
153 | self.image_id = image_id | |
154 | self.start = start | |
155 | self.version = version | |
156 | ||
157 | def __eq__(self, other: Any) -> bool: | |
158 | if not isinstance(other, ContainerInfo): | |
159 | return NotImplemented | |
160 | return (self.container_id == other.container_id | |
161 | and self.image_name == other.image_name | |
162 | and self.image_id == other.image_id | |
163 | and self.start == other.start | |
164 | and self.version == other.version) | |
165 | ||
166 | ||
f67539c2 | 167 | class BaseConfig: |
9f95a23c | 168 | |
522d829b | 169 | def __init__(self) -> None: |
f67539c2 TL |
170 | self.image: str = '' |
171 | self.docker: bool = False | |
172 | self.data_dir: str = DATA_DIR | |
173 | self.log_dir: str = LOG_DIR | |
174 | self.logrotate_dir: str = LOGROTATE_DIR | |
b3b6e05e | 175 | self.sysctl_dir: str = SYSCTL_DIR |
f67539c2 TL |
176 | self.unit_dir: str = UNIT_DIR |
177 | self.verbose: bool = False | |
178 | self.timeout: Optional[int] = DEFAULT_TIMEOUT | |
179 | self.retry: int = DEFAULT_RETRY | |
180 | self.env: List[str] = [] | |
181 | self.memory_request: Optional[int] = None | |
182 | self.memory_limit: Optional[int] = None | |
20effc67 | 183 | self.log_to_journald: Optional[bool] = None |
f67539c2 TL |
184 | |
185 | self.container_init: bool = CONTAINER_INIT | |
186 | self.container_engine: Optional[ContainerEngine] = None | |
187 | ||
522d829b | 188 | def set_from_args(self, args: argparse.Namespace) -> None: |
f67539c2 TL |
189 | argdict: Dict[str, Any] = vars(args) |
190 | for k, v in argdict.items(): | |
191 | if hasattr(self, k): | |
192 | setattr(self, k, v) | |
193 | ||
194 | ||
195 | class CephadmContext: | |
9f95a23c | 196 | |
522d829b | 197 | def __init__(self) -> None: |
f67539c2 TL |
198 | self.__dict__['_args'] = None |
199 | self.__dict__['_conf'] = BaseConfig() | |
9f95a23c | 200 | |
f67539c2 TL |
201 | def set_args(self, args: argparse.Namespace) -> None: |
202 | self._conf.set_from_args(args) | |
203 | self._args = args | |
f6b5b4d7 | 204 | |
f67539c2 TL |
205 | def has_function(self) -> bool: |
206 | return 'func' in self._args | |
207 | ||
208 | def __contains__(self, name: str) -> bool: | |
209 | return hasattr(self, name) | |
210 | ||
211 | def __getattr__(self, name: str) -> Any: | |
212 | if '_conf' in self.__dict__ and hasattr(self._conf, name): | |
213 | return getattr(self._conf, name) | |
214 | elif '_args' in self.__dict__ and hasattr(self._args, name): | |
215 | return getattr(self._args, name) | |
216 | else: | |
217 | return super().__getattribute__(name) | |
218 | ||
219 | def __setattr__(self, name: str, value: Any) -> None: | |
220 | if hasattr(self._conf, name): | |
221 | setattr(self._conf, name, value) | |
222 | elif hasattr(self._args, name): | |
223 | setattr(self._args, name, value) | |
224 | else: | |
225 | super().__setattr__(name, value) | |
226 | ||
227 | ||
228 | class ContainerEngine: | |
522d829b | 229 | def __init__(self) -> None: |
f67539c2 TL |
230 | self.path = find_program(self.EXE) |
231 | ||
522d829b | 232 | @classmethod |
f67539c2 | 233 | @property |
522d829b | 234 | def EXE(cls) -> str: |
f67539c2 TL |
235 | raise NotImplementedError() |
236 | ||
a4b75251 TL |
237 | def __str__(self) -> str: |
238 | return f'{self.EXE} ({self.path})' | |
239 | ||
f67539c2 TL |
240 | |
241 | class Podman(ContainerEngine): | |
242 | EXE = 'podman' | |
243 | ||
522d829b | 244 | def __init__(self) -> None: |
f67539c2 | 245 | super().__init__() |
522d829b | 246 | self._version: Optional[Tuple[int, ...]] = None |
f67539c2 TL |
247 | |
248 | @property | |
522d829b | 249 | def version(self) -> Tuple[int, ...]: |
f67539c2 TL |
250 | if self._version is None: |
251 | raise RuntimeError('Please call `get_version` first') | |
252 | return self._version | |
253 | ||
522d829b | 254 | def get_version(self, ctx: CephadmContext) -> None: |
2a845540 | 255 | out, _, _ = call_throws(ctx, [self.path, 'version', '--format', '{{.Client.Version}}'], verbosity=CallVerbosity.QUIET) |
f67539c2 TL |
256 | self._version = _parse_podman_version(out) |
257 | ||
a4b75251 TL |
258 | def __str__(self) -> str: |
259 | version = '.'.join(map(str, self.version)) | |
260 | return f'{self.EXE} ({self.path}) version {version}' | |
261 | ||
f67539c2 TL |
262 | |
263 | class Docker(ContainerEngine): | |
264 | EXE = 'docker' | |
265 | ||
266 | ||
267 | CONTAINER_PREFERENCE = (Podman, Docker) # prefer podman to docker | |
9f95a23c | 268 | |
9f95a23c | 269 | |
33c7a0ef TL |
270 | # During normal cephadm operations (cephadm ls, gather-facts, etc ) we use: |
271 | # stdout: for JSON output only | |
272 | # stderr: for error, debug, info, etc | |
f91f0fd5 TL |
273 | logging_config = { |
274 | 'version': 1, | |
275 | 'disable_existing_loggers': True, | |
276 | 'formatters': { | |
277 | 'cephadm': { | |
a4b75251 | 278 | 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s' |
f91f0fd5 TL |
279 | }, |
280 | }, | |
281 | 'handlers': { | |
f67539c2 TL |
282 | 'console': { |
283 | 'level': 'INFO', | |
284 | 'class': 'logging.StreamHandler', | |
f91f0fd5 TL |
285 | }, |
286 | 'log_file': { | |
287 | 'level': 'DEBUG', | |
b3b6e05e | 288 | 'class': 'logging.handlers.WatchedFileHandler', |
f91f0fd5 TL |
289 | 'formatter': 'cephadm', |
290 | 'filename': '%s/cephadm.log' % LOG_DIR, | |
f91f0fd5 TL |
291 | } |
292 | }, | |
293 | 'loggers': { | |
294 | '': { | |
295 | 'level': 'DEBUG', | |
296 | 'handlers': ['console', 'log_file'], | |
297 | } | |
298 | } | |
299 | } | |
e306af50 | 300 | |
f67539c2 | 301 | |
33c7a0ef TL |
302 | class ExcludeErrorsFilter(logging.Filter): |
303 | def filter(self, record: logging.LogRecord) -> bool: | |
304 | """Only lets through log messages with log level below WARNING .""" | |
305 | return record.levelno < logging.WARNING | |
306 | ||
307 | ||
308 | # When cephadm is used as standard binary (bootstrap, rm-cluster, etc) we use: | |
309 | # stdout: for debug and info | |
310 | # stderr: for errors and warnings | |
311 | interactive_logging_config = { | |
312 | 'version': 1, | |
313 | 'filters': { | |
314 | 'exclude_errors': { | |
315 | '()': ExcludeErrorsFilter | |
316 | } | |
317 | }, | |
318 | 'disable_existing_loggers': True, | |
319 | 'formatters': { | |
320 | 'cephadm': { | |
321 | 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s' | |
322 | }, | |
323 | }, | |
324 | 'handlers': { | |
325 | 'console_stdout': { | |
326 | 'level': 'INFO', | |
327 | 'class': 'logging.StreamHandler', | |
328 | 'filters': ['exclude_errors'], | |
329 | 'stream': sys.stdout | |
330 | }, | |
331 | 'console_stderr': { | |
332 | 'level': 'WARNING', | |
333 | 'class': 'logging.StreamHandler', | |
334 | 'stream': sys.stderr | |
335 | }, | |
336 | 'log_file': { | |
337 | 'level': 'DEBUG', | |
338 | 'class': 'logging.handlers.WatchedFileHandler', | |
339 | 'formatter': 'cephadm', | |
340 | 'filename': '%s/cephadm.log' % LOG_DIR, | |
341 | } | |
342 | }, | |
343 | 'loggers': { | |
344 | '': { | |
345 | 'level': 'DEBUG', | |
346 | 'handlers': ['console_stdout', 'console_stderr', 'log_file'], | |
347 | } | |
348 | } | |
349 | } | |
350 | ||
351 | ||
e306af50 TL |
352 | class termcolor: |
353 | yellow = '\033[93m' | |
354 | red = '\033[31m' | |
355 | end = '\033[0m' | |
356 | ||
f6b5b4d7 | 357 | |
9f95a23c TL |
358 | class Error(Exception): |
359 | pass | |
360 | ||
f6b5b4d7 | 361 | |
9f95a23c TL |
362 | class TimeoutExpired(Error): |
363 | pass | |
364 | ||
33c7a0ef TL |
365 | |
366 | class UnauthorizedRegistryError(Error): | |
367 | pass | |
368 | ||
9f95a23c TL |
369 | ################################## |
370 | ||
f6b5b4d7 | 371 | |
9f95a23c | 372 | class Ceph(object): |
33c7a0ef | 373 | daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror', |
f67539c2 | 374 | 'crash', 'cephfs-mirror') |
9f95a23c TL |
375 | |
376 | ################################## | |
377 | ||
f6b5b4d7 | 378 | |
b3b6e05e TL |
379 | class OSD(object): |
380 | @staticmethod | |
381 | def get_sysctl_settings() -> List[str]: | |
382 | return [ | |
383 | '# allow a large number of OSDs', | |
384 | 'fs.aio-max-nr = 1048576', | |
385 | 'kernel.pid_max = 4194304', | |
386 | ] | |
387 | ||
20effc67 | 388 | |
b3b6e05e TL |
389 | ################################## |
390 | ||
391 | ||
20effc67 TL |
392 | class SNMPGateway: |
393 | """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks""" | |
394 | daemon_type = 'snmp-gateway' | |
395 | SUPPORTED_VERSIONS = ['V2c', 'V3'] | |
396 | default_image = DEFAULT_SNMP_GATEWAY_IMAGE | |
397 | DEFAULT_PORT = 9464 | |
398 | env_filename = 'snmp-gateway.conf' | |
399 | ||
400 | def __init__(self, | |
401 | ctx: CephadmContext, | |
402 | fsid: str, | |
403 | daemon_id: Union[int, str], | |
404 | config_json: Dict[str, Any], | |
405 | image: Optional[str] = None) -> None: | |
406 | self.ctx = ctx | |
407 | self.fsid = fsid | |
408 | self.daemon_id = daemon_id | |
409 | self.image = image or SNMPGateway.default_image | |
410 | ||
411 | self.uid = config_json.get('uid', 0) | |
412 | self.gid = config_json.get('gid', 0) | |
413 | ||
414 | self.destination = config_json.get('destination', '') | |
415 | self.snmp_version = config_json.get('snmp_version', 'V2c') | |
416 | self.snmp_community = config_json.get('snmp_community', 'public') | |
417 | self.log_level = config_json.get('log_level', 'info') | |
418 | self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '') | |
419 | self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '') | |
420 | self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '') | |
421 | self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '') | |
422 | self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '') | |
423 | self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '') | |
424 | ||
425 | self.validate() | |
426 | ||
427 | @classmethod | |
428 | def init(cls, ctx: CephadmContext, fsid: str, | |
429 | daemon_id: Union[int, str]) -> 'SNMPGateway': | |
430 | assert ctx.config_json | |
431 | return cls(ctx, fsid, daemon_id, | |
432 | get_parm(ctx.config_json), ctx.image) | |
433 | ||
434 | @staticmethod | |
435 | def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]: | |
436 | """Return the version of the notifer from it's http endpoint""" | |
437 | path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta') | |
438 | try: | |
439 | with open(path, 'r') as env: | |
440 | metadata = json.loads(env.read()) | |
441 | except (OSError, json.JSONDecodeError): | |
442 | return None | |
443 | ||
444 | ports = metadata.get('ports', []) | |
445 | if not ports: | |
446 | return None | |
447 | ||
448 | try: | |
449 | with urlopen(f'http://127.0.0.1:{ports[0]}/') as r: | |
450 | html = r.read().decode('utf-8').split('\n') | |
451 | except (HTTPError, URLError): | |
452 | return None | |
453 | ||
454 | for h in html: | |
455 | stripped = h.strip() | |
456 | if stripped.startswith(('<pre>', '<PRE>')) and \ | |
457 | stripped.endswith(('</pre>', '</PRE>')): | |
458 | # <pre>(version=1.2.1, branch=HEAD, revision=7... | |
459 | return stripped.split(',')[0].split('version=')[1] | |
460 | ||
461 | return None | |
462 | ||
463 | @property | |
464 | def port(self) -> int: | |
465 | if not self.ctx.tcp_ports: | |
466 | return self.DEFAULT_PORT | |
467 | else: | |
468 | if len(self.ctx.tcp_ports) > 0: | |
469 | return int(self.ctx.tcp_ports.split()[0]) | |
470 | else: | |
471 | return self.DEFAULT_PORT | |
472 | ||
473 | def get_daemon_args(self) -> List[str]: | |
474 | v3_args = [] | |
475 | base_args = [ | |
476 | f'--web.listen-address=:{self.port}', | |
477 | f'--snmp.destination={self.destination}', | |
478 | f'--snmp.version={self.snmp_version}', | |
479 | f'--log.level={self.log_level}', | |
480 | '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl' | |
481 | ] | |
482 | ||
483 | if self.snmp_version == 'V3': | |
484 | # common auth settings | |
485 | v3_args.extend([ | |
486 | '--snmp.authentication-enabled', | |
487 | f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}', | |
488 | f'--snmp.security-engine-id={self.snmp_v3_engine_id}' | |
489 | ]) | |
490 | # authPriv setting is applied if we have a privacy protocol setting | |
491 | if self.snmp_v3_priv_protocol: | |
492 | v3_args.extend([ | |
493 | '--snmp.private-enabled', | |
494 | f'--snmp.private-protocol={self.snmp_v3_priv_protocol}' | |
495 | ]) | |
496 | ||
497 | return base_args + v3_args | |
498 | ||
499 | @property | |
500 | def data_dir(self) -> str: | |
501 | return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}') | |
502 | ||
503 | @property | |
504 | def conf_file_path(self) -> str: | |
505 | return os.path.join(self.data_dir, self.env_filename) | |
506 | ||
507 | def create_daemon_conf(self) -> None: | |
508 | """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon""" | |
509 | with open(os.open(self.conf_file_path, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
510 | if self.snmp_version == 'V2c': | |
511 | f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n') | |
512 | else: | |
513 | f.write(f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n') | |
514 | f.write(f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n') | |
515 | if self.snmp_v3_priv_password: | |
516 | f.write(f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n') | |
517 | ||
518 | def validate(self) -> None: | |
519 | """Validate the settings | |
520 | ||
521 | Raises: | |
522 | Error: if the fsid doesn't look like an fsid | |
523 | Error: if the snmp version is not supported | |
524 | Error: destination IP and port address missing | |
525 | """ | |
526 | if not is_fsid(self.fsid): | |
527 | raise Error(f'not a valid fsid: {self.fsid}') | |
528 | ||
529 | if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS: | |
530 | raise Error(f'not a valid snmp version: {self.snmp_version}') | |
531 | ||
532 | if not self.destination: | |
533 | raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener') | |
534 | ||
535 | ||
536 | ################################## | |
9f95a23c TL |
537 | class Monitoring(object): |
538 | """Define the configs for the monitoring containers""" | |
539 | ||
540 | port_map = { | |
f67539c2 TL |
541 | 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI |
542 | 'node-exporter': [9100], | |
543 | 'grafana': [3000], | |
544 | 'alertmanager': [9093, 9094], | |
33c7a0ef TL |
545 | 'loki': [3100], |
546 | 'promtail': [9080] | |
9f95a23c TL |
547 | } |
548 | ||
549 | components = { | |
f67539c2 TL |
550 | 'prometheus': { |
551 | 'image': DEFAULT_PROMETHEUS_IMAGE, | |
552 | 'cpus': '2', | |
553 | 'memory': '4GB', | |
554 | 'args': [ | |
555 | '--config.file=/etc/prometheus/prometheus.yml', | |
556 | '--storage.tsdb.path=/prometheus', | |
9f95a23c | 557 | ], |
f67539c2 TL |
558 | 'config-json-files': [ |
559 | 'prometheus.yml', | |
9f95a23c TL |
560 | ], |
561 | }, | |
33c7a0ef TL |
562 | 'loki': { |
563 | 'image': DEFAULT_LOKI_IMAGE, | |
564 | 'cpus': '1', | |
565 | 'memory': '1GB', | |
566 | 'args': [ | |
567 | '--config.file=/etc/loki/loki.yml', | |
568 | ], | |
569 | 'config-json-files': [ | |
570 | 'loki.yml' | |
571 | ], | |
572 | }, | |
573 | 'promtail': { | |
574 | 'image': DEFAULT_PROMTAIL_IMAGE, | |
575 | 'cpus': '1', | |
576 | 'memory': '1GB', | |
577 | 'args': [ | |
578 | '--config.file=/etc/promtail/promtail.yml', | |
579 | ], | |
580 | 'config-json-files': [ | |
581 | 'promtail.yml', | |
582 | ], | |
583 | }, | |
f67539c2 TL |
584 | 'node-exporter': { |
585 | 'image': DEFAULT_NODE_EXPORTER_IMAGE, | |
586 | 'cpus': '1', | |
587 | 'memory': '1GB', | |
588 | 'args': [ | |
589 | '--no-collector.timex', | |
9f95a23c TL |
590 | ], |
591 | }, | |
f67539c2 TL |
592 | 'grafana': { |
593 | 'image': DEFAULT_GRAFANA_IMAGE, | |
594 | 'cpus': '2', | |
595 | 'memory': '4GB', | |
596 | 'args': [], | |
597 | 'config-json-files': [ | |
598 | 'grafana.ini', | |
599 | 'provisioning/datasources/ceph-dashboard.yml', | |
600 | 'certs/cert_file', | |
601 | 'certs/cert_key', | |
9f95a23c TL |
602 | ], |
603 | }, | |
f67539c2 TL |
604 | 'alertmanager': { |
605 | 'image': DEFAULT_ALERT_MANAGER_IMAGE, | |
606 | 'cpus': '2', | |
607 | 'memory': '2GB', | |
608 | 'args': [ | |
f67539c2 | 609 | '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]), |
f91f0fd5 | 610 | ], |
f67539c2 TL |
611 | 'config-json-files': [ |
612 | 'alertmanager.yml', | |
9f95a23c | 613 | ], |
f67539c2 TL |
614 | 'config-json-args': [ |
615 | 'peers', | |
9f95a23c TL |
616 | ], |
617 | }, | |
618 | } # type: ignore | |
619 | ||
f67539c2 TL |
620 | @staticmethod |
621 | def get_version(ctx, container_id, daemon_type): | |
622 | # type: (CephadmContext, str, str) -> str | |
623 | """ | |
33c7a0ef | 624 | :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter" |
f67539c2 | 625 | """ |
33c7a0ef | 626 | assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail') |
f67539c2 TL |
627 | cmd = daemon_type.replace('-', '_') |
628 | code = -1 | |
629 | err = '' | |
630 | version = '' | |
631 | if daemon_type == 'alertmanager': | |
632 | for cmd in ['alertmanager', 'prometheus-alertmanager']: | |
633 | _, err, code = call(ctx, [ | |
634 | ctx.container_engine.path, 'exec', container_id, cmd, | |
635 | '--version' | |
2a845540 | 636 | ], verbosity=CallVerbosity.QUIET) |
f67539c2 TL |
637 | if code == 0: |
638 | break | |
639 | cmd = 'alertmanager' # reset cmd for version extraction | |
640 | else: | |
641 | _, err, code = call(ctx, [ | |
642 | ctx.container_engine.path, 'exec', container_id, cmd, '--version' | |
2a845540 | 643 | ], verbosity=CallVerbosity.QUIET) |
f67539c2 TL |
644 | if code == 0 and \ |
645 | err.startswith('%s, version ' % cmd): | |
646 | version = err.split(' ')[2] | |
647 | return version | |
648 | ||
9f95a23c TL |
649 | ################################## |
650 | ||
f6b5b4d7 | 651 | |
f67539c2 TL |
652 | def populate_files(config_dir, config_files, uid, gid): |
653 | # type: (str, Dict, int, int) -> None | |
654 | """create config files for different services""" | |
655 | for fname in config_files: | |
656 | config_file = os.path.join(config_dir, fname) | |
657 | config_content = dict_get_join(config_files, fname) | |
658 | logger.info('Write file: %s' % (config_file)) | |
b3b6e05e | 659 | with open(config_file, 'w', encoding='utf-8') as f: |
f67539c2 TL |
660 | os.fchown(f.fileno(), uid, gid) |
661 | os.fchmod(f.fileno(), 0o600) | |
662 | f.write(config_content) | |
663 | ||
664 | ||
9f95a23c TL |
665 | class NFSGanesha(object): |
666 | """Defines a NFS-Ganesha container""" | |
667 | ||
668 | daemon_type = 'nfs' | |
669 | entrypoint = '/usr/bin/ganesha.nfsd' | |
670 | daemon_args = ['-F', '-L', 'STDERR'] | |
671 | ||
672 | required_files = ['ganesha.conf'] | |
673 | ||
674 | port_map = { | |
f67539c2 | 675 | 'nfs': 2049, |
9f95a23c TL |
676 | } |
677 | ||
678 | def __init__(self, | |
f67539c2 | 679 | ctx, |
9f95a23c TL |
680 | fsid, |
681 | daemon_id, | |
682 | config_json, | |
683 | image=DEFAULT_IMAGE): | |
f67539c2 TL |
684 | # type: (CephadmContext, str, Union[int, str], Dict, str) -> None |
685 | self.ctx = ctx | |
9f95a23c TL |
686 | self.fsid = fsid |
687 | self.daemon_id = daemon_id | |
688 | self.image = image | |
689 | ||
9f95a23c | 690 | # config-json options |
f91f0fd5 TL |
691 | self.pool = dict_get(config_json, 'pool', require=True) |
692 | self.namespace = dict_get(config_json, 'namespace') | |
693 | self.userid = dict_get(config_json, 'userid') | |
694 | self.extra_args = dict_get(config_json, 'extra_args', []) | |
695 | self.files = dict_get(config_json, 'files', {}) | |
696 | self.rgw = dict_get(config_json, 'rgw', {}) | |
9f95a23c TL |
697 | |
698 | # validate the supplied args | |
699 | self.validate() | |
700 | ||
701 | @classmethod | |
f67539c2 TL |
702 | def init(cls, ctx, fsid, daemon_id): |
703 | # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha | |
704 | return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), ctx.image) | |
9f95a23c | 705 | |
f91f0fd5 | 706 | def get_container_mounts(self, data_dir): |
9f95a23c TL |
707 | # type: (str) -> Dict[str, str] |
708 | mounts = dict() | |
709 | mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' | |
710 | mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' | |
711 | mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z' | |
f91f0fd5 TL |
712 | if self.rgw: |
713 | cluster = self.rgw.get('cluster', 'ceph') | |
714 | rgw_user = self.rgw.get('user', 'admin') | |
715 | mounts[os.path.join(data_dir, 'keyring.rgw')] = \ | |
f67539c2 | 716 | '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user) |
9f95a23c TL |
717 | return mounts |
718 | ||
719 | @staticmethod | |
720 | def get_container_envs(): | |
721 | # type: () -> List[str] | |
722 | envs = [ | |
33c7a0ef | 723 | 'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF) |
9f95a23c TL |
724 | ] |
725 | return envs | |
726 | ||
727 | @staticmethod | |
f67539c2 TL |
728 | def get_version(ctx, container_id): |
729 | # type: (CephadmContext, str) -> Optional[str] | |
9f95a23c | 730 | version = None |
f67539c2 TL |
731 | out, err, code = call(ctx, |
732 | [ctx.container_engine.path, 'exec', container_id, | |
733 | NFSGanesha.entrypoint, '-v'], | |
2a845540 | 734 | verbosity=CallVerbosity.QUIET) |
9f95a23c TL |
735 | if code == 0: |
736 | match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out) | |
737 | if match: | |
738 | version = match.group(1) | |
739 | return version | |
740 | ||
741 | def validate(self): | |
e306af50 | 742 | # type: () -> None |
9f95a23c TL |
743 | if not is_fsid(self.fsid): |
744 | raise Error('not an fsid: %s' % self.fsid) | |
745 | if not self.daemon_id: | |
746 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
747 | if not self.image: | |
748 | raise Error('invalid image: %s' % self.image) | |
749 | ||
750 | # check for the required files | |
751 | if self.required_files: | |
752 | for fname in self.required_files: | |
753 | if fname not in self.files: | |
754 | raise Error('required file missing from config-json: %s' % fname) | |
755 | ||
f91f0fd5 TL |
756 | # check for an RGW config |
757 | if self.rgw: | |
758 | if not self.rgw.get('keyring'): | |
759 | raise Error('RGW keyring is missing') | |
760 | if not self.rgw.get('user'): | |
761 | raise Error('RGW user is missing') | |
762 | ||
9f95a23c TL |
763 | def get_daemon_name(self): |
764 | # type: () -> str | |
765 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
766 | ||
767 | def get_container_name(self, desc=None): | |
768 | # type: (Optional[str]) -> str | |
769 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
770 | if desc: | |
771 | cname = '%s-%s' % (cname, desc) | |
772 | return cname | |
773 | ||
1911f103 TL |
774 | def get_daemon_args(self): |
775 | # type: () -> List[str] | |
776 | return self.daemon_args + self.extra_args | |
777 | ||
9f95a23c TL |
778 | def create_daemon_dirs(self, data_dir, uid, gid): |
779 | # type: (str, int, int) -> None | |
780 | """Create files under the container data dir""" | |
781 | if not os.path.isdir(data_dir): | |
782 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
783 | ||
784 | logger.info('Creating ganesha config...') | |
785 | ||
786 | # create the ganesha conf dir | |
787 | config_dir = os.path.join(data_dir, 'etc/ganesha') | |
788 | makedirs(config_dir, uid, gid, 0o755) | |
789 | ||
790 | # populate files from the config-json | |
f67539c2 | 791 | populate_files(config_dir, self.files, uid, gid) |
9f95a23c | 792 | |
f91f0fd5 TL |
793 | # write the RGW keyring |
794 | if self.rgw: | |
795 | keyring_path = os.path.join(data_dir, 'keyring.rgw') | |
796 | with open(keyring_path, 'w') as f: | |
797 | os.fchmod(f.fileno(), 0o600) | |
798 | os.fchown(f.fileno(), uid, gid) | |
799 | f.write(self.rgw.get('keyring', '')) | |
800 | ||
9f95a23c TL |
801 | ################################## |
802 | ||
f6b5b4d7 | 803 | |
1911f103 TL |
804 | class CephIscsi(object): |
805 | """Defines a Ceph-Iscsi container""" | |
806 | ||
807 | daemon_type = 'iscsi' | |
808 | entrypoint = '/usr/bin/rbd-target-api' | |
809 | ||
810 | required_files = ['iscsi-gateway.cfg'] | |
811 | ||
812 | def __init__(self, | |
f67539c2 | 813 | ctx, |
1911f103 TL |
814 | fsid, |
815 | daemon_id, | |
816 | config_json, | |
817 | image=DEFAULT_IMAGE): | |
f67539c2 TL |
818 | # type: (CephadmContext, str, Union[int, str], Dict, str) -> None |
819 | self.ctx = ctx | |
1911f103 TL |
820 | self.fsid = fsid |
821 | self.daemon_id = daemon_id | |
822 | self.image = image | |
823 | ||
1911f103 | 824 | # config-json options |
f91f0fd5 | 825 | self.files = dict_get(config_json, 'files', {}) |
1911f103 TL |
826 | |
827 | # validate the supplied args | |
828 | self.validate() | |
829 | ||
830 | @classmethod | |
f67539c2 TL |
831 | def init(cls, ctx, fsid, daemon_id): |
832 | # type: (CephadmContext, str, Union[int, str]) -> CephIscsi | |
833 | return cls(ctx, fsid, daemon_id, | |
834 | get_parm(ctx.config_json), ctx.image) | |
1911f103 TL |
835 | |
836 | @staticmethod | |
837 | def get_container_mounts(data_dir, log_dir): | |
838 | # type: (str, str) -> Dict[str, str] | |
839 | mounts = dict() | |
840 | mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' | |
841 | mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' | |
842 | mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z' | |
f91f0fd5 | 843 | mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' |
a4b75251 | 844 | mounts[log_dir] = '/var/log:z' |
f91f0fd5 | 845 | mounts['/dev'] = '/dev' |
1911f103 TL |
846 | return mounts |
847 | ||
f6b5b4d7 TL |
848 | @staticmethod |
849 | def get_container_binds(): | |
850 | # type: () -> List[List[str]] | |
851 | binds = [] | |
852 | lib_modules = ['type=bind', | |
853 | 'source=/lib/modules', | |
854 | 'destination=/lib/modules', | |
855 | 'ro=true'] | |
856 | binds.append(lib_modules) | |
857 | return binds | |
858 | ||
1911f103 | 859 | @staticmethod |
f67539c2 TL |
860 | def get_version(ctx, container_id): |
861 | # type: (CephadmContext, str) -> Optional[str] | |
1911f103 | 862 | version = None |
f67539c2 TL |
863 | out, err, code = call(ctx, |
864 | [ctx.container_engine.path, 'exec', container_id, | |
865 | '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"], | |
2a845540 | 866 | verbosity=CallVerbosity.QUIET) |
1911f103 | 867 | if code == 0: |
f6b5b4d7 | 868 | version = out.strip() |
1911f103 TL |
869 | return version |
870 | ||
871 | def validate(self): | |
e306af50 | 872 | # type: () -> None |
1911f103 TL |
873 | if not is_fsid(self.fsid): |
874 | raise Error('not an fsid: %s' % self.fsid) | |
875 | if not self.daemon_id: | |
876 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
877 | if not self.image: | |
878 | raise Error('invalid image: %s' % self.image) | |
879 | ||
880 | # check for the required files | |
881 | if self.required_files: | |
882 | for fname in self.required_files: | |
883 | if fname not in self.files: | |
884 | raise Error('required file missing from config-json: %s' % fname) | |
885 | ||
886 | def get_daemon_name(self): | |
887 | # type: () -> str | |
888 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
889 | ||
890 | def get_container_name(self, desc=None): | |
891 | # type: (Optional[str]) -> str | |
892 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
893 | if desc: | |
894 | cname = '%s-%s' % (cname, desc) | |
895 | return cname | |
896 | ||
1911f103 TL |
897 | def create_daemon_dirs(self, data_dir, uid, gid): |
898 | # type: (str, int, int) -> None | |
899 | """Create files under the container data dir""" | |
900 | if not os.path.isdir(data_dir): | |
901 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
902 | ||
903 | logger.info('Creating ceph-iscsi config...') | |
904 | configfs_dir = os.path.join(data_dir, 'configfs') | |
905 | makedirs(configfs_dir, uid, gid, 0o755) | |
906 | ||
907 | # populate files from the config-json | |
f67539c2 | 908 | populate_files(data_dir, self.files, uid, gid) |
1911f103 TL |
909 | |
910 | @staticmethod | |
911 | def configfs_mount_umount(data_dir, mount=True): | |
e306af50 | 912 | # type: (str, bool) -> List[str] |
1911f103 TL |
913 | mount_path = os.path.join(data_dir, 'configfs') |
914 | if mount: | |
f67539c2 TL |
915 | cmd = 'if ! grep -qs {0} /proc/mounts; then ' \ |
916 | 'mount -t configfs none {0}; fi'.format(mount_path) | |
1911f103 | 917 | else: |
f67539c2 TL |
918 | cmd = 'if grep -qs {0} /proc/mounts; then ' \ |
919 | 'umount {0}; fi'.format(mount_path) | |
1911f103 TL |
920 | return cmd.split() |
921 | ||
f6b5b4d7 TL |
922 | def get_tcmu_runner_container(self): |
923 | # type: () -> CephContainer | |
f67539c2 TL |
924 | tcmu_container = get_container(self.ctx, self.fsid, self.daemon_type, self.daemon_id) |
925 | tcmu_container.entrypoint = '/usr/bin/tcmu-runner' | |
f6b5b4d7 | 926 | tcmu_container.cname = self.get_container_name(desc='tcmu') |
f91f0fd5 TL |
927 | # remove extra container args for tcmu container. |
928 | # extra args could cause issue with forking service type | |
929 | tcmu_container.container_args = [] | |
f6b5b4d7 TL |
930 | return tcmu_container |
931 | ||
1911f103 TL |
932 | ################################## |
933 | ||
f6b5b4d7 | 934 | |
f67539c2 TL |
935 | class HAproxy(object): |
936 | """Defines an HAproxy container""" | |
937 | daemon_type = 'haproxy' | |
938 | required_files = ['haproxy.cfg'] | |
522d829b | 939 | default_image = DEFAULT_HAPROXY_IMAGE |
f67539c2 TL |
940 | |
941 | def __init__(self, | |
942 | ctx: CephadmContext, | |
943 | fsid: str, daemon_id: Union[int, str], | |
944 | config_json: Dict, image: str) -> None: | |
945 | self.ctx = ctx | |
946 | self.fsid = fsid | |
947 | self.daemon_id = daemon_id | |
948 | self.image = image | |
949 | ||
950 | # config-json options | |
951 | self.files = dict_get(config_json, 'files', {}) | |
952 | ||
953 | self.validate() | |
954 | ||
955 | @classmethod | |
956 | def init(cls, ctx: CephadmContext, | |
957 | fsid: str, daemon_id: Union[int, str]) -> 'HAproxy': | |
958 | return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), | |
959 | ctx.image) | |
960 | ||
961 | def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: | |
962 | """Create files under the container data dir""" | |
963 | if not os.path.isdir(data_dir): | |
964 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
965 | ||
966 | # create additional directories in data dir for HAproxy to use | |
967 | if not os.path.isdir(os.path.join(data_dir, 'haproxy')): | |
968 | makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE) | |
969 | ||
970 | data_dir = os.path.join(data_dir, 'haproxy') | |
971 | populate_files(data_dir, self.files, uid, gid) | |
972 | ||
973 | def get_daemon_args(self) -> List[str]: | |
974 | return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg'] | |
975 | ||
976 | def validate(self): | |
977 | # type: () -> None | |
978 | if not is_fsid(self.fsid): | |
979 | raise Error('not an fsid: %s' % self.fsid) | |
980 | if not self.daemon_id: | |
981 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
982 | if not self.image: | |
983 | raise Error('invalid image: %s' % self.image) | |
984 | ||
985 | # check for the required files | |
986 | if self.required_files: | |
987 | for fname in self.required_files: | |
988 | if fname not in self.files: | |
989 | raise Error('required file missing from config-json: %s' % fname) | |
990 | ||
991 | def get_daemon_name(self): | |
992 | # type: () -> str | |
993 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
994 | ||
995 | def get_container_name(self, desc=None): | |
996 | # type: (Optional[str]) -> str | |
997 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
998 | if desc: | |
999 | cname = '%s-%s' % (cname, desc) | |
1000 | return cname | |
1001 | ||
522d829b | 1002 | def extract_uid_gid_haproxy(self) -> Tuple[int, int]: |
f67539c2 TL |
1003 | # better directory for this? |
1004 | return extract_uid_gid(self.ctx, file_path='/var/lib') | |
1005 | ||
1006 | @staticmethod | |
1007 | def get_container_mounts(data_dir: str) -> Dict[str, str]: | |
1008 | mounts = dict() | |
1009 | mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy' | |
1010 | return mounts | |
1011 | ||
b3b6e05e TL |
1012 | @staticmethod |
1013 | def get_sysctl_settings() -> List[str]: | |
1014 | return [ | |
1015 | '# IP forwarding', | |
1016 | 'net.ipv4.ip_forward = 1', | |
1017 | ] | |
1018 | ||
f67539c2 TL |
1019 | ################################## |
1020 | ||
1021 | ||
1022 | class Keepalived(object): | |
1023 | """Defines an Keepalived container""" | |
1024 | daemon_type = 'keepalived' | |
1025 | required_files = ['keepalived.conf'] | |
522d829b | 1026 | default_image = DEFAULT_KEEPALIVED_IMAGE |
f67539c2 TL |
1027 | |
1028 | def __init__(self, | |
1029 | ctx: CephadmContext, | |
1030 | fsid: str, daemon_id: Union[int, str], | |
1031 | config_json: Dict, image: str) -> None: | |
1032 | self.ctx = ctx | |
1033 | self.fsid = fsid | |
1034 | self.daemon_id = daemon_id | |
1035 | self.image = image | |
1036 | ||
1037 | # config-json options | |
1038 | self.files = dict_get(config_json, 'files', {}) | |
1039 | ||
1040 | self.validate() | |
1041 | ||
1042 | @classmethod | |
1043 | def init(cls, ctx: CephadmContext, fsid: str, | |
1044 | daemon_id: Union[int, str]) -> 'Keepalived': | |
1045 | return cls(ctx, fsid, daemon_id, | |
1046 | get_parm(ctx.config_json), ctx.image) | |
1047 | ||
1048 | def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: | |
1049 | """Create files under the container data dir""" | |
1050 | if not os.path.isdir(data_dir): | |
1051 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
1052 | ||
1053 | # create additional directories in data dir for keepalived to use | |
1054 | if not os.path.isdir(os.path.join(data_dir, 'keepalived')): | |
1055 | makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE) | |
1056 | ||
1057 | # populate files from the config-json | |
1058 | populate_files(data_dir, self.files, uid, gid) | |
1059 | ||
1060 | def validate(self): | |
1061 | # type: () -> None | |
1062 | if not is_fsid(self.fsid): | |
1063 | raise Error('not an fsid: %s' % self.fsid) | |
1064 | if not self.daemon_id: | |
1065 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
1066 | if not self.image: | |
1067 | raise Error('invalid image: %s' % self.image) | |
1068 | ||
1069 | # check for the required files | |
1070 | if self.required_files: | |
1071 | for fname in self.required_files: | |
1072 | if fname not in self.files: | |
1073 | raise Error('required file missing from config-json: %s' % fname) | |
1074 | ||
1075 | def get_daemon_name(self): | |
1076 | # type: () -> str | |
1077 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
1078 | ||
1079 | def get_container_name(self, desc=None): | |
1080 | # type: (Optional[str]) -> str | |
1081 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
1082 | if desc: | |
1083 | cname = '%s-%s' % (cname, desc) | |
1084 | return cname | |
1085 | ||
1086 | @staticmethod | |
1087 | def get_container_envs(): | |
1088 | # type: () -> List[str] | |
1089 | envs = [ | |
1090 | 'KEEPALIVED_AUTOCONF=false', | |
1091 | 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf', | |
1092 | 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf', | |
1093 | 'KEEPALIVED_DEBUG=false' | |
1094 | ] | |
1095 | return envs | |
1096 | ||
1097 | @staticmethod | |
b3b6e05e TL |
1098 | def get_sysctl_settings() -> List[str]: |
1099 | return [ | |
1100 | '# IP forwarding and non-local bind', | |
1101 | 'net.ipv4.ip_forward = 1', | |
1102 | 'net.ipv4.ip_nonlocal_bind = 1', | |
1103 | ] | |
f67539c2 | 1104 | |
522d829b | 1105 | def extract_uid_gid_keepalived(self) -> Tuple[int, int]: |
f67539c2 TL |
1106 | # better directory for this? |
1107 | return extract_uid_gid(self.ctx, file_path='/var/lib') | |
1108 | ||
1109 | @staticmethod | |
1110 | def get_container_mounts(data_dir: str) -> Dict[str, str]: | |
1111 | mounts = dict() | |
1112 | mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf' | |
1113 | return mounts | |
1114 | ||
1115 | ################################## | |
1116 | ||
1117 | ||
f91f0fd5 TL |
1118 | class CustomContainer(object): |
1119 | """Defines a custom container""" | |
1120 | daemon_type = 'container' | |
1121 | ||
f67539c2 TL |
1122 | def __init__(self, |
1123 | fsid: str, daemon_id: Union[int, str], | |
f91f0fd5 TL |
1124 | config_json: Dict, image: str) -> None: |
1125 | self.fsid = fsid | |
1126 | self.daemon_id = daemon_id | |
1127 | self.image = image | |
1128 | ||
1129 | # config-json options | |
1130 | self.entrypoint = dict_get(config_json, 'entrypoint') | |
1131 | self.uid = dict_get(config_json, 'uid', 65534) # nobody | |
1132 | self.gid = dict_get(config_json, 'gid', 65534) # nobody | |
1133 | self.volume_mounts = dict_get(config_json, 'volume_mounts', {}) | |
1134 | self.args = dict_get(config_json, 'args', []) | |
1135 | self.envs = dict_get(config_json, 'envs', []) | |
1136 | self.privileged = dict_get(config_json, 'privileged', False) | |
1137 | self.bind_mounts = dict_get(config_json, 'bind_mounts', []) | |
1138 | self.ports = dict_get(config_json, 'ports', []) | |
1139 | self.dirs = dict_get(config_json, 'dirs', []) | |
1140 | self.files = dict_get(config_json, 'files', {}) | |
1141 | ||
1142 | @classmethod | |
f67539c2 TL |
1143 | def init(cls, ctx: CephadmContext, |
1144 | fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer': | |
1145 | return cls(fsid, daemon_id, | |
1146 | get_parm(ctx.config_json), ctx.image) | |
f91f0fd5 TL |
1147 | |
1148 | def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: | |
1149 | """ | |
1150 | Create dirs/files below the container data directory. | |
1151 | """ | |
1152 | logger.info('Creating custom container configuration ' | |
1153 | 'dirs/files in {} ...'.format(data_dir)) | |
1154 | ||
1155 | if not os.path.isdir(data_dir): | |
1156 | raise OSError('data_dir is not a directory: %s' % data_dir) | |
1157 | ||
1158 | for dir_path in self.dirs: | |
1159 | logger.info('Creating directory: {}'.format(dir_path)) | |
1160 | dir_path = os.path.join(data_dir, dir_path.strip('/')) | |
1161 | makedirs(dir_path, uid, gid, 0o755) | |
1162 | ||
1163 | for file_path in self.files: | |
1164 | logger.info('Creating file: {}'.format(file_path)) | |
1165 | content = dict_get_join(self.files, file_path) | |
1166 | file_path = os.path.join(data_dir, file_path.strip('/')) | |
1167 | with open(file_path, 'w', encoding='utf-8') as f: | |
1168 | os.fchown(f.fileno(), uid, gid) | |
1169 | os.fchmod(f.fileno(), 0o600) | |
1170 | f.write(content) | |
1171 | ||
1172 | def get_daemon_args(self) -> List[str]: | |
1173 | return [] | |
1174 | ||
1175 | def get_container_args(self) -> List[str]: | |
1176 | return self.args | |
1177 | ||
1178 | def get_container_envs(self) -> List[str]: | |
1179 | return self.envs | |
1180 | ||
1181 | def get_container_mounts(self, data_dir: str) -> Dict[str, str]: | |
1182 | """ | |
1183 | Get the volume mounts. Relative source paths will be located below | |
1184 | `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. | |
1185 | ||
1186 | Example: | |
1187 | { | |
1188 | /foo/conf: /conf | |
1189 | foo/conf: /conf | |
1190 | } | |
1191 | becomes | |
1192 | { | |
1193 | /foo/conf: /conf | |
1194 | /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf | |
1195 | } | |
1196 | """ | |
1197 | mounts = {} | |
1198 | for source, destination in self.volume_mounts.items(): | |
1199 | source = os.path.join(data_dir, source) | |
1200 | mounts[source] = destination | |
1201 | return mounts | |
1202 | ||
1203 | def get_container_binds(self, data_dir: str) -> List[List[str]]: | |
1204 | """ | |
1205 | Get the bind mounts. Relative `source=...` paths will be located below | |
1206 | `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. | |
1207 | ||
1208 | Example: | |
1209 | [ | |
1210 | 'type=bind', | |
1211 | 'source=lib/modules', | |
1212 | 'destination=/lib/modules', | |
1213 | 'ro=true' | |
1214 | ] | |
1215 | becomes | |
1216 | [ | |
1217 | ... | |
1218 | 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules', | |
1219 | ... | |
1220 | ] | |
1221 | """ | |
1222 | binds = self.bind_mounts.copy() | |
1223 | for bind in binds: | |
1224 | for index, value in enumerate(bind): | |
1225 | match = re.match(r'^source=(.+)$', value) | |
1226 | if match: | |
1227 | bind[index] = 'source={}'.format(os.path.join( | |
1228 | data_dir, match.group(1))) | |
1229 | return binds | |
1230 | ||
1231 | ################################## | |
1232 | ||
1233 | ||
f67539c2 TL |
1234 | def touch(file_path: str, uid: Optional[int] = None, gid: Optional[int] = None) -> None: |
1235 | Path(file_path).touch() | |
1236 | if uid and gid: | |
1237 | os.chown(file_path, uid, gid) | |
1238 | ||
1239 | ||
1240 | ################################## | |
1241 | ||
1242 | ||
f91f0fd5 TL |
1243 | def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any: |
1244 | """ | |
1245 | Helper function to get a key from a dictionary. | |
1246 | :param d: The dictionary to process. | |
1247 | :param key: The name of the key to get. | |
1248 | :param default: The default value in case the key does not | |
1249 | exist. Default is `None`. | |
1250 | :param require: Set to `True` if the key is required. An | |
1251 | exception will be raised if the key does not exist in | |
1252 | the given dictionary. | |
1253 | :return: Returns the value of the given key. | |
1254 | :raises: :exc:`self.Error` if the given key does not exist | |
1255 | and `require` is set to `True`. | |
1256 | """ | |
1257 | if require and key not in d.keys(): | |
1258 | raise Error('{} missing from dict'.format(key)) | |
f67539c2 | 1259 | return d.get(key, default) # type: ignore |
f91f0fd5 TL |
1260 | |
1261 | ################################## | |
1262 | ||
1263 | ||
1264 | def dict_get_join(d: Dict, key: str) -> Any: | |
1265 | """ | |
1266 | Helper function to get the value of a given key from a dictionary. | |
1267 | `List` values will be converted to a string by joining them with a | |
1268 | line break. | |
1269 | :param d: The dictionary to process. | |
1270 | :param key: The name of the key to get. | |
1271 | :return: Returns the value of the given key. If it was a `list`, it | |
1272 | will be joining with a line break. | |
1273 | """ | |
1274 | value = d.get(key) | |
1275 | if isinstance(value, list): | |
1276 | value = '\n'.join(map(str, value)) | |
1277 | return value | |
1278 | ||
1279 | ################################## | |
1280 | ||
1281 | ||
9f95a23c | 1282 | def get_supported_daemons(): |
e306af50 | 1283 | # type: () -> List[str] |
9f95a23c TL |
1284 | supported_daemons = list(Ceph.daemons) |
1285 | supported_daemons.extend(Monitoring.components) | |
1286 | supported_daemons.append(NFSGanesha.daemon_type) | |
1911f103 | 1287 | supported_daemons.append(CephIscsi.daemon_type) |
f91f0fd5 | 1288 | supported_daemons.append(CustomContainer.daemon_type) |
f67539c2 TL |
1289 | supported_daemons.append(HAproxy.daemon_type) |
1290 | supported_daemons.append(Keepalived.daemon_type) | |
20effc67 TL |
1291 | supported_daemons.append(CephadmAgent.daemon_type) |
1292 | supported_daemons.append(SNMPGateway.daemon_type) | |
9f95a23c TL |
1293 | assert len(supported_daemons) == len(set(supported_daemons)) |
1294 | return supported_daemons | |
1295 | ||
1296 | ################################## | |
1297 | ||
f6b5b4d7 | 1298 | |
f67539c2 TL |
1299 | class PortOccupiedError(Error): |
1300 | pass | |
1301 | ||
1302 | ||
1303 | def attempt_bind(ctx, s, address, port): | |
1304 | # type: (CephadmContext, socket.socket, str, int) -> None | |
9f95a23c TL |
1305 | try: |
1306 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) | |
1307 | s.bind((address, port)) | |
b3b6e05e | 1308 | except OSError as e: |
9f95a23c | 1309 | if e.errno == errno.EADDRINUSE: |
f67539c2 TL |
1310 | msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e) |
1311 | logger.warning(msg) | |
1312 | raise PortOccupiedError(msg) | |
1313 | else: | |
b3b6e05e TL |
1314 | raise Error(e) |
1315 | except Exception as e: | |
1316 | raise Error(e) | |
9f95a23c TL |
1317 | finally: |
1318 | s.close() | |
1319 | ||
f6b5b4d7 | 1320 | |
f67539c2 TL |
1321 | def port_in_use(ctx, port_num): |
1322 | # type: (CephadmContext, int) -> bool | |
9f95a23c | 1323 | """Detect whether a port is in use on the local machine - IPv4 and IPv6""" |
e306af50 | 1324 | logger.info('Verifying port %d ...' % port_num) |
9f95a23c | 1325 | |
f67539c2 TL |
1326 | def _port_in_use(af: socket.AddressFamily, address: str) -> bool: |
1327 | try: | |
1328 | s = socket.socket(af, socket.SOCK_STREAM) | |
1329 | attempt_bind(ctx, s, address, port_num) | |
1330 | except PortOccupiedError: | |
1331 | return True | |
1332 | except OSError as e: | |
1333 | if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL): | |
1334 | # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are | |
1335 | # being tested here and one might be intentionally be disabled. | |
1336 | # In that case no error should be raised. | |
1337 | return False | |
1338 | else: | |
1339 | raise e | |
9f95a23c | 1340 | return False |
f67539c2 TL |
1341 | return any(_port_in_use(af, address) for af, address in ( |
1342 | (socket.AF_INET, '0.0.0.0'), | |
1343 | (socket.AF_INET6, '::') | |
1344 | )) | |
9f95a23c | 1345 | |
f6b5b4d7 | 1346 | |
33c7a0ef TL |
1347 | def check_ip_port(ctx, ep): |
1348 | # type: (CephadmContext, EndPoint) -> None | |
f67539c2 | 1349 | if not ctx.skip_ping_check: |
33c7a0ef TL |
1350 | logger.info(f'Verifying IP {ep.ip} port {ep.port} ...') |
1351 | if is_ipv6(ep.ip): | |
9f95a23c | 1352 | s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) |
33c7a0ef | 1353 | ip = unwrap_ipv6(ep.ip) |
9f95a23c TL |
1354 | else: |
1355 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
33c7a0ef TL |
1356 | ip = ep.ip |
1357 | attempt_bind(ctx, s, ip, ep.port) | |
9f95a23c TL |
1358 | |
1359 | ################################## | |
1360 | ||
f67539c2 | 1361 | |
9f95a23c TL |
1362 | # this is an abbreviated version of |
1363 | # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py | |
1364 | # that drops all of the compatibility (this is Unix/Linux only). | |
1365 | ||
9f95a23c TL |
1366 | class Timeout(TimeoutError): |
1367 | """ | |
1368 | Raised when the lock could not be acquired in *timeout* | |
1369 | seconds. | |
1370 | """ | |
1371 | ||
522d829b | 1372 | def __init__(self, lock_file: str) -> None: |
9f95a23c TL |
1373 | """ |
1374 | """ | |
1375 | #: The path of the file lock. | |
1376 | self.lock_file = lock_file | |
1377 | return None | |
1378 | ||
522d829b | 1379 | def __str__(self) -> str: |
9f95a23c TL |
1380 | temp = "The file lock '{}' could not be acquired."\ |
1381 | .format(self.lock_file) | |
1382 | return temp | |
1383 | ||
1384 | ||
1385 | class _Acquire_ReturnProxy(object): | |
522d829b | 1386 | def __init__(self, lock: 'FileLock') -> None: |
9f95a23c TL |
1387 | self.lock = lock |
1388 | return None | |
1389 | ||
522d829b | 1390 | def __enter__(self) -> 'FileLock': |
9f95a23c TL |
1391 | return self.lock |
1392 | ||
522d829b | 1393 | def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: |
9f95a23c TL |
1394 | self.lock.release() |
1395 | return None | |
1396 | ||
1397 | ||
1398 | class FileLock(object): | |
522d829b | 1399 | def __init__(self, ctx: CephadmContext, name: str, timeout: int = -1) -> None: |
9f95a23c TL |
1400 | if not os.path.exists(LOCK_DIR): |
1401 | os.mkdir(LOCK_DIR, 0o700) | |
1402 | self._lock_file = os.path.join(LOCK_DIR, name + '.lock') | |
f67539c2 | 1403 | self.ctx = ctx |
9f95a23c TL |
1404 | |
1405 | # The file descriptor for the *_lock_file* as it is returned by the | |
1406 | # os.open() function. | |
1407 | # This file lock is only NOT None, if the object currently holds the | |
1408 | # lock. | |
f67539c2 | 1409 | self._lock_file_fd: Optional[int] = None |
9f95a23c TL |
1410 | self.timeout = timeout |
1411 | # The lock counter is used for implementing the nested locking | |
1412 | # mechanism. Whenever the lock is acquired, the counter is increased and | |
1413 | # the lock is only released, when this value is 0 again. | |
1414 | self._lock_counter = 0 | |
1415 | return None | |
1416 | ||
1417 | @property | |
522d829b | 1418 | def is_locked(self) -> bool: |
9f95a23c TL |
1419 | return self._lock_file_fd is not None |
1420 | ||
522d829b | 1421 | def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy: |
9f95a23c TL |
1422 | """ |
1423 | Acquires the file lock or fails with a :exc:`Timeout` error. | |
1424 | .. code-block:: python | |
1425 | # You can use this method in the context manager (recommended) | |
1426 | with lock.acquire(): | |
1427 | pass | |
1428 | # Or use an equivalent try-finally construct: | |
1429 | lock.acquire() | |
1430 | try: | |
1431 | pass | |
1432 | finally: | |
1433 | lock.release() | |
1434 | :arg float timeout: | |
1435 | The maximum time waited for the file lock. | |
1436 | If ``timeout < 0``, there is no timeout and this method will | |
1437 | block until the lock could be acquired. | |
1438 | If ``timeout`` is None, the default :attr:`~timeout` is used. | |
1439 | :arg float poll_intervall: | |
1440 | We check once in *poll_intervall* seconds if we can acquire the | |
1441 | file lock. | |
1442 | :raises Timeout: | |
1443 | if the lock could not be acquired in *timeout* seconds. | |
1444 | .. versionchanged:: 2.0.0 | |
1445 | This method returns now a *proxy* object instead of *self*, | |
1446 | so that it can be used in a with statement without side effects. | |
1447 | """ | |
f67539c2 | 1448 | |
9f95a23c TL |
1449 | # Use the default timeout, if no timeout is provided. |
1450 | if timeout is None: | |
1451 | timeout = self.timeout | |
1452 | ||
1453 | # Increment the number right at the beginning. | |
1454 | # We can still undo it, if something fails. | |
1455 | self._lock_counter += 1 | |
1456 | ||
1457 | lock_id = id(self) | |
1458 | lock_filename = self._lock_file | |
1459 | start_time = time.time() | |
1460 | try: | |
1461 | while True: | |
1462 | if not self.is_locked: | |
2a845540 TL |
1463 | logger.log(QUIET_LOG_LEVEL, 'Acquiring lock %s on %s', lock_id, |
1464 | lock_filename) | |
9f95a23c TL |
1465 | self._acquire() |
1466 | ||
1467 | if self.is_locked: | |
2a845540 TL |
1468 | logger.log(QUIET_LOG_LEVEL, 'Lock %s acquired on %s', lock_id, |
1469 | lock_filename) | |
9f95a23c TL |
1470 | break |
1471 | elif timeout >= 0 and time.time() - start_time > timeout: | |
1472 | logger.warning('Timeout acquiring lock %s on %s', lock_id, | |
1473 | lock_filename) | |
1474 | raise Timeout(self._lock_file) | |
1475 | else: | |
2a845540 TL |
1476 | logger.log( |
1477 | QUIET_LOG_LEVEL, | |
9f95a23c TL |
1478 | 'Lock %s not acquired on %s, waiting %s seconds ...', |
1479 | lock_id, lock_filename, poll_intervall | |
1480 | ) | |
1481 | time.sleep(poll_intervall) | |
b3b6e05e | 1482 | except Exception: |
9f95a23c TL |
1483 | # Something did go wrong, so decrement the counter. |
1484 | self._lock_counter = max(0, self._lock_counter - 1) | |
1485 | ||
1486 | raise | |
f67539c2 | 1487 | return _Acquire_ReturnProxy(lock=self) |
9f95a23c | 1488 | |
522d829b | 1489 | def release(self, force: bool = False) -> None: |
9f95a23c TL |
1490 | """ |
1491 | Releases the file lock. | |
1492 | Please note, that the lock is only completly released, if the lock | |
1493 | counter is 0. | |
1494 | Also note, that the lock file itself is not automatically deleted. | |
1495 | :arg bool force: | |
1496 | If true, the lock counter is ignored and the lock is released in | |
1497 | every case. | |
1498 | """ | |
1499 | if self.is_locked: | |
1500 | self._lock_counter -= 1 | |
1501 | ||
1502 | if self._lock_counter == 0 or force: | |
522d829b TL |
1503 | # lock_id = id(self) |
1504 | # lock_filename = self._lock_file | |
9f95a23c | 1505 | |
522d829b TL |
1506 | # Can't log in shutdown: |
1507 | # File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open | |
1508 | # NameError: name 'open' is not defined | |
1509 | # logger.debug('Releasing lock %s on %s', lock_id, lock_filename) | |
9f95a23c TL |
1510 | self._release() |
1511 | self._lock_counter = 0 | |
522d829b | 1512 | # logger.debug('Lock %s released on %s', lock_id, lock_filename) |
9f95a23c TL |
1513 | |
1514 | return None | |
1515 | ||
522d829b | 1516 | def __enter__(self) -> 'FileLock': |
9f95a23c TL |
1517 | self.acquire() |
1518 | return self | |
1519 | ||
522d829b | 1520 | def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: |
9f95a23c TL |
1521 | self.release() |
1522 | return None | |
1523 | ||
522d829b | 1524 | def __del__(self) -> None: |
f6b5b4d7 | 1525 | self.release(force=True) |
9f95a23c TL |
1526 | return None |
1527 | ||
522d829b | 1528 | def _acquire(self) -> None: |
9f95a23c TL |
1529 | open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC |
1530 | fd = os.open(self._lock_file, open_mode) | |
1531 | ||
1532 | try: | |
1533 | fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) | |
1534 | except (IOError, OSError): | |
1535 | os.close(fd) | |
1536 | else: | |
1537 | self._lock_file_fd = fd | |
1538 | return None | |
1539 | ||
522d829b | 1540 | def _release(self) -> None: |
9f95a23c TL |
1541 | # Do not remove the lockfile: |
1542 | # | |
1543 | # https://github.com/benediktschmitt/py-filelock/issues/31 | |
1544 | # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition | |
1545 | fd = self._lock_file_fd | |
1546 | self._lock_file_fd = None | |
f6b5b4d7 TL |
1547 | fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore |
1548 | os.close(fd) # type: ignore | |
9f95a23c TL |
1549 | return None |
1550 | ||
1551 | ||
1552 | ################################## | |
1553 | # Popen wrappers, lifted from ceph-volume | |
1554 | ||
adb31ebb | 1555 | class CallVerbosity(Enum): |
2a845540 TL |
1556 | ##### |
1557 | # Format: | |
1558 | # Normal Operation: <log-level-when-no-errors>, Errors: <log-level-when-error> | |
1559 | # | |
1560 | # NOTE: QUIET log level is custom level only used when --verbose is passed | |
1561 | ##### | |
1562 | ||
1563 | # Normal Operation: None, Errors: None | |
adb31ebb | 1564 | SILENT = 0 |
2a845540 TL |
1565 | # Normal Operation: QUIET, Error: QUIET |
1566 | QUIET = 1 | |
1567 | # Normal Operation: DEBUG, Error: DEBUG | |
1568 | DEBUG = 2 | |
1569 | # Normal Operation: QUIET, Error: INFO | |
1570 | QUIET_UNLESS_ERROR = 3 | |
1571 | # Normal Operation: DEBUG, Error: INFO | |
1572 | VERBOSE_ON_FAILURE = 4 | |
1573 | # Normal Operation: INFO, Error: INFO | |
1574 | VERBOSE = 5 | |
1575 | ||
1576 | def success_log_level(self) -> int: | |
1577 | _verbosity_level_to_log_level = { | |
1578 | self.SILENT: 0, | |
1579 | self.QUIET: QUIET_LOG_LEVEL, | |
1580 | self.DEBUG: logging.DEBUG, | |
1581 | self.QUIET_UNLESS_ERROR: QUIET_LOG_LEVEL, | |
1582 | self.VERBOSE_ON_FAILURE: logging.DEBUG, | |
1583 | self.VERBOSE: logging.INFO | |
1584 | } | |
1585 | return _verbosity_level_to_log_level[self] # type: ignore | |
1586 | ||
1587 | def error_log_level(self) -> int: | |
1588 | _verbosity_level_to_log_level = { | |
1589 | self.SILENT: 0, | |
1590 | self.QUIET: QUIET_LOG_LEVEL, | |
1591 | self.DEBUG: logging.DEBUG, | |
1592 | self.QUIET_UNLESS_ERROR: logging.INFO, | |
1593 | self.VERBOSE_ON_FAILURE: logging.INFO, | |
1594 | self.VERBOSE: logging.INFO | |
1595 | } | |
1596 | return _verbosity_level_to_log_level[self] # type: ignore | |
adb31ebb TL |
1597 | |
1598 | ||
f67539c2 TL |
1599 | if sys.version_info < (3, 8): |
1600 | import itertools | |
1601 | import threading | |
1602 | import warnings | |
1603 | from asyncio import events | |
1604 | ||
1605 | class ThreadedChildWatcher(asyncio.AbstractChildWatcher): | |
1606 | """Threaded child watcher implementation. | |
1607 | The watcher uses a thread per process | |
1608 | for waiting for the process finish. | |
1609 | It doesn't require subscription on POSIX signal | |
1610 | but a thread creation is not free. | |
1611 | The watcher has O(1) complexity, its performance doesn't depend | |
1612 | on amount of spawn processes. | |
1613 | """ | |
1614 | ||
522d829b | 1615 | def __init__(self) -> None: |
f67539c2 | 1616 | self._pid_counter = itertools.count(0) |
a4b75251 | 1617 | self._threads: Dict[Any, Any] = {} |
f67539c2 | 1618 | |
a4b75251 | 1619 | def is_active(self) -> bool: |
f67539c2 TL |
1620 | return True |
1621 | ||
a4b75251 | 1622 | def close(self) -> None: |
f67539c2 TL |
1623 | self._join_threads() |
1624 | ||
a4b75251 | 1625 | def _join_threads(self) -> None: |
f67539c2 TL |
1626 | """Internal: Join all non-daemon threads""" |
1627 | threads = [thread for thread in list(self._threads.values()) | |
1628 | if thread.is_alive() and not thread.daemon] | |
1629 | for thread in threads: | |
1630 | thread.join() | |
1631 | ||
a4b75251 | 1632 | def __enter__(self) -> Any: |
f67539c2 TL |
1633 | return self |
1634 | ||
a4b75251 | 1635 | def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: |
f67539c2 TL |
1636 | pass |
1637 | ||
a4b75251 | 1638 | def __del__(self, _warn: Any = warnings.warn) -> None: |
f67539c2 TL |
1639 | threads = [thread for thread in list(self._threads.values()) |
1640 | if thread.is_alive()] | |
1641 | if threads: | |
1642 | _warn(f'{self.__class__} has registered but not finished child processes', | |
1643 | ResourceWarning, | |
1644 | source=self) | |
1645 | ||
a4b75251 | 1646 | def add_child_handler(self, pid: Any, callback: Any, *args: Any) -> None: |
f67539c2 TL |
1647 | loop = events.get_event_loop() |
1648 | thread = threading.Thread(target=self._do_waitpid, | |
1649 | name=f'waitpid-{next(self._pid_counter)}', | |
1650 | args=(loop, pid, callback, args), | |
1651 | daemon=True) | |
1652 | self._threads[pid] = thread | |
1653 | thread.start() | |
1654 | ||
a4b75251 | 1655 | def remove_child_handler(self, pid: Any) -> bool: |
f67539c2 TL |
1656 | # asyncio never calls remove_child_handler() !!! |
1657 | # The method is no-op but is implemented because | |
1658 | # abstract base classe requires it | |
1659 | return True | |
1660 | ||
a4b75251 | 1661 | def attach_loop(self, loop: Any) -> None: |
f67539c2 TL |
1662 | pass |
1663 | ||
a4b75251 | 1664 | def _do_waitpid(self, loop: Any, expected_pid: Any, callback: Any, args: Any) -> None: |
f67539c2 TL |
1665 | assert expected_pid > 0 |
1666 | ||
1667 | try: | |
1668 | pid, status = os.waitpid(expected_pid, 0) | |
1669 | except ChildProcessError: | |
1670 | # The child process is already reaped | |
1671 | # (may happen if waitpid() is called elsewhere). | |
1672 | pid = expected_pid | |
1673 | returncode = 255 | |
1674 | logger.warning( | |
1675 | 'Unknown child process pid %d, will report returncode 255', | |
1676 | pid) | |
1677 | else: | |
1678 | if os.WIFEXITED(status): | |
1679 | returncode = os.WEXITSTATUS(status) | |
1680 | elif os.WIFSIGNALED(status): | |
1681 | returncode = -os.WTERMSIG(status) | |
1682 | else: | |
1683 | raise ValueError(f'unknown wait status {status}') | |
1684 | if loop.get_debug(): | |
1685 | logger.debug('process %s exited with returncode %s', | |
1686 | expected_pid, returncode) | |
1687 | ||
1688 | if loop.is_closed(): | |
1689 | logger.warning('Loop %r that handles pid %r is closed', loop, pid) | |
1690 | else: | |
1691 | loop.call_soon_threadsafe(callback, pid, returncode, *args) | |
1692 | ||
1693 | self._threads.pop(expected_pid) | |
1694 | ||
1695 | # unlike SafeChildWatcher which handles SIGCHLD in the main thread, | |
1696 | # ThreadedChildWatcher runs in a separated thread, hence allows us to | |
1697 | # run create_subprocess_exec() in non-main thread, see | |
1698 | # https://bugs.python.org/issue35621 | |
1699 | asyncio.set_child_watcher(ThreadedChildWatcher()) | |
1700 | ||
1701 | ||
1702 | try: | |
1703 | from asyncio import run as async_run # type: ignore[attr-defined] | |
1704 | except ImportError: | |
1705 | def async_run(coro): # type: ignore | |
1706 | loop = asyncio.new_event_loop() | |
1707 | try: | |
1708 | asyncio.set_event_loop(loop) | |
1709 | return loop.run_until_complete(coro) | |
1710 | finally: | |
1711 | try: | |
1712 | loop.run_until_complete(loop.shutdown_asyncgens()) | |
1713 | finally: | |
1714 | asyncio.set_event_loop(None) | |
1715 | loop.close() | |
1716 | ||
1717 | ||
1718 | def call(ctx: CephadmContext, | |
1719 | command: List[str], | |
adb31ebb TL |
1720 | desc: Optional[str] = None, |
1721 | verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, | |
1722 | timeout: Optional[int] = DEFAULT_TIMEOUT, | |
522d829b | 1723 | **kwargs: Any) -> Tuple[str, str, int]: |
9f95a23c TL |
1724 | """ |
1725 | Wrap subprocess.Popen to | |
1726 | ||
1727 | - log stdout/stderr to a logger, | |
1728 | - decode utf-8 | |
1729 | - cleanly return out, err, returncode | |
1730 | ||
9f95a23c TL |
1731 | :param timeout: timeout in seconds |
1732 | """ | |
f67539c2 TL |
1733 | |
1734 | prefix = command[0] if desc is None else desc | |
1735 | if prefix: | |
1736 | prefix += ': ' | |
1737 | timeout = timeout or ctx.timeout | |
1738 | ||
f67539c2 TL |
1739 | async def tee(reader: asyncio.StreamReader) -> str: |
1740 | collected = StringIO() | |
1741 | async for line in reader: | |
1742 | message = line.decode('utf-8') | |
1743 | collected.write(message) | |
f67539c2 TL |
1744 | return collected.getvalue() |
1745 | ||
1746 | async def run_with_timeout() -> Tuple[str, str, int]: | |
1747 | process = await asyncio.create_subprocess_exec( | |
1748 | *command, | |
1749 | stdout=asyncio.subprocess.PIPE, | |
522d829b TL |
1750 | stderr=asyncio.subprocess.PIPE, |
1751 | env=os.environ.copy()) | |
f67539c2 TL |
1752 | assert process.stdout |
1753 | assert process.stderr | |
1754 | try: | |
1755 | stdout, stderr = await asyncio.gather(tee(process.stdout), | |
1756 | tee(process.stderr)) | |
1757 | returncode = await asyncio.wait_for(process.wait(), timeout) | |
1758 | except asyncio.TimeoutError: | |
1759 | logger.info(prefix + f'timeout after {timeout} seconds') | |
1760 | return '', '', 124 | |
9f95a23c | 1761 | else: |
f67539c2 | 1762 | return stdout, stderr, returncode |
9f95a23c | 1763 | |
f67539c2 | 1764 | stdout, stderr, returncode = async_run(run_with_timeout()) |
2a845540 TL |
1765 | log_level = verbosity.success_log_level() |
1766 | if returncode != 0: | |
1767 | log_level = verbosity.error_log_level() | |
1768 | logger.log(log_level, f'Non-zero exit code {returncode} from {" ".join(command)}') | |
1769 | for line in stdout.splitlines(): | |
1770 | logger.log(log_level, prefix + 'stdout ' + line) | |
1771 | for line in stderr.splitlines(): | |
1772 | logger.log(log_level, prefix + 'stderr ' + line) | |
f67539c2 TL |
1773 | return stdout, stderr, returncode |
1774 | ||
1775 | ||
1776 | def call_throws( | |
1777 | ctx: CephadmContext, | |
1778 | command: List[str], | |
1779 | desc: Optional[str] = None, | |
1780 | verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, | |
1781 | timeout: Optional[int] = DEFAULT_TIMEOUT, | |
522d829b | 1782 | **kwargs: Any) -> Tuple[str, str, int]: |
f67539c2 | 1783 | out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs) |
9f95a23c | 1784 | if ret: |
20effc67 TL |
1785 | for s in (out, err): |
1786 | if s.strip() and len(s.splitlines()) <= 2: # readable message? | |
1787 | raise RuntimeError(f'Failed command: {" ".join(command)}: {s}') | |
9f95a23c TL |
1788 | raise RuntimeError('Failed command: %s' % ' '.join(command)) |
1789 | return out, err, ret | |
1790 | ||
1791 | ||
f67539c2 TL |
1792 | def call_timeout(ctx, command, timeout): |
1793 | # type: (CephadmContext, List[str], int) -> int | |
9f95a23c | 1794 | logger.debug('Running command (timeout=%s): %s' |
f67539c2 | 1795 | % (timeout, ' '.join(command))) |
9f95a23c TL |
1796 | |
1797 | def raise_timeout(command, timeout): | |
1798 | # type: (List[str], int) -> NoReturn | |
f67539c2 | 1799 | msg = 'Command `%s` timed out after %s seconds' % (command, timeout) |
9f95a23c TL |
1800 | logger.debug(msg) |
1801 | raise TimeoutExpired(msg) | |
1802 | ||
f67539c2 | 1803 | try: |
522d829b | 1804 | return subprocess.call(command, timeout=timeout, env=os.environ.copy()) |
f67539c2 TL |
1805 | except subprocess.TimeoutExpired: |
1806 | raise_timeout(command, timeout) | |
9f95a23c TL |
1807 | |
1808 | ################################## | |
1809 | ||
f6b5b4d7 | 1810 | |
522d829b | 1811 | def json_loads_retry(cli_func: Callable[[], str]) -> Any: |
b3b6e05e TL |
1812 | for sleep_secs in [1, 4, 4]: |
1813 | try: | |
1814 | return json.loads(cli_func()) | |
1815 | except json.JSONDecodeError: | |
1816 | logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs) | |
1817 | time.sleep(sleep_secs) | |
1818 | return json.loads(cli_func()) | |
1819 | ||
1820 | ||
f67539c2 TL |
1821 | def is_available(ctx, what, func): |
1822 | # type: (CephadmContext, str, Callable[[], bool]) -> None | |
9f95a23c TL |
1823 | """ |
1824 | Wait for a service to become available | |
1825 | ||
1826 | :param what: the name of the service | |
1827 | :param func: the callable object that determines availability | |
1828 | """ | |
f67539c2 | 1829 | retry = ctx.retry |
f6b5b4d7 | 1830 | logger.info('Waiting for %s...' % what) |
9f95a23c TL |
1831 | num = 1 |
1832 | while True: | |
1833 | if func(): | |
e306af50 | 1834 | logger.info('%s is available' |
f6b5b4d7 | 1835 | % what) |
9f95a23c TL |
1836 | break |
1837 | elif num > retry: | |
1838 | raise Error('%s not available after %s tries' | |
f67539c2 | 1839 | % (what, retry)) |
9f95a23c TL |
1840 | |
1841 | logger.info('%s not available, waiting (%s/%s)...' | |
f67539c2 | 1842 | % (what, num, retry)) |
9f95a23c TL |
1843 | |
1844 | num += 1 | |
f67539c2 | 1845 | time.sleep(2) |
9f95a23c TL |
1846 | |
1847 | ||
1848 | def read_config(fn): | |
1849 | # type: (Optional[str]) -> ConfigParser | |
f67539c2 | 1850 | cp = ConfigParser() |
9f95a23c | 1851 | if fn: |
f67539c2 | 1852 | cp.read(fn) |
9f95a23c TL |
1853 | return cp |
1854 | ||
f6b5b4d7 | 1855 | |
9f95a23c TL |
1856 | def pathify(p): |
1857 | # type: (str) -> str | |
e306af50 TL |
1858 | p = os.path.expanduser(p) |
1859 | return os.path.abspath(p) | |
9f95a23c | 1860 | |
f6b5b4d7 | 1861 | |
9f95a23c | 1862 | def get_file_timestamp(fn): |
e306af50 | 1863 | # type: (str) -> Optional[str] |
9f95a23c TL |
1864 | try: |
1865 | mt = os.path.getmtime(fn) | |
1866 | return datetime.datetime.fromtimestamp( | |
1867 | mt, tz=datetime.timezone.utc | |
1868 | ).strftime(DATEFMT) | |
adb31ebb | 1869 | except Exception: |
9f95a23c TL |
1870 | return None |
1871 | ||
f6b5b4d7 | 1872 | |
9f95a23c | 1873 | def try_convert_datetime(s): |
e306af50 | 1874 | # type: (str) -> Optional[str] |
9f95a23c TL |
1875 | # This is super irritating because |
1876 | # 1) podman and docker use different formats | |
1877 | # 2) python's strptime can't parse either one | |
1878 | # | |
1879 | # I've seen: | |
1880 | # docker 18.09.7: 2020-03-03T09:21:43.636153304Z | |
1881 | # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00 | |
1882 | # 2020-03-03 15:52:30.136257504 -0600 CST | |
1883 | # (In the podman case, there is a different string format for | |
1884 | # 'inspect' and 'inspect --format {{.Created}}'!!) | |
1885 | ||
1886 | # In *all* cases, the 9 digit second precision is too much for | |
1887 | # python's strptime. Shorten it to 6 digits. | |
1888 | p = re.compile(r'(\.[\d]{6})[\d]*') | |
1889 | s = p.sub(r'\1', s) | |
1890 | ||
adb31ebb | 1891 | # replace trailing Z with -0000, since (on python 3.6.8) it won't parse |
9f95a23c TL |
1892 | if s and s[-1] == 'Z': |
1893 | s = s[:-1] + '-0000' | |
1894 | ||
adb31ebb | 1895 | # cut off the redundant 'CST' part that strptime can't parse, if |
9f95a23c TL |
1896 | # present. |
1897 | v = s.split(' ') | |
1898 | s = ' '.join(v[0:3]) | |
1899 | ||
1900 | # try parsing with several format strings | |
1901 | fmts = [ | |
1902 | '%Y-%m-%dT%H:%M:%S.%f%z', | |
1903 | '%Y-%m-%d %H:%M:%S.%f %z', | |
1904 | ] | |
1905 | for f in fmts: | |
1906 | try: | |
1907 | # return timestamp normalized to UTC, rendered as DATEFMT. | |
1908 | return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT) | |
1909 | except ValueError: | |
1910 | pass | |
1911 | return None | |
1912 | ||
f6b5b4d7 | 1913 | |
f67539c2 | 1914 | def _parse_podman_version(version_str): |
9f95a23c | 1915 | # type: (str) -> Tuple[int, ...] |
522d829b | 1916 | def to_int(val: str, org_e: Optional[Exception] = None) -> int: |
9f95a23c TL |
1917 | if not val and org_e: |
1918 | raise org_e | |
1919 | try: | |
1920 | return int(val) | |
1921 | except ValueError as e: | |
1922 | return to_int(val[0:-1], org_e or e) | |
1923 | ||
1924 | return tuple(map(to_int, version_str.split('.'))) | |
1925 | ||
1926 | ||
1927 | def get_hostname(): | |
1928 | # type: () -> str | |
1929 | return socket.gethostname() | |
1930 | ||
f6b5b4d7 | 1931 | |
9f95a23c TL |
1932 | def get_fqdn(): |
1933 | # type: () -> str | |
1934 | return socket.getfqdn() or socket.gethostname() | |
1935 | ||
f6b5b4d7 | 1936 | |
9f95a23c TL |
1937 | def get_arch(): |
1938 | # type: () -> str | |
1939 | return platform.uname().machine | |
1940 | ||
f6b5b4d7 | 1941 | |
9f95a23c TL |
1942 | def generate_service_id(): |
1943 | # type: () -> str | |
1944 | return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase) | |
1945 | for _ in range(6)) | |
1946 | ||
f6b5b4d7 | 1947 | |
9f95a23c TL |
1948 | def generate_password(): |
1949 | # type: () -> str | |
1950 | return ''.join(random.choice(string.ascii_lowercase + string.digits) | |
1951 | for i in range(10)) | |
1952 | ||
f6b5b4d7 | 1953 | |
9f95a23c TL |
1954 | def normalize_container_id(i): |
1955 | # type: (str) -> str | |
1956 | # docker adds the sha256: prefix, but AFAICS both | |
1957 | # docker (18.09.7 in bionic at least) and podman | |
1958 | # both always use sha256, so leave off the prefix | |
1959 | # for consistency. | |
1960 | prefix = 'sha256:' | |
1961 | if i.startswith(prefix): | |
1962 | i = i[len(prefix):] | |
1963 | return i | |
1964 | ||
f6b5b4d7 | 1965 | |
9f95a23c TL |
1966 | def make_fsid(): |
1967 | # type: () -> str | |
1968 | return str(uuid.uuid1()) | |
1969 | ||
f6b5b4d7 | 1970 | |
9f95a23c TL |
1971 | def is_fsid(s): |
1972 | # type: (str) -> bool | |
1973 | try: | |
1974 | uuid.UUID(s) | |
1975 | except ValueError: | |
1976 | return False | |
1977 | return True | |
1978 | ||
f6b5b4d7 | 1979 | |
522d829b TL |
1980 | def validate_fsid(func: FuncT) -> FuncT: |
1981 | @wraps(func) | |
1982 | def _validate_fsid(ctx: CephadmContext) -> Any: | |
1983 | if 'fsid' in ctx and ctx.fsid: | |
1984 | if not is_fsid(ctx.fsid): | |
1985 | raise Error('not an fsid: %s' % ctx.fsid) | |
1986 | return func(ctx) | |
1987 | return cast(FuncT, _validate_fsid) | |
1988 | ||
1989 | ||
1990 | def infer_fsid(func: FuncT) -> FuncT: | |
9f95a23c TL |
1991 | """ |
1992 | If we only find a single fsid in /var/lib/ceph/*, use that | |
1993 | """ | |
522d829b | 1994 | @infer_config |
9f95a23c | 1995 | @wraps(func) |
522d829b TL |
1996 | def _infer_fsid(ctx: CephadmContext) -> Any: |
1997 | if 'fsid' in ctx and ctx.fsid: | |
f67539c2 TL |
1998 | logger.debug('Using specified fsid: %s' % ctx.fsid) |
1999 | return func(ctx) | |
9f95a23c | 2000 | |
522d829b TL |
2001 | fsids = set() |
2002 | ||
2003 | cp = read_config(ctx.config) | |
2004 | if cp.has_option('global', 'fsid'): | |
2005 | fsids.add(cp.get('global', 'fsid')) | |
2006 | ||
f67539c2 | 2007 | daemon_list = list_daemons(ctx, detail=False) |
9f95a23c | 2008 | for daemon in daemon_list: |
f6b5b4d7 TL |
2009 | if not is_fsid(daemon['fsid']): |
2010 | # 'unknown' fsid | |
2011 | continue | |
f67539c2 TL |
2012 | elif 'name' not in ctx or not ctx.name: |
2013 | # ctx.name not specified | |
522d829b | 2014 | fsids.add(daemon['fsid']) |
f67539c2 TL |
2015 | elif daemon['name'] == ctx.name: |
2016 | # ctx.name is a match | |
522d829b TL |
2017 | fsids.add(daemon['fsid']) |
2018 | fsids = sorted(fsids) | |
9f95a23c TL |
2019 | |
2020 | if not fsids: | |
2021 | # some commands do not always require an fsid | |
2022 | pass | |
2023 | elif len(fsids) == 1: | |
2024 | logger.info('Inferring fsid %s' % fsids[0]) | |
f67539c2 | 2025 | ctx.fsid = fsids[0] |
9f95a23c | 2026 | else: |
33c7a0ef | 2027 | raise Error('Cannot infer an fsid, one must be specified (using --fsid): %s' % fsids) |
f67539c2 | 2028 | return func(ctx) |
9f95a23c | 2029 | |
522d829b | 2030 | return cast(FuncT, _infer_fsid) |
9f95a23c | 2031 | |
f6b5b4d7 | 2032 | |
522d829b | 2033 | def infer_config(func: FuncT) -> FuncT: |
e306af50 | 2034 | """ |
33c7a0ef TL |
2035 | Infer the clusater configuration using the followign priority order: |
2036 | 1- if the user has provided custom conf file (-c option) use it | |
2037 | 2- otherwise if daemon --name has been provided use daemon conf | |
2038 | 3- otherwise find the mon daemon conf file and use it (if v1) | |
2039 | 4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it | |
2040 | 5- finally: fallback to the default file /etc/ceph/ceph.conf | |
e306af50 TL |
2041 | """ |
2042 | @wraps(func) | |
522d829b | 2043 | def _infer_config(ctx: CephadmContext) -> Any: |
33c7a0ef TL |
2044 | |
2045 | def config_path(daemon_type: str, daemon_name: str) -> str: | |
2046 | data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_name) | |
2047 | return os.path.join(data_dir, 'config') | |
2048 | ||
2049 | def get_mon_daemon_name(fsid: str) -> Optional[str]: | |
2050 | daemon_list = list_daemons(ctx, detail=False) | |
2051 | for daemon in daemon_list: | |
2052 | if ( | |
2053 | daemon.get('name', '').startswith('mon.') | |
2054 | and daemon.get('fsid', '') == fsid | |
2055 | and daemon.get('style', '') == 'cephadm:v1' | |
2056 | and os.path.exists(config_path('mon', daemon['name'].split('.', 1)[1])) | |
2057 | ): | |
2058 | return daemon['name'] | |
2059 | return None | |
2060 | ||
522d829b | 2061 | ctx.config = ctx.config if 'config' in ctx else None |
33c7a0ef TL |
2062 | # check if user has provided conf by using -c option |
2063 | if ctx.config and (ctx.config != CEPH_DEFAULT_CONF): | |
2064 | logger.debug(f'Using specified config: {ctx.config}') | |
f67539c2 | 2065 | return func(ctx) |
33c7a0ef | 2066 | |
522d829b | 2067 | if 'fsid' in ctx and ctx.fsid: |
33c7a0ef TL |
2068 | name = ctx.name if ('name' in ctx and ctx.name) else get_mon_daemon_name(ctx.fsid) |
2069 | if name is not None: | |
2070 | # daemon name has been specified (or inffered from mon), let's use its conf | |
2071 | ctx.config = config_path(name.split('.', 1)[0], name.split('.', 1)[1]) | |
2072 | else: | |
2073 | # no daemon, in case the cluster has a config dir then use it | |
2074 | ceph_conf = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_CONF}' | |
2075 | if os.path.exists(ceph_conf): | |
2076 | ctx.config = ceph_conf | |
2077 | ||
522d829b | 2078 | if ctx.config: |
33c7a0ef TL |
2079 | logger.info(f'Inferring config {ctx.config}') |
2080 | elif os.path.exists(CEPH_DEFAULT_CONF): | |
2081 | logger.debug(f'Using default config {CEPH_DEFAULT_CONF}') | |
2082 | ctx.config = CEPH_DEFAULT_CONF | |
f67539c2 | 2083 | return func(ctx) |
e306af50 | 2084 | |
522d829b | 2085 | return cast(FuncT, _infer_config) |
e306af50 | 2086 | |
f6b5b4d7 | 2087 | |
522d829b | 2088 | def _get_default_image(ctx: CephadmContext) -> str: |
1911f103 | 2089 | if DEFAULT_IMAGE_IS_MASTER: |
f67539c2 | 2090 | warn = """This is a development version of cephadm. |
1911f103 TL |
2091 | For information regarding the latest stable release: |
2092 | https://docs.ceph.com/docs/{}/cephadm/install | |
f67539c2 | 2093 | """.format(LATEST_STABLE_RELEASE) |
1911f103 | 2094 | for line in warn.splitlines(): |
e306af50 | 2095 | logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end)) |
1911f103 TL |
2096 | return DEFAULT_IMAGE |
2097 | ||
f6b5b4d7 | 2098 | |
522d829b | 2099 | def infer_image(func: FuncT) -> FuncT: |
9f95a23c TL |
2100 | """ |
2101 | Use the most recent ceph image | |
2102 | """ | |
2103 | @wraps(func) | |
522d829b | 2104 | def _infer_image(ctx: CephadmContext) -> Any: |
f67539c2 TL |
2105 | if not ctx.image: |
2106 | ctx.image = os.environ.get('CEPHADM_IMAGE') | |
2107 | if not ctx.image: | |
33c7a0ef | 2108 | ctx.image = infer_local_ceph_image(ctx, ctx.container_engine.path) |
f67539c2 TL |
2109 | if not ctx.image: |
2110 | ctx.image = _get_default_image(ctx) | |
2111 | return func(ctx) | |
9f95a23c | 2112 | |
522d829b | 2113 | return cast(FuncT, _infer_image) |
9f95a23c | 2114 | |
f6b5b4d7 | 2115 | |
522d829b | 2116 | def default_image(func: FuncT) -> FuncT: |
9f95a23c | 2117 | @wraps(func) |
522d829b | 2118 | def _default_image(ctx: CephadmContext) -> Any: |
f67539c2 TL |
2119 | if not ctx.image: |
2120 | if 'name' in ctx and ctx.name: | |
2121 | type_ = ctx.name.split('.', 1)[0] | |
9f95a23c | 2122 | if type_ in Monitoring.components: |
f67539c2 TL |
2123 | ctx.image = Monitoring.components[type_]['image'] |
2124 | if type_ == 'haproxy': | |
2125 | ctx.image = HAproxy.default_image | |
2126 | if type_ == 'keepalived': | |
2127 | ctx.image = Keepalived.default_image | |
20effc67 TL |
2128 | if type_ == SNMPGateway.daemon_type: |
2129 | ctx.image = SNMPGateway.default_image | |
f67539c2 TL |
2130 | if not ctx.image: |
2131 | ctx.image = os.environ.get('CEPHADM_IMAGE') | |
2132 | if not ctx.image: | |
2133 | ctx.image = _get_default_image(ctx) | |
2134 | ||
2135 | return func(ctx) | |
9f95a23c | 2136 | |
522d829b | 2137 | return cast(FuncT, _default_image) |
9f95a23c | 2138 | |
f6b5b4d7 | 2139 | |
33c7a0ef TL |
2140 | def get_container_info(ctx: CephadmContext, daemon_filter: str, by_name: bool) -> Optional[ContainerInfo]: |
2141 | """ | |
2142 | :param ctx: Cephadm context | |
2143 | :param daemon_filter: daemon name or type | |
2144 | :param by_name: must be set to True if daemon name is provided | |
2145 | :return: Container information or None | |
9f95a23c | 2146 | """ |
33c7a0ef TL |
2147 | def daemon_name_or_type(daemon: Dict[str, str]) -> str: |
2148 | return daemon['name'] if by_name else daemon['name'].split('.', 1)[0] | |
2149 | ||
2150 | if by_name and '.' not in daemon_filter: | |
2151 | logger.warning(f'Trying to get container info using invalid daemon name {daemon_filter}') | |
2152 | return None | |
2153 | daemons = list_daemons(ctx, detail=False) | |
2154 | matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid] | |
2155 | if matching_daemons: | |
2156 | d_type, d_id = matching_daemons[0]['name'].split('.', 1) | |
2157 | out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id) | |
2158 | if not code: | |
2159 | (container_id, image_name, image_id, start, version) = out.strip().split(',') | |
2160 | return ContainerInfo(container_id, image_name, image_id, start, version) | |
2161 | return None | |
2162 | ||
2163 | ||
2164 | def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]: | |
2165 | """ | |
2166 | Infer the local ceph image based on the following priority criteria: | |
2167 | 1- the image specified by --image arg (if provided). | |
2168 | 2- the same image as the daemon container specified by --name arg (if provided). | |
2169 | 3- image used by any ceph container running on the host. In this case we use daemon types. | |
2170 | 4- if no container is found then we use the most ceph recent image on the host. | |
2171 | ||
2172 | Note: any selected container must have the same fsid inferred previously. | |
2173 | ||
9f95a23c TL |
2174 | :return: The most recent local ceph image (already pulled) |
2175 | """ | |
33c7a0ef TL |
2176 | # '|' special character is used to separate the output fields into: |
2177 | # - Repository@digest | |
2178 | # - Image Id | |
2179 | # - Image Tag | |
2180 | # - Image creation date | |
f67539c2 TL |
2181 | out, _, _ = call_throws(ctx, |
2182 | [container_path, 'images', | |
2183 | '--filter', 'label=ceph=True', | |
2184 | '--filter', 'dangling=false', | |
33c7a0ef TL |
2185 | '--format', '{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}']) |
2186 | ||
2187 | container_info = None | |
2188 | daemon_name = ctx.name if ('name' in ctx and ctx.name and '.' in ctx.name) else None | |
2189 | daemons_ls = [daemon_name] if daemon_name is not None else Ceph.daemons # daemon types: 'mon', 'mgr', etc | |
2190 | for daemon in daemons_ls: | |
2191 | container_info = get_container_info(ctx, daemon, daemon_name is not None) | |
2192 | if container_info is not None: | |
2193 | logger.debug(f"Using container info for daemon '{daemon}'") | |
2194 | break | |
adb31ebb | 2195 | |
adb31ebb | 2196 | for image in out.splitlines(): |
33c7a0ef TL |
2197 | if image and not image.isspace(): |
2198 | (digest, image_id, tag, created_date) = image.lstrip().split('|') | |
2199 | if container_info is not None and image_id not in container_info.image_id: | |
2200 | continue | |
2201 | if digest and not digest.endswith('@'): | |
2202 | logger.info(f"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}") | |
2203 | return digest | |
9f95a23c TL |
2204 | return None |
2205 | ||
f6b5b4d7 | 2206 | |
9f95a23c | 2207 | def write_tmp(s, uid, gid): |
f67539c2 | 2208 | # type: (str, int, int) -> IO[str] |
9f95a23c TL |
2209 | tmp_f = tempfile.NamedTemporaryFile(mode='w', |
2210 | prefix='ceph-tmp') | |
2211 | os.fchown(tmp_f.fileno(), uid, gid) | |
2212 | tmp_f.write(s) | |
2213 | tmp_f.flush() | |
2214 | ||
2215 | return tmp_f | |
2216 | ||
f6b5b4d7 | 2217 | |
9f95a23c TL |
2218 | def makedirs(dir, uid, gid, mode): |
2219 | # type: (str, int, int, int) -> None | |
2220 | if not os.path.exists(dir): | |
2221 | os.makedirs(dir, mode=mode) | |
2222 | else: | |
2223 | os.chmod(dir, mode) | |
2224 | os.chown(dir, uid, gid) | |
2225 | os.chmod(dir, mode) # the above is masked by umask... | |
2226 | ||
f6b5b4d7 | 2227 | |
f67539c2 TL |
2228 | def get_data_dir(fsid, data_dir, t, n): |
2229 | # type: (str, str, str, Union[int, str]) -> str | |
2230 | return os.path.join(data_dir, fsid, '%s.%s' % (t, n)) | |
9f95a23c | 2231 | |
f6b5b4d7 | 2232 | |
f67539c2 TL |
2233 | def get_log_dir(fsid, log_dir): |
2234 | # type: (str, str) -> str | |
2235 | return os.path.join(log_dir, fsid) | |
9f95a23c | 2236 | |
f6b5b4d7 | 2237 | |
f67539c2 TL |
2238 | def make_data_dir_base(fsid, data_dir, uid, gid): |
2239 | # type: (str, str, int, int) -> str | |
2240 | data_dir_base = os.path.join(data_dir, fsid) | |
9f95a23c TL |
2241 | makedirs(data_dir_base, uid, gid, DATA_DIR_MODE) |
2242 | makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE) | |
2243 | makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid, | |
2244 | DATA_DIR_MODE) | |
2245 | return data_dir_base | |
2246 | ||
f6b5b4d7 | 2247 | |
f67539c2 TL |
2248 | def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None): |
2249 | # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str | |
f6b5b4d7 | 2250 | if uid is None or gid is None: |
f67539c2 TL |
2251 | uid, gid = extract_uid_gid(ctx) |
2252 | make_data_dir_base(fsid, ctx.data_dir, uid, gid) | |
2253 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
9f95a23c TL |
2254 | makedirs(data_dir, uid, gid, DATA_DIR_MODE) |
2255 | return data_dir | |
2256 | ||
f6b5b4d7 | 2257 | |
f67539c2 TL |
2258 | def make_log_dir(ctx, fsid, uid=None, gid=None): |
2259 | # type: (CephadmContext, str, Optional[int], Optional[int]) -> str | |
f6b5b4d7 | 2260 | if uid is None or gid is None: |
f67539c2 TL |
2261 | uid, gid = extract_uid_gid(ctx) |
2262 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
9f95a23c TL |
2263 | makedirs(log_dir, uid, gid, LOG_DIR_MODE) |
2264 | return log_dir | |
2265 | ||
f6b5b4d7 | 2266 | |
f67539c2 TL |
2267 | def make_var_run(ctx, fsid, uid, gid): |
2268 | # type: (CephadmContext, str, int, int) -> None | |
2269 | call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid), | |
2270 | '/var/run/ceph/%s' % fsid]) | |
9f95a23c | 2271 | |
f6b5b4d7 | 2272 | |
f67539c2 TL |
2273 | def copy_tree(ctx, src, dst, uid=None, gid=None): |
2274 | # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None | |
9f95a23c TL |
2275 | """ |
2276 | Copy a directory tree from src to dst | |
2277 | """ | |
f91f0fd5 | 2278 | if uid is None or gid is None: |
f67539c2 | 2279 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
2280 | |
2281 | for src_dir in src: | |
2282 | dst_dir = dst | |
2283 | if os.path.isdir(dst): | |
2284 | dst_dir = os.path.join(dst, os.path.basename(src_dir)) | |
2285 | ||
f67539c2 | 2286 | logger.debug('copy directory `%s` -> `%s`' % (src_dir, dst_dir)) |
9f95a23c | 2287 | shutil.rmtree(dst_dir, ignore_errors=True) |
f67539c2 | 2288 | shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8 |
9f95a23c TL |
2289 | |
2290 | for dirpath, dirnames, filenames in os.walk(dst_dir): | |
f67539c2 | 2291 | logger.debug('chown %s:%s `%s`' % (uid, gid, dirpath)) |
9f95a23c TL |
2292 | os.chown(dirpath, uid, gid) |
2293 | for filename in filenames: | |
f67539c2 | 2294 | logger.debug('chown %s:%s `%s`' % (uid, gid, filename)) |
9f95a23c TL |
2295 | os.chown(os.path.join(dirpath, filename), uid, gid) |
2296 | ||
2297 | ||
f67539c2 TL |
2298 | def copy_files(ctx, src, dst, uid=None, gid=None): |
2299 | # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None | |
9f95a23c TL |
2300 | """ |
2301 | Copy a files from src to dst | |
2302 | """ | |
f91f0fd5 | 2303 | if uid is None or gid is None: |
f67539c2 | 2304 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
2305 | |
2306 | for src_file in src: | |
2307 | dst_file = dst | |
2308 | if os.path.isdir(dst): | |
2309 | dst_file = os.path.join(dst, os.path.basename(src_file)) | |
2310 | ||
f67539c2 | 2311 | logger.debug('copy file `%s` -> `%s`' % (src_file, dst_file)) |
9f95a23c TL |
2312 | shutil.copyfile(src_file, dst_file) |
2313 | ||
f67539c2 | 2314 | logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file)) |
9f95a23c TL |
2315 | os.chown(dst_file, uid, gid) |
2316 | ||
f6b5b4d7 | 2317 | |
f67539c2 TL |
2318 | def move_files(ctx, src, dst, uid=None, gid=None): |
2319 | # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None | |
9f95a23c TL |
2320 | """ |
2321 | Move files from src to dst | |
2322 | """ | |
f91f0fd5 | 2323 | if uid is None or gid is None: |
f67539c2 | 2324 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
2325 | |
2326 | for src_file in src: | |
2327 | dst_file = dst | |
2328 | if os.path.isdir(dst): | |
2329 | dst_file = os.path.join(dst, os.path.basename(src_file)) | |
2330 | ||
2331 | if os.path.islink(src_file): | |
2332 | # shutil.move() in py2 does not handle symlinks correctly | |
2333 | src_rl = os.readlink(src_file) | |
2334 | logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl)) | |
2335 | os.symlink(src_rl, dst_file) | |
2336 | os.unlink(src_file) | |
2337 | else: | |
2338 | logger.debug("move file '%s' -> '%s'" % (src_file, dst_file)) | |
2339 | shutil.move(src_file, dst_file) | |
f67539c2 | 2340 | logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file)) |
9f95a23c TL |
2341 | os.chown(dst_file, uid, gid) |
2342 | ||
f6b5b4d7 | 2343 | |
33c7a0ef TL |
2344 | def recursive_chown(path: str, uid: int, gid: int) -> None: |
2345 | for dirpath, dirnames, filenames in os.walk(path): | |
2346 | os.chown(dirpath, uid, gid) | |
2347 | for filename in filenames: | |
2348 | os.chown(os.path.join(dirpath, filename), uid, gid) | |
2349 | ||
2350 | ||
f67539c2 | 2351 | # copied from distutils |
522d829b | 2352 | def find_executable(executable: str, path: Optional[str] = None) -> Optional[str]: |
9f95a23c TL |
2353 | """Tries to find 'executable' in the directories listed in 'path'. |
2354 | A string listing directories separated by 'os.pathsep'; defaults to | |
2355 | os.environ['PATH']. Returns the complete filename or None if not found. | |
2356 | """ | |
2357 | _, ext = os.path.splitext(executable) | |
2358 | if (sys.platform == 'win32') and (ext != '.exe'): | |
2359 | executable = executable + '.exe' | |
2360 | ||
2361 | if os.path.isfile(executable): | |
2362 | return executable | |
2363 | ||
2364 | if path is None: | |
2365 | path = os.environ.get('PATH', None) | |
2366 | if path is None: | |
2367 | try: | |
f67539c2 | 2368 | path = os.confstr('CS_PATH') |
9f95a23c TL |
2369 | except (AttributeError, ValueError): |
2370 | # os.confstr() or CS_PATH is not available | |
2371 | path = os.defpath | |
2372 | # bpo-35755: Don't use os.defpath if the PATH environment variable is | |
2373 | # set to an empty string | |
2374 | ||
2375 | # PATH='' doesn't match, whereas PATH=':' looks in the current directory | |
2376 | if not path: | |
2377 | return None | |
2378 | ||
2379 | paths = path.split(os.pathsep) | |
2380 | for p in paths: | |
2381 | f = os.path.join(p, executable) | |
2382 | if os.path.isfile(f): | |
2383 | # the file exists, we have a shot at spawn working | |
2384 | return f | |
2385 | return None | |
2386 | ||
f6b5b4d7 | 2387 | |
9f95a23c TL |
2388 | def find_program(filename): |
2389 | # type: (str) -> str | |
2390 | name = find_executable(filename) | |
2391 | if name is None: | |
2392 | raise ValueError('%s not found' % filename) | |
2393 | return name | |
2394 | ||
f6b5b4d7 | 2395 | |
522d829b | 2396 | def find_container_engine(ctx: CephadmContext) -> Optional[ContainerEngine]: |
f67539c2 TL |
2397 | if ctx.docker: |
2398 | return Docker() | |
2399 | else: | |
2400 | for i in CONTAINER_PREFERENCE: | |
2401 | try: | |
2402 | return i() | |
a4b75251 TL |
2403 | except Exception: |
2404 | pass | |
f67539c2 TL |
2405 | return None |
2406 | ||
2407 | ||
a4b75251 | 2408 | def check_container_engine(ctx: CephadmContext) -> ContainerEngine: |
f67539c2 TL |
2409 | engine = ctx.container_engine |
2410 | if not isinstance(engine, CONTAINER_PREFERENCE): | |
522d829b TL |
2411 | # See https://github.com/python/mypy/issues/8993 |
2412 | exes: List[str] = [i.EXE for i in CONTAINER_PREFERENCE] # type: ignore | |
2413 | raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install <container engine>`'.format(' or '.join(exes))) | |
f67539c2 TL |
2414 | elif isinstance(engine, Podman): |
2415 | engine.get_version(ctx) | |
2416 | if engine.version < MIN_PODMAN_VERSION: | |
2417 | raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION) | |
a4b75251 | 2418 | return engine |
f67539c2 TL |
2419 | |
2420 | ||
9f95a23c TL |
2421 | def get_unit_name(fsid, daemon_type, daemon_id=None): |
2422 | # type: (str, str, Optional[Union[int, str]]) -> str | |
2423 | # accept either name or type + id | |
20effc67 | 2424 | if daemon_id is not None: |
9f95a23c TL |
2425 | return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id) |
2426 | else: | |
2427 | return 'ceph-%s@%s' % (fsid, daemon_type) | |
2428 | ||
f6b5b4d7 | 2429 | |
522d829b | 2430 | def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid: str, name: str) -> str: |
f67539c2 | 2431 | daemon = get_daemon_description(ctx, fsid, name) |
e306af50 TL |
2432 | try: |
2433 | return daemon['systemd_unit'] | |
2434 | except KeyError: | |
2435 | raise Error('Failed to get unit name for {}'.format(daemon)) | |
2436 | ||
f6b5b4d7 | 2437 | |
f67539c2 TL |
2438 | def check_unit(ctx, unit_name): |
2439 | # type: (CephadmContext, str) -> Tuple[bool, str, bool] | |
9f95a23c TL |
2440 | # NOTE: we ignore the exit code here because systemctl outputs |
2441 | # various exit codes based on the state of the service, but the | |
2442 | # string result is more explicit (and sufficient). | |
2443 | enabled = False | |
2444 | installed = False | |
2445 | try: | |
f67539c2 | 2446 | out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name], |
2a845540 | 2447 | verbosity=CallVerbosity.QUIET) |
9f95a23c TL |
2448 | if code == 0: |
2449 | enabled = True | |
2450 | installed = True | |
f67539c2 | 2451 | elif 'disabled' in out: |
9f95a23c TL |
2452 | installed = True |
2453 | except Exception as e: | |
2454 | logger.warning('unable to run systemctl: %s' % e) | |
2455 | enabled = False | |
2456 | installed = False | |
2457 | ||
2458 | state = 'unknown' | |
2459 | try: | |
f67539c2 | 2460 | out, err, code = call(ctx, ['systemctl', 'is-active', unit_name], |
2a845540 | 2461 | verbosity=CallVerbosity.QUIET) |
9f95a23c TL |
2462 | out = out.strip() |
2463 | if out in ['active']: | |
2464 | state = 'running' | |
2465 | elif out in ['inactive']: | |
2466 | state = 'stopped' | |
2467 | elif out in ['failed', 'auto-restart']: | |
2468 | state = 'error' | |
2469 | else: | |
2470 | state = 'unknown' | |
2471 | except Exception as e: | |
2472 | logger.warning('unable to run systemctl: %s' % e) | |
2473 | state = 'unknown' | |
2474 | return (enabled, state, installed) | |
2475 | ||
f6b5b4d7 | 2476 | |
f67539c2 TL |
2477 | def check_units(ctx, units, enabler=None): |
2478 | # type: (CephadmContext, List[str], Optional[Packager]) -> bool | |
9f95a23c | 2479 | for u in units: |
f67539c2 | 2480 | (enabled, state, installed) = check_unit(ctx, u) |
9f95a23c TL |
2481 | if enabled and state == 'running': |
2482 | logger.info('Unit %s is enabled and running' % u) | |
2483 | return True | |
2484 | if enabler is not None: | |
2485 | if installed: | |
2486 | logger.info('Enabling unit %s' % u) | |
2487 | enabler.enable_service(u) | |
2488 | return False | |
2489 | ||
f6b5b4d7 | 2490 | |
522d829b | 2491 | def is_container_running(ctx: CephadmContext, c: 'CephContainer') -> bool: |
20effc67 TL |
2492 | if ctx.name.split('.', 1)[0] in ['agent', 'cephadm-exporter']: |
2493 | # these are non-containerized daemon types | |
2494 | return False | |
522d829b TL |
2495 | return bool(get_running_container_name(ctx, c)) |
2496 | ||
2497 | ||
2498 | def get_running_container_name(ctx: CephadmContext, c: 'CephContainer') -> Optional[str]: | |
2499 | for name in [c.cname, c.old_cname]: | |
2500 | out, err, ret = call(ctx, [ | |
2501 | ctx.container_engine.path, 'container', 'inspect', | |
2502 | '--format', '{{.State.Status}}', name | |
2503 | ]) | |
2504 | if out.strip() == 'running': | |
2505 | return name | |
2506 | return None | |
f67539c2 TL |
2507 | |
2508 | ||
9f95a23c | 2509 | def get_legacy_config_fsid(cluster, legacy_dir=None): |
f6b5b4d7 | 2510 | # type: (str, Optional[str]) -> Optional[str] |
9f95a23c TL |
2511 | config_file = '/etc/ceph/%s.conf' % cluster |
2512 | if legacy_dir is not None: | |
2513 | config_file = os.path.abspath(legacy_dir + config_file) | |
2514 | ||
2515 | if os.path.exists(config_file): | |
2516 | config = read_config(config_file) | |
2517 | if config.has_section('global') and config.has_option('global', 'fsid'): | |
2518 | return config.get('global', 'fsid') | |
2519 | return None | |
2520 | ||
f6b5b4d7 | 2521 | |
f67539c2 TL |
2522 | def get_legacy_daemon_fsid(ctx, cluster, |
2523 | daemon_type, daemon_id, legacy_dir=None): | |
2524 | # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str] | |
9f95a23c TL |
2525 | fsid = None |
2526 | if daemon_type == 'osd': | |
2527 | try: | |
f67539c2 | 2528 | fsid_file = os.path.join(ctx.data_dir, |
9f95a23c TL |
2529 | daemon_type, |
2530 | 'ceph-%s' % daemon_id, | |
2531 | 'ceph_fsid') | |
2532 | if legacy_dir is not None: | |
2533 | fsid_file = os.path.abspath(legacy_dir + fsid_file) | |
2534 | with open(fsid_file, 'r') as f: | |
2535 | fsid = f.read().strip() | |
2536 | except IOError: | |
2537 | pass | |
2538 | if not fsid: | |
2539 | fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir) | |
2540 | return fsid | |
2541 | ||
f6b5b4d7 | 2542 | |
20effc67 TL |
2543 | def should_log_to_journald(ctx: CephadmContext) -> bool: |
2544 | if ctx.log_to_journald is not None: | |
2545 | return ctx.log_to_journald | |
2546 | return isinstance(ctx.container_engine, Podman) and \ | |
2547 | ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION | |
2548 | ||
2549 | ||
f67539c2 TL |
2550 | def get_daemon_args(ctx, fsid, daemon_type, daemon_id): |
2551 | # type: (CephadmContext, str, str, Union[int, str]) -> List[str] | |
9f95a23c TL |
2552 | r = list() # type: List[str] |
2553 | ||
2554 | if daemon_type in Ceph.daemons and daemon_type != 'crash': | |
2555 | r += [ | |
2556 | '--setuser', 'ceph', | |
2557 | '--setgroup', 'ceph', | |
2558 | '--default-log-to-file=false', | |
9f95a23c | 2559 | ] |
20effc67 TL |
2560 | log_to_journald = should_log_to_journald(ctx) |
2561 | if log_to_journald: | |
2562 | r += [ | |
2563 | '--default-log-to-journald=true', | |
2564 | '--default-log-to-stderr=false', | |
2565 | ] | |
2566 | else: | |
2567 | r += [ | |
2568 | '--default-log-to-stderr=true', | |
2569 | '--default-log-stderr-prefix=debug ', | |
2570 | ] | |
9f95a23c TL |
2571 | if daemon_type == 'mon': |
2572 | r += [ | |
2573 | '--default-mon-cluster-log-to-file=false', | |
9f95a23c | 2574 | ] |
20effc67 TL |
2575 | if log_to_journald: |
2576 | r += [ | |
2577 | '--default-mon-cluster-log-to-journald=true', | |
2578 | '--default-mon-cluster-log-to-stderr=false', | |
2579 | ] | |
2580 | else: | |
2581 | r += ['--default-mon-cluster-log-to-stderr=true'] | |
9f95a23c TL |
2582 | elif daemon_type in Monitoring.components: |
2583 | metadata = Monitoring.components[daemon_type] | |
2584 | r += metadata.get('args', list()) | |
b3b6e05e | 2585 | # set ip and port to bind to for nodeexporter,alertmanager,prometheus |
33c7a0ef | 2586 | if daemon_type not in ['grafana', 'loki', 'promtail']: |
b3b6e05e TL |
2587 | ip = '' |
2588 | port = Monitoring.port_map[daemon_type][0] | |
2589 | if 'meta_json' in ctx and ctx.meta_json: | |
2590 | meta = json.loads(ctx.meta_json) or {} | |
2591 | if 'ip' in meta and meta['ip']: | |
2592 | ip = meta['ip'] | |
2593 | if 'ports' in meta and meta['ports']: | |
2594 | port = meta['ports'][0] | |
2595 | r += [f'--web.listen-address={ip}:{port}'] | |
33c7a0ef TL |
2596 | if daemon_type == 'prometheus': |
2597 | scheme = 'http' | |
2598 | host = get_fqdn() | |
2599 | r += [f'--web.external-url={scheme}://{host}:{port}'] | |
9f95a23c | 2600 | if daemon_type == 'alertmanager': |
f67539c2 | 2601 | config = get_parm(ctx.config_json) |
9f95a23c TL |
2602 | peers = config.get('peers', list()) # type: ignore |
2603 | for peer in peers: | |
f67539c2 | 2604 | r += ['--cluster.peer={}'.format(peer)] |
f6b5b4d7 | 2605 | # some alertmanager, by default, look elsewhere for a config |
f67539c2 | 2606 | r += ['--config.file=/etc/alertmanager/alertmanager.yml'] |
33c7a0ef | 2607 | if daemon_type == 'promtail': |
2a845540 | 2608 | r += ['--config.expand-env'] |
33c7a0ef TL |
2609 | if daemon_type == 'node-exporter': |
2610 | r += ['--path.procfs=/host/proc', | |
2611 | '--path.sysfs=/host/sys', | |
2612 | '--path.rootfs=/rootfs'] | |
9f95a23c | 2613 | elif daemon_type == NFSGanesha.daemon_type: |
f67539c2 | 2614 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) |
1911f103 | 2615 | r += nfs_ganesha.get_daemon_args() |
f67539c2 TL |
2616 | elif daemon_type == HAproxy.daemon_type: |
2617 | haproxy = HAproxy.init(ctx, fsid, daemon_id) | |
2618 | r += haproxy.get_daemon_args() | |
f91f0fd5 | 2619 | elif daemon_type == CustomContainer.daemon_type: |
f67539c2 | 2620 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
f91f0fd5 | 2621 | r.extend(cc.get_daemon_args()) |
20effc67 TL |
2622 | elif daemon_type == SNMPGateway.daemon_type: |
2623 | sc = SNMPGateway.init(ctx, fsid, daemon_id) | |
2624 | r.extend(sc.get_daemon_args()) | |
9f95a23c TL |
2625 | |
2626 | return r | |
2627 | ||
f6b5b4d7 | 2628 | |
f67539c2 | 2629 | def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid, |
e306af50 | 2630 | config=None, keyring=None): |
f67539c2 TL |
2631 | # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None |
2632 | data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid) | |
20effc67 TL |
2633 | |
2634 | if daemon_type in Ceph.daemons: | |
2635 | make_log_dir(ctx, fsid, uid=uid, gid=gid) | |
9f95a23c TL |
2636 | |
2637 | if config: | |
2638 | config_path = os.path.join(data_dir, 'config') | |
2639 | with open(config_path, 'w') as f: | |
2640 | os.fchown(f.fileno(), uid, gid) | |
2641 | os.fchmod(f.fileno(), 0o600) | |
2642 | f.write(config) | |
f91f0fd5 | 2643 | |
9f95a23c TL |
2644 | if keyring: |
2645 | keyring_path = os.path.join(data_dir, 'keyring') | |
2646 | with open(keyring_path, 'w') as f: | |
2647 | os.fchmod(f.fileno(), 0o600) | |
2648 | os.fchown(f.fileno(), uid, gid) | |
2649 | f.write(keyring) | |
2650 | ||
2651 | if daemon_type in Monitoring.components.keys(): | |
522d829b TL |
2652 | config_json: Dict[str, Any] = dict() |
2653 | if 'config_json' in ctx: | |
2654 | config_json = get_parm(ctx.config_json) | |
9f95a23c TL |
2655 | |
2656 | # Set up directories specific to the monitoring component | |
2657 | config_dir = '' | |
f67539c2 | 2658 | data_dir_root = '' |
9f95a23c | 2659 | if daemon_type == 'prometheus': |
f67539c2 TL |
2660 | data_dir_root = get_data_dir(fsid, ctx.data_dir, |
2661 | daemon_type, daemon_id) | |
9f95a23c TL |
2662 | config_dir = 'etc/prometheus' |
2663 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2664 | makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755) | |
2665 | makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) | |
33c7a0ef TL |
2666 | recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid) |
2667 | recursive_chown(os.path.join(data_dir_root, 'data'), uid, gid) | |
9f95a23c | 2668 | elif daemon_type == 'grafana': |
f67539c2 TL |
2669 | data_dir_root = get_data_dir(fsid, ctx.data_dir, |
2670 | daemon_type, daemon_id) | |
9f95a23c TL |
2671 | config_dir = 'etc/grafana' |
2672 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2673 | makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755) | |
2674 | makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755) | |
2675 | makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) | |
f67539c2 | 2676 | touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid) |
9f95a23c | 2677 | elif daemon_type == 'alertmanager': |
f67539c2 TL |
2678 | data_dir_root = get_data_dir(fsid, ctx.data_dir, |
2679 | daemon_type, daemon_id) | |
9f95a23c TL |
2680 | config_dir = 'etc/alertmanager' |
2681 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2682 | makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755) | |
33c7a0ef TL |
2683 | elif daemon_type == 'promtail': |
2684 | data_dir_root = get_data_dir(fsid, ctx.data_dir, | |
2685 | daemon_type, daemon_id) | |
2686 | config_dir = 'etc/promtail' | |
2687 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2688 | makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) | |
2689 | elif daemon_type == 'loki': | |
2690 | data_dir_root = get_data_dir(fsid, ctx.data_dir, | |
2691 | daemon_type, daemon_id) | |
2692 | config_dir = 'etc/loki' | |
2693 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2694 | makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) | |
9f95a23c | 2695 | |
9f95a23c | 2696 | # populate the config directory for the component from the config-json |
b3b6e05e TL |
2697 | if 'files' in config_json: |
2698 | for fname in config_json['files']: | |
f91f0fd5 | 2699 | content = dict_get_join(config_json['files'], fname) |
b3b6e05e TL |
2700 | if os.path.isabs(fname): |
2701 | fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep)) | |
2702 | else: | |
2703 | fpath = os.path.join(data_dir_root, config_dir, fname) | |
2704 | with open(fpath, 'w', encoding='utf-8') as f: | |
9f95a23c TL |
2705 | os.fchown(f.fileno(), uid, gid) |
2706 | os.fchmod(f.fileno(), 0o600) | |
2707 | f.write(content) | |
2708 | ||
f91f0fd5 | 2709 | elif daemon_type == NFSGanesha.daemon_type: |
f67539c2 | 2710 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) |
9f95a23c TL |
2711 | nfs_ganesha.create_daemon_dirs(data_dir, uid, gid) |
2712 | ||
f91f0fd5 | 2713 | elif daemon_type == CephIscsi.daemon_type: |
f67539c2 | 2714 | ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) |
1911f103 TL |
2715 | ceph_iscsi.create_daemon_dirs(data_dir, uid, gid) |
2716 | ||
f67539c2 TL |
2717 | elif daemon_type == HAproxy.daemon_type: |
2718 | haproxy = HAproxy.init(ctx, fsid, daemon_id) | |
2719 | haproxy.create_daemon_dirs(data_dir, uid, gid) | |
2720 | ||
2721 | elif daemon_type == Keepalived.daemon_type: | |
2722 | keepalived = Keepalived.init(ctx, fsid, daemon_id) | |
2723 | keepalived.create_daemon_dirs(data_dir, uid, gid) | |
2724 | ||
f91f0fd5 | 2725 | elif daemon_type == CustomContainer.daemon_type: |
f67539c2 | 2726 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
f91f0fd5 TL |
2727 | cc.create_daemon_dirs(data_dir, uid, gid) |
2728 | ||
20effc67 TL |
2729 | elif daemon_type == SNMPGateway.daemon_type: |
2730 | sg = SNMPGateway.init(ctx, fsid, daemon_id) | |
2731 | sg.create_daemon_conf() | |
2732 | ||
2a845540 TL |
2733 | _write_custom_conf_files(ctx, daemon_type, str(daemon_id), fsid, uid, gid) |
2734 | ||
f6b5b4d7 | 2735 | |
2a845540 TL |
2736 | def _write_custom_conf_files(ctx: CephadmContext, daemon_type: str, daemon_id: str, fsid: str, uid: int, gid: int) -> None: |
2737 | # mostly making this its own function to make unit testing easier | |
2738 | if 'config_json' not in ctx or not ctx.config_json: | |
2739 | return | |
2740 | config_json = get_custom_config_files(ctx.config_json) | |
2741 | custom_config_dir = os.path.join(ctx.data_dir, fsid, 'custom_config_files', f'{daemon_type}.{daemon_id}') | |
2742 | if not os.path.exists(custom_config_dir): | |
2743 | makedirs(custom_config_dir, uid, gid, 0o755) | |
2744 | mandatory_keys = ['mount_path', 'content'] | |
2745 | for ccf in config_json['custom_config_files']: | |
2746 | if all(k in ccf for k in mandatory_keys): | |
2747 | file_path = os.path.join(custom_config_dir, os.path.basename(ccf['mount_path'])) | |
2748 | with open(file_path, 'w+', encoding='utf-8') as f: | |
2749 | os.fchown(f.fileno(), uid, gid) | |
2750 | os.fchmod(f.fileno(), 0o600) | |
2751 | f.write(ccf['content']) | |
9f95a23c | 2752 | |
2a845540 TL |
2753 | |
2754 | def get_parm(option: str) -> Dict[str, str]: | |
2755 | js = _get_config_json(option) | |
2756 | # custom_config_files is a special field that may be in the config | |
2757 | # dict. It is used for mounting custom config files into daemon's containers | |
2758 | # and should be accessed through the "get_custom_config_files" function. | |
2759 | # For get_parm we need to discard it. | |
2760 | js.pop('custom_config_files', None) | |
2761 | return js | |
2762 | ||
2763 | ||
2764 | def get_custom_config_files(option: str) -> Dict[str, List[Dict[str, str]]]: | |
2765 | js = _get_config_json(option) | |
2766 | res: Dict[str, List[Dict[str, str]]] = {'custom_config_files': []} | |
2767 | if 'custom_config_files' in js: | |
2768 | res['custom_config_files'] = js['custom_config_files'] | |
2769 | return res | |
2770 | ||
2771 | ||
2772 | def _get_config_json(option: str) -> Dict[str, Any]: | |
9f95a23c TL |
2773 | if not option: |
2774 | return dict() | |
2775 | ||
2776 | global cached_stdin | |
2777 | if option == '-': | |
2778 | if cached_stdin is not None: | |
2779 | j = cached_stdin | |
2780 | else: | |
f67539c2 TL |
2781 | j = sys.stdin.read() |
2782 | cached_stdin = j | |
9f95a23c TL |
2783 | else: |
2784 | # inline json string | |
2785 | if option[0] == '{' and option[-1] == '}': | |
2786 | j = option | |
2787 | # json file | |
2788 | elif os.path.exists(option): | |
2789 | with open(option, 'r') as f: | |
2790 | j = f.read() | |
2791 | else: | |
f67539c2 | 2792 | raise Error('Config file {} not found'.format(option)) |
9f95a23c TL |
2793 | |
2794 | try: | |
2795 | js = json.loads(j) | |
2796 | except ValueError as e: | |
f67539c2 | 2797 | raise Error('Invalid JSON in {}: {}'.format(option, e)) |
9f95a23c TL |
2798 | else: |
2799 | return js | |
2800 | ||
f6b5b4d7 | 2801 | |
f67539c2 TL |
2802 | def get_config_and_keyring(ctx): |
2803 | # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]] | |
801d1391 TL |
2804 | config = None |
2805 | keyring = None | |
2806 | ||
f67539c2 TL |
2807 | if 'config_json' in ctx and ctx.config_json: |
2808 | d = get_parm(ctx.config_json) | |
9f95a23c TL |
2809 | config = d.get('config') |
2810 | keyring = d.get('keyring') | |
a4b75251 TL |
2811 | if config and keyring: |
2812 | return config, keyring | |
9f95a23c | 2813 | |
f67539c2 TL |
2814 | if 'config' in ctx and ctx.config: |
2815 | try: | |
2816 | with open(ctx.config, 'r') as f: | |
2817 | config = f.read() | |
b3b6e05e TL |
2818 | except FileNotFoundError as e: |
2819 | raise Error(e) | |
f67539c2 TL |
2820 | |
2821 | if 'key' in ctx and ctx.key: | |
2822 | keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key) | |
2823 | elif 'keyring' in ctx and ctx.keyring: | |
2824 | try: | |
2825 | with open(ctx.keyring, 'r') as f: | |
2826 | keyring = f.read() | |
b3b6e05e TL |
2827 | except FileNotFoundError as e: |
2828 | raise Error(e) | |
9f95a23c | 2829 | |
f6b5b4d7 TL |
2830 | return config, keyring |
2831 | ||
2832 | ||
f67539c2 TL |
2833 | def get_container_binds(ctx, fsid, daemon_type, daemon_id): |
2834 | # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]] | |
f6b5b4d7 TL |
2835 | binds = list() |
2836 | ||
2837 | if daemon_type == CephIscsi.daemon_type: | |
f6b5b4d7 | 2838 | binds.extend(CephIscsi.get_container_binds()) |
f91f0fd5 TL |
2839 | elif daemon_type == CustomContainer.daemon_type: |
2840 | assert daemon_id | |
f67539c2 TL |
2841 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
2842 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
f91f0fd5 | 2843 | binds.extend(cc.get_container_binds(data_dir)) |
f6b5b4d7 TL |
2844 | |
2845 | return binds | |
2846 | ||
9f95a23c | 2847 | |
f67539c2 | 2848 | def get_container_mounts(ctx, fsid, daemon_type, daemon_id, |
9f95a23c | 2849 | no_config=False): |
f67539c2 | 2850 | # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str] |
9f95a23c TL |
2851 | mounts = dict() |
2852 | ||
2853 | if daemon_type in Ceph.daemons: | |
2854 | if fsid: | |
f67539c2 | 2855 | run_path = os.path.join('/var/run/ceph', fsid) |
9f95a23c TL |
2856 | if os.path.exists(run_path): |
2857 | mounts[run_path] = '/var/run/ceph:z' | |
f67539c2 | 2858 | log_dir = get_log_dir(fsid, ctx.log_dir) |
9f95a23c TL |
2859 | mounts[log_dir] = '/var/log/ceph:z' |
2860 | crash_dir = '/var/lib/ceph/%s/crash' % fsid | |
2861 | if os.path.exists(crash_dir): | |
2862 | mounts[crash_dir] = '/var/lib/ceph/crash:z' | |
20effc67 TL |
2863 | if daemon_type != 'crash' and should_log_to_journald(ctx): |
2864 | journald_sock_dir = '/run/systemd/journal' | |
2865 | mounts[journald_sock_dir] = journald_sock_dir | |
9f95a23c TL |
2866 | |
2867 | if daemon_type in Ceph.daemons and daemon_id: | |
f67539c2 | 2868 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
9f95a23c TL |
2869 | if daemon_type == 'rgw': |
2870 | cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id) | |
2871 | else: | |
2872 | cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id) | |
2873 | if daemon_type != 'crash': | |
2874 | mounts[data_dir] = cdata_dir + ':z' | |
2875 | if not no_config: | |
2876 | mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z' | |
f67539c2 | 2877 | if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash']: |
9f95a23c TL |
2878 | # these do not search for their keyrings in a data directory |
2879 | mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id) | |
2880 | ||
b3b6e05e | 2881 | if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']: |
9f95a23c TL |
2882 | mounts['/dev'] = '/dev' # FIXME: narrow this down? |
2883 | mounts['/run/udev'] = '/run/udev' | |
b3b6e05e | 2884 | if daemon_type in ['osd', 'clusterless-ceph-volume']: |
9f95a23c | 2885 | mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ... |
b3b6e05e TL |
2886 | mounts['/run/lvm'] = '/run/lvm' |
2887 | mounts['/run/lock/lvm'] = '/run/lock/lvm' | |
2888 | if daemon_type == 'osd': | |
f67539c2 TL |
2889 | # selinux-policy in the container may not match the host. |
2890 | if HostFacts(ctx).selinux_enabled: | |
2891 | selinux_folder = '/var/lib/ceph/%s/selinux' % fsid | |
2892 | if not os.path.exists(selinux_folder): | |
2893 | os.makedirs(selinux_folder, mode=0o755) | |
2894 | mounts[selinux_folder] = '/sys/fs/selinux:ro' | |
20effc67 | 2895 | mounts['/'] = '/rootfs' |
9f95a23c | 2896 | |
e306af50 | 2897 | try: |
f67539c2 TL |
2898 | if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development |
2899 | ceph_folder = pathify(ctx.shared_ceph_folder) | |
e306af50 TL |
2900 | if os.path.exists(ceph_folder): |
2901 | mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume' | |
522d829b | 2902 | mounts[ceph_folder + '/src/cephadm/cephadm'] = '/usr/sbin/cephadm' |
e306af50 TL |
2903 | mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr' |
2904 | mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph' | |
20effc67 TL |
2905 | mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard' |
2906 | mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml' | |
e306af50 TL |
2907 | else: |
2908 | logger.error('{}{}{}'.format(termcolor.red, | |
f67539c2 TL |
2909 | 'Ceph shared source folder does not exist.', |
2910 | termcolor.end)) | |
e306af50 TL |
2911 | except AttributeError: |
2912 | pass | |
2913 | ||
9f95a23c | 2914 | if daemon_type in Monitoring.components and daemon_id: |
f67539c2 | 2915 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
33c7a0ef | 2916 | log_dir = get_log_dir(fsid, ctx.log_dir) |
9f95a23c TL |
2917 | if daemon_type == 'prometheus': |
2918 | mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z' | |
2919 | mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z' | |
33c7a0ef TL |
2920 | elif daemon_type == 'loki': |
2921 | mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z' | |
2922 | mounts[os.path.join(data_dir, 'data')] = '/loki:Z' | |
2923 | elif daemon_type == 'promtail': | |
2924 | mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z' | |
2925 | mounts[log_dir] = '/var/log/ceph:z' | |
2926 | mounts[os.path.join(data_dir, 'data')] = '/promtail:Z' | |
9f95a23c TL |
2927 | elif daemon_type == 'node-exporter': |
2928 | mounts['/proc'] = '/host/proc:ro' | |
2929 | mounts['/sys'] = '/host/sys:ro' | |
2930 | mounts['/'] = '/rootfs:ro' | |
f67539c2 | 2931 | elif daemon_type == 'grafana': |
9f95a23c TL |
2932 | mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z' |
2933 | mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z' | |
2934 | mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z' | |
f67539c2 | 2935 | mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z' |
9f95a23c | 2936 | elif daemon_type == 'alertmanager': |
f6b5b4d7 | 2937 | mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z' |
9f95a23c TL |
2938 | |
2939 | if daemon_type == NFSGanesha.daemon_type: | |
2940 | assert daemon_id | |
f67539c2 TL |
2941 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
2942 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) | |
f91f0fd5 | 2943 | mounts.update(nfs_ganesha.get_container_mounts(data_dir)) |
9f95a23c | 2944 | |
f67539c2 TL |
2945 | if daemon_type == HAproxy.daemon_type: |
2946 | assert daemon_id | |
2947 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
2948 | mounts.update(HAproxy.get_container_mounts(data_dir)) | |
2949 | ||
1911f103 TL |
2950 | if daemon_type == CephIscsi.daemon_type: |
2951 | assert daemon_id | |
f67539c2 TL |
2952 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
2953 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
1911f103 TL |
2954 | mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir)) |
2955 | ||
f67539c2 TL |
2956 | if daemon_type == Keepalived.daemon_type: |
2957 | assert daemon_id | |
2958 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
2959 | mounts.update(Keepalived.get_container_mounts(data_dir)) | |
2960 | ||
f91f0fd5 TL |
2961 | if daemon_type == CustomContainer.daemon_type: |
2962 | assert daemon_id | |
f67539c2 TL |
2963 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
2964 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
f91f0fd5 TL |
2965 | mounts.update(cc.get_container_mounts(data_dir)) |
2966 | ||
9f95a23c TL |
2967 | return mounts |
2968 | ||
f6b5b4d7 | 2969 | |
20effc67 TL |
2970 | def get_ceph_volume_container(ctx: CephadmContext, |
2971 | privileged: bool = True, | |
2972 | cname: str = '', | |
2973 | volume_mounts: Dict[str, str] = {}, | |
2974 | bind_mounts: Optional[List[List[str]]] = None, | |
2975 | args: List[str] = [], | |
2976 | envs: Optional[List[str]] = None) -> 'CephContainer': | |
2977 | if envs is None: | |
2978 | envs = [] | |
2979 | envs.append('CEPH_VOLUME_SKIP_RESTORECON=yes') | |
2980 | envs.append('CEPH_VOLUME_DEBUG=1') | |
2981 | ||
2982 | return CephContainer( | |
2983 | ctx, | |
2984 | image=ctx.image, | |
2985 | entrypoint='/usr/sbin/ceph-volume', | |
2986 | args=args, | |
2987 | volume_mounts=volume_mounts, | |
2988 | bind_mounts=bind_mounts, | |
2989 | envs=envs, | |
2990 | privileged=privileged, | |
2991 | cname=cname, | |
2992 | memory_request=ctx.memory_request, | |
2993 | memory_limit=ctx.memory_limit, | |
2994 | ) | |
2995 | ||
2996 | ||
f67539c2 TL |
2997 | def get_container(ctx: CephadmContext, |
2998 | fsid: str, daemon_type: str, daemon_id: Union[int, str], | |
f91f0fd5 TL |
2999 | privileged: bool = False, |
3000 | ptrace: bool = False, | |
3001 | container_args: Optional[List[str]] = None) -> 'CephContainer': | |
3002 | entrypoint: str = '' | |
3003 | name: str = '' | |
3004 | ceph_args: List[str] = [] | |
522d829b | 3005 | envs: List[str] = [] |
f91f0fd5 TL |
3006 | host_network: bool = True |
3007 | ||
522d829b TL |
3008 | if daemon_type in Ceph.daemons: |
3009 | envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728') | |
f91f0fd5 TL |
3010 | if container_args is None: |
3011 | container_args = [] | |
9f95a23c TL |
3012 | if daemon_type in ['mon', 'osd']: |
3013 | # mon and osd need privileged in order for libudev to query devices | |
3014 | privileged = True | |
3015 | if daemon_type == 'rgw': | |
3016 | entrypoint = '/usr/bin/radosgw' | |
3017 | name = 'client.rgw.%s' % daemon_id | |
3018 | elif daemon_type == 'rbd-mirror': | |
3019 | entrypoint = '/usr/bin/rbd-mirror' | |
3020 | name = 'client.rbd-mirror.%s' % daemon_id | |
f67539c2 TL |
3021 | elif daemon_type == 'cephfs-mirror': |
3022 | entrypoint = '/usr/bin/cephfs-mirror' | |
3023 | name = 'client.cephfs-mirror.%s' % daemon_id | |
9f95a23c TL |
3024 | elif daemon_type == 'crash': |
3025 | entrypoint = '/usr/bin/ceph-crash' | |
3026 | name = 'client.crash.%s' % daemon_id | |
3027 | elif daemon_type in ['mon', 'mgr', 'mds', 'osd']: | |
3028 | entrypoint = '/usr/bin/ceph-' + daemon_type | |
3029 | name = '%s.%s' % (daemon_type, daemon_id) | |
3030 | elif daemon_type in Monitoring.components: | |
3031 | entrypoint = '' | |
9f95a23c TL |
3032 | elif daemon_type == NFSGanesha.daemon_type: |
3033 | entrypoint = NFSGanesha.entrypoint | |
3034 | name = '%s.%s' % (daemon_type, daemon_id) | |
f91f0fd5 | 3035 | envs.extend(NFSGanesha.get_container_envs()) |
f67539c2 TL |
3036 | elif daemon_type == HAproxy.daemon_type: |
3037 | name = '%s.%s' % (daemon_type, daemon_id) | |
522d829b | 3038 | container_args.extend(['--user=root']) # haproxy 2.4 defaults to a different user |
f67539c2 TL |
3039 | elif daemon_type == Keepalived.daemon_type: |
3040 | name = '%s.%s' % (daemon_type, daemon_id) | |
3041 | envs.extend(Keepalived.get_container_envs()) | |
3042 | container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW']) | |
1911f103 TL |
3043 | elif daemon_type == CephIscsi.daemon_type: |
3044 | entrypoint = CephIscsi.entrypoint | |
3045 | name = '%s.%s' % (daemon_type, daemon_id) | |
e306af50 TL |
3046 | # So the container can modprobe iscsi_target_mod and have write perms |
3047 | # to configfs we need to make this a privileged container. | |
3048 | privileged = True | |
f91f0fd5 | 3049 | elif daemon_type == CustomContainer.daemon_type: |
f67539c2 | 3050 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
f91f0fd5 TL |
3051 | entrypoint = cc.entrypoint |
3052 | host_network = False | |
3053 | envs.extend(cc.get_container_envs()) | |
3054 | container_args.extend(cc.get_container_args()) | |
9f95a23c | 3055 | |
9f95a23c | 3056 | if daemon_type in Monitoring.components: |
f67539c2 | 3057 | uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) |
9f95a23c TL |
3058 | monitoring_args = [ |
3059 | '--user', | |
3060 | str(uid), | |
3061 | # FIXME: disable cpu/memory limits for the time being (not supported | |
3062 | # by ubuntu 18.04 kernel!) | |
9f95a23c TL |
3063 | ] |
3064 | container_args.extend(monitoring_args) | |
33c7a0ef TL |
3065 | if daemon_type == 'node-exporter': |
3066 | # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys', | |
3067 | # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation | |
3068 | # between the node-exporter container and the host to avoid selinux denials | |
3069 | container_args.extend(['--security-opt', 'label=disable']) | |
9f95a23c TL |
3070 | elif daemon_type == 'crash': |
3071 | ceph_args = ['-n', name] | |
3072 | elif daemon_type in Ceph.daemons: | |
3073 | ceph_args = ['-n', name, '-f'] | |
20effc67 TL |
3074 | elif daemon_type == SNMPGateway.daemon_type: |
3075 | sg = SNMPGateway.init(ctx, fsid, daemon_id) | |
3076 | container_args.append( | |
3077 | f'--env-file={sg.conf_file_path}' | |
3078 | ) | |
9f95a23c | 3079 | |
f91f0fd5 TL |
3080 | # if using podman, set -d, --conmon-pidfile & --cidfile flags |
3081 | # so service can have Type=Forking | |
f67539c2 | 3082 | if isinstance(ctx.container_engine, Podman): |
f91f0fd5 | 3083 | runtime_dir = '/run' |
f67539c2 TL |
3084 | container_args.extend([ |
3085 | '-d', '--log-driver', 'journald', | |
f91f0fd5 TL |
3086 | '--conmon-pidfile', |
3087 | runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id), | |
3088 | '--cidfile', | |
f67539c2 TL |
3089 | runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id), |
3090 | ]) | |
3091 | if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION: | |
3092 | container_args.append('--cgroups=split') | |
9f95a23c | 3093 | |
522d829b | 3094 | return CephContainer.for_daemon( |
f67539c2 | 3095 | ctx, |
522d829b TL |
3096 | fsid=fsid, |
3097 | daemon_type=daemon_type, | |
3098 | daemon_id=str(daemon_id), | |
9f95a23c | 3099 | entrypoint=entrypoint, |
f67539c2 | 3100 | args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id), |
9f95a23c | 3101 | container_args=container_args, |
f67539c2 TL |
3102 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), |
3103 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
9f95a23c TL |
3104 | envs=envs, |
3105 | privileged=privileged, | |
3106 | ptrace=ptrace, | |
f91f0fd5 | 3107 | host_network=host_network, |
9f95a23c TL |
3108 | ) |
3109 | ||
f6b5b4d7 | 3110 | |
f67539c2 TL |
3111 | def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'): |
3112 | # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int] | |
9f95a23c TL |
3113 | |
3114 | if not img: | |
f67539c2 | 3115 | img = ctx.image |
9f95a23c | 3116 | |
f6b5b4d7 TL |
3117 | if isinstance(file_path, str): |
3118 | paths = [file_path] | |
3119 | else: | |
3120 | paths = file_path | |
3121 | ||
20effc67 TL |
3122 | ex: Optional[Tuple[str, RuntimeError]] = None |
3123 | ||
f6b5b4d7 TL |
3124 | for fp in paths: |
3125 | try: | |
3126 | out = CephContainer( | |
f67539c2 | 3127 | ctx, |
f6b5b4d7 TL |
3128 | image=img, |
3129 | entrypoint='stat', | |
3130 | args=['-c', '%u %g', fp] | |
2a845540 | 3131 | ).run(verbosity=CallVerbosity.QUIET_UNLESS_ERROR) |
f6b5b4d7 TL |
3132 | uid, gid = out.split(' ') |
3133 | return int(uid), int(gid) | |
20effc67 TL |
3134 | except RuntimeError as e: |
3135 | ex = (fp, e) | |
3136 | if ex: | |
3137 | raise Error(f'Failed to extract uid/gid for path {ex[0]}: {ex[1]}') | |
3138 | ||
f6b5b4d7 TL |
3139 | raise RuntimeError('uid/gid not found') |
3140 | ||
9f95a23c | 3141 | |
f67539c2 | 3142 | def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid, |
9f95a23c TL |
3143 | config=None, keyring=None, |
3144 | osd_fsid=None, | |
f6b5b4d7 TL |
3145 | reconfig=False, |
3146 | ports=None): | |
f67539c2 | 3147 | # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None |
f6b5b4d7 TL |
3148 | |
3149 | ports = ports or [] | |
f67539c2 | 3150 | if any([port_in_use(ctx, port) for port in ports]): |
b3b6e05e TL |
3151 | if daemon_type == 'mgr': |
3152 | # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't | |
3153 | # tell whether that is the case here. | |
3154 | logger.warning( | |
3155 | f"ceph-mgr TCP port(s) {','.join(map(str, ports))} already in use" | |
3156 | ) | |
3157 | else: | |
3158 | raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports)), daemon_type)) | |
f6b5b4d7 | 3159 | |
f67539c2 | 3160 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
9f95a23c TL |
3161 | if reconfig and not os.path.exists(data_dir): |
3162 | raise Error('cannot reconfig, data path %s does not exist' % data_dir) | |
3163 | if daemon_type == 'mon' and not os.path.exists(data_dir): | |
3164 | assert config | |
3165 | assert keyring | |
3166 | # tmp keyring file | |
3167 | tmp_keyring = write_tmp(keyring, uid, gid) | |
3168 | ||
3169 | # tmp config file | |
3170 | tmp_config = write_tmp(config, uid, gid) | |
3171 | ||
3172 | # --mkfs | |
f67539c2 TL |
3173 | create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid) |
3174 | mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id) | |
3175 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
3176 | CephContainer( | |
3177 | ctx, | |
3178 | image=ctx.image, | |
9f95a23c | 3179 | entrypoint='/usr/bin/ceph-mon', |
f67539c2 TL |
3180 | args=[ |
3181 | '--mkfs', | |
3182 | '-i', str(daemon_id), | |
3183 | '--fsid', fsid, | |
3184 | '-c', '/tmp/config', | |
3185 | '--keyring', '/tmp/keyring', | |
3186 | ] + get_daemon_args(ctx, fsid, 'mon', daemon_id), | |
9f95a23c TL |
3187 | volume_mounts={ |
3188 | log_dir: '/var/log/ceph:z', | |
3189 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id), | |
3190 | tmp_keyring.name: '/tmp/keyring:z', | |
3191 | tmp_config.name: '/tmp/config:z', | |
3192 | }, | |
3193 | ).run() | |
3194 | ||
3195 | # write conf | |
3196 | with open(mon_dir + '/config', 'w') as f: | |
3197 | os.fchown(f.fileno(), uid, gid) | |
3198 | os.fchmod(f.fileno(), 0o600) | |
3199 | f.write(config) | |
3200 | else: | |
3201 | # dirs, conf, keyring | |
3202 | create_daemon_dirs( | |
f67539c2 | 3203 | ctx, |
9f95a23c TL |
3204 | fsid, daemon_type, daemon_id, |
3205 | uid, gid, | |
3206 | config, keyring) | |
3207 | ||
3208 | if not reconfig: | |
20effc67 | 3209 | if daemon_type == CephadmAgent.daemon_type: |
f67539c2 TL |
3210 | if ctx.config_json == '-': |
3211 | config_js = get_parm('-') | |
3212 | else: | |
3213 | config_js = get_parm(ctx.config_json) | |
3214 | assert isinstance(config_js, dict) | |
3215 | ||
20effc67 TL |
3216 | cephadm_agent = CephadmAgent(ctx, fsid, daemon_id) |
3217 | cephadm_agent.deploy_daemon_unit(config_js) | |
f67539c2 TL |
3218 | else: |
3219 | if c: | |
3220 | deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, | |
3221 | c, osd_fsid=osd_fsid, ports=ports) | |
3222 | else: | |
3223 | raise RuntimeError('attempting to deploy a daemon without a container image') | |
9f95a23c TL |
3224 | |
3225 | if not os.path.exists(data_dir + '/unit.created'): | |
3226 | with open(data_dir + '/unit.created', 'w') as f: | |
3227 | os.fchmod(f.fileno(), 0o600) | |
3228 | os.fchown(f.fileno(), uid, gid) | |
3229 | f.write('mtime is time the daemon deployment was created\n') | |
3230 | ||
3231 | with open(data_dir + '/unit.configured', 'w') as f: | |
3232 | f.write('mtime is time we were last configured\n') | |
3233 | os.fchmod(f.fileno(), 0o600) | |
3234 | os.fchown(f.fileno(), uid, gid) | |
3235 | ||
f67539c2 | 3236 | update_firewalld(ctx, daemon_type) |
9f95a23c | 3237 | |
f6b5b4d7 TL |
3238 | # Open ports explicitly required for the daemon |
3239 | if ports: | |
f67539c2 | 3240 | fw = Firewalld(ctx) |
f6b5b4d7 TL |
3241 | fw.open_ports(ports) |
3242 | fw.apply_rules() | |
3243 | ||
9f95a23c TL |
3244 | if reconfig and daemon_type not in Ceph.daemons: |
3245 | # ceph daemons do not need a restart; others (presumably) do to pick | |
3246 | # up the new config | |
f67539c2 TL |
3247 | call_throws(ctx, ['systemctl', 'reset-failed', |
3248 | get_unit_name(fsid, daemon_type, daemon_id)]) | |
3249 | call_throws(ctx, ['systemctl', 'restart', | |
3250 | get_unit_name(fsid, daemon_type, daemon_id)]) | |
3251 | ||
9f95a23c | 3252 | |
f67539c2 TL |
3253 | def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False): |
3254 | # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None | |
f6b5b4d7 | 3255 | if comment: |
f91f0fd5 | 3256 | # Sometimes adding a comment, especially if there are multiple containers in one |
f6b5b4d7 TL |
3257 | # unit file, makes it easier to read and grok. |
3258 | file_obj.write('# ' + comment + '\n') | |
3259 | # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually | |
522d829b | 3260 | file_obj.write('! ' + ' '.join(container.rm_cmd(old_cname=True)) + ' 2> /dev/null\n') |
f67539c2 | 3261 | file_obj.write('! ' + ' '.join(container.rm_cmd()) + ' 2> /dev/null\n') |
f6b5b4d7 | 3262 | # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage` |
f67539c2 TL |
3263 | if isinstance(ctx.container_engine, Podman): |
3264 | file_obj.write( | |
3265 | '! ' | |
3266 | + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)]) | |
3267 | + ' 2> /dev/null\n') | |
522d829b TL |
3268 | file_obj.write( |
3269 | '! ' | |
3270 | + ' '.join([shlex.quote(a) for a in container.rm_cmd(old_cname=True, storage=True)]) | |
3271 | + ' 2> /dev/null\n') | |
f6b5b4d7 TL |
3272 | |
3273 | # container run command | |
f67539c2 TL |
3274 | file_obj.write( |
3275 | ' '.join([shlex.quote(a) for a in container.run_cmd()]) | |
3276 | + (' &' if background else '') + '\n') | |
3277 | ||
3278 | ||
522d829b TL |
3279 | def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None: |
3280 | # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail. | |
3281 | # see https://tracker.ceph.com/issues/50998 | |
3282 | ||
3283 | CGROUPV2_PATH = Path('/sys/fs/cgroup') | |
3284 | if not (CGROUPV2_PATH / 'system.slice').exists(): | |
3285 | # Only unified cgroup is affected, skip if not the case | |
3286 | return | |
3287 | ||
3288 | slice_name = 'system-ceph\\x2d{}.slice'.format(fsid.replace('-', '\\x2d')) | |
3289 | cg_path = CGROUPV2_PATH / 'system.slice' / slice_name / f'{unit_name}.service' | |
3290 | if not cg_path.exists(): | |
3291 | return | |
3292 | ||
3293 | def cg_trim(path: Path) -> None: | |
3294 | for p in path.iterdir(): | |
3295 | if p.is_dir(): | |
3296 | cg_trim(p) | |
3297 | path.rmdir() | |
3298 | try: | |
3299 | cg_trim(cg_path) | |
3300 | except OSError: | |
3301 | logger.warning(f'Failed to trim old cgroups {cg_path}') | |
3302 | ||
3303 | ||
f67539c2 TL |
3304 | def deploy_daemon_units( |
3305 | ctx: CephadmContext, | |
3306 | fsid: str, | |
3307 | uid: int, | |
3308 | gid: int, | |
3309 | daemon_type: str, | |
3310 | daemon_id: Union[int, str], | |
3311 | c: 'CephContainer', | |
3312 | enable: bool = True, | |
3313 | start: bool = True, | |
3314 | osd_fsid: Optional[str] = None, | |
3315 | ports: Optional[List[int]] = None, | |
3316 | ) -> None: | |
9f95a23c | 3317 | # cmd |
f67539c2 TL |
3318 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
3319 | with open(data_dir + '/unit.run.new', 'w') as f, \ | |
b3b6e05e | 3320 | open(data_dir + '/unit.meta.new', 'w') as metaf: |
f6b5b4d7 | 3321 | f.write('set -e\n') |
f91f0fd5 TL |
3322 | |
3323 | if daemon_type in Ceph.daemons: | |
3324 | install_path = find_program('install') | |
3325 | f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid)) | |
3326 | ||
9f95a23c TL |
3327 | # pre-start cmd(s) |
3328 | if daemon_type == 'osd': | |
3329 | # osds have a pre-start step | |
3330 | assert osd_fsid | |
f6b5b4d7 TL |
3331 | simple_fn = os.path.join('/etc/ceph/osd', |
3332 | '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid)) | |
3333 | if os.path.exists(simple_fn): | |
3334 | f.write('# Simple OSDs need chown on startup:\n') | |
3335 | for n in ['block', 'block.db', 'block.wal']: | |
3336 | p = os.path.join(data_dir, n) | |
3337 | f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid)) | |
3338 | else: | |
20effc67 TL |
3339 | # if ceph-volume does not support 'ceph-volume activate', we must |
3340 | # do 'ceph-volume lvm activate'. | |
3341 | test_cv = get_ceph_volume_container( | |
f67539c2 | 3342 | ctx, |
20effc67 TL |
3343 | args=['activate', '--bad-option'], |
3344 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), | |
3345 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
3346 | cname='ceph-%s-%s.%s-activate-test' % (fsid, daemon_type, daemon_id), | |
3347 | ) | |
3348 | out, err, ret = call(ctx, test_cv.run_cmd(), verbosity=CallVerbosity.SILENT) | |
3349 | # bad: ceph-volume: error: unrecognized arguments: activate --bad-option | |
3350 | # good: ceph-volume: error: unrecognized arguments: --bad-option | |
3351 | if 'unrecognized arguments: activate' in err: | |
3352 | # older ceph-volume without top-level activate or --no-tmpfs | |
3353 | cmd = [ | |
f6b5b4d7 TL |
3354 | 'lvm', 'activate', |
3355 | str(daemon_id), osd_fsid, | |
20effc67 TL |
3356 | '--no-systemd', |
3357 | ] | |
3358 | else: | |
3359 | cmd = [ | |
3360 | 'activate', | |
3361 | '--osd-id', str(daemon_id), | |
3362 | '--osd-uuid', osd_fsid, | |
3363 | '--no-systemd', | |
3364 | '--no-tmpfs', | |
3365 | ] | |
3366 | ||
3367 | prestart = get_ceph_volume_container( | |
3368 | ctx, | |
3369 | args=cmd, | |
f67539c2 TL |
3370 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), |
3371 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
f6b5b4d7 TL |
3372 | cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id), |
3373 | ) | |
f67539c2 | 3374 | _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate') |
1911f103 TL |
3375 | elif daemon_type == CephIscsi.daemon_type: |
3376 | f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n') | |
f67539c2 | 3377 | ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) |
f6b5b4d7 | 3378 | tcmu_container = ceph_iscsi.get_tcmu_runner_container() |
a4b75251 | 3379 | _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True) |
f67539c2 TL |
3380 | |
3381 | _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id))) | |
3382 | ||
3383 | # some metadata about the deploy | |
3384 | meta: Dict[str, Any] = {} | |
3385 | if 'meta_json' in ctx and ctx.meta_json: | |
3386 | meta = json.loads(ctx.meta_json) or {} | |
3387 | meta.update({ | |
3388 | 'memory_request': int(ctx.memory_request) if ctx.memory_request else None, | |
3389 | 'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None, | |
3390 | }) | |
3391 | if not meta.get('ports'): | |
3392 | meta['ports'] = ports | |
3393 | metaf.write(json.dumps(meta, indent=4) + '\n') | |
1911f103 | 3394 | |
9f95a23c | 3395 | os.fchmod(f.fileno(), 0o600) |
f67539c2 | 3396 | os.fchmod(metaf.fileno(), 0o600) |
9f95a23c TL |
3397 | os.rename(data_dir + '/unit.run.new', |
3398 | data_dir + '/unit.run') | |
f67539c2 TL |
3399 | os.rename(data_dir + '/unit.meta.new', |
3400 | data_dir + '/unit.meta') | |
9f95a23c TL |
3401 | |
3402 | # post-stop command(s) | |
3403 | with open(data_dir + '/unit.poststop.new', 'w') as f: | |
3404 | if daemon_type == 'osd': | |
3405 | assert osd_fsid | |
20effc67 | 3406 | poststop = get_ceph_volume_container( |
f67539c2 | 3407 | ctx, |
9f95a23c TL |
3408 | args=[ |
3409 | 'lvm', 'deactivate', | |
3410 | str(daemon_id), osd_fsid, | |
3411 | ], | |
f67539c2 TL |
3412 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), |
3413 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
9f95a23c TL |
3414 | cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type, |
3415 | daemon_id), | |
3416 | ) | |
f67539c2 | 3417 | _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd') |
1911f103 | 3418 | elif daemon_type == CephIscsi.daemon_type: |
f6b5b4d7 | 3419 | # make sure we also stop the tcmu container |
f67539c2 | 3420 | ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) |
f6b5b4d7 | 3421 | tcmu_container = ceph_iscsi.get_tcmu_runner_container() |
f67539c2 | 3422 | f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n') |
1911f103 | 3423 | f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n') |
9f95a23c TL |
3424 | os.fchmod(f.fileno(), 0o600) |
3425 | os.rename(data_dir + '/unit.poststop.new', | |
3426 | data_dir + '/unit.poststop') | |
3427 | ||
522d829b TL |
3428 | # post-stop command(s) |
3429 | with open(data_dir + '/unit.stop.new', 'w') as f: | |
33c7a0ef TL |
3430 | # following generated script basically checks if the container exists |
3431 | # before stopping it. Exit code will be success either if it doesn't | |
3432 | # exist or if it exists and is stopped successfully. | |
3433 | container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null' | |
3434 | f.write(f'! {container_exists % c.old_cname} || {" ".join(c.stop_cmd(old_cname=True))} \n') | |
3435 | f.write(f'! {container_exists % c.cname} || {" ".join(c.stop_cmd())} \n') | |
522d829b TL |
3436 | |
3437 | os.fchmod(f.fileno(), 0o600) | |
3438 | os.rename(data_dir + '/unit.stop.new', | |
3439 | data_dir + '/unit.stop') | |
3440 | ||
f67539c2 TL |
3441 | if c: |
3442 | with open(data_dir + '/unit.image.new', 'w') as f: | |
3443 | f.write(c.image + '\n') | |
3444 | os.fchmod(f.fileno(), 0o600) | |
3445 | os.rename(data_dir + '/unit.image.new', | |
3446 | data_dir + '/unit.image') | |
9f95a23c | 3447 | |
b3b6e05e TL |
3448 | # sysctl |
3449 | install_sysctl(ctx, fsid, daemon_type) | |
3450 | ||
9f95a23c | 3451 | # systemd |
f67539c2 TL |
3452 | install_base_units(ctx, fsid) |
3453 | unit = get_unit_file(ctx, fsid) | |
9f95a23c | 3454 | unit_file = 'ceph-%s@.service' % (fsid) |
f67539c2 | 3455 | with open(ctx.unit_dir + '/' + unit_file + '.new', 'w') as f: |
9f95a23c | 3456 | f.write(unit) |
f67539c2 TL |
3457 | os.rename(ctx.unit_dir + '/' + unit_file + '.new', |
3458 | ctx.unit_dir + '/' + unit_file) | |
3459 | call_throws(ctx, ['systemctl', 'daemon-reload']) | |
9f95a23c TL |
3460 | |
3461 | unit_name = get_unit_name(fsid, daemon_type, daemon_id) | |
f67539c2 | 3462 | call(ctx, ['systemctl', 'stop', unit_name], |
adb31ebb | 3463 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 3464 | call(ctx, ['systemctl', 'reset-failed', unit_name], |
adb31ebb | 3465 | verbosity=CallVerbosity.DEBUG) |
9f95a23c | 3466 | if enable: |
f67539c2 | 3467 | call_throws(ctx, ['systemctl', 'enable', unit_name]) |
9f95a23c | 3468 | if start: |
522d829b | 3469 | clean_cgroup(ctx, fsid, unit_name) |
f67539c2 | 3470 | call_throws(ctx, ['systemctl', 'start', unit_name]) |
9f95a23c | 3471 | |
f6b5b4d7 TL |
3472 | |
3473 | class Firewalld(object): | |
f67539c2 TL |
3474 | def __init__(self, ctx): |
3475 | # type: (CephadmContext) -> None | |
3476 | self.ctx = ctx | |
f6b5b4d7 TL |
3477 | self.available = self.check() |
3478 | ||
3479 | def check(self): | |
3480 | # type: () -> bool | |
3481 | self.cmd = find_executable('firewall-cmd') | |
3482 | if not self.cmd: | |
3483 | logger.debug('firewalld does not appear to be present') | |
3484 | return False | |
f67539c2 | 3485 | (enabled, state, _) = check_unit(self.ctx, 'firewalld.service') |
f6b5b4d7 TL |
3486 | if not enabled: |
3487 | logger.debug('firewalld.service is not enabled') | |
3488 | return False | |
f67539c2 | 3489 | if state != 'running': |
f6b5b4d7 TL |
3490 | logger.debug('firewalld.service is not running') |
3491 | return False | |
3492 | ||
f67539c2 | 3493 | logger.info('firewalld ready') |
f6b5b4d7 TL |
3494 | return True |
3495 | ||
3496 | def enable_service_for(self, daemon_type): | |
3497 | # type: (str) -> None | |
3498 | if not self.available: | |
3499 | logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type) | |
3500 | return | |
3501 | ||
3502 | if daemon_type == 'mon': | |
3503 | svc = 'ceph-mon' | |
3504 | elif daemon_type in ['mgr', 'mds', 'osd']: | |
3505 | svc = 'ceph' | |
3506 | elif daemon_type == NFSGanesha.daemon_type: | |
3507 | svc = 'nfs' | |
3508 | else: | |
3509 | return | |
3510 | ||
f67539c2 TL |
3511 | if not self.cmd: |
3512 | raise RuntimeError('command not defined') | |
3513 | ||
3514 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
3515 | if ret: |
3516 | logger.info('Enabling firewalld service %s in current zone...' % svc) | |
f67539c2 | 3517 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc]) |
9f95a23c TL |
3518 | if ret: |
3519 | raise RuntimeError( | |
3520 | 'unable to add service %s to current zone: %s' % (svc, err)) | |
3521 | else: | |
3522 | logger.debug('firewalld service %s is enabled in current zone' % svc) | |
f6b5b4d7 TL |
3523 | |
3524 | def open_ports(self, fw_ports): | |
3525 | # type: (List[int]) -> None | |
3526 | if not self.available: | |
3527 | logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports) | |
3528 | return | |
3529 | ||
f67539c2 TL |
3530 | if not self.cmd: |
3531 | raise RuntimeError('command not defined') | |
3532 | ||
f6b5b4d7 TL |
3533 | for port in fw_ports: |
3534 | tcp_port = str(port) + '/tcp' | |
f67539c2 | 3535 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) |
9f95a23c | 3536 | if ret: |
f6b5b4d7 | 3537 | logger.info('Enabling firewalld port %s in current zone...' % tcp_port) |
f67539c2 | 3538 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port]) |
f6b5b4d7 TL |
3539 | if ret: |
3540 | raise RuntimeError('unable to add port %s to current zone: %s' % | |
f67539c2 | 3541 | (tcp_port, err)) |
f6b5b4d7 TL |
3542 | else: |
3543 | logger.debug('firewalld port %s is enabled in current zone' % tcp_port) | |
3544 | ||
f67539c2 TL |
3545 | def close_ports(self, fw_ports): |
3546 | # type: (List[int]) -> None | |
3547 | if not self.available: | |
3548 | logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports) | |
3549 | return | |
3550 | ||
3551 | if not self.cmd: | |
3552 | raise RuntimeError('command not defined') | |
3553 | ||
3554 | for port in fw_ports: | |
3555 | tcp_port = str(port) + '/tcp' | |
3556 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) | |
3557 | if not ret: | |
3558 | logger.info('Disabling port %s in current zone...' % tcp_port) | |
3559 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port]) | |
3560 | if ret: | |
3561 | raise RuntimeError('unable to remove port %s from current zone: %s' % | |
3562 | (tcp_port, err)) | |
3563 | else: | |
3564 | logger.info(f'Port {tcp_port} disabled') | |
3565 | else: | |
3566 | logger.info(f'firewalld port {tcp_port} already closed') | |
3567 | ||
f6b5b4d7 TL |
3568 | def apply_rules(self): |
3569 | # type: () -> None | |
3570 | if not self.available: | |
3571 | return | |
3572 | ||
f67539c2 TL |
3573 | if not self.cmd: |
3574 | raise RuntimeError('command not defined') | |
f6b5b4d7 | 3575 | |
f67539c2 | 3576 | call_throws(self.ctx, [self.cmd, '--reload']) |
f6b5b4d7 | 3577 | |
f67539c2 TL |
3578 | |
3579 | def update_firewalld(ctx, daemon_type): | |
3580 | # type: (CephadmContext, str) -> None | |
33c7a0ef TL |
3581 | if not ('skip_firewalld' in ctx and ctx.skip_firewalld): |
3582 | firewall = Firewalld(ctx) | |
3583 | firewall.enable_service_for(daemon_type) | |
3584 | firewall.apply_rules() | |
f6b5b4d7 | 3585 | |
f6b5b4d7 | 3586 | |
b3b6e05e TL |
3587 | def install_sysctl(ctx: CephadmContext, fsid: str, daemon_type: str) -> None: |
3588 | """ | |
3589 | Set up sysctl settings | |
3590 | """ | |
3591 | def _write(conf: Path, lines: List[str]) -> None: | |
3592 | lines = [ | |
3593 | '# created by cephadm', | |
3594 | '', | |
3595 | *lines, | |
3596 | '', | |
3597 | ] | |
3598 | with open(conf, 'w') as f: | |
3599 | f.write('\n'.join(lines)) | |
f6b5b4d7 | 3600 | |
b3b6e05e TL |
3601 | conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf') |
3602 | lines: Optional[List] = None | |
3603 | ||
3604 | if daemon_type == 'osd': | |
3605 | lines = OSD.get_sysctl_settings() | |
3606 | elif daemon_type == 'haproxy': | |
3607 | lines = HAproxy.get_sysctl_settings() | |
3608 | elif daemon_type == 'keepalived': | |
3609 | lines = Keepalived.get_sysctl_settings() | |
3610 | ||
3611 | # apply the sysctl settings | |
3612 | if lines: | |
522d829b | 3613 | Path(ctx.sysctl_dir).mkdir(mode=0o755, exist_ok=True) |
b3b6e05e TL |
3614 | _write(conf, lines) |
3615 | call_throws(ctx, ['sysctl', '--system']) | |
9f95a23c | 3616 | |
f67539c2 | 3617 | |
33c7a0ef TL |
3618 | def migrate_sysctl_dir(ctx: CephadmContext, fsid: str) -> None: |
3619 | """ | |
3620 | Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration. | |
3621 | This moves it to '/etc/sysctl.d'. | |
3622 | """ | |
3623 | deprecated_location: str = '/usr/lib/sysctl.d' | |
3624 | deprecated_confs: List[str] = glob(f'{deprecated_location}/90-ceph-{fsid}-*.conf') | |
3625 | if not deprecated_confs: | |
3626 | return | |
3627 | ||
3628 | file_count: int = len(deprecated_confs) | |
3629 | logger.info(f'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.') | |
3630 | for conf in deprecated_confs: | |
3631 | try: | |
3632 | shutil.move(conf, ctx.sysctl_dir) | |
3633 | file_count -= 1 | |
3634 | except shutil.Error as err: | |
3635 | if str(err).endswith('already exists'): | |
3636 | logger.warning(f'Destination file already exists. Deleting {conf}.') | |
3637 | try: | |
3638 | os.unlink(conf) | |
3639 | file_count -= 1 | |
3640 | except OSError as del_err: | |
3641 | logger.warning(f'Could not remove {conf}: {del_err}.') | |
3642 | else: | |
3643 | logger.warning(f'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}') | |
3644 | ||
3645 | # Log successful migration | |
3646 | if file_count == 0: | |
3647 | logger.info(f'Successfully migrated sysctl config to {ctx.sysctl_dir}.') | |
3648 | return | |
3649 | ||
3650 | # Log partially successful / unsuccessful migration | |
3651 | files_processed: int = len(deprecated_confs) | |
3652 | if file_count < files_processed: | |
3653 | status: str = f'partially successful (failed {file_count}/{files_processed})' | |
3654 | elif file_count == files_processed: | |
3655 | status = 'unsuccessful' | |
3656 | logger.warning(f'Migration of sysctl configuration {status}. You may want to perform a migration manually.') | |
3657 | ||
3658 | ||
f67539c2 TL |
3659 | def install_base_units(ctx, fsid): |
3660 | # type: (CephadmContext, str) -> None | |
9f95a23c TL |
3661 | """ |
3662 | Set up ceph.target and ceph-$fsid.target units. | |
3663 | """ | |
3664 | # global unit | |
f67539c2 TL |
3665 | existed = os.path.exists(ctx.unit_dir + '/ceph.target') |
3666 | with open(ctx.unit_dir + '/ceph.target.new', 'w') as f: | |
9f95a23c TL |
3667 | f.write('[Unit]\n' |
3668 | 'Description=All Ceph clusters and services\n' | |
3669 | '\n' | |
3670 | '[Install]\n' | |
3671 | 'WantedBy=multi-user.target\n') | |
f67539c2 TL |
3672 | os.rename(ctx.unit_dir + '/ceph.target.new', |
3673 | ctx.unit_dir + '/ceph.target') | |
9f95a23c TL |
3674 | if not existed: |
3675 | # we disable before enable in case a different ceph.target | |
3676 | # (from the traditional package) is present; while newer | |
3677 | # systemd is smart enough to disable the old | |
3678 | # (/lib/systemd/...) and enable the new (/etc/systemd/...), | |
3679 | # some older versions of systemd error out with EEXIST. | |
f67539c2 TL |
3680 | call_throws(ctx, ['systemctl', 'disable', 'ceph.target']) |
3681 | call_throws(ctx, ['systemctl', 'enable', 'ceph.target']) | |
3682 | call_throws(ctx, ['systemctl', 'start', 'ceph.target']) | |
9f95a23c TL |
3683 | |
3684 | # cluster unit | |
f67539c2 TL |
3685 | existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid) |
3686 | with open(ctx.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f: | |
3687 | f.write( | |
3688 | '[Unit]\n' | |
3689 | 'Description=Ceph cluster {fsid}\n' | |
3690 | 'PartOf=ceph.target\n' | |
3691 | 'Before=ceph.target\n' | |
3692 | '\n' | |
3693 | '[Install]\n' | |
3694 | 'WantedBy=multi-user.target ceph.target\n'.format( | |
3695 | fsid=fsid) | |
9f95a23c | 3696 | ) |
f67539c2 TL |
3697 | os.rename(ctx.unit_dir + '/ceph-%s.target.new' % fsid, |
3698 | ctx.unit_dir + '/ceph-%s.target' % fsid) | |
9f95a23c | 3699 | if not existed: |
f67539c2 TL |
3700 | call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid]) |
3701 | call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid]) | |
9f95a23c TL |
3702 | |
3703 | # logrotate for the cluster | |
f67539c2 | 3704 | with open(ctx.logrotate_dir + '/ceph-%s' % fsid, 'w') as f: |
9f95a23c TL |
3705 | """ |
3706 | This is a bit sloppy in that the killall/pkill will touch all ceph daemons | |
3707 | in all containers, but I don't see an elegant way to send SIGHUP *just* to | |
3708 | the daemons for this cluster. (1) systemd kill -s will get the signal to | |
3709 | podman, but podman will exit. (2) podman kill will get the signal to the | |
3710 | first child (bash), but that isn't the ceph daemon. This is simpler and | |
3711 | should be harmless. | |
3712 | """ | |
3713 | f.write("""# created by cephadm | |
3714 | /var/log/ceph/%s/*.log { | |
3715 | rotate 7 | |
3716 | daily | |
3717 | compress | |
3718 | sharedscripts | |
3719 | postrotate | |
f67539c2 | 3720 | killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror' || true |
9f95a23c TL |
3721 | endscript |
3722 | missingok | |
3723 | notifempty | |
3724 | su root root | |
3725 | } | |
3726 | """ % fsid) | |
3727 | ||
f6b5b4d7 | 3728 | |
f67539c2 TL |
3729 | def get_unit_file(ctx, fsid): |
3730 | # type: (CephadmContext, str) -> str | |
f91f0fd5 | 3731 | extra_args = '' |
f67539c2 TL |
3732 | if isinstance(ctx.container_engine, Podman): |
3733 | extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n' | |
3734 | 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n' | |
3735 | 'Type=forking\n' | |
3736 | 'PIDFile=%t/%n-pid\n') | |
3737 | if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION: | |
3738 | extra_args += 'Delegate=yes\n' | |
3739 | ||
3740 | docker = isinstance(ctx.container_engine, Docker) | |
9f95a23c TL |
3741 | u = """# generated by cephadm |
3742 | [Unit] | |
3743 | Description=Ceph %i for {fsid} | |
3744 | ||
3745 | # According to: | |
3746 | # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget | |
3747 | # these can be removed once ceph-mon will dynamically change network | |
3748 | # configuration. | |
f67539c2 | 3749 | After=network-online.target local-fs.target time-sync.target{docker_after} |
9f95a23c | 3750 | Wants=network-online.target local-fs.target time-sync.target |
f67539c2 | 3751 | {docker_requires} |
9f95a23c TL |
3752 | |
3753 | PartOf=ceph-{fsid}.target | |
3754 | Before=ceph-{fsid}.target | |
3755 | ||
3756 | [Service] | |
3757 | LimitNOFILE=1048576 | |
3758 | LimitNPROC=1048576 | |
3759 | EnvironmentFile=-/etc/environment | |
9f95a23c | 3760 | ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run |
33c7a0ef | 3761 | ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop' |
9f95a23c TL |
3762 | ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop |
3763 | KillMode=none | |
3764 | Restart=on-failure | |
3765 | RestartSec=10s | |
3766 | TimeoutStartSec=120 | |
e306af50 | 3767 | TimeoutStopSec=120 |
9f95a23c TL |
3768 | StartLimitInterval=30min |
3769 | StartLimitBurst=5 | |
f91f0fd5 | 3770 | {extra_args} |
9f95a23c TL |
3771 | [Install] |
3772 | WantedBy=ceph-{fsid}.target | |
33c7a0ef | 3773 | """.format(fsid=fsid, |
f67539c2 TL |
3774 | data_dir=ctx.data_dir, |
3775 | extra_args=extra_args, | |
3776 | # if docker, we depend on docker.service | |
3777 | docker_after=' docker.service' if docker else '', | |
3778 | docker_requires='Requires=docker.service\n' if docker else '') | |
f91f0fd5 | 3779 | |
9f95a23c TL |
3780 | return u |
3781 | ||
3782 | ################################## | |
3783 | ||
f6b5b4d7 | 3784 | |
9f95a23c TL |
3785 | class CephContainer: |
3786 | def __init__(self, | |
f67539c2 | 3787 | ctx: CephadmContext, |
f91f0fd5 TL |
3788 | image: str, |
3789 | entrypoint: str, | |
3790 | args: List[str] = [], | |
3791 | volume_mounts: Dict[str, str] = {}, | |
3792 | cname: str = '', | |
3793 | container_args: List[str] = [], | |
3794 | envs: Optional[List[str]] = None, | |
3795 | privileged: bool = False, | |
3796 | ptrace: bool = False, | |
3797 | bind_mounts: Optional[List[List[str]]] = None, | |
f67539c2 | 3798 | init: Optional[bool] = None, |
f91f0fd5 | 3799 | host_network: bool = True, |
f67539c2 TL |
3800 | memory_request: Optional[str] = None, |
3801 | memory_limit: Optional[str] = None, | |
f91f0fd5 | 3802 | ) -> None: |
f67539c2 | 3803 | self.ctx = ctx |
9f95a23c TL |
3804 | self.image = image |
3805 | self.entrypoint = entrypoint | |
3806 | self.args = args | |
3807 | self.volume_mounts = volume_mounts | |
522d829b | 3808 | self._cname = cname |
9f95a23c TL |
3809 | self.container_args = container_args |
3810 | self.envs = envs | |
3811 | self.privileged = privileged | |
3812 | self.ptrace = ptrace | |
f6b5b4d7 | 3813 | self.bind_mounts = bind_mounts if bind_mounts else [] |
f67539c2 | 3814 | self.init = init if init else ctx.container_init |
f91f0fd5 | 3815 | self.host_network = host_network |
f67539c2 TL |
3816 | self.memory_request = memory_request |
3817 | self.memory_limit = memory_limit | |
9f95a23c | 3818 | |
522d829b TL |
3819 | @classmethod |
3820 | def for_daemon(cls, | |
3821 | ctx: CephadmContext, | |
3822 | fsid: str, | |
3823 | daemon_type: str, | |
3824 | daemon_id: str, | |
3825 | entrypoint: str, | |
3826 | args: List[str] = [], | |
3827 | volume_mounts: Dict[str, str] = {}, | |
3828 | container_args: List[str] = [], | |
3829 | envs: Optional[List[str]] = None, | |
3830 | privileged: bool = False, | |
3831 | ptrace: bool = False, | |
3832 | bind_mounts: Optional[List[List[str]]] = None, | |
3833 | init: Optional[bool] = None, | |
3834 | host_network: bool = True, | |
3835 | memory_request: Optional[str] = None, | |
3836 | memory_limit: Optional[str] = None, | |
3837 | ) -> 'CephContainer': | |
3838 | return cls( | |
3839 | ctx, | |
3840 | image=ctx.image, | |
3841 | entrypoint=entrypoint, | |
3842 | args=args, | |
3843 | volume_mounts=volume_mounts, | |
3844 | cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id), | |
3845 | container_args=container_args, | |
3846 | envs=envs, | |
3847 | privileged=privileged, | |
3848 | ptrace=ptrace, | |
3849 | bind_mounts=bind_mounts, | |
3850 | init=init, | |
3851 | host_network=host_network, | |
3852 | memory_request=memory_request, | |
3853 | memory_limit=memory_limit, | |
3854 | ) | |
3855 | ||
3856 | @property | |
3857 | def cname(self) -> str: | |
3858 | """ | |
3859 | podman adds the current container name to the /etc/hosts | |
3860 | file. Turns out, python's `socket.getfqdn()` differs from | |
3861 | `hostname -f`, when we have the container names containing | |
3862 | dots in it.: | |
3863 | ||
3864 | # podman run --name foo.bar.baz.com ceph/ceph /bin/bash | |
3865 | [root@sebastians-laptop /]# cat /etc/hosts | |
3866 | 127.0.0.1 localhost | |
3867 | ::1 localhost | |
3868 | 127.0.1.1 sebastians-laptop foo.bar.baz.com | |
3869 | [root@sebastians-laptop /]# hostname -f | |
3870 | sebastians-laptop | |
3871 | [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())' | |
3872 | foo.bar.baz.com | |
3873 | ||
3874 | Fascinatingly, this doesn't happen when using dashes. | |
3875 | """ | |
3876 | return self._cname.replace('.', '-') | |
3877 | ||
3878 | @cname.setter | |
3879 | def cname(self, val: str) -> None: | |
3880 | self._cname = val | |
3881 | ||
3882 | @property | |
3883 | def old_cname(self) -> str: | |
3884 | return self._cname | |
3885 | ||
f91f0fd5 TL |
3886 | def run_cmd(self) -> List[str]: |
3887 | cmd_args: List[str] = [ | |
f67539c2 | 3888 | str(self.ctx.container_engine.path), |
f91f0fd5 TL |
3889 | 'run', |
3890 | '--rm', | |
3891 | '--ipc=host', | |
b3b6e05e TL |
3892 | # some containers (ahem, haproxy) override this, but we want a fast |
3893 | # shutdown always (and, more importantly, a successful exit even if we | |
3894 | # fall back to SIGKILL). | |
3895 | '--stop-signal=SIGTERM', | |
f91f0fd5 | 3896 | ] |
f67539c2 TL |
3897 | |
3898 | if isinstance(self.ctx.container_engine, Podman): | |
f67539c2 TL |
3899 | if os.path.exists('/etc/ceph/podman-auth.json'): |
3900 | cmd_args.append('--authfile=/etc/ceph/podman-auth.json') | |
3901 | ||
f91f0fd5 TL |
3902 | envs: List[str] = [ |
3903 | '-e', 'CONTAINER_IMAGE=%s' % self.image, | |
3904 | '-e', 'NODE_NAME=%s' % get_hostname(), | |
3905 | ] | |
3906 | vols: List[str] = [] | |
3907 | binds: List[str] = [] | |
9f95a23c | 3908 | |
f67539c2 TL |
3909 | if self.memory_request: |
3910 | cmd_args.extend(['-e', 'POD_MEMORY_REQUEST', str(self.memory_request)]) | |
3911 | if self.memory_limit: | |
3912 | cmd_args.extend(['-e', 'POD_MEMORY_LIMIT', str(self.memory_limit)]) | |
3913 | cmd_args.extend(['--memory', str(self.memory_limit)]) | |
3914 | ||
f91f0fd5 TL |
3915 | if self.host_network: |
3916 | cmd_args.append('--net=host') | |
3917 | if self.entrypoint: | |
3918 | cmd_args.extend(['--entrypoint', self.entrypoint]) | |
9f95a23c | 3919 | if self.privileged: |
f91f0fd5 TL |
3920 | cmd_args.extend([ |
3921 | '--privileged', | |
3922 | # let OSD etc read block devs that haven't been chowned | |
3923 | '--group-add=disk']) | |
3924 | if self.ptrace and not self.privileged: | |
3925 | # if privileged, the SYS_PTRACE cap is already added | |
3926 | # in addition, --cap-add and --privileged are mutually | |
3927 | # exclusive since podman >= 2.0 | |
3928 | cmd_args.append('--cap-add=SYS_PTRACE') | |
3929 | if self.init: | |
3930 | cmd_args.append('--init') | |
f67539c2 | 3931 | envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1'] |
f91f0fd5 TL |
3932 | if self.cname: |
3933 | cmd_args.extend(['--name', self.cname]) | |
3934 | if self.envs: | |
3935 | for env in self.envs: | |
3936 | envs.extend(['-e', env]) | |
3937 | ||
9f95a23c TL |
3938 | vols = sum( |
3939 | [['-v', '%s:%s' % (host_dir, container_dir)] | |
3940 | for host_dir, container_dir in self.volume_mounts.items()], []) | |
f6b5b4d7 | 3941 | binds = sum([['--mount', '{}'.format(','.join(bind))] |
f91f0fd5 TL |
3942 | for bind in self.bind_mounts], []) |
3943 | ||
f67539c2 TL |
3944 | return \ |
3945 | cmd_args + self.container_args + \ | |
3946 | envs + vols + binds + \ | |
3947 | [self.image] + self.args # type: ignore | |
f91f0fd5 TL |
3948 | |
3949 | def shell_cmd(self, cmd: List[str]) -> List[str]: | |
3950 | cmd_args: List[str] = [ | |
f67539c2 | 3951 | str(self.ctx.container_engine.path), |
9f95a23c TL |
3952 | 'run', |
3953 | '--rm', | |
e306af50 | 3954 | '--ipc=host', |
f91f0fd5 TL |
3955 | ] |
3956 | envs: List[str] = [ | |
3957 | '-e', 'CONTAINER_IMAGE=%s' % self.image, | |
3958 | '-e', 'NODE_NAME=%s' % get_hostname(), | |
3959 | ] | |
3960 | vols: List[str] = [] | |
3961 | binds: List[str] = [] | |
9f95a23c | 3962 | |
f91f0fd5 TL |
3963 | if self.host_network: |
3964 | cmd_args.append('--net=host') | |
b3b6e05e TL |
3965 | if self.ctx.no_hosts: |
3966 | cmd_args.append('--no-hosts') | |
9f95a23c | 3967 | if self.privileged: |
f91f0fd5 TL |
3968 | cmd_args.extend([ |
3969 | '--privileged', | |
3970 | # let OSD etc read block devs that haven't been chowned | |
3971 | '--group-add=disk', | |
3972 | ]) | |
f67539c2 TL |
3973 | if self.init: |
3974 | cmd_args.append('--init') | |
3975 | envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1'] | |
f91f0fd5 TL |
3976 | if self.envs: |
3977 | for env in self.envs: | |
3978 | envs.extend(['-e', env]) | |
3979 | ||
9f95a23c TL |
3980 | vols = sum( |
3981 | [['-v', '%s:%s' % (host_dir, container_dir)] | |
3982 | for host_dir, container_dir in self.volume_mounts.items()], []) | |
f6b5b4d7 TL |
3983 | binds = sum([['--mount', '{}'.format(','.join(bind))] |
3984 | for bind in self.bind_mounts], []) | |
f91f0fd5 TL |
3985 | |
3986 | return cmd_args + self.container_args + envs + vols + binds + [ | |
9f95a23c | 3987 | '--entrypoint', cmd[0], |
f91f0fd5 | 3988 | self.image, |
9f95a23c TL |
3989 | ] + cmd[1:] |
3990 | ||
3991 | def exec_cmd(self, cmd): | |
3992 | # type: (List[str]) -> List[str] | |
522d829b TL |
3993 | cname = get_running_container_name(self.ctx, self) |
3994 | if not cname: | |
3995 | raise Error('unable to find container "{}"'.format(self.cname)) | |
9f95a23c | 3996 | return [ |
f67539c2 | 3997 | str(self.ctx.container_engine.path), |
9f95a23c TL |
3998 | 'exec', |
3999 | ] + self.container_args + [ | |
4000 | self.cname, | |
4001 | ] + cmd | |
4002 | ||
522d829b | 4003 | def rm_cmd(self, old_cname: bool = False, storage: bool = False) -> List[str]: |
f6b5b4d7 | 4004 | ret = [ |
f67539c2 | 4005 | str(self.ctx.container_engine.path), |
f6b5b4d7 TL |
4006 | 'rm', '-f', |
4007 | ] | |
4008 | if storage: | |
4009 | ret.append('--storage') | |
522d829b TL |
4010 | if old_cname: |
4011 | ret.append(self.old_cname) | |
4012 | else: | |
4013 | ret.append(self.cname) | |
f6b5b4d7 TL |
4014 | return ret |
4015 | ||
522d829b | 4016 | def stop_cmd(self, old_cname: bool = False) -> List[str]: |
f6b5b4d7 | 4017 | ret = [ |
f67539c2 | 4018 | str(self.ctx.container_engine.path), |
522d829b | 4019 | 'stop', self.old_cname if old_cname else self.cname, |
f6b5b4d7 TL |
4020 | ] |
4021 | return ret | |
4022 | ||
2a845540 TL |
4023 | def run(self, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE): |
4024 | # type: (Optional[int], CallVerbosity) -> str | |
f67539c2 | 4025 | out, _, _ = call_throws(self.ctx, self.run_cmd(), |
2a845540 | 4026 | desc=self.entrypoint, timeout=timeout, verbosity=verbosity) |
9f95a23c TL |
4027 | return out |
4028 | ||
20effc67 TL |
4029 | |
4030 | ##################################### | |
4031 | ||
4032 | class MgrListener(Thread): | |
4033 | def __init__(self, agent: 'CephadmAgent') -> None: | |
4034 | self.agent = agent | |
4035 | self.stop = False | |
4036 | super(MgrListener, self).__init__(target=self.run) | |
4037 | ||
4038 | def run(self) -> None: | |
4039 | listenSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
4040 | listenSocket.bind(('0.0.0.0', int(self.agent.listener_port))) | |
4041 | listenSocket.settimeout(60) | |
4042 | listenSocket.listen(1) | |
4043 | ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) | |
4044 | ssl_ctx.verify_mode = ssl.CERT_REQUIRED | |
4045 | ssl_ctx.load_cert_chain(self.agent.listener_cert_path, self.agent.listener_key_path) | |
4046 | ssl_ctx.load_verify_locations(self.agent.ca_path) | |
4047 | secureListenSocket = ssl_ctx.wrap_socket(listenSocket, server_side=True) | |
4048 | while not self.stop: | |
4049 | try: | |
4050 | try: | |
4051 | conn, _ = secureListenSocket.accept() | |
4052 | except socket.timeout: | |
4053 | continue | |
4054 | try: | |
4055 | length: int = int(conn.recv(10).decode()) | |
4056 | except Exception as e: | |
4057 | err_str = f'Failed to extract length of payload from message: {e}' | |
4058 | conn.send(err_str.encode()) | |
4059 | logger.error(err_str) | |
4060 | while True: | |
4061 | payload = conn.recv(length).decode() | |
4062 | if not payload: | |
4063 | break | |
4064 | try: | |
4065 | data: Dict[Any, Any] = json.loads(payload) | |
4066 | self.handle_json_payload(data) | |
4067 | except Exception as e: | |
4068 | err_str = f'Failed to extract json payload from message: {e}' | |
4069 | conn.send(err_str.encode()) | |
4070 | logger.error(err_str) | |
4071 | else: | |
4072 | conn.send(b'ACK') | |
4073 | if 'config' in data: | |
4074 | self.agent.wakeup() | |
4075 | self.agent.ls_gatherer.wakeup() | |
4076 | self.agent.volume_gatherer.wakeup() | |
4077 | logger.debug(f'Got mgr message {data}') | |
4078 | except Exception as e: | |
4079 | logger.error(f'Mgr Listener encountered exception: {e}') | |
4080 | ||
4081 | def shutdown(self) -> None: | |
4082 | self.stop = True | |
4083 | ||
4084 | def handle_json_payload(self, data: Dict[Any, Any]) -> None: | |
4085 | self.agent.ack = int(data['counter']) | |
4086 | if 'config' in data: | |
4087 | logger.info('Received new config from mgr') | |
4088 | config = data['config'] | |
4089 | for filename in config: | |
4090 | if filename in self.agent.required_files: | |
4091 | file_path = os.path.join(self.agent.daemon_dir, filename) | |
4092 | with open(os.open(file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
4093 | f.write(config[filename]) | |
4094 | os.rename(file_path + '.new', file_path) | |
4095 | self.agent.pull_conf_settings() | |
4096 | self.agent.wakeup() | |
4097 | ||
4098 | ||
4099 | class CephadmAgent(): | |
4100 | ||
4101 | daemon_type = 'agent' | |
4102 | default_port = 8498 | |
4103 | loop_interval = 30 | |
4104 | stop = False | |
4105 | ||
4106 | required_files = [ | |
4107 | 'agent.json', | |
4108 | 'keyring', | |
4109 | 'root_cert.pem', | |
4110 | 'listener.crt', | |
4111 | 'listener.key', | |
4112 | ] | |
4113 | ||
4114 | def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] = ''): | |
4115 | self.ctx = ctx | |
4116 | self.fsid = fsid | |
4117 | self.daemon_id = daemon_id | |
4118 | self.starting_port = 14873 | |
4119 | self.target_ip = '' | |
4120 | self.target_port = '' | |
4121 | self.host = '' | |
4122 | self.daemon_dir = os.path.join(ctx.data_dir, self.fsid, f'{self.daemon_type}.{self.daemon_id}') | |
4123 | self.config_path = os.path.join(self.daemon_dir, 'agent.json') | |
4124 | self.keyring_path = os.path.join(self.daemon_dir, 'keyring') | |
4125 | self.ca_path = os.path.join(self.daemon_dir, 'root_cert.pem') | |
4126 | self.listener_cert_path = os.path.join(self.daemon_dir, 'listener.crt') | |
4127 | self.listener_key_path = os.path.join(self.daemon_dir, 'listener.key') | |
4128 | self.listener_port = '' | |
4129 | self.ack = 1 | |
4130 | self.event = Event() | |
4131 | self.mgr_listener = MgrListener(self) | |
4132 | self.ls_gatherer = AgentGatherer(self, lambda: self._get_ls(), 'Ls') | |
4133 | self.volume_gatherer = AgentGatherer(self, lambda: self._ceph_volume(enhanced=False), 'Volume') | |
4134 | self.device_enhanced_scan = False | |
4135 | self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0] | |
4136 | self.recent_iteration_index: int = 0 | |
4137 | self.cached_ls_values: Dict[str, Dict[str, str]] = {} | |
4138 | ||
4139 | def validate(self, config: Dict[str, str] = {}) -> None: | |
4140 | # check for the required files | |
4141 | for fname in self.required_files: | |
4142 | if fname not in config: | |
4143 | raise Error('required file missing from config: %s' % fname) | |
4144 | ||
4145 | def deploy_daemon_unit(self, config: Dict[str, str] = {}) -> None: | |
4146 | if not config: | |
4147 | raise Error('Agent needs a config') | |
4148 | assert isinstance(config, dict) | |
4149 | self.validate(config) | |
4150 | ||
4151 | # Create the required config files in the daemons dir, with restricted permissions | |
4152 | for filename in config: | |
4153 | if filename in self.required_files: | |
4154 | file_path = os.path.join(self.daemon_dir, filename) | |
4155 | with open(os.open(file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
4156 | f.write(config[filename]) | |
4157 | os.rename(file_path + '.new', file_path) | |
4158 | ||
4159 | unit_run_path = os.path.join(self.daemon_dir, 'unit.run') | |
4160 | with open(os.open(unit_run_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
4161 | f.write(self.unit_run()) | |
4162 | os.rename(unit_run_path + '.new', unit_run_path) | |
4163 | ||
4164 | meta: Dict[str, Any] = {} | |
4165 | meta_file_path = os.path.join(self.daemon_dir, 'unit.meta') | |
4166 | if 'meta_json' in self.ctx and self.ctx.meta_json: | |
4167 | meta = json.loads(self.ctx.meta_json) or {} | |
4168 | with open(os.open(meta_file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
4169 | f.write(json.dumps(meta, indent=4) + '\n') | |
4170 | os.rename(meta_file_path + '.new', meta_file_path) | |
4171 | ||
4172 | unit_file_path = os.path.join(self.ctx.unit_dir, self.unit_name()) | |
4173 | with open(os.open(unit_file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
4174 | f.write(self.unit_file()) | |
4175 | os.rename(unit_file_path + '.new', unit_file_path) | |
4176 | ||
4177 | call_throws(self.ctx, ['systemctl', 'daemon-reload']) | |
4178 | call(self.ctx, ['systemctl', 'stop', self.unit_name()], | |
4179 | verbosity=CallVerbosity.DEBUG) | |
4180 | call(self.ctx, ['systemctl', 'reset-failed', self.unit_name()], | |
4181 | verbosity=CallVerbosity.DEBUG) | |
4182 | call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name()]) | |
4183 | ||
4184 | def unit_name(self) -> str: | |
4185 | return '{}.service'.format(get_unit_name(self.fsid, self.daemon_type, self.daemon_id)) | |
4186 | ||
4187 | def unit_run(self) -> str: | |
4188 | py3 = shutil.which('python3') | |
4189 | binary_path = os.path.realpath(sys.argv[0]) | |
4190 | return ('set -e\n' + f'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n') | |
4191 | ||
4192 | def unit_file(self) -> str: | |
4193 | return """#generated by cephadm | |
4194 | [Unit] | |
4195 | Description=cephadm agent for cluster {fsid} | |
4196 | ||
4197 | PartOf=ceph-{fsid}.target | |
4198 | Before=ceph-{fsid}.target | |
4199 | ||
4200 | [Service] | |
4201 | Type=forking | |
4202 | ExecStart=/bin/bash {data_dir}/unit.run | |
4203 | Restart=on-failure | |
4204 | RestartSec=10s | |
4205 | ||
4206 | [Install] | |
4207 | WantedBy=ceph-{fsid}.target | |
4208 | """.format( | |
4209 | fsid=self.fsid, | |
4210 | data_dir=self.daemon_dir | |
4211 | ) | |
4212 | ||
4213 | def shutdown(self) -> None: | |
4214 | self.stop = True | |
4215 | if self.mgr_listener.is_alive(): | |
4216 | self.mgr_listener.shutdown() | |
4217 | ||
4218 | def wakeup(self) -> None: | |
4219 | self.event.set() | |
4220 | ||
4221 | def pull_conf_settings(self) -> None: | |
4222 | try: | |
4223 | with open(self.config_path, 'r') as f: | |
4224 | config = json.load(f) | |
4225 | self.target_ip = config['target_ip'] | |
4226 | self.target_port = config['target_port'] | |
4227 | self.loop_interval = int(config['refresh_period']) | |
4228 | self.starting_port = int(config['listener_port']) | |
4229 | self.host = config['host'] | |
4230 | use_lsm = config['device_enhanced_scan'] | |
4231 | except Exception as e: | |
4232 | self.shutdown() | |
4233 | raise Error(f'Failed to get agent target ip and port from config: {e}') | |
4234 | ||
4235 | try: | |
4236 | with open(self.keyring_path, 'r') as f: | |
4237 | self.keyring = f.read() | |
4238 | except Exception as e: | |
4239 | self.shutdown() | |
4240 | raise Error(f'Failed to get agent keyring: {e}') | |
4241 | ||
4242 | assert self.target_ip and self.target_port | |
4243 | ||
4244 | self.device_enhanced_scan = False | |
4245 | if use_lsm.lower() == 'true': | |
4246 | self.device_enhanced_scan = True | |
4247 | self.volume_gatherer.update_func(lambda: self._ceph_volume(enhanced=self.device_enhanced_scan)) | |
4248 | ||
4249 | def run(self) -> None: | |
4250 | self.pull_conf_settings() | |
4251 | ||
4252 | try: | |
4253 | for _ in range(1001): | |
4254 | if not port_in_use(self.ctx, self.starting_port): | |
4255 | self.listener_port = str(self.starting_port) | |
4256 | break | |
4257 | self.starting_port += 1 | |
4258 | if not self.listener_port: | |
4259 | raise Error(f'All 1000 ports starting at {str(self.starting_port - 1001)} taken.') | |
4260 | except Exception as e: | |
4261 | raise Error(f'Failed to pick port for agent to listen on: {e}') | |
4262 | ||
4263 | if not self.mgr_listener.is_alive(): | |
4264 | self.mgr_listener.start() | |
4265 | ||
4266 | if not self.ls_gatherer.is_alive(): | |
4267 | self.ls_gatherer.start() | |
4268 | ||
4269 | if not self.volume_gatherer.is_alive(): | |
4270 | self.volume_gatherer.start() | |
4271 | ||
4272 | ssl_ctx = ssl.create_default_context() | |
4273 | ssl_ctx.check_hostname = True | |
4274 | ssl_ctx.verify_mode = ssl.CERT_REQUIRED | |
4275 | ssl_ctx.load_verify_locations(self.ca_path) | |
4276 | ||
4277 | while not self.stop: | |
4278 | start_time = time.monotonic() | |
4279 | ack = self.ack | |
4280 | ||
4281 | # part of the networks info is returned as a set which is not JSON | |
4282 | # serializable. The set must be converted to a list | |
4283 | networks = list_networks(self.ctx) | |
4284 | networks_list = {} | |
4285 | for key in networks.keys(): | |
4286 | for k, v in networks[key].items(): | |
4287 | networks_list[key] = {k: list(v)} | |
4288 | ||
4289 | data = json.dumps({'host': self.host, | |
4290 | 'ls': (self.ls_gatherer.data if self.ack == self.ls_gatherer.ack | |
4291 | and self.ls_gatherer.data is not None else []), | |
4292 | 'networks': networks_list, | |
4293 | 'facts': HostFacts(self.ctx).dump(), | |
4294 | 'volume': (self.volume_gatherer.data if self.ack == self.volume_gatherer.ack | |
4295 | and self.volume_gatherer.data is not None else ''), | |
4296 | 'ack': str(ack), | |
4297 | 'keyring': self.keyring, | |
4298 | 'port': self.listener_port}) | |
4299 | data = data.encode('ascii') | |
4300 | ||
4301 | url = f'https://{self.target_ip}:{self.target_port}/data' | |
4302 | try: | |
4303 | req = Request(url, data, {'Content-Type': 'application/json'}) | |
4304 | send_time = time.monotonic() | |
4305 | with urlopen(req, context=ssl_ctx) as response: | |
4306 | response_str = response.read() | |
4307 | response_json = json.loads(response_str) | |
4308 | total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds() | |
4309 | logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.') | |
4310 | except Exception as e: | |
4311 | logger.error(f'Failed to send metadata to mgr: {e}') | |
4312 | ||
4313 | end_time = time.monotonic() | |
4314 | run_time = datetime.timedelta(seconds=(end_time - start_time)) | |
4315 | self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds() | |
4316 | self.recent_iteration_index = (self.recent_iteration_index + 1) % 3 | |
4317 | run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t]) | |
4318 | ||
4319 | self.event.wait(max(self.loop_interval - int(run_time_average), 0)) | |
4320 | self.event.clear() | |
4321 | ||
4322 | def _ceph_volume(self, enhanced: bool = False) -> Tuple[str, bool]: | |
4323 | self.ctx.command = 'inventory --format=json'.split() | |
4324 | if enhanced: | |
4325 | self.ctx.command.append('--with-lsm') | |
4326 | self.ctx.fsid = self.fsid | |
4327 | ||
4328 | stream = io.StringIO() | |
4329 | with redirect_stdout(stream): | |
4330 | command_ceph_volume(self.ctx) | |
4331 | ||
4332 | stdout = stream.getvalue() | |
4333 | ||
4334 | if stdout: | |
4335 | return (stdout, False) | |
4336 | else: | |
4337 | raise Exception('ceph-volume returned empty value') | |
4338 | ||
4339 | def _daemon_ls_subset(self) -> Dict[str, Dict[str, Any]]: | |
4340 | # gets a subset of ls info quickly. The results of this will tell us if our | |
4341 | # cached info is still good or if we need to run the full ls again. | |
4342 | # for legacy containers, we just grab the full info. For cephadmv1 containers, | |
4343 | # we only grab enabled, state, mem_usage and container id. If container id has | |
4344 | # not changed for any daemon, we assume our cached info is good. | |
4345 | daemons: Dict[str, Dict[str, Any]] = {} | |
4346 | data_dir = self.ctx.data_dir | |
4347 | seen_memusage = {} # type: Dict[str, int] | |
4348 | out, err, code = call( | |
4349 | self.ctx, | |
4350 | [self.ctx.container_engine.path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'], | |
4351 | verbosity=CallVerbosity.DEBUG | |
4352 | ) | |
4353 | seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out) | |
4354 | # we need a mapping from container names to ids. Later we will convert daemon | |
4355 | # names to container names to get daemons container id to see if it has changed | |
4356 | out, err, code = call( | |
4357 | self.ctx, | |
4358 | [self.ctx.container_engine.path, 'ps', '--format', '{{.ID}},{{.Names}}', '--no-trunc'], | |
4359 | verbosity=CallVerbosity.DEBUG | |
4360 | ) | |
4361 | name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out) | |
4362 | for i in os.listdir(data_dir): | |
4363 | if i in ['mon', 'osd', 'mds', 'mgr']: | |
4364 | daemon_type = i | |
4365 | for j in os.listdir(os.path.join(data_dir, i)): | |
4366 | if '-' not in j: | |
4367 | continue | |
4368 | (cluster, daemon_id) = j.split('-', 1) | |
4369 | legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) | |
4370 | (enabled, state, _) = check_unit(self.ctx, legacy_unit_name) | |
4371 | daemons[f'{daemon_type}.{daemon_id}'] = { | |
4372 | 'style': 'legacy', | |
4373 | 'name': '%s.%s' % (daemon_type, daemon_id), | |
4374 | 'fsid': self.ctx.fsid if self.ctx.fsid is not None else 'unknown', | |
4375 | 'systemd_unit': legacy_unit_name, | |
4376 | 'enabled': 'true' if enabled else 'false', | |
4377 | 'state': state, | |
4378 | } | |
4379 | elif is_fsid(i): | |
4380 | fsid = str(i) # convince mypy that fsid is a str here | |
4381 | for j in os.listdir(os.path.join(data_dir, i)): | |
4382 | if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)): | |
4383 | (daemon_type, daemon_id) = j.split('.', 1) | |
4384 | unit_name = get_unit_name(fsid, daemon_type, daemon_id) | |
4385 | (enabled, state, _) = check_unit(self.ctx, unit_name) | |
4386 | daemons[j] = { | |
4387 | 'style': 'cephadm:v1', | |
4388 | 'systemd_unit': unit_name, | |
4389 | 'enabled': 'true' if enabled else 'false', | |
4390 | 'state': state, | |
4391 | } | |
4392 | c = CephContainer.for_daemon(self.ctx, self.ctx.fsid, daemon_type, daemon_id, 'bash') | |
4393 | container_id: Optional[str] = None | |
4394 | for name in (c.cname, c.old_cname): | |
4395 | if name in name_id_mapping: | |
4396 | container_id = name_id_mapping[name] | |
4397 | break | |
4398 | daemons[j]['container_id'] = container_id | |
4399 | if container_id: | |
4400 | daemons[j]['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len]) | |
4401 | return daemons | |
4402 | ||
4403 | def _parse_container_id_name(self, code: int, out: str) -> Dict[str, str]: | |
4404 | # map container names to ids from ps output | |
4405 | name_id_mapping = {} # type: Dict[str, str] | |
4406 | if not code: | |
4407 | for line in out.splitlines(): | |
4408 | id, name = line.split(',') | |
4409 | name_id_mapping[name] = id | |
4410 | return name_id_mapping | |
4411 | ||
4412 | def _get_ls(self) -> Tuple[List[Dict[str, str]], bool]: | |
4413 | if not self.cached_ls_values: | |
4414 | logger.info('No cached ls output. Running full daemon ls') | |
4415 | ls = list_daemons(self.ctx) | |
4416 | for d in ls: | |
4417 | self.cached_ls_values[d['name']] = d | |
4418 | return (ls, True) | |
4419 | else: | |
4420 | ls_subset = self._daemon_ls_subset() | |
4421 | need_full_ls = False | |
4422 | state_change = False | |
4423 | if set(self.cached_ls_values.keys()) != set(ls_subset.keys()): | |
4424 | # case for a new daemon in ls or an old daemon no longer appearing. | |
4425 | # If that happens we need a full ls | |
4426 | logger.info('Change detected in state of daemons. Running full daemon ls') | |
4427 | ls = list_daemons(self.ctx) | |
4428 | for d in ls: | |
4429 | self.cached_ls_values[d['name']] = d | |
4430 | return (ls, True) | |
4431 | for daemon, info in self.cached_ls_values.items(): | |
4432 | if info['style'] == 'legacy': | |
4433 | # for legacy containers, ls_subset just grabs all the info | |
4434 | self.cached_ls_values[daemon] = ls_subset[daemon] | |
4435 | else: | |
4436 | if info['container_id'] != ls_subset[daemon]['container_id']: | |
4437 | # case for container id having changed. We need full ls as | |
4438 | # info we didn't grab like version and start time could have changed | |
4439 | need_full_ls = True | |
4440 | break | |
4441 | ||
4442 | # want to know if a daemons state change because in those cases we want | |
4443 | # to report back quicker | |
4444 | if ( | |
4445 | self.cached_ls_values[daemon]['enabled'] != ls_subset[daemon]['enabled'] | |
4446 | or self.cached_ls_values[daemon]['state'] != ls_subset[daemon]['state'] | |
4447 | ): | |
4448 | state_change = True | |
4449 | # if we reach here, container id matched. Update the few values we do track | |
4450 | # from ls subset: state, enabled, memory_usage. | |
4451 | self.cached_ls_values[daemon]['enabled'] = ls_subset[daemon]['enabled'] | |
4452 | self.cached_ls_values[daemon]['state'] = ls_subset[daemon]['state'] | |
4453 | if 'memory_usage' in ls_subset[daemon]: | |
4454 | self.cached_ls_values[daemon]['memory_usage'] = ls_subset[daemon]['memory_usage'] | |
4455 | if need_full_ls: | |
4456 | logger.info('Change detected in state of daemons. Running full daemon ls') | |
4457 | ls = list_daemons(self.ctx) | |
4458 | for d in ls: | |
4459 | self.cached_ls_values[d['name']] = d | |
4460 | return (ls, True) | |
4461 | else: | |
4462 | ls = [info for daemon, info in self.cached_ls_values.items()] | |
4463 | return (ls, state_change) | |
4464 | ||
4465 | ||
4466 | class AgentGatherer(Thread): | |
4467 | def __init__(self, agent: 'CephadmAgent', func: Callable, gatherer_type: str = 'Unnamed', initial_ack: int = 0) -> None: | |
4468 | self.agent = agent | |
4469 | self.func = func | |
4470 | self.gatherer_type = gatherer_type | |
4471 | self.ack = initial_ack | |
4472 | self.event = Event() | |
4473 | self.data: Any = None | |
4474 | self.stop = False | |
4475 | self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0] | |
4476 | self.recent_iteration_index: int = 0 | |
4477 | super(AgentGatherer, self).__init__(target=self.run) | |
4478 | ||
4479 | def run(self) -> None: | |
4480 | while not self.stop: | |
4481 | try: | |
4482 | start_time = time.monotonic() | |
4483 | ||
4484 | ack = self.agent.ack | |
4485 | change = False | |
4486 | try: | |
4487 | self.data, change = self.func() | |
4488 | except Exception as e: | |
4489 | logger.error(f'{self.gatherer_type} Gatherer encountered exception gathering data: {e}') | |
4490 | self.data = None | |
4491 | if ack != self.ack or change: | |
4492 | self.ack = ack | |
4493 | self.agent.wakeup() | |
4494 | ||
4495 | end_time = time.monotonic() | |
4496 | run_time = datetime.timedelta(seconds=(end_time - start_time)) | |
4497 | self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds() | |
4498 | self.recent_iteration_index = (self.recent_iteration_index + 1) % 3 | |
4499 | run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t]) | |
4500 | ||
4501 | self.event.wait(max(self.agent.loop_interval - int(run_time_average), 0)) | |
4502 | self.event.clear() | |
4503 | except Exception as e: | |
4504 | logger.error(f'{self.gatherer_type} Gatherer encountered exception: {e}') | |
4505 | ||
4506 | def shutdown(self) -> None: | |
4507 | self.stop = True | |
4508 | ||
4509 | def wakeup(self) -> None: | |
4510 | self.event.set() | |
4511 | ||
4512 | def update_func(self, func: Callable) -> None: | |
4513 | self.func = func | |
4514 | ||
4515 | ||
4516 | def command_agent(ctx: CephadmContext) -> None: | |
4517 | agent = CephadmAgent(ctx, ctx.fsid, ctx.daemon_id) | |
4518 | ||
4519 | if not os.path.isdir(agent.daemon_dir): | |
4520 | raise Error(f'Agent daemon directory {agent.daemon_dir} does not exist. Perhaps agent was never deployed?') | |
4521 | ||
4522 | agent.run() | |
4523 | ||
4524 | ||
9f95a23c TL |
4525 | ################################## |
4526 | ||
f6b5b4d7 | 4527 | |
9f95a23c | 4528 | @infer_image |
f67539c2 TL |
4529 | def command_version(ctx): |
4530 | # type: (CephadmContext) -> int | |
4531 | c = CephContainer(ctx, ctx.image, 'ceph', ['--version']) | |
4532 | out, err, ret = call(ctx, c.run_cmd(), desc=c.entrypoint) | |
4533 | if not ret: | |
4534 | print(out.strip()) | |
4535 | return ret | |
9f95a23c TL |
4536 | |
4537 | ################################## | |
4538 | ||
f6b5b4d7 | 4539 | |
33c7a0ef | 4540 | @default_image |
f67539c2 TL |
4541 | def command_pull(ctx): |
4542 | # type: (CephadmContext) -> int | |
f6b5b4d7 | 4543 | |
33c7a0ef TL |
4544 | try: |
4545 | _pull_image(ctx, ctx.image, ctx.insecure) | |
4546 | except UnauthorizedRegistryError: | |
4547 | err_str = 'Failed to pull container image. Check that host(s) are logged into the registry' | |
4548 | logger.debug(f'Pulling image for `command_pull` failed: {err_str}') | |
4549 | raise Error(err_str) | |
f67539c2 | 4550 | return command_inspect_image(ctx) |
9f95a23c | 4551 | |
f6b5b4d7 | 4552 | |
a4b75251 TL |
4553 | def _pull_image(ctx, image, insecure=False): |
4554 | # type: (CephadmContext, str, bool) -> None | |
f6b5b4d7 TL |
4555 | logger.info('Pulling container image %s...' % image) |
4556 | ||
4557 | ignorelist = [ | |
f67539c2 TL |
4558 | 'error creating read-write layer with ID', |
4559 | 'net/http: TLS handshake timeout', | |
4560 | 'Digest did not match, expected', | |
f6b5b4d7 TL |
4561 | ] |
4562 | ||
f67539c2 | 4563 | cmd = [ctx.container_engine.path, 'pull', image] |
a4b75251 TL |
4564 | if isinstance(ctx.container_engine, Podman): |
4565 | if insecure: | |
4566 | cmd.append('--tls-verify=false') | |
4567 | ||
4568 | if os.path.exists('/etc/ceph/podman-auth.json'): | |
4569 | cmd.append('--authfile=/etc/ceph/podman-auth.json') | |
f6b5b4d7 TL |
4570 | cmd_str = ' '.join(cmd) |
4571 | ||
4572 | for sleep_secs in [1, 4, 25]: | |
2a845540 | 4573 | out, err, ret = call(ctx, cmd, verbosity=CallVerbosity.QUIET_UNLESS_ERROR) |
f6b5b4d7 TL |
4574 | if not ret: |
4575 | return | |
4576 | ||
33c7a0ef TL |
4577 | if 'unauthorized' in err: |
4578 | raise UnauthorizedRegistryError() | |
4579 | ||
f6b5b4d7 | 4580 | if not any(pattern in err for pattern in ignorelist): |
a4b75251 | 4581 | raise Error('Failed command: %s' % cmd_str) |
f6b5b4d7 | 4582 | |
f67539c2 | 4583 | logger.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs)) |
f6b5b4d7 TL |
4584 | time.sleep(sleep_secs) |
4585 | ||
a4b75251 | 4586 | raise Error('Failed command: %s: maximum retries reached' % cmd_str) |
f67539c2 | 4587 | |
9f95a23c TL |
4588 | ################################## |
4589 | ||
f6b5b4d7 | 4590 | |
9f95a23c | 4591 | @infer_image |
f67539c2 TL |
4592 | def command_inspect_image(ctx): |
4593 | # type: (CephadmContext) -> int | |
4594 | out, err, ret = call_throws(ctx, [ | |
4595 | ctx.container_engine.path, 'inspect', | |
cd265ab1 | 4596 | '--format', '{{.ID}},{{.RepoDigests}}', |
f67539c2 | 4597 | ctx.image]) |
9f95a23c TL |
4598 | if ret: |
4599 | return errno.ENOENT | |
f67539c2 | 4600 | info_from = get_image_info_from_inspect(out.strip(), ctx.image) |
f91f0fd5 | 4601 | |
f67539c2 | 4602 | ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip() |
f91f0fd5 TL |
4603 | info_from['ceph_version'] = ver |
4604 | ||
4605 | print(json.dumps(info_from, indent=4, sort_keys=True)) | |
4606 | return 0 | |
4607 | ||
4608 | ||
522d829b | 4609 | def normalize_image_digest(digest: str) -> str: |
20effc67 TL |
4610 | """ |
4611 | Normal case: | |
4612 | >>> normalize_image_digest('ceph/ceph', 'docker.io') | |
4613 | 'docker.io/ceph/ceph' | |
4614 | ||
4615 | No change: | |
4616 | >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io') | |
4617 | 'quay.ceph.io/ceph/ceph' | |
4618 | ||
4619 | >>> normalize_image_digest('docker.io/ubuntu', 'docker.io') | |
4620 | 'docker.io/ubuntu' | |
4621 | ||
4622 | >>> normalize_image_digest('localhost/ceph', 'docker.io') | |
4623 | 'localhost/ceph' | |
4624 | """ | |
4625 | known_shortnames = [ | |
4626 | 'ceph/ceph', | |
4627 | 'ceph/daemon', | |
4628 | 'ceph/daemon-base', | |
4629 | ] | |
4630 | for image in known_shortnames: | |
4631 | if digest.startswith(image): | |
4632 | return f'{DEFAULT_REGISTRY}/{digest}' | |
f67539c2 TL |
4633 | return digest |
4634 | ||
4635 | ||
f91f0fd5 | 4636 | def get_image_info_from_inspect(out, image): |
f67539c2 | 4637 | # type: (str, str) -> Dict[str, Union[str,List[str]]] |
f91f0fd5 TL |
4638 | image_id, digests = out.split(',', 1) |
4639 | if not out: | |
4640 | raise Error('inspect {}: empty result'.format(image)) | |
9f95a23c | 4641 | r = { |
f91f0fd5 | 4642 | 'image_id': normalize_container_id(image_id) |
f67539c2 | 4643 | } # type: Dict[str, Union[str,List[str]]] |
f91f0fd5 | 4644 | if digests: |
20effc67 | 4645 | r['repo_digests'] = list(map(normalize_image_digest, digests[1: -1].split(' '))) |
f91f0fd5 TL |
4646 | return r |
4647 | ||
9f95a23c TL |
4648 | ################################## |
4649 | ||
f91f0fd5 | 4650 | |
f67539c2 TL |
4651 | def check_subnet(subnets: str) -> Tuple[int, List[int], str]: |
4652 | """Determine whether the given string is a valid subnet | |
4653 | ||
4654 | :param subnets: subnet string, a single definition or comma separated list of CIDR subnets | |
4655 | :returns: return code, IP version list of the subnets and msg describing any errors validation errors | |
4656 | """ | |
4657 | ||
4658 | rc = 0 | |
4659 | versions = set() | |
4660 | errors = [] | |
4661 | subnet_list = subnets.split(',') | |
4662 | for subnet in subnet_list: | |
4663 | # ensure the format of the string is as expected address/netmask | |
33c7a0ef | 4664 | subnet = subnet.strip() |
f67539c2 TL |
4665 | if not re.search(r'\/\d+$', subnet): |
4666 | rc = 1 | |
4667 | errors.append(f'{subnet} is not in CIDR format (address/netmask)') | |
4668 | continue | |
4669 | try: | |
4670 | v = ipaddress.ip_network(subnet).version | |
4671 | versions.add(v) | |
4672 | except ValueError as e: | |
4673 | rc = 1 | |
4674 | errors.append(f'{subnet} invalid: {str(e)}') | |
4675 | ||
4676 | return rc, list(versions), ', '.join(errors) | |
4677 | ||
4678 | ||
f6b5b4d7 TL |
4679 | def unwrap_ipv6(address): |
4680 | # type: (str) -> str | |
4681 | if address.startswith('[') and address.endswith(']'): | |
20effc67 | 4682 | return address[1: -1] |
f6b5b4d7 TL |
4683 | return address |
4684 | ||
4685 | ||
f91f0fd5 TL |
4686 | def wrap_ipv6(address): |
4687 | # type: (str) -> str | |
4688 | ||
4689 | # We cannot assume it's already wrapped or even an IPv6 address if | |
4690 | # it's already wrapped it'll not pass (like if it's a hostname) and trigger | |
4691 | # the ValueError | |
4692 | try: | |
f67539c2 TL |
4693 | if ipaddress.ip_address(address).version == 6: |
4694 | return f'[{address}]' | |
f91f0fd5 TL |
4695 | except ValueError: |
4696 | pass | |
4697 | ||
4698 | return address | |
4699 | ||
4700 | ||
f6b5b4d7 TL |
4701 | def is_ipv6(address): |
4702 | # type: (str) -> bool | |
4703 | address = unwrap_ipv6(address) | |
4704 | try: | |
f67539c2 | 4705 | return ipaddress.ip_address(address).version == 6 |
f6b5b4d7 | 4706 | except ValueError: |
f67539c2 | 4707 | logger.warning('Address: {} is not a valid IP address'.format(address)) |
f6b5b4d7 TL |
4708 | return False |
4709 | ||
4710 | ||
33c7a0ef TL |
4711 | def ip_in_subnets(ip_addr: str, subnets: str) -> bool: |
4712 | """Determine if the ip_addr belongs to any of the subnets list.""" | |
4713 | subnet_list = [x.strip() for x in subnets.split(',')] | |
4714 | for subnet in subnet_list: | |
4715 | ip_address = unwrap_ipv6(ip_addr) if is_ipv6(ip_addr) else ip_addr | |
4716 | if ipaddress.ip_address(ip_address) in ipaddress.ip_network(subnet): | |
4717 | return True | |
4718 | return False | |
4719 | ||
4720 | ||
4721 | def parse_mon_addrv(addrv_arg: str) -> List[EndPoint]: | |
4722 | """Parse mon-addrv param into a list of mon end points.""" | |
9f95a23c | 4723 | r = re.compile(r':(\d+)$') |
33c7a0ef TL |
4724 | addrv_args = [] |
4725 | addr_arg = addrv_arg | |
4726 | if addr_arg[0] != '[' or addr_arg[-1] != ']': | |
4727 | raise Error(f'--mon-addrv value {addr_arg} must use square backets') | |
4728 | ||
4729 | for addr in addr_arg[1: -1].split(','): | |
4730 | hasport = r.findall(addr) | |
4731 | if not hasport: | |
4732 | raise Error(f'--mon-addrv value {addr_arg} must include port number') | |
4733 | port_str = hasport[0] | |
4734 | addr = re.sub(r'^v\d+:', '', addr) # strip off v1: or v2: prefix | |
4735 | base_ip = addr[0:-(len(port_str)) - 1] | |
4736 | addrv_args.append(EndPoint(base_ip, int(port_str))) | |
4737 | ||
4738 | return addrv_args | |
4739 | ||
4740 | ||
4741 | def parse_mon_ip(mon_ip: str) -> List[EndPoint]: | |
4742 | """Parse mon-ip param into a list of mon end points.""" | |
4743 | r = re.compile(r':(\d+)$') | |
4744 | addrv_args = [] | |
4745 | hasport = r.findall(mon_ip) | |
4746 | if hasport: | |
4747 | port_str = hasport[0] | |
4748 | base_ip = mon_ip[0:-(len(port_str)) - 1] | |
4749 | addrv_args.append(EndPoint(base_ip, int(port_str))) | |
4750 | else: | |
4751 | # No port provided: use fixed ports for ceph monitor | |
4752 | addrv_args.append(EndPoint(mon_ip, 3300)) | |
4753 | addrv_args.append(EndPoint(mon_ip, 6789)) | |
4754 | ||
4755 | return addrv_args | |
4756 | ||
4757 | ||
4758 | def build_addrv_params(addrv: List[EndPoint]) -> str: | |
4759 | """Convert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]""" | |
4760 | if len(addrv) > 2: | |
4761 | raise Error('Detected a local mon-addrv list with more than 2 entries.') | |
4762 | port_to_ver: Dict[int, str] = {6789: 'v1', 3300: 'v2'} | |
4763 | addr_arg_list: List[str] = [] | |
4764 | for ep in addrv: | |
4765 | if ep.port in port_to_ver: | |
4766 | ver = port_to_ver[ep.port] | |
4767 | else: | |
4768 | ver = 'v2' # default mon protocol version if port is not provided | |
4769 | logger.warning(f'Using msgr2 protocol for unrecognized port {ep}') | |
4770 | addr_arg_list.append(f'{ver}:{ep.ip}:{ep.port}') | |
4771 | ||
4772 | addr_arg = '[{0}]'.format(','.join(addr_arg_list)) | |
4773 | return addr_arg | |
4774 | ||
4775 | ||
4776 | def get_public_net_from_cfg(ctx: CephadmContext) -> Optional[str]: | |
4777 | """Get mon public network from configuration file.""" | |
4778 | cp = read_config(ctx.config) | |
4779 | if not cp.has_option('global', 'public_network'): | |
4780 | return None | |
4781 | ||
4782 | # Ensure all public CIDR networks are valid | |
4783 | public_network = cp.get('global', 'public_network') | |
4784 | rc, _, err_msg = check_subnet(public_network) | |
4785 | if rc: | |
4786 | raise Error(f'Invalid public_network {public_network} parameter: {err_msg}') | |
4787 | ||
4788 | # Ensure all public CIDR networks are configured locally | |
4789 | configured_subnets = set([x.strip() for x in public_network.split(',')]) | |
4790 | local_subnets = set([x[0] for x in list_networks(ctx).items()]) | |
4791 | valid_public_net = False | |
4792 | for net in configured_subnets: | |
4793 | if net in local_subnets: | |
4794 | valid_public_net = True | |
4795 | else: | |
4796 | logger.warning(f'The public CIDR network {net} (from -c conf file) is not configured locally.') | |
4797 | if not valid_public_net: | |
4798 | raise Error(f'None of the public CIDR network(s) {configured_subnets} (from -c conf file) is configured locally.') | |
4799 | ||
4800 | # Ensure public_network is compatible with the provided mon-ip (or mon-addrv) | |
4801 | if ctx.mon_ip: | |
4802 | if not ip_in_subnets(ctx.mon_ip, public_network): | |
4803 | raise Error(f'The provided --mon-ip {ctx.mon_ip} does not belong to any public_network(s) {public_network}') | |
4804 | elif ctx.mon_addrv: | |
4805 | addrv_args = parse_mon_addrv(ctx.mon_addrv) | |
4806 | for addrv in addrv_args: | |
4807 | if not ip_in_subnets(addrv.ip, public_network): | |
4808 | raise Error(f'The provided --mon-addrv {addrv.ip} ip does not belong to any public_network(s) {public_network}') | |
4809 | ||
4810 | logger.debug(f'Using mon public network from configuration file {public_network}') | |
4811 | return public_network | |
4812 | ||
4813 | ||
4814 | def infer_mon_network(ctx: CephadmContext, mon_eps: List[EndPoint]) -> Optional[str]: | |
4815 | """Infer mon public network from local network.""" | |
4816 | # Make sure IP is configured locally, and then figure out the CIDR network | |
4817 | mon_networks = [] | |
4818 | for net, ifaces in list_networks(ctx).items(): | |
4819 | # build local_ips list for the specified network | |
4820 | local_ips: List[str] = [] | |
4821 | for _, ls in ifaces.items(): | |
4822 | local_ips.extend([ipaddress.ip_address(ip) for ip in ls]) | |
4823 | ||
4824 | # check if any of mon ips belong to this net | |
4825 | for mon_ep in mon_eps: | |
4826 | try: | |
4827 | if ipaddress.ip_address(unwrap_ipv6(mon_ep.ip)) in local_ips: | |
4828 | mon_networks.append(net) | |
4829 | logger.info(f'Mon IP `{mon_ep.ip}` is in CIDR network `{net}`') | |
4830 | except ValueError as e: | |
4831 | logger.warning(f'Cannot infer CIDR network for mon IP `{mon_ep.ip}` : {e}') | |
4832 | ||
4833 | if not mon_networks: | |
4834 | raise Error('Cannot infer CIDR network. Pass --skip-mon-network to configure it later') | |
4835 | else: | |
4836 | logger.debug(f'Inferred mon public CIDR from local network configuration {mon_networks}') | |
4837 | ||
4838 | mon_networks = list(set(mon_networks)) # remove duplicates | |
4839 | return ','.join(mon_networks) | |
4840 | ||
4841 | ||
4842 | def prepare_mon_addresses(ctx: CephadmContext) -> Tuple[str, bool, Optional[str]]: | |
4843 | """Get mon public network configuration.""" | |
f67539c2 | 4844 | ipv6 = False |
33c7a0ef TL |
4845 | addrv_args: List[EndPoint] = [] |
4846 | mon_addrv: str = '' # i.e: [v2:192.168.100.1:3300,v1:192.168.100.1:6789] | |
f67539c2 TL |
4847 | |
4848 | if ctx.mon_ip: | |
4849 | ipv6 = is_ipv6(ctx.mon_ip) | |
f91f0fd5 | 4850 | if ipv6: |
f67539c2 | 4851 | ctx.mon_ip = wrap_ipv6(ctx.mon_ip) |
33c7a0ef TL |
4852 | addrv_args = parse_mon_ip(ctx.mon_ip) |
4853 | mon_addrv = build_addrv_params(addrv_args) | |
f67539c2 | 4854 | elif ctx.mon_addrv: |
33c7a0ef TL |
4855 | ipv6 = ctx.mon_addrv.count('[') > 1 |
4856 | addrv_args = parse_mon_addrv(ctx.mon_addrv) | |
4857 | mon_addrv = ctx.mon_addrv | |
9f95a23c TL |
4858 | else: |
4859 | raise Error('must specify --mon-ip or --mon-addrv') | |
9f95a23c | 4860 | |
33c7a0ef TL |
4861 | if addrv_args: |
4862 | for end_point in addrv_args: | |
4863 | check_ip_port(ctx, end_point) | |
4864 | ||
4865 | logger.debug(f'Base mon IP(s) is {addrv_args}, mon addrv is {mon_addrv}') | |
9f95a23c | 4866 | mon_network = None |
f67539c2 | 4867 | if not ctx.skip_mon_network: |
33c7a0ef | 4868 | mon_network = get_public_net_from_cfg(ctx) or infer_mon_network(ctx, addrv_args) |
9f95a23c | 4869 | |
33c7a0ef | 4870 | return (mon_addrv, ipv6, mon_network) |
9f95a23c | 4871 | |
f6b5b4d7 | 4872 | |
f67539c2 | 4873 | def prepare_cluster_network(ctx: CephadmContext) -> Tuple[str, bool]: |
f67539c2 TL |
4874 | # the cluster network may not exist on this node, so all we can do is |
4875 | # validate that the address given is valid ipv4 or ipv6 subnet | |
33c7a0ef TL |
4876 | ipv6_cluster_network = False |
4877 | cp = read_config(ctx.config) | |
4878 | cluster_network = ctx.cluster_network | |
4879 | if cluster_network is None and cp.has_option('global', 'cluster_network'): | |
4880 | cluster_network = cp.get('global', 'cluster_network') | |
4881 | ||
4882 | if cluster_network: | |
4883 | cluser_nets = set([x.strip() for x in cluster_network.split(',')]) | |
4884 | local_subnets = set([x[0] for x in list_networks(ctx).items()]) | |
4885 | for net in cluser_nets: | |
4886 | if net not in local_subnets: | |
4887 | logger.warning(f'The cluster CIDR network {net} is not configured locally.') | |
4888 | ||
4889 | rc, versions, err_msg = check_subnet(cluster_network) | |
f67539c2 TL |
4890 | if rc: |
4891 | raise Error(f'Invalid --cluster-network parameter: {err_msg}') | |
f67539c2 TL |
4892 | ipv6_cluster_network = True if 6 in versions else False |
4893 | else: | |
33c7a0ef | 4894 | logger.info('Internal network (--cluster-network) has not ' |
f67539c2 TL |
4895 | 'been provided, OSD replication will default to ' |
4896 | 'the public_network') | |
9f95a23c | 4897 | |
f67539c2 TL |
4898 | return cluster_network, ipv6_cluster_network |
4899 | ||
4900 | ||
4901 | def create_initial_keys( | |
4902 | ctx: CephadmContext, | |
4903 | uid: int, gid: int, | |
4904 | mgr_id: str | |
4905 | ) -> Tuple[str, str, str, Any, Any]: # type: ignore | |
4906 | ||
4907 | _image = ctx.image | |
9f95a23c TL |
4908 | |
4909 | # create some initial keys | |
4910 | logger.info('Creating initial keys...') | |
4911 | mon_key = CephContainer( | |
f67539c2 TL |
4912 | ctx, |
4913 | image=_image, | |
9f95a23c TL |
4914 | entrypoint='/usr/bin/ceph-authtool', |
4915 | args=['--gen-print-key'], | |
4916 | ).run().strip() | |
4917 | admin_key = CephContainer( | |
f67539c2 TL |
4918 | ctx, |
4919 | image=_image, | |
9f95a23c TL |
4920 | entrypoint='/usr/bin/ceph-authtool', |
4921 | args=['--gen-print-key'], | |
4922 | ).run().strip() | |
4923 | mgr_key = CephContainer( | |
f67539c2 TL |
4924 | ctx, |
4925 | image=_image, | |
9f95a23c TL |
4926 | entrypoint='/usr/bin/ceph-authtool', |
4927 | args=['--gen-print-key'], | |
4928 | ).run().strip() | |
4929 | ||
4930 | keyring = ('[mon.]\n' | |
4931 | '\tkey = %s\n' | |
4932 | '\tcaps mon = allow *\n' | |
4933 | '[client.admin]\n' | |
4934 | '\tkey = %s\n' | |
4935 | '\tcaps mon = allow *\n' | |
4936 | '\tcaps mds = allow *\n' | |
4937 | '\tcaps mgr = allow *\n' | |
4938 | '\tcaps osd = allow *\n' | |
4939 | '[mgr.%s]\n' | |
4940 | '\tkey = %s\n' | |
4941 | '\tcaps mon = profile mgr\n' | |
4942 | '\tcaps mds = allow *\n' | |
4943 | '\tcaps osd = allow *\n' | |
4944 | % (mon_key, admin_key, mgr_id, mgr_key)) | |
4945 | ||
f67539c2 TL |
4946 | admin_keyring = write_tmp('[client.admin]\n' |
4947 | '\tkey = ' + admin_key + '\n', | |
4948 | uid, gid) | |
4949 | ||
9f95a23c | 4950 | # tmp keyring file |
f67539c2 TL |
4951 | bootstrap_keyring = write_tmp(keyring, uid, gid) |
4952 | return (mon_key, mgr_key, admin_key, | |
4953 | bootstrap_keyring, admin_keyring) | |
4954 | ||
9f95a23c | 4955 | |
f67539c2 TL |
4956 | def create_initial_monmap( |
4957 | ctx: CephadmContext, | |
4958 | uid: int, gid: int, | |
4959 | fsid: str, | |
4960 | mon_id: str, mon_addr: str | |
4961 | ) -> Any: | |
9f95a23c | 4962 | logger.info('Creating initial monmap...') |
f67539c2 | 4963 | monmap = write_tmp('', 0, 0) |
9f95a23c | 4964 | out = CephContainer( |
f67539c2 TL |
4965 | ctx, |
4966 | image=ctx.image, | |
9f95a23c | 4967 | entrypoint='/usr/bin/monmaptool', |
f67539c2 TL |
4968 | args=[ |
4969 | '--create', | |
4970 | '--clobber', | |
4971 | '--fsid', fsid, | |
4972 | '--addv', mon_id, mon_addr, | |
4973 | '/tmp/monmap' | |
9f95a23c TL |
4974 | ], |
4975 | volume_mounts={ | |
f67539c2 | 4976 | monmap.name: '/tmp/monmap:z', |
9f95a23c TL |
4977 | }, |
4978 | ).run() | |
f67539c2 | 4979 | logger.debug(f'monmaptool for {mon_id} {mon_addr} on {out}') |
9f95a23c TL |
4980 | |
4981 | # pass monmap file to ceph user for use by ceph-mon --mkfs below | |
f67539c2 TL |
4982 | os.fchown(monmap.fileno(), uid, gid) |
4983 | return monmap | |
9f95a23c | 4984 | |
f67539c2 TL |
4985 | |
4986 | def prepare_create_mon( | |
4987 | ctx: CephadmContext, | |
4988 | uid: int, gid: int, | |
4989 | fsid: str, mon_id: str, | |
4990 | bootstrap_keyring_path: str, | |
4991 | monmap_path: str | |
522d829b | 4992 | ) -> Tuple[str, str]: |
9f95a23c | 4993 | logger.info('Creating mon...') |
f67539c2 TL |
4994 | create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid) |
4995 | mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id) | |
4996 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
9f95a23c | 4997 | out = CephContainer( |
f67539c2 TL |
4998 | ctx, |
4999 | image=ctx.image, | |
9f95a23c | 5000 | entrypoint='/usr/bin/ceph-mon', |
f67539c2 TL |
5001 | args=[ |
5002 | '--mkfs', | |
5003 | '-i', mon_id, | |
5004 | '--fsid', fsid, | |
5005 | '-c', '/dev/null', | |
5006 | '--monmap', '/tmp/monmap', | |
5007 | '--keyring', '/tmp/keyring', | |
5008 | ] + get_daemon_args(ctx, fsid, 'mon', mon_id), | |
9f95a23c TL |
5009 | volume_mounts={ |
5010 | log_dir: '/var/log/ceph:z', | |
5011 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id), | |
f67539c2 TL |
5012 | bootstrap_keyring_path: '/tmp/keyring:z', |
5013 | monmap_path: '/tmp/monmap:z', | |
9f95a23c TL |
5014 | }, |
5015 | ).run() | |
f67539c2 TL |
5016 | logger.debug(f'create mon.{mon_id} on {out}') |
5017 | return (mon_dir, log_dir) | |
5018 | ||
5019 | ||
5020 | def create_mon( | |
5021 | ctx: CephadmContext, | |
5022 | uid: int, gid: int, | |
5023 | fsid: str, mon_id: str | |
5024 | ) -> None: | |
5025 | mon_c = get_container(ctx, fsid, 'mon', mon_id) | |
5026 | ctx.meta_json = json.dumps({'service_name': 'mon'}) | |
5027 | deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid, | |
9f95a23c TL |
5028 | config=None, keyring=None) |
5029 | ||
9f95a23c | 5030 | |
f67539c2 TL |
5031 | def wait_for_mon( |
5032 | ctx: CephadmContext, | |
5033 | mon_id: str, mon_dir: str, | |
5034 | admin_keyring_path: str, config_path: str | |
522d829b | 5035 | ) -> None: |
9f95a23c TL |
5036 | logger.info('Waiting for mon to start...') |
5037 | c = CephContainer( | |
f67539c2 TL |
5038 | ctx, |
5039 | image=ctx.image, | |
9f95a23c TL |
5040 | entrypoint='/usr/bin/ceph', |
5041 | args=[ | |
5042 | 'status'], | |
5043 | volume_mounts={ | |
5044 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id), | |
f67539c2 TL |
5045 | admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z', |
5046 | config_path: '/etc/ceph/ceph.conf:z', | |
9f95a23c TL |
5047 | }, |
5048 | ) | |
5049 | ||
5050 | # wait for the service to become available | |
5051 | def is_mon_available(): | |
5052 | # type: () -> bool | |
f67539c2 TL |
5053 | timeout = ctx.timeout if ctx.timeout else 60 # seconds |
5054 | out, err, ret = call(ctx, c.run_cmd(), | |
9f95a23c | 5055 | desc=c.entrypoint, |
2a845540 TL |
5056 | timeout=timeout, |
5057 | verbosity=CallVerbosity.QUIET_UNLESS_ERROR) | |
9f95a23c | 5058 | return ret == 0 |
9f95a23c | 5059 | |
f67539c2 TL |
5060 | is_available(ctx, 'mon', is_mon_available) |
5061 | ||
5062 | ||
5063 | def create_mgr( | |
5064 | ctx: CephadmContext, | |
5065 | uid: int, gid: int, | |
5066 | fsid: str, mgr_id: str, mgr_key: str, | |
5067 | config: str, clifunc: Callable | |
5068 | ) -> None: | |
5069 | logger.info('Creating mgr...') | |
5070 | mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key) | |
5071 | mgr_c = get_container(ctx, fsid, 'mgr', mgr_id) | |
5072 | # Note:the default port used by the Prometheus node exporter is opened in fw | |
5073 | ctx.meta_json = json.dumps({'service_name': 'mgr'}) | |
5074 | deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid, | |
5075 | config=config, keyring=mgr_keyring, ports=[9283]) | |
5076 | ||
5077 | # wait for the service to become available | |
5078 | logger.info('Waiting for mgr to start...') | |
5079 | ||
5080 | def is_mgr_available(): | |
5081 | # type: () -> bool | |
5082 | timeout = ctx.timeout if ctx.timeout else 60 # seconds | |
5083 | try: | |
2a845540 TL |
5084 | out = clifunc(['status', '-f', 'json-pretty'], |
5085 | timeout=timeout, | |
5086 | verbosity=CallVerbosity.QUIET_UNLESS_ERROR) | |
f67539c2 TL |
5087 | j = json.loads(out) |
5088 | return j.get('mgrmap', {}).get('available', False) | |
5089 | except Exception as e: | |
5090 | logger.debug('status failed: %s' % e) | |
5091 | return False | |
5092 | is_available(ctx, 'mgr', is_mgr_available) | |
5093 | ||
5094 | ||
5095 | def prepare_ssh( | |
5096 | ctx: CephadmContext, | |
5097 | cli: Callable, wait_for_mgr_restart: Callable | |
5098 | ) -> None: | |
5099 | ||
5100 | cli(['cephadm', 'set-user', ctx.ssh_user]) | |
5101 | ||
5102 | if ctx.ssh_config: | |
5103 | logger.info('Using provided ssh config...') | |
5104 | mounts = { | |
5105 | pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z', | |
5106 | } | |
5107 | cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts) | |
5108 | ||
5109 | if ctx.ssh_private_key and ctx.ssh_public_key: | |
5110 | logger.info('Using provided ssh keys...') | |
5111 | mounts = { | |
5112 | pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z', | |
5113 | pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z' | |
5114 | } | |
5115 | cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts) | |
5116 | cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts) | |
33c7a0ef | 5117 | ssh_pub = cli(['cephadm', 'get-pub-key']) |
f67539c2 TL |
5118 | else: |
5119 | logger.info('Generating ssh key...') | |
5120 | cli(['cephadm', 'generate-key']) | |
5121 | ssh_pub = cli(['cephadm', 'get-pub-key']) | |
f67539c2 TL |
5122 | with open(ctx.output_pub_ssh_key, 'w') as f: |
5123 | f.write(ssh_pub) | |
5124 | logger.info('Wrote public SSH key to %s' % ctx.output_pub_ssh_key) | |
5125 | ||
33c7a0ef | 5126 | authorize_ssh_key(ssh_pub, ctx.ssh_user) |
f67539c2 TL |
5127 | |
5128 | host = get_hostname() | |
5129 | logger.info('Adding host %s...' % host) | |
5130 | try: | |
5131 | args = ['orch', 'host', 'add', host] | |
5132 | if ctx.mon_ip: | |
522d829b | 5133 | args.append(unwrap_ipv6(ctx.mon_ip)) |
33c7a0ef TL |
5134 | elif ctx.mon_addrv: |
5135 | addrv_args = parse_mon_addrv(ctx.mon_addrv) | |
5136 | args.append(unwrap_ipv6(addrv_args[0].ip)) | |
f67539c2 TL |
5137 | cli(args) |
5138 | except RuntimeError as e: | |
5139 | raise Error('Failed to add host <%s>: %s' % (host, e)) | |
5140 | ||
5141 | for t in ['mon', 'mgr']: | |
5142 | if not ctx.orphan_initial_daemons: | |
5143 | logger.info('Deploying %s service with default placement...' % t) | |
5144 | cli(['orch', 'apply', t]) | |
5145 | else: | |
5146 | logger.info('Deploying unmanaged %s service...' % t) | |
5147 | cli(['orch', 'apply', t, '--unmanaged']) | |
5148 | ||
5149 | if not ctx.orphan_initial_daemons: | |
5150 | logger.info('Deploying crash service with default placement...') | |
5151 | cli(['orch', 'apply', 'crash']) | |
5152 | ||
5153 | if not ctx.skip_monitoring_stack: | |
f67539c2 TL |
5154 | for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']: |
5155 | logger.info('Deploying %s service with default placement...' % t) | |
5156 | cli(['orch', 'apply', t]) | |
5157 | ||
2a845540 TL |
5158 | if ctx.with_centralized_logging: |
5159 | for t in ['loki', 'promtail']: | |
5160 | logger.info('Deploying %s service with default placement...' % t) | |
5161 | cli(['orch', 'apply', t]) | |
5162 | ||
f67539c2 TL |
5163 | |
5164 | def enable_cephadm_mgr_module( | |
5165 | cli: Callable, wait_for_mgr_restart: Callable | |
5166 | ) -> None: | |
5167 | ||
5168 | logger.info('Enabling cephadm module...') | |
5169 | cli(['mgr', 'module', 'enable', 'cephadm']) | |
5170 | wait_for_mgr_restart() | |
5171 | logger.info('Setting orchestrator backend to cephadm...') | |
5172 | cli(['orch', 'set', 'backend', 'cephadm']) | |
5173 | ||
5174 | ||
5175 | def prepare_dashboard( | |
5176 | ctx: CephadmContext, | |
5177 | uid: int, gid: int, | |
5178 | cli: Callable, wait_for_mgr_restart: Callable | |
5179 | ) -> None: | |
5180 | ||
5181 | # Configure SSL port (cephadm only allows to configure dashboard SSL port) | |
5182 | # if the user does not want to use SSL he can change this setting once the cluster is up | |
5183 | cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx.ssl_dashboard_port)]) | |
5184 | ||
5185 | # configuring dashboard parameters | |
5186 | logger.info('Enabling the dashboard module...') | |
5187 | cli(['mgr', 'module', 'enable', 'dashboard']) | |
5188 | wait_for_mgr_restart() | |
5189 | ||
5190 | # dashboard crt and key | |
5191 | if ctx.dashboard_key and ctx.dashboard_crt: | |
5192 | logger.info('Using provided dashboard certificate...') | |
5193 | mounts = { | |
5194 | pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z', | |
5195 | pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z' | |
5196 | } | |
5197 | cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts) | |
5198 | cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts) | |
5199 | else: | |
5200 | logger.info('Generating a dashboard self-signed certificate...') | |
5201 | cli(['dashboard', 'create-self-signed-cert']) | |
5202 | ||
5203 | logger.info('Creating initial admin user...') | |
5204 | password = ctx.initial_dashboard_password or generate_password() | |
5205 | tmp_password_file = write_tmp(password, uid, gid) | |
5206 | cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password'] | |
5207 | if not ctx.dashboard_password_noupdate: | |
5208 | cmd.append('--pwd-update-required') | |
5209 | cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'}) | |
5210 | logger.info('Fetching dashboard port number...') | |
5211 | out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port']) | |
5212 | port = int(out) | |
5213 | ||
5214 | # Open dashboard port | |
33c7a0ef TL |
5215 | if not ('skip_firewalld' in ctx and ctx.skip_firewalld): |
5216 | fw = Firewalld(ctx) | |
5217 | fw.open_ports([port]) | |
5218 | fw.apply_rules() | |
f67539c2 TL |
5219 | |
5220 | logger.info('Ceph Dashboard is now available at:\n\n' | |
5221 | '\t URL: https://%s:%s/\n' | |
5222 | '\t User: %s\n' | |
5223 | '\tPassword: %s\n' % ( | |
5224 | get_fqdn(), port, | |
5225 | ctx.initial_dashboard_user, | |
5226 | password)) | |
5227 | ||
5228 | ||
5229 | def prepare_bootstrap_config( | |
5230 | ctx: CephadmContext, | |
5231 | fsid: str, mon_addr: str, image: str | |
5232 | ||
5233 | ) -> str: | |
5234 | ||
5235 | cp = read_config(ctx.config) | |
5236 | if not cp.has_section('global'): | |
5237 | cp.add_section('global') | |
5238 | cp.set('global', 'fsid', fsid) | |
5239 | cp.set('global', 'mon_host', mon_addr) | |
5240 | cp.set('global', 'container_image', image) | |
b3b6e05e | 5241 | |
f67539c2 TL |
5242 | if not cp.has_section('mon'): |
5243 | cp.add_section('mon') | |
5244 | if ( | |
5245 | not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim') | |
5246 | and not cp.has_option('mon', 'auth allow insecure global id reclaim') | |
5247 | ): | |
5248 | cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false') | |
b3b6e05e TL |
5249 | |
5250 | if ctx.single_host_defaults: | |
5251 | logger.info('Adjusting default settings to suit single-host cluster...') | |
5252 | # replicate across osds, not hosts | |
5253 | if ( | |
a4b75251 TL |
5254 | not cp.has_option('global', 'osd_crush_chooseleaf_type') |
5255 | and not cp.has_option('global', 'osd crush chooseleaf type') | |
b3b6e05e | 5256 | ): |
a4b75251 | 5257 | cp.set('global', 'osd_crush_chooseleaf_type', '0') |
b3b6e05e TL |
5258 | # replica 2x |
5259 | if ( | |
5260 | not cp.has_option('global', 'osd_pool_default_size') | |
5261 | and not cp.has_option('global', 'osd pool default size') | |
5262 | ): | |
5263 | cp.set('global', 'osd_pool_default_size', '2') | |
5264 | # disable mgr standby modules (so we can colocate multiple mgrs on one host) | |
5265 | if not cp.has_section('mgr'): | |
5266 | cp.add_section('mgr') | |
5267 | if ( | |
5268 | not cp.has_option('mgr', 'mgr_standby_modules') | |
5269 | and not cp.has_option('mgr', 'mgr standby modules') | |
5270 | ): | |
5271 | cp.set('mgr', 'mgr_standby_modules', 'false') | |
522d829b TL |
5272 | if ctx.log_to_file: |
5273 | cp.set('global', 'log_to_file', 'true') | |
5274 | cp.set('global', 'log_to_stderr', 'false') | |
5275 | cp.set('global', 'log_to_journald', 'false') | |
5276 | cp.set('global', 'mon_cluster_log_to_file', 'true') | |
5277 | cp.set('global', 'mon_cluster_log_to_stderr', 'false') | |
5278 | cp.set('global', 'mon_cluster_log_to_journald', 'false') | |
b3b6e05e | 5279 | |
f67539c2 TL |
5280 | cpf = StringIO() |
5281 | cp.write(cpf) | |
5282 | config = cpf.getvalue() | |
5283 | ||
5284 | if ctx.registry_json or ctx.registry_url: | |
5285 | command_registry_login(ctx) | |
5286 | ||
5287 | return config | |
5288 | ||
5289 | ||
5290 | def finish_bootstrap_config( | |
5291 | ctx: CephadmContext, | |
5292 | fsid: str, | |
5293 | config: str, | |
5294 | mon_id: str, mon_dir: str, | |
5295 | mon_network: Optional[str], ipv6: bool, | |
5296 | cli: Callable, | |
5297 | cluster_network: Optional[str], ipv6_cluster_network: bool | |
5298 | ||
5299 | ) -> None: | |
5300 | if not ctx.no_minimize_config: | |
9f95a23c TL |
5301 | logger.info('Assimilating anything we can from ceph.conf...') |
5302 | cli([ | |
5303 | 'config', 'assimilate-conf', | |
5304 | '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id | |
5305 | ], { | |
5306 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id | |
5307 | }) | |
5308 | logger.info('Generating new minimal ceph.conf...') | |
5309 | cli([ | |
5310 | 'config', 'generate-minimal-conf', | |
5311 | '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id | |
5312 | ], { | |
5313 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id | |
5314 | }) | |
5315 | # re-read our minimized config | |
5316 | with open(mon_dir + '/config', 'r') as f: | |
5317 | config = f.read() | |
5318 | logger.info('Restarting the monitor...') | |
f67539c2 | 5319 | call_throws(ctx, [ |
9f95a23c TL |
5320 | 'systemctl', |
5321 | 'restart', | |
5322 | get_unit_name(fsid, 'mon', mon_id) | |
5323 | ]) | |
33c7a0ef TL |
5324 | elif 'image' in ctx and ctx.image: |
5325 | # we still want to assimilate the given container image if provided | |
5326 | cli(['config', 'set', 'global', 'container_image', f'{ctx.image}']) | |
9f95a23c TL |
5327 | |
5328 | if mon_network: | |
f67539c2 | 5329 | logger.info(f'Setting mon public_network to {mon_network}') |
9f95a23c TL |
5330 | cli(['config', 'set', 'mon', 'public_network', mon_network]) |
5331 | ||
f67539c2 TL |
5332 | if cluster_network: |
5333 | logger.info(f'Setting cluster_network to {cluster_network}') | |
5334 | cli(['config', 'set', 'global', 'cluster_network', cluster_network]) | |
5335 | ||
5336 | if ipv6 or ipv6_cluster_network: | |
5337 | logger.info('Enabling IPv6 (ms_bind_ipv6) binding') | |
f6b5b4d7 TL |
5338 | cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true']) |
5339 | ||
f67539c2 TL |
5340 | with open(ctx.output_config, 'w') as f: |
5341 | f.write(config) | |
5342 | logger.info('Wrote config to %s' % ctx.output_config) | |
5343 | pass | |
5344 | ||
5345 | ||
a4b75251 TL |
5346 | # funcs to process spec file for apply spec |
5347 | def _parse_yaml_docs(f: Iterable[str]) -> List[List[str]]: | |
5348 | docs = [] | |
5349 | current_doc = [] # type: List[str] | |
5350 | for line in f: | |
33c7a0ef | 5351 | if re.search(r'^---\s+', line): |
a4b75251 TL |
5352 | if current_doc: |
5353 | docs.append(current_doc) | |
5354 | current_doc = [] | |
5355 | else: | |
5356 | current_doc.append(line.rstrip()) | |
5357 | if current_doc: | |
5358 | docs.append(current_doc) | |
5359 | return docs | |
5360 | ||
5361 | ||
5362 | def _parse_yaml_obj(doc: List[str]) -> Dict[str, str]: | |
5363 | # note: this only parses the first layer of yaml | |
5364 | obj = {} # type: Dict[str, str] | |
5365 | current_key = '' | |
5366 | for line in doc: | |
5367 | if line.startswith(' '): | |
5368 | obj[current_key] += line.strip() | |
5369 | elif line.endswith(':'): | |
5370 | current_key = line.strip(':') | |
5371 | obj[current_key] = '' | |
5372 | else: | |
5373 | current_key, val = line.split(':') | |
5374 | obj[current_key] = val.strip() | |
5375 | return obj | |
5376 | ||
5377 | ||
5378 | def parse_yaml_objs(f: Iterable[str]) -> List[Dict[str, str]]: | |
5379 | objs = [] | |
5380 | for d in _parse_yaml_docs(f): | |
5381 | objs.append(_parse_yaml_obj(d)) | |
5382 | return objs | |
5383 | ||
5384 | ||
5385 | def _distribute_ssh_keys(ctx: CephadmContext, host_spec: Dict[str, str], bootstrap_hostname: str) -> int: | |
5386 | # copy ssh key to hosts in host spec (used for apply spec) | |
33c7a0ef | 5387 | ssh_key = CEPH_DEFAULT_PUBKEY |
a4b75251 TL |
5388 | if ctx.ssh_public_key: |
5389 | ssh_key = ctx.ssh_public_key.name | |
5390 | ||
5391 | if bootstrap_hostname != host_spec['hostname']: | |
5392 | if 'addr' in host_spec: | |
5393 | addr = host_spec['addr'] | |
5394 | else: | |
5395 | addr = host_spec['hostname'] | |
5396 | out, err, code = call(ctx, ['sudo', '-u', ctx.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx.ssh_user, addr)]) | |
5397 | if code: | |
5398 | logger.info('\nCopying ssh key to host %s at address %s failed!\n' % (host_spec['hostname'], addr)) | |
5399 | return 1 | |
5400 | else: | |
5401 | logger.info('Added ssh key to host %s at address %s\n' % (host_spec['hostname'], addr)) | |
5402 | return 0 | |
5403 | ||
5404 | ||
33c7a0ef TL |
5405 | def save_cluster_config(ctx: CephadmContext, uid: int, gid: int, fsid: str) -> None: |
5406 | """Save cluster configuration to the per fsid directory """ | |
5407 | def copy_file(src: str, dst: str) -> None: | |
5408 | if src: | |
5409 | shutil.copyfile(src, dst) | |
5410 | ||
5411 | conf_dir = f'{ctx.data_dir}/{fsid}/{CEPH_CONF_DIR}' | |
5412 | makedirs(conf_dir, uid, gid, DATA_DIR_MODE) | |
5413 | if os.path.exists(conf_dir): | |
5414 | logger.info(f'Saving cluster configuration to {conf_dir} directory') | |
5415 | copy_file(ctx.output_config, os.path.join(conf_dir, CEPH_CONF)) | |
5416 | copy_file(ctx.output_keyring, os.path.join(conf_dir, CEPH_KEYRING)) | |
5417 | # ctx.output_pub_ssh_key may not exist if user has provided custom ssh keys | |
5418 | if (os.path.exists(ctx.output_pub_ssh_key)): | |
5419 | copy_file(ctx.output_pub_ssh_key, os.path.join(conf_dir, CEPH_PUBKEY)) | |
5420 | else: | |
5421 | logger.warning(f'Cannot create cluster configuration directory {conf_dir}') | |
5422 | ||
5423 | ||
f67539c2 TL |
5424 | @default_image |
5425 | def command_bootstrap(ctx): | |
5426 | # type: (CephadmContext) -> int | |
5427 | ||
2a845540 TL |
5428 | ctx.error_code = 0 |
5429 | ||
f67539c2 | 5430 | if not ctx.output_config: |
33c7a0ef | 5431 | ctx.output_config = os.path.join(ctx.output_dir, CEPH_CONF) |
f67539c2 | 5432 | if not ctx.output_keyring: |
33c7a0ef | 5433 | ctx.output_keyring = os.path.join(ctx.output_dir, CEPH_KEYRING) |
f67539c2 | 5434 | if not ctx.output_pub_ssh_key: |
33c7a0ef TL |
5435 | ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, CEPH_PUBKEY) |
5436 | ||
5437 | if bool(ctx.ssh_private_key) is not bool(ctx.ssh_public_key): | |
5438 | raise Error('--ssh-private-key and --ssh-public-key must be provided together or not at all.') | |
5439 | ||
5440 | if ctx.fsid: | |
5441 | data_dir_base = os.path.join(ctx.data_dir, ctx.fsid) | |
5442 | if os.path.exists(data_dir_base): | |
5443 | raise Error(f"A cluster with the same fsid '{ctx.fsid}' already exists.") | |
5444 | else: | |
5445 | logger.warning('Specifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.') | |
f67539c2 TL |
5446 | |
5447 | # verify output files | |
5448 | for f in [ctx.output_config, ctx.output_keyring, | |
5449 | ctx.output_pub_ssh_key]: | |
5450 | if not ctx.allow_overwrite: | |
5451 | if os.path.exists(f): | |
5452 | raise Error('%s already exists; delete or pass ' | |
5453 | '--allow-overwrite to overwrite' % f) | |
5454 | dirname = os.path.dirname(f) | |
5455 | if dirname and not os.path.exists(dirname): | |
5456 | fname = os.path.basename(f) | |
5457 | logger.info(f'Creating directory {dirname} for {fname}') | |
5458 | try: | |
5459 | # use makedirs to create intermediate missing dirs | |
5460 | os.makedirs(dirname, 0o755) | |
5461 | except PermissionError: | |
5462 | raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.') | |
5463 | ||
b3b6e05e TL |
5464 | (user_conf, _) = get_config_and_keyring(ctx) |
5465 | ||
33c7a0ef TL |
5466 | if ctx.ssh_user != 'root': |
5467 | check_ssh_connectivity(ctx) | |
5468 | ||
f67539c2 TL |
5469 | if not ctx.skip_prepare_host: |
5470 | command_prepare_host(ctx) | |
5471 | else: | |
5472 | logger.info('Skip prepare_host') | |
5473 | ||
5474 | # initial vars | |
5475 | fsid = ctx.fsid or make_fsid() | |
b3b6e05e TL |
5476 | if not is_fsid(fsid): |
5477 | raise Error('not an fsid: %s' % fsid) | |
5478 | logger.info('Cluster fsid: %s' % fsid) | |
5479 | ||
f67539c2 TL |
5480 | hostname = get_hostname() |
5481 | if '.' in hostname and not ctx.allow_fqdn_hostname: | |
5482 | raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0])) | |
5483 | mon_id = ctx.mon_id or hostname | |
5484 | mgr_id = ctx.mgr_id or generate_service_id() | |
f67539c2 TL |
5485 | |
5486 | lock = FileLock(ctx, fsid) | |
5487 | lock.acquire() | |
5488 | ||
5489 | (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx) | |
5490 | cluster_network, ipv6_cluster_network = prepare_cluster_network(ctx) | |
5491 | ||
5492 | config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image) | |
5493 | ||
5494 | if not ctx.skip_pull: | |
33c7a0ef TL |
5495 | try: |
5496 | _pull_image(ctx, ctx.image) | |
5497 | except UnauthorizedRegistryError: | |
5498 | err_str = 'Failed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentials' | |
5499 | logger.debug(f'Pulling image for bootstrap on {hostname} failed: {err_str}') | |
5500 | raise Error(err_str) | |
f67539c2 TL |
5501 | |
5502 | image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip() | |
5503 | logger.info(f'Ceph version: {image_ver}') | |
b3b6e05e TL |
5504 | |
5505 | if not ctx.allow_mismatched_release: | |
5506 | image_release = image_ver.split()[4] | |
5507 | if image_release not in \ | |
5508 | [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE]: | |
5509 | raise Error( | |
5510 | f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};' | |
5511 | ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)' | |
5512 | ) | |
f67539c2 TL |
5513 | |
5514 | logger.info('Extracting ceph user uid/gid from container image...') | |
5515 | (uid, gid) = extract_uid_gid(ctx) | |
5516 | ||
5517 | # create some initial keys | |
20effc67 | 5518 | (mon_key, mgr_key, admin_key, bootstrap_keyring, admin_keyring) = create_initial_keys(ctx, uid, gid, mgr_id) |
f67539c2 TL |
5519 | |
5520 | monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg) | |
20effc67 TL |
5521 | (mon_dir, log_dir) = prepare_create_mon(ctx, uid, gid, fsid, mon_id, |
5522 | bootstrap_keyring.name, monmap.name) | |
f67539c2 TL |
5523 | |
5524 | with open(mon_dir + '/config', 'w') as f: | |
5525 | os.fchown(f.fileno(), uid, gid) | |
5526 | os.fchmod(f.fileno(), 0o600) | |
5527 | f.write(config) | |
5528 | ||
5529 | make_var_run(ctx, fsid, uid, gid) | |
5530 | create_mon(ctx, uid, gid, fsid, mon_id) | |
5531 | ||
5532 | # config to issue various CLI commands | |
5533 | tmp_config = write_tmp(config, uid, gid) | |
5534 | ||
5535 | # a CLI helper to reduce our typing | |
2a845540 TL |
5536 | def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.VERBOSE_ON_FAILURE): |
5537 | # type: (List[str], Dict[str, str], Optional[int], CallVerbosity) -> str | |
f67539c2 TL |
5538 | mounts = { |
5539 | log_dir: '/var/log/ceph:z', | |
5540 | admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z', | |
5541 | tmp_config.name: '/etc/ceph/ceph.conf:z', | |
5542 | } | |
5543 | for k, v in extra_mounts.items(): | |
5544 | mounts[k] = v | |
5545 | timeout = timeout or ctx.timeout | |
5546 | return CephContainer( | |
5547 | ctx, | |
5548 | image=ctx.image, | |
5549 | entrypoint='/usr/bin/ceph', | |
5550 | args=cmd, | |
5551 | volume_mounts=mounts, | |
2a845540 | 5552 | ).run(timeout=timeout, verbosity=verbosity) |
f67539c2 TL |
5553 | |
5554 | wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name) | |
5555 | ||
5556 | finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir, | |
5557 | mon_network, ipv6, cli, | |
5558 | cluster_network, ipv6_cluster_network) | |
9f95a23c TL |
5559 | |
5560 | # output files | |
f67539c2 | 5561 | with open(ctx.output_keyring, 'w') as f: |
9f95a23c TL |
5562 | os.fchmod(f.fileno(), 0o600) |
5563 | f.write('[client.admin]\n' | |
5564 | '\tkey = ' + admin_key + '\n') | |
f67539c2 | 5565 | logger.info('Wrote keyring to %s' % ctx.output_keyring) |
9f95a23c | 5566 | |
f67539c2 TL |
5567 | # create mgr |
5568 | create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli) | |
9f95a23c | 5569 | |
b3b6e05e TL |
5570 | if user_conf: |
5571 | # user given config settings were already assimilated earlier | |
5572 | # but if the given settings contained any attributes in | |
5573 | # the mgr (e.g. mgr/cephadm/container_image_prometheus) | |
5574 | # they don't seem to be stored if there isn't a mgr yet. | |
5575 | # Since re-assimilating the same conf settings should be | |
5576 | # idempotent we can just do it again here. | |
5577 | with tempfile.NamedTemporaryFile(buffering=0) as tmp: | |
5578 | tmp.write(user_conf.encode('utf-8')) | |
5579 | cli(['config', 'assimilate-conf', | |
5580 | '-i', '/var/lib/ceph/user.conf'], | |
5581 | {tmp.name: '/var/lib/ceph/user.conf:z'}) | |
9f95a23c TL |
5582 | |
5583 | # wait for mgr to restart (after enabling a module) | |
522d829b | 5584 | def wait_for_mgr_restart() -> None: |
f67539c2 TL |
5585 | # first get latest mgrmap epoch from the mon. try newer 'mgr |
5586 | # stat' command first, then fall back to 'mgr dump' if | |
5587 | # necessary | |
5588 | try: | |
2a845540 | 5589 | j = json_loads_retry(lambda: cli(['mgr', 'stat'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)) |
f67539c2 | 5590 | except Exception: |
2a845540 | 5591 | j = json_loads_retry(lambda: cli(['mgr', 'dump'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR)) |
9f95a23c | 5592 | epoch = j['epoch'] |
f67539c2 | 5593 | |
9f95a23c TL |
5594 | # wait for mgr to have it |
5595 | logger.info('Waiting for the mgr to restart...') | |
f67539c2 | 5596 | |
9f95a23c TL |
5597 | def mgr_has_latest_epoch(): |
5598 | # type: () -> bool | |
5599 | try: | |
5600 | out = cli(['tell', 'mgr', 'mgr_status']) | |
5601 | j = json.loads(out) | |
5602 | return j['mgrmap_epoch'] >= epoch | |
5603 | except Exception as e: | |
5604 | logger.debug('tell mgr mgr_status failed: %s' % e) | |
5605 | return False | |
f67539c2 | 5606 | is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch) |
e306af50 | 5607 | |
f67539c2 | 5608 | enable_cephadm_mgr_module(cli, wait_for_mgr_restart) |
e306af50 | 5609 | |
f67539c2 TL |
5610 | # ssh |
5611 | if not ctx.skip_ssh: | |
5612 | prepare_ssh(ctx, cli, wait_for_mgr_restart) | |
5613 | ||
5614 | if ctx.registry_url and ctx.registry_username and ctx.registry_password: | |
20effc67 TL |
5615 | registry_credentials = {'url': ctx.registry_url, 'username': ctx.registry_username, 'password': ctx.registry_password} |
5616 | cli(['config-key', 'set', 'mgr/cephadm/registry_credentials', json.dumps(registry_credentials)]) | |
f67539c2 TL |
5617 | |
5618 | cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force']) | |
5619 | ||
f67539c2 TL |
5620 | if not ctx.skip_dashboard: |
5621 | prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart) | |
f6b5b4d7 | 5622 | |
33c7a0ef | 5623 | if ctx.output_config == CEPH_DEFAULT_CONF and not ctx.skip_admin_label and not ctx.no_minimize_config: |
b3b6e05e TL |
5624 | logger.info('Enabling client.admin keyring and conf on hosts with "admin" label') |
5625 | try: | |
5626 | cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin']) | |
5627 | cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin']) | |
5628 | except Exception: | |
5629 | logger.info('Unable to set up "admin" label; assuming older version of Ceph') | |
5630 | ||
f67539c2 TL |
5631 | if ctx.apply_spec: |
5632 | logger.info('Applying %s to cluster' % ctx.apply_spec) | |
a4b75251 | 5633 | # copy ssh key to hosts in spec file |
f67539c2 | 5634 | with open(ctx.apply_spec) as f: |
a4b75251 TL |
5635 | try: |
5636 | for spec in parse_yaml_objs(f): | |
5637 | if spec.get('service_type') == 'host': | |
5638 | _distribute_ssh_keys(ctx, spec, hostname) | |
5639 | except ValueError: | |
5640 | logger.info('Unable to parse %s succesfully' % ctx.apply_spec) | |
e306af50 TL |
5641 | |
5642 | mounts = {} | |
a4b75251 TL |
5643 | mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro' |
5644 | try: | |
5645 | out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts) | |
5646 | logger.info(out) | |
5647 | except Exception: | |
2a845540 | 5648 | ctx.error_code = -errno.EINVAL |
a4b75251 | 5649 | logger.info('\nApplying %s to cluster failed!\n' % ctx.apply_spec) |
9f95a23c | 5650 | |
33c7a0ef TL |
5651 | save_cluster_config(ctx, uid, gid, fsid) |
5652 | ||
20effc67 TL |
5653 | # enable autotune for osd_memory_target |
5654 | logger.info('Enabling autotune for osd_memory_target') | |
5655 | cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true']) | |
5656 | ||
5657 | # Notify the Dashboard to show the 'Expand cluster' page on first log in. | |
5658 | cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED']) | |
5659 | ||
33c7a0ef | 5660 | logger.info('You can access the Ceph CLI as following in case of multi-cluster or non-default config:\n\n' |
9f95a23c TL |
5661 | '\tsudo %s shell --fsid %s -c %s -k %s\n' % ( |
5662 | sys.argv[0], | |
5663 | fsid, | |
f67539c2 TL |
5664 | ctx.output_config, |
5665 | ctx.output_keyring)) | |
33c7a0ef TL |
5666 | |
5667 | logger.info('Or, if you are only running a single cluster on this host:\n\n\tsudo %s shell \n' % (sys.argv[0])) | |
5668 | ||
9f95a23c TL |
5669 | logger.info('Please consider enabling telemetry to help improve Ceph:\n\n' |
5670 | '\tceph telemetry on\n\n' | |
5671 | 'For more information see:\n\n' | |
20effc67 | 5672 | '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n') |
9f95a23c | 5673 | logger.info('Bootstrap complete.') |
2a845540 | 5674 | return ctx.error_code |
9f95a23c TL |
5675 | |
5676 | ################################## | |
5677 | ||
f67539c2 | 5678 | |
522d829b | 5679 | def command_registry_login(ctx: CephadmContext) -> int: |
f67539c2 TL |
5680 | if ctx.registry_json: |
5681 | logger.info('Pulling custom registry login info from %s.' % ctx.registry_json) | |
5682 | d = get_parm(ctx.registry_json) | |
f6b5b4d7 | 5683 | if d.get('url') and d.get('username') and d.get('password'): |
f67539c2 TL |
5684 | ctx.registry_url = d.get('url') |
5685 | ctx.registry_username = d.get('username') | |
5686 | ctx.registry_password = d.get('password') | |
5687 | registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password) | |
f6b5b4d7 | 5688 | else: |
f67539c2 TL |
5689 | raise Error('json provided for custom registry login did not include all necessary fields. ' |
5690 | 'Please setup json file as\n' | |
5691 | '{\n' | |
5692 | ' "url": "REGISTRY_URL",\n' | |
5693 | ' "username": "REGISTRY_USERNAME",\n' | |
5694 | ' "password": "REGISTRY_PASSWORD"\n' | |
5695 | '}\n') | |
5696 | elif ctx.registry_url and ctx.registry_username and ctx.registry_password: | |
5697 | registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password) | |
f6b5b4d7 | 5698 | else: |
f67539c2 TL |
5699 | raise Error('Invalid custom registry arguments received. To login to a custom registry include ' |
5700 | '--registry-url, --registry-username and --registry-password ' | |
5701 | 'options or --registry-json option') | |
f6b5b4d7 TL |
5702 | return 0 |
5703 | ||
f67539c2 | 5704 | |
522d829b | 5705 | def registry_login(ctx: CephadmContext, url: Optional[str], username: Optional[str], password: Optional[str]) -> None: |
f67539c2 | 5706 | logger.info('Logging into custom registry.') |
f6b5b4d7 | 5707 | try: |
f67539c2 TL |
5708 | engine = ctx.container_engine |
5709 | cmd = [engine.path, 'login', | |
5710 | '-u', username, '-p', password, | |
5711 | url] | |
5712 | if isinstance(engine, Podman): | |
5713 | cmd.append('--authfile=/etc/ceph/podman-auth.json') | |
5714 | out, _, _ = call_throws(ctx, cmd) | |
5715 | if isinstance(engine, Podman): | |
5716 | os.chmod('/etc/ceph/podman-auth.json', 0o600) | |
5717 | except Exception: | |
5718 | raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx.registry_url, ctx.registry_username)) | |
f6b5b4d7 TL |
5719 | |
5720 | ################################## | |
5721 | ||
5722 | ||
f67539c2 TL |
5723 | def extract_uid_gid_monitoring(ctx, daemon_type): |
5724 | # type: (CephadmContext, str) -> Tuple[int, int] | |
9f95a23c TL |
5725 | |
5726 | if daemon_type == 'prometheus': | |
f67539c2 | 5727 | uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus') |
9f95a23c TL |
5728 | elif daemon_type == 'node-exporter': |
5729 | uid, gid = 65534, 65534 | |
5730 | elif daemon_type == 'grafana': | |
f67539c2 | 5731 | uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana') |
33c7a0ef TL |
5732 | elif daemon_type == 'loki': |
5733 | uid, gid = extract_uid_gid(ctx, file_path='/etc/loki') | |
5734 | elif daemon_type == 'promtail': | |
5735 | uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail') | |
9f95a23c | 5736 | elif daemon_type == 'alertmanager': |
f67539c2 | 5737 | uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus']) |
9f95a23c | 5738 | else: |
f67539c2 | 5739 | raise Error('{} not implemented yet'.format(daemon_type)) |
9f95a23c TL |
5740 | return uid, gid |
5741 | ||
5742 | ||
2a845540 TL |
5743 | def get_deployment_container(ctx: CephadmContext, |
5744 | fsid: str, daemon_type: str, daemon_id: Union[int, str], | |
5745 | privileged: bool = False, | |
5746 | ptrace: bool = False, | |
5747 | container_args: Optional[List[str]] = None) -> 'CephContainer': | |
5748 | # wrapper for get_container specifically for containers made during the `cephadm deploy` | |
5749 | # command. Adds some extra things such as extra container args and custom config files | |
20effc67 TL |
5750 | c = get_container(ctx, fsid, daemon_type, daemon_id, privileged, ptrace, container_args) |
5751 | if 'extra_container_args' in ctx and ctx.extra_container_args: | |
5752 | c.container_args.extend(ctx.extra_container_args) | |
2a845540 TL |
5753 | if 'config_json' in ctx and ctx.config_json: |
5754 | conf_files = get_custom_config_files(ctx.config_json) | |
5755 | mandatory_keys = ['mount_path', 'content'] | |
5756 | for conf in conf_files['custom_config_files']: | |
5757 | if all(k in conf for k in mandatory_keys): | |
5758 | mount_path = conf['mount_path'] | |
5759 | file_path = os.path.join( | |
5760 | ctx.data_dir, | |
5761 | fsid, | |
5762 | 'custom_config_files', | |
5763 | f'{daemon_type}.{daemon_id}', | |
5764 | os.path.basename(mount_path) | |
5765 | ) | |
5766 | c.volume_mounts[file_path] = mount_path | |
20effc67 TL |
5767 | return c |
5768 | ||
5769 | ||
9f95a23c | 5770 | @default_image |
f67539c2 TL |
5771 | def command_deploy(ctx): |
5772 | # type: (CephadmContext) -> None | |
5773 | daemon_type, daemon_id = ctx.name.split('.', 1) | |
9f95a23c | 5774 | |
f67539c2 TL |
5775 | lock = FileLock(ctx, ctx.fsid) |
5776 | lock.acquire() | |
9f95a23c TL |
5777 | |
5778 | if daemon_type not in get_supported_daemons(): | |
5779 | raise Error('daemon type %s not recognized' % daemon_type) | |
5780 | ||
e306af50 | 5781 | redeploy = False |
f67539c2 | 5782 | unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id) |
f67539c2 | 5783 | (_, state, _) = check_unit(ctx, unit_name) |
522d829b | 5784 | if state == 'running' or is_container_running(ctx, CephContainer.for_daemon(ctx, ctx.fsid, daemon_type, daemon_id, 'bash')): |
e306af50 TL |
5785 | redeploy = True |
5786 | ||
f67539c2 TL |
5787 | if ctx.reconfig: |
5788 | logger.info('%s daemon %s ...' % ('Reconfig', ctx.name)) | |
e306af50 | 5789 | elif redeploy: |
f67539c2 | 5790 | logger.info('%s daemon %s ...' % ('Redeploy', ctx.name)) |
e306af50 | 5791 | else: |
f67539c2 | 5792 | logger.info('%s daemon %s ...' % ('Deploy', ctx.name)) |
9f95a23c | 5793 | |
33c7a0ef TL |
5794 | # Migrate sysctl conf files from /usr/lib to /etc |
5795 | migrate_sysctl_dir(ctx, ctx.fsid) | |
5796 | ||
f6b5b4d7 | 5797 | # Get and check ports explicitly required to be opened |
f67539c2 TL |
5798 | daemon_ports = [] # type: List[int] |
5799 | ||
5800 | # only check port in use if not reconfig or redeploy since service | |
5801 | # we are redeploying/reconfiguring will already be using the port | |
5802 | if not ctx.reconfig and not redeploy: | |
5803 | if ctx.tcp_ports: | |
5804 | daemon_ports = list(map(int, ctx.tcp_ports.split())) | |
f6b5b4d7 | 5805 | |
9f95a23c | 5806 | if daemon_type in Ceph.daemons: |
f67539c2 TL |
5807 | config, keyring = get_config_and_keyring(ctx) |
5808 | uid, gid = extract_uid_gid(ctx) | |
5809 | make_var_run(ctx, ctx.fsid, uid, gid) | |
f6b5b4d7 | 5810 | |
2a845540 TL |
5811 | c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id, |
5812 | ptrace=ctx.allow_ptrace) | |
f67539c2 | 5813 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
9f95a23c | 5814 | config=config, keyring=keyring, |
f67539c2 TL |
5815 | osd_fsid=ctx.osd_fsid, |
5816 | reconfig=ctx.reconfig, | |
f6b5b4d7 | 5817 | ports=daemon_ports) |
9f95a23c TL |
5818 | |
5819 | elif daemon_type in Monitoring.components: | |
5820 | # monitoring daemon - prometheus, grafana, alertmanager, node-exporter | |
9f95a23c | 5821 | # Default Checks |
9f95a23c | 5822 | # make sure provided config-json is sufficient |
f67539c2 | 5823 | config = get_parm(ctx.config_json) # type: ignore |
9f95a23c TL |
5824 | required_files = Monitoring.components[daemon_type].get('config-json-files', list()) |
5825 | required_args = Monitoring.components[daemon_type].get('config-json-args', list()) | |
5826 | if required_files: | |
5827 | if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore | |
f67539c2 TL |
5828 | raise Error('{} deployment requires config-json which must ' |
5829 | 'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files))) | |
9f95a23c TL |
5830 | if required_args: |
5831 | if not config or not all(c in config.keys() for c in required_args): # type: ignore | |
f67539c2 TL |
5832 | raise Error('{} deployment requires config-json which must ' |
5833 | 'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args))) | |
9f95a23c | 5834 | |
f67539c2 | 5835 | uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) |
2a845540 | 5836 | c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) |
f67539c2 TL |
5837 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
5838 | reconfig=ctx.reconfig, | |
f6b5b4d7 | 5839 | ports=daemon_ports) |
9f95a23c TL |
5840 | |
5841 | elif daemon_type == NFSGanesha.daemon_type: | |
b3b6e05e TL |
5842 | if not ctx.reconfig and not redeploy and not daemon_ports: |
5843 | daemon_ports = list(NFSGanesha.port_map.values()) | |
f6b5b4d7 | 5844 | |
f67539c2 | 5845 | config, keyring = get_config_and_keyring(ctx) |
9f95a23c | 5846 | # TODO: extract ganesha uid/gid (997, 994) ? |
f67539c2 | 5847 | uid, gid = extract_uid_gid(ctx) |
2a845540 | 5848 | c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) |
f67539c2 | 5849 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
9f95a23c | 5850 | config=config, keyring=keyring, |
f67539c2 | 5851 | reconfig=ctx.reconfig, |
f6b5b4d7 | 5852 | ports=daemon_ports) |
e306af50 | 5853 | |
1911f103 | 5854 | elif daemon_type == CephIscsi.daemon_type: |
f67539c2 TL |
5855 | config, keyring = get_config_and_keyring(ctx) |
5856 | uid, gid = extract_uid_gid(ctx) | |
2a845540 | 5857 | c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) |
f67539c2 | 5858 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
1911f103 | 5859 | config=config, keyring=keyring, |
f67539c2 TL |
5860 | reconfig=ctx.reconfig, |
5861 | ports=daemon_ports) | |
5862 | ||
5863 | elif daemon_type == HAproxy.daemon_type: | |
5864 | haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id) | |
5865 | uid, gid = haproxy.extract_uid_gid_haproxy() | |
2a845540 | 5866 | c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) |
f67539c2 TL |
5867 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
5868 | reconfig=ctx.reconfig, | |
5869 | ports=daemon_ports) | |
5870 | ||
5871 | elif daemon_type == Keepalived.daemon_type: | |
5872 | keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id) | |
5873 | uid, gid = keepalived.extract_uid_gid_keepalived() | |
2a845540 | 5874 | c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) |
f67539c2 TL |
5875 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
5876 | reconfig=ctx.reconfig, | |
f6b5b4d7 | 5877 | ports=daemon_ports) |
f91f0fd5 TL |
5878 | |
5879 | elif daemon_type == CustomContainer.daemon_type: | |
f67539c2 TL |
5880 | cc = CustomContainer.init(ctx, ctx.fsid, daemon_id) |
5881 | if not ctx.reconfig and not redeploy: | |
f91f0fd5 | 5882 | daemon_ports.extend(cc.ports) |
2a845540 TL |
5883 | c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id, |
5884 | privileged=cc.privileged, | |
5885 | ptrace=ctx.allow_ptrace) | |
f67539c2 | 5886 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, |
f91f0fd5 | 5887 | uid=cc.uid, gid=cc.gid, config=None, |
f67539c2 | 5888 | keyring=None, reconfig=ctx.reconfig, |
f91f0fd5 TL |
5889 | ports=daemon_ports) |
5890 | ||
20effc67 | 5891 | elif daemon_type == CephadmAgent.daemon_type: |
f67539c2 TL |
5892 | # get current user gid and uid |
5893 | uid = os.getuid() | |
5894 | gid = os.getgid() | |
f67539c2 TL |
5895 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None, |
5896 | uid, gid, ports=daemon_ports) | |
5897 | ||
20effc67 TL |
5898 | elif daemon_type == SNMPGateway.daemon_type: |
5899 | sc = SNMPGateway.init(ctx, ctx.fsid, daemon_id) | |
2a845540 | 5900 | c = get_deployment_container(ctx, ctx.fsid, daemon_type, daemon_id) |
20effc67 TL |
5901 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, |
5902 | sc.uid, sc.gid, | |
5903 | ports=daemon_ports) | |
5904 | ||
9f95a23c | 5905 | else: |
f91f0fd5 TL |
5906 | raise Error('daemon type {} not implemented in command_deploy function' |
5907 | .format(daemon_type)) | |
9f95a23c TL |
5908 | |
5909 | ################################## | |
5910 | ||
f6b5b4d7 | 5911 | |
9f95a23c | 5912 | @infer_image |
f67539c2 TL |
5913 | def command_run(ctx): |
5914 | # type: (CephadmContext) -> int | |
5915 | (daemon_type, daemon_id) = ctx.name.split('.', 1) | |
5916 | c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) | |
9f95a23c | 5917 | command = c.run_cmd() |
f67539c2 | 5918 | return call_timeout(ctx, command, ctx.timeout) |
9f95a23c TL |
5919 | |
5920 | ################################## | |
5921 | ||
f6b5b4d7 | 5922 | |
9f95a23c | 5923 | @infer_fsid |
e306af50 | 5924 | @infer_config |
9f95a23c | 5925 | @infer_image |
522d829b | 5926 | @validate_fsid |
f67539c2 TL |
5927 | def command_shell(ctx): |
5928 | # type: (CephadmContext) -> int | |
522d829b TL |
5929 | cp = read_config(ctx.config) |
5930 | if cp.has_option('global', 'fsid') and \ | |
5931 | cp.get('global', 'fsid') != ctx.fsid: | |
5932 | raise Error('fsid does not match ceph.conf') | |
f67539c2 | 5933 | |
f67539c2 TL |
5934 | if ctx.name: |
5935 | if '.' in ctx.name: | |
5936 | (daemon_type, daemon_id) = ctx.name.split('.', 1) | |
9f95a23c | 5937 | else: |
f67539c2 | 5938 | daemon_type = ctx.name |
9f95a23c TL |
5939 | daemon_id = None |
5940 | else: | |
5941 | daemon_type = 'osd' # get the most mounts | |
5942 | daemon_id = None | |
5943 | ||
20effc67 TL |
5944 | if ctx.fsid and daemon_type in Ceph.daemons: |
5945 | make_log_dir(ctx, ctx.fsid) | |
5946 | ||
f67539c2 | 5947 | if daemon_id and not ctx.fsid: |
9f95a23c TL |
5948 | raise Error('must pass --fsid to specify cluster') |
5949 | ||
33c7a0ef TL |
5950 | # in case a dedicated keyring for the specified fsid is found we us it. |
5951 | # Otherwise, use /etc/ceph files by default, if present. We do this instead of | |
9f95a23c TL |
5952 | # making these defaults in the arg parser because we don't want an error |
5953 | # if they don't exist. | |
33c7a0ef TL |
5954 | if not ctx.keyring: |
5955 | keyring_file = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_KEYRING}' | |
5956 | if os.path.exists(keyring_file): | |
5957 | ctx.keyring = keyring_file | |
5958 | elif os.path.exists(CEPH_DEFAULT_KEYRING): | |
5959 | ctx.keyring = CEPH_DEFAULT_KEYRING | |
f67539c2 TL |
5960 | |
5961 | container_args: List[str] = ['-i'] | |
5962 | mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id, | |
5963 | no_config=True if ctx.config else False) | |
5964 | binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id) | |
5965 | if ctx.config: | |
5966 | mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z' | |
5967 | if ctx.keyring: | |
5968 | mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z' | |
5969 | if ctx.mount: | |
5970 | for _mount in ctx.mount: | |
f91f0fd5 TL |
5971 | split_src_dst = _mount.split(':') |
5972 | mount = pathify(split_src_dst[0]) | |
5973 | filename = os.path.basename(split_src_dst[0]) | |
5974 | if len(split_src_dst) > 1: | |
a4b75251 TL |
5975 | dst = split_src_dst[1] |
5976 | if len(split_src_dst) == 3: | |
5977 | dst = '{}:{}'.format(dst, split_src_dst[2]) | |
f91f0fd5 TL |
5978 | mounts[mount] = dst |
5979 | else: | |
a4b75251 | 5980 | mounts[mount] = '/mnt/{}'.format(filename) |
f67539c2 TL |
5981 | if ctx.command: |
5982 | command = ctx.command | |
9f95a23c TL |
5983 | else: |
5984 | command = ['bash'] | |
5985 | container_args += [ | |
f67539c2 | 5986 | '-t', |
9f95a23c | 5987 | '-e', 'LANG=C', |
f67539c2 | 5988 | '-e', 'PS1=%s' % CUSTOM_PS1, |
9f95a23c | 5989 | ] |
f67539c2 TL |
5990 | if ctx.fsid: |
5991 | home = os.path.join(ctx.data_dir, ctx.fsid, 'home') | |
9f95a23c TL |
5992 | if not os.path.exists(home): |
5993 | logger.debug('Creating root home at %s' % home) | |
5994 | makedirs(home, 0, 0, 0o660) | |
5995 | if os.path.exists('/etc/skel'): | |
5996 | for f in os.listdir('/etc/skel'): | |
5997 | if f.startswith('.bash'): | |
5998 | shutil.copyfile(os.path.join('/etc/skel', f), | |
5999 | os.path.join(home, f)) | |
6000 | mounts[home] = '/root' | |
6001 | ||
b3b6e05e TL |
6002 | for i in ctx.volume: |
6003 | a, b = i.split(':', 1) | |
6004 | mounts[a] = b | |
6005 | ||
9f95a23c | 6006 | c = CephContainer( |
f67539c2 TL |
6007 | ctx, |
6008 | image=ctx.image, | |
9f95a23c TL |
6009 | entrypoint='doesnotmatter', |
6010 | args=[], | |
6011 | container_args=container_args, | |
6012 | volume_mounts=mounts, | |
f6b5b4d7 | 6013 | bind_mounts=binds, |
f67539c2 | 6014 | envs=ctx.env, |
9f95a23c TL |
6015 | privileged=True) |
6016 | command = c.shell_cmd(command) | |
6017 | ||
f67539c2 | 6018 | return call_timeout(ctx, command, ctx.timeout) |
9f95a23c TL |
6019 | |
6020 | ################################## | |
6021 | ||
f6b5b4d7 | 6022 | |
9f95a23c | 6023 | @infer_fsid |
f67539c2 TL |
6024 | def command_enter(ctx): |
6025 | # type: (CephadmContext) -> int | |
6026 | if not ctx.fsid: | |
9f95a23c | 6027 | raise Error('must pass --fsid to specify cluster') |
f67539c2 TL |
6028 | (daemon_type, daemon_id) = ctx.name.split('.', 1) |
6029 | container_args = ['-i'] # type: List[str] | |
6030 | if ctx.command: | |
6031 | command = ctx.command | |
9f95a23c TL |
6032 | else: |
6033 | command = ['sh'] | |
6034 | container_args += [ | |
f67539c2 | 6035 | '-t', |
9f95a23c | 6036 | '-e', 'LANG=C', |
f67539c2 | 6037 | '-e', 'PS1=%s' % CUSTOM_PS1, |
9f95a23c | 6038 | ] |
1911f103 | 6039 | c = CephContainer( |
f67539c2 TL |
6040 | ctx, |
6041 | image=ctx.image, | |
1911f103 TL |
6042 | entrypoint='doesnotmatter', |
6043 | container_args=container_args, | |
f67539c2 | 6044 | cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id), |
1911f103 | 6045 | ) |
9f95a23c | 6046 | command = c.exec_cmd(command) |
f67539c2 | 6047 | return call_timeout(ctx, command, ctx.timeout) |
9f95a23c TL |
6048 | |
6049 | ################################## | |
6050 | ||
f6b5b4d7 | 6051 | |
9f95a23c TL |
6052 | @infer_fsid |
6053 | @infer_image | |
522d829b | 6054 | @validate_fsid |
f67539c2 TL |
6055 | def command_ceph_volume(ctx): |
6056 | # type: (CephadmContext) -> None | |
522d829b TL |
6057 | cp = read_config(ctx.config) |
6058 | if cp.has_option('global', 'fsid') and \ | |
6059 | cp.get('global', 'fsid') != ctx.fsid: | |
6060 | raise Error('fsid does not match ceph.conf') | |
6061 | ||
f67539c2 TL |
6062 | if ctx.fsid: |
6063 | make_log_dir(ctx, ctx.fsid) | |
9f95a23c | 6064 | |
f67539c2 TL |
6065 | lock = FileLock(ctx, ctx.fsid) |
6066 | lock.acquire() | |
1911f103 | 6067 | |
f67539c2 TL |
6068 | (uid, gid) = (0, 0) # ceph-volume runs as root |
6069 | mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None) | |
9f95a23c TL |
6070 | |
6071 | tmp_config = None | |
6072 | tmp_keyring = None | |
6073 | ||
f67539c2 | 6074 | (config, keyring) = get_config_and_keyring(ctx) |
9f95a23c | 6075 | |
801d1391 | 6076 | if config: |
9f95a23c TL |
6077 | # tmp config file |
6078 | tmp_config = write_tmp(config, uid, gid) | |
9f95a23c | 6079 | mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z' |
801d1391 TL |
6080 | |
6081 | if keyring: | |
6082 | # tmp keyring file | |
6083 | tmp_keyring = write_tmp(keyring, uid, gid) | |
9f95a23c TL |
6084 | mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z' |
6085 | ||
20effc67 | 6086 | c = get_ceph_volume_container( |
f67539c2 | 6087 | ctx, |
f67539c2 TL |
6088 | envs=ctx.env, |
6089 | args=ctx.command, | |
9f95a23c TL |
6090 | volume_mounts=mounts, |
6091 | ) | |
b3b6e05e | 6092 | |
2a845540 | 6093 | out, err, code = call_throws(ctx, c.run_cmd(), verbosity=CallVerbosity.QUIET_UNLESS_ERROR) |
9f95a23c TL |
6094 | if not code: |
6095 | print(out) | |
6096 | ||
6097 | ################################## | |
6098 | ||
f6b5b4d7 | 6099 | |
9f95a23c | 6100 | @infer_fsid |
f67539c2 | 6101 | def command_unit(ctx): |
33c7a0ef | 6102 | # type: (CephadmContext) -> int |
f67539c2 | 6103 | if not ctx.fsid: |
9f95a23c | 6104 | raise Error('must pass --fsid to specify cluster') |
e306af50 | 6105 | |
f67539c2 | 6106 | unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) |
e306af50 | 6107 | |
33c7a0ef TL |
6108 | _, _, code = call( |
6109 | ctx, | |
6110 | ['systemctl', ctx.command, unit_name], | |
adb31ebb TL |
6111 | verbosity=CallVerbosity.VERBOSE, |
6112 | desc='' | |
6113 | ) | |
33c7a0ef | 6114 | return code |
9f95a23c TL |
6115 | |
6116 | ################################## | |
6117 | ||
f6b5b4d7 | 6118 | |
9f95a23c | 6119 | @infer_fsid |
f67539c2 TL |
6120 | def command_logs(ctx): |
6121 | # type: (CephadmContext) -> None | |
6122 | if not ctx.fsid: | |
9f95a23c TL |
6123 | raise Error('must pass --fsid to specify cluster') |
6124 | ||
f67539c2 | 6125 | unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) |
9f95a23c TL |
6126 | |
6127 | cmd = [find_program('journalctl')] | |
6128 | cmd.extend(['-u', unit_name]) | |
f67539c2 TL |
6129 | if ctx.command: |
6130 | cmd.extend(ctx.command) | |
9f95a23c TL |
6131 | |
6132 | # call this directly, without our wrapper, so that we get an unmolested | |
6133 | # stdout with logger prefixing. | |
f67539c2 | 6134 | logger.debug('Running command: %s' % ' '.join(cmd)) |
522d829b | 6135 | subprocess.call(cmd, env=os.environ.copy()) # type: ignore |
9f95a23c TL |
6136 | |
6137 | ################################## | |
6138 | ||
f6b5b4d7 | 6139 | |
f67539c2 | 6140 | def list_networks(ctx): |
522d829b | 6141 | # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]] |
9f95a23c | 6142 | |
f67539c2 TL |
6143 | # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag, |
6144 | # so we'll need to use a regex to parse 'ip' command output. | |
6145 | # | |
6146 | # out, _, _ = call_throws(['ip', '-j', 'route', 'ls']) | |
6147 | # j = json.loads(out) | |
6148 | # for x in j: | |
f67539c2 TL |
6149 | res = _list_ipv4_networks(ctx) |
6150 | res.update(_list_ipv6_networks(ctx)) | |
f6b5b4d7 TL |
6151 | return res |
6152 | ||
6153 | ||
522d829b | 6154 | def _list_ipv4_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]: |
f67539c2 TL |
6155 | execstr: Optional[str] = find_executable('ip') |
6156 | if not execstr: | |
6157 | raise FileNotFoundError("unable to find 'ip' command") | |
2a845540 | 6158 | out, _, _ = call_throws(ctx, [execstr, 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR) |
f6b5b4d7 TL |
6159 | return _parse_ipv4_route(out) |
6160 | ||
9f95a23c | 6161 | |
522d829b TL |
6162 | def _parse_ipv4_route(out: str) -> Dict[str, Dict[str, Set[str]]]: |
6163 | r = {} # type: Dict[str, Dict[str, Set[str]]] | |
33c7a0ef | 6164 | p = re.compile(r'^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)') |
9f95a23c TL |
6165 | for line in out.splitlines(): |
6166 | m = p.findall(line) | |
6167 | if not m: | |
6168 | continue | |
6169 | net = m[0][0] | |
33c7a0ef TL |
6170 | if '/' not in net: # aggregate /32 mask for single host sub-networks |
6171 | net += '/32' | |
f67539c2 TL |
6172 | iface = m[0][1] |
6173 | ip = m[0][4] | |
9f95a23c | 6174 | if net not in r: |
f67539c2 TL |
6175 | r[net] = {} |
6176 | if iface not in r[net]: | |
522d829b TL |
6177 | r[net][iface] = set() |
6178 | r[net][iface].add(ip) | |
9f95a23c TL |
6179 | return r |
6180 | ||
f6b5b4d7 | 6181 | |
522d829b | 6182 | def _list_ipv6_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]: |
f67539c2 TL |
6183 | execstr: Optional[str] = find_executable('ip') |
6184 | if not execstr: | |
6185 | raise FileNotFoundError("unable to find 'ip' command") | |
2a845540 TL |
6186 | routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR) |
6187 | ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls'], verbosity=CallVerbosity.QUIET_UNLESS_ERROR) | |
f6b5b4d7 TL |
6188 | return _parse_ipv6_route(routes, ips) |
6189 | ||
6190 | ||
522d829b TL |
6191 | def _parse_ipv6_route(routes: str, ips: str) -> Dict[str, Dict[str, Set[str]]]: |
6192 | r = {} # type: Dict[str, Dict[str, Set[str]]] | |
f6b5b4d7 TL |
6193 | route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$') |
6194 | ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$') | |
f67539c2 | 6195 | iface_p = re.compile(r'^(\d+): (\S+): (.*)$') |
f6b5b4d7 TL |
6196 | for line in routes.splitlines(): |
6197 | m = route_p.findall(line) | |
6198 | if not m or m[0][0].lower() == 'default': | |
6199 | continue | |
6200 | net = m[0][0] | |
33c7a0ef TL |
6201 | if '/' not in net: # aggregate /128 mask for single host sub-networks |
6202 | net += '/128' | |
f67539c2 | 6203 | iface = m[0][1] |
33c7a0ef TL |
6204 | if iface == 'lo': # skip loopback devices |
6205 | continue | |
f6b5b4d7 | 6206 | if net not in r: |
f67539c2 TL |
6207 | r[net] = {} |
6208 | if iface not in r[net]: | |
522d829b | 6209 | r[net][iface] = set() |
f6b5b4d7 | 6210 | |
f67539c2 | 6211 | iface = None |
f6b5b4d7 TL |
6212 | for line in ips.splitlines(): |
6213 | m = ip_p.findall(line) | |
6214 | if not m: | |
f67539c2 TL |
6215 | m = iface_p.findall(line) |
6216 | if m: | |
6217 | # drop @... suffix, if present | |
6218 | iface = m[0][1].split('@')[0] | |
f6b5b4d7 TL |
6219 | continue |
6220 | ip = m[0][0] | |
6221 | # find the network it belongs to | |
6222 | net = [n for n in r.keys() | |
f67539c2 | 6223 | if ipaddress.ip_address(ip) in ipaddress.ip_network(n)] |
20effc67 | 6224 | if net and iface in r[net[0]]: |
2a845540 | 6225 | assert iface |
522d829b | 6226 | r[net[0]][iface].add(ip) |
f6b5b4d7 TL |
6227 | |
6228 | return r | |
6229 | ||
6230 | ||
f67539c2 TL |
6231 | def command_list_networks(ctx): |
6232 | # type: (CephadmContext) -> None | |
6233 | r = list_networks(ctx) | |
522d829b TL |
6234 | |
6235 | def serialize_sets(obj: Any) -> Any: | |
6236 | return list(obj) if isinstance(obj, set) else obj | |
6237 | ||
6238 | print(json.dumps(r, indent=4, default=serialize_sets)) | |
9f95a23c TL |
6239 | |
6240 | ################################## | |
6241 | ||
f6b5b4d7 | 6242 | |
f67539c2 TL |
6243 | def command_ls(ctx): |
6244 | # type: (CephadmContext) -> None | |
6245 | ls = list_daemons(ctx, detail=not ctx.no_detail, | |
6246 | legacy_dir=ctx.legacy_dir) | |
9f95a23c TL |
6247 | print(json.dumps(ls, indent=4)) |
6248 | ||
f6b5b4d7 | 6249 | |
f67539c2 TL |
6250 | def with_units_to_int(v: str) -> int: |
6251 | if v.endswith('iB'): | |
6252 | v = v[:-2] | |
6253 | elif v.endswith('B'): | |
6254 | v = v[:-1] | |
6255 | mult = 1 | |
6256 | if v[-1].upper() == 'K': | |
6257 | mult = 1024 | |
6258 | v = v[:-1] | |
6259 | elif v[-1].upper() == 'M': | |
6260 | mult = 1024 * 1024 | |
6261 | v = v[:-1] | |
6262 | elif v[-1].upper() == 'G': | |
6263 | mult = 1024 * 1024 * 1024 | |
6264 | v = v[:-1] | |
6265 | elif v[-1].upper() == 'T': | |
6266 | mult = 1024 * 1024 * 1024 * 1024 | |
6267 | v = v[:-1] | |
6268 | return int(float(v) * mult) | |
6269 | ||
6270 | ||
6271 | def list_daemons(ctx, detail=True, legacy_dir=None): | |
6272 | # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]] | |
6273 | host_version: Optional[str] = None | |
9f95a23c | 6274 | ls = [] |
f67539c2 | 6275 | container_path = ctx.container_engine.path |
9f95a23c | 6276 | |
f67539c2 | 6277 | data_dir = ctx.data_dir |
9f95a23c TL |
6278 | if legacy_dir is not None: |
6279 | data_dir = os.path.abspath(legacy_dir + data_dir) | |
6280 | ||
6281 | # keep track of ceph versions we see | |
6282 | seen_versions = {} # type: Dict[str, Optional[str]] | |
6283 | ||
f67539c2 TL |
6284 | # keep track of image digests |
6285 | seen_digests = {} # type: Dict[str, List[str]] | |
6286 | ||
33c7a0ef | 6287 | # keep track of memory and cpu usage we've seen |
f67539c2 | 6288 | seen_memusage = {} # type: Dict[str, int] |
33c7a0ef | 6289 | seen_cpuperc = {} # type: Dict[str, str] |
f67539c2 TL |
6290 | out, err, code = call( |
6291 | ctx, | |
6292 | [container_path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'], | |
2a845540 | 6293 | verbosity=CallVerbosity.QUIET |
f67539c2 | 6294 | ) |
522d829b | 6295 | seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out) |
f67539c2 | 6296 | |
33c7a0ef TL |
6297 | out, err, code = call( |
6298 | ctx, | |
6299 | [container_path, 'stats', '--format', '{{.ID}},{{.CPUPerc}}', '--no-stream'], | |
2a845540 | 6300 | verbosity=CallVerbosity.QUIET |
33c7a0ef TL |
6301 | ) |
6302 | seen_cpuperc_cid_len, seen_cpuperc = _parse_cpu_perc(code, out) | |
6303 | ||
9f95a23c TL |
6304 | # /var/lib/ceph |
6305 | if os.path.exists(data_dir): | |
6306 | for i in os.listdir(data_dir): | |
6307 | if i in ['mon', 'osd', 'mds', 'mgr']: | |
6308 | daemon_type = i | |
6309 | for j in os.listdir(os.path.join(data_dir, i)): | |
6310 | if '-' not in j: | |
6311 | continue | |
6312 | (cluster, daemon_id) = j.split('-', 1) | |
f67539c2 TL |
6313 | fsid = get_legacy_daemon_fsid(ctx, |
6314 | cluster, daemon_type, daemon_id, | |
6315 | legacy_dir=legacy_dir) | |
e306af50 | 6316 | legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) |
f67539c2 | 6317 | val: Dict[str, Any] = { |
9f95a23c TL |
6318 | 'style': 'legacy', |
6319 | 'name': '%s.%s' % (daemon_type, daemon_id), | |
6320 | 'fsid': fsid if fsid is not None else 'unknown', | |
e306af50 | 6321 | 'systemd_unit': legacy_unit_name, |
9f95a23c TL |
6322 | } |
6323 | if detail: | |
20effc67 | 6324 | (val['enabled'], val['state'], _) = check_unit(ctx, legacy_unit_name) |
9f95a23c TL |
6325 | if not host_version: |
6326 | try: | |
f67539c2 TL |
6327 | out, err, code = call(ctx, |
6328 | ['ceph', '-v'], | |
2a845540 | 6329 | verbosity=CallVerbosity.QUIET) |
9f95a23c TL |
6330 | if not code and out.startswith('ceph version '): |
6331 | host_version = out.split(' ')[2] | |
6332 | except Exception: | |
6333 | pass | |
f67539c2 TL |
6334 | val['host_version'] = host_version |
6335 | ls.append(val) | |
9f95a23c TL |
6336 | elif is_fsid(i): |
6337 | fsid = str(i) # convince mypy that fsid is a str here | |
6338 | for j in os.listdir(os.path.join(data_dir, i)): | |
f67539c2 | 6339 | if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)): |
9f95a23c TL |
6340 | name = j |
6341 | (daemon_type, daemon_id) = j.split('.', 1) | |
6342 | unit_name = get_unit_name(fsid, | |
6343 | daemon_type, | |
6344 | daemon_id) | |
6345 | else: | |
6346 | continue | |
f67539c2 | 6347 | val = { |
9f95a23c TL |
6348 | 'style': 'cephadm:v1', |
6349 | 'name': name, | |
6350 | 'fsid': fsid, | |
e306af50 | 6351 | 'systemd_unit': unit_name, |
9f95a23c TL |
6352 | } |
6353 | if detail: | |
6354 | # get container id | |
20effc67 | 6355 | (val['enabled'], val['state'], _) = check_unit(ctx, unit_name) |
9f95a23c TL |
6356 | container_id = None |
6357 | image_name = None | |
6358 | image_id = None | |
f67539c2 | 6359 | image_digests = None |
9f95a23c TL |
6360 | version = None |
6361 | start_stamp = None | |
6362 | ||
522d829b | 6363 | out, err, code = get_container_stats(ctx, container_path, fsid, daemon_type, daemon_id) |
9f95a23c TL |
6364 | if not code: |
6365 | (container_id, image_name, image_id, start, | |
6366 | version) = out.strip().split(',') | |
6367 | image_id = normalize_container_id(image_id) | |
6368 | daemon_type = name.split('.', 1)[0] | |
6369 | start_stamp = try_convert_datetime(start) | |
f67539c2 TL |
6370 | |
6371 | # collect digests for this image id | |
6372 | image_digests = seen_digests.get(image_id) | |
6373 | if not image_digests: | |
6374 | out, err, code = call( | |
6375 | ctx, | |
6376 | [ | |
6377 | container_path, 'image', 'inspect', image_id, | |
6378 | '--format', '{{.RepoDigests}}', | |
6379 | ], | |
2a845540 | 6380 | verbosity=CallVerbosity.QUIET) |
f67539c2 | 6381 | if not code: |
18d92ca7 TL |
6382 | image_digests = list(set(map( |
6383 | normalize_image_digest, | |
6384 | out.strip()[1:-1].split(' ')))) | |
f67539c2 TL |
6385 | seen_digests[image_id] = image_digests |
6386 | ||
6387 | # identify software version inside the container (if we can) | |
9f95a23c TL |
6388 | if not version or '.' not in version: |
6389 | version = seen_versions.get(image_id, None) | |
6390 | if daemon_type == NFSGanesha.daemon_type: | |
f67539c2 | 6391 | version = NFSGanesha.get_version(ctx, container_id) |
1911f103 | 6392 | if daemon_type == CephIscsi.daemon_type: |
f67539c2 | 6393 | version = CephIscsi.get_version(ctx, container_id) |
9f95a23c TL |
6394 | elif not version: |
6395 | if daemon_type in Ceph.daemons: | |
f67539c2 TL |
6396 | out, err, code = call(ctx, |
6397 | [container_path, 'exec', container_id, | |
6398 | 'ceph', '-v'], | |
2a845540 | 6399 | verbosity=CallVerbosity.QUIET) |
9f95a23c TL |
6400 | if not code and \ |
6401 | out.startswith('ceph version '): | |
6402 | version = out.split(' ')[2] | |
6403 | seen_versions[image_id] = version | |
6404 | elif daemon_type == 'grafana': | |
f67539c2 TL |
6405 | out, err, code = call(ctx, |
6406 | [container_path, 'exec', container_id, | |
6407 | 'grafana-server', '-v'], | |
2a845540 | 6408 | verbosity=CallVerbosity.QUIET) |
9f95a23c TL |
6409 | if not code and \ |
6410 | out.startswith('Version '): | |
6411 | version = out.split(' ')[1] | |
6412 | seen_versions[image_id] = version | |
6413 | elif daemon_type in ['prometheus', | |
6414 | 'alertmanager', | |
33c7a0ef TL |
6415 | 'node-exporter', |
6416 | 'loki', | |
6417 | 'promtail']: | |
f67539c2 TL |
6418 | version = Monitoring.get_version(ctx, container_id, daemon_type) |
6419 | seen_versions[image_id] = version | |
6420 | elif daemon_type == 'haproxy': | |
6421 | out, err, code = call(ctx, | |
6422 | [container_path, 'exec', container_id, | |
6423 | 'haproxy', '-v'], | |
2a845540 | 6424 | verbosity=CallVerbosity.QUIET) |
f67539c2 TL |
6425 | if not code and \ |
6426 | out.startswith('HA-Proxy version '): | |
6427 | version = out.split(' ')[2] | |
6428 | seen_versions[image_id] = version | |
6429 | elif daemon_type == 'keepalived': | |
6430 | out, err, code = call(ctx, | |
6431 | [container_path, 'exec', container_id, | |
6432 | 'keepalived', '--version'], | |
2a845540 | 6433 | verbosity=CallVerbosity.QUIET) |
9f95a23c | 6434 | if not code and \ |
f67539c2 TL |
6435 | err.startswith('Keepalived '): |
6436 | version = err.split(' ')[1] | |
6437 | if version[0] == 'v': | |
6438 | version = version[1:] | |
9f95a23c | 6439 | seen_versions[image_id] = version |
f91f0fd5 TL |
6440 | elif daemon_type == CustomContainer.daemon_type: |
6441 | # Because a custom container can contain | |
6442 | # everything, we do not know which command | |
6443 | # to execute to get the version. | |
6444 | pass | |
20effc67 TL |
6445 | elif daemon_type == SNMPGateway.daemon_type: |
6446 | version = SNMPGateway.get_version(ctx, fsid, daemon_id) | |
6447 | seen_versions[image_id] = version | |
9f95a23c | 6448 | else: |
f91f0fd5 | 6449 | logger.warning('version for unknown daemon type %s' % daemon_type) |
9f95a23c | 6450 | else: |
f67539c2 | 6451 | vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore |
9f95a23c TL |
6452 | try: |
6453 | with open(vfile, 'r') as f: | |
6454 | image_name = f.read().strip() or None | |
6455 | except IOError: | |
6456 | pass | |
f67539c2 TL |
6457 | |
6458 | # unit.meta? | |
6459 | mfile = os.path.join(data_dir, fsid, j, 'unit.meta') # type: ignore | |
6460 | try: | |
6461 | with open(mfile, 'r') as f: | |
6462 | meta = json.loads(f.read()) | |
6463 | val.update(meta) | |
6464 | except IOError: | |
6465 | pass | |
6466 | ||
6467 | val['container_id'] = container_id | |
6468 | val['container_image_name'] = image_name | |
6469 | val['container_image_id'] = image_id | |
6470 | val['container_image_digests'] = image_digests | |
6471 | if container_id: | |
6472 | val['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len]) | |
33c7a0ef | 6473 | val['cpu_percentage'] = seen_cpuperc.get(container_id[0:seen_cpuperc_cid_len]) |
f67539c2 TL |
6474 | val['version'] = version |
6475 | val['started'] = start_stamp | |
6476 | val['created'] = get_file_timestamp( | |
9f95a23c TL |
6477 | os.path.join(data_dir, fsid, j, 'unit.created') |
6478 | ) | |
f67539c2 | 6479 | val['deployed'] = get_file_timestamp( |
9f95a23c | 6480 | os.path.join(data_dir, fsid, j, 'unit.image')) |
f67539c2 | 6481 | val['configured'] = get_file_timestamp( |
9f95a23c | 6482 | os.path.join(data_dir, fsid, j, 'unit.configured')) |
f67539c2 | 6483 | ls.append(val) |
9f95a23c | 6484 | |
9f95a23c TL |
6485 | return ls |
6486 | ||
6487 | ||
522d829b TL |
6488 | def _parse_mem_usage(code: int, out: str) -> Tuple[int, Dict[str, int]]: |
6489 | # keep track of memory usage we've seen | |
6490 | seen_memusage = {} # type: Dict[str, int] | |
6491 | seen_memusage_cid_len = 0 | |
6492 | if not code: | |
6493 | for line in out.splitlines(): | |
6494 | (cid, usage) = line.split(',') | |
6495 | (used, limit) = usage.split(' / ') | |
6496 | try: | |
6497 | seen_memusage[cid] = with_units_to_int(used) | |
6498 | if not seen_memusage_cid_len: | |
6499 | seen_memusage_cid_len = len(cid) | |
6500 | except ValueError: | |
6501 | logger.info('unable to parse memory usage line\n>{}'.format(line)) | |
6502 | pass | |
6503 | return seen_memusage_cid_len, seen_memusage | |
6504 | ||
6505 | ||
33c7a0ef TL |
6506 | def _parse_cpu_perc(code: int, out: str) -> Tuple[int, Dict[str, str]]: |
6507 | seen_cpuperc = {} | |
6508 | seen_cpuperc_cid_len = 0 | |
6509 | if not code: | |
6510 | for line in out.splitlines(): | |
6511 | (cid, cpuperc) = line.split(',') | |
6512 | try: | |
6513 | seen_cpuperc[cid] = cpuperc | |
6514 | if not seen_cpuperc_cid_len: | |
6515 | seen_cpuperc_cid_len = len(cid) | |
6516 | except ValueError: | |
6517 | logger.info('unable to parse cpu percentage line\n>{}'.format(line)) | |
6518 | pass | |
6519 | return seen_cpuperc_cid_len, seen_cpuperc | |
6520 | ||
6521 | ||
f67539c2 TL |
6522 | def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None): |
6523 | # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str] | |
e306af50 | 6524 | |
f67539c2 | 6525 | for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir): |
e306af50 TL |
6526 | if d['fsid'] != fsid: |
6527 | continue | |
6528 | if d['name'] != name: | |
6529 | continue | |
6530 | return d | |
6531 | raise Error('Daemon not found: {}. See `cephadm ls`'.format(name)) | |
6532 | ||
522d829b TL |
6533 | |
6534 | def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, daemon_type: str, daemon_id: str) -> Tuple[str, str, int]: | |
6535 | c = CephContainer.for_daemon(ctx, fsid, daemon_type, daemon_id, 'bash') | |
6536 | out, err, code = '', '', -1 | |
6537 | for name in (c.cname, c.old_cname): | |
6538 | cmd = [ | |
6539 | container_path, 'inspect', | |
6540 | '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}', | |
6541 | name | |
6542 | ] | |
2a845540 | 6543 | out, err, code = call(ctx, cmd, verbosity=CallVerbosity.QUIET) |
522d829b TL |
6544 | if not code: |
6545 | break | |
6546 | return out, err, code | |
6547 | ||
9f95a23c TL |
6548 | ################################## |
6549 | ||
f67539c2 | 6550 | |
9f95a23c | 6551 | @default_image |
f67539c2 TL |
6552 | def command_adopt(ctx): |
6553 | # type: (CephadmContext) -> None | |
9f95a23c | 6554 | |
f67539c2 | 6555 | if not ctx.skip_pull: |
33c7a0ef TL |
6556 | try: |
6557 | _pull_image(ctx, ctx.image) | |
6558 | except UnauthorizedRegistryError: | |
6559 | err_str = 'Failed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url <url> --registry-username <username> --registry-password <password>` or supply login info via a json file with `cephadm registry-login --registry-json <file>`' | |
6560 | logger.debug(f'Pulling image for `command_adopt` failed: {err_str}') | |
6561 | raise Error(err_str) | |
9f95a23c | 6562 | |
f67539c2 | 6563 | (daemon_type, daemon_id) = ctx.name.split('.', 1) |
9f95a23c TL |
6564 | |
6565 | # legacy check | |
f67539c2 TL |
6566 | if ctx.style != 'legacy': |
6567 | raise Error('adoption of style %s not implemented' % ctx.style) | |
9f95a23c TL |
6568 | |
6569 | # lock | |
f67539c2 TL |
6570 | fsid = get_legacy_daemon_fsid(ctx, |
6571 | ctx.cluster, | |
9f95a23c TL |
6572 | daemon_type, |
6573 | daemon_id, | |
f67539c2 | 6574 | legacy_dir=ctx.legacy_dir) |
9f95a23c TL |
6575 | if not fsid: |
6576 | raise Error('could not detect legacy fsid; set fsid in ceph.conf') | |
f67539c2 TL |
6577 | lock = FileLock(ctx, fsid) |
6578 | lock.acquire() | |
9f95a23c TL |
6579 | |
6580 | # call correct adoption | |
6581 | if daemon_type in Ceph.daemons: | |
f67539c2 | 6582 | command_adopt_ceph(ctx, daemon_type, daemon_id, fsid) |
9f95a23c | 6583 | elif daemon_type == 'prometheus': |
f67539c2 | 6584 | command_adopt_prometheus(ctx, daemon_id, fsid) |
9f95a23c | 6585 | elif daemon_type == 'grafana': |
f67539c2 | 6586 | command_adopt_grafana(ctx, daemon_id, fsid) |
9f95a23c TL |
6587 | elif daemon_type == 'node-exporter': |
6588 | raise Error('adoption of node-exporter not implemented') | |
6589 | elif daemon_type == 'alertmanager': | |
f67539c2 | 6590 | command_adopt_alertmanager(ctx, daemon_id, fsid) |
9f95a23c TL |
6591 | else: |
6592 | raise Error('daemon type %s not recognized' % daemon_type) | |
6593 | ||
6594 | ||
1911f103 | 6595 | class AdoptOsd(object): |
f67539c2 TL |
6596 | def __init__(self, ctx, osd_data_dir, osd_id): |
6597 | # type: (CephadmContext, str, str) -> None | |
6598 | self.ctx = ctx | |
1911f103 TL |
6599 | self.osd_data_dir = osd_data_dir |
6600 | self.osd_id = osd_id | |
6601 | ||
6602 | def check_online_osd(self): | |
6603 | # type: () -> Tuple[Optional[str], Optional[str]] | |
6604 | ||
6605 | osd_fsid, osd_type = None, None | |
6606 | ||
6607 | path = os.path.join(self.osd_data_dir, 'fsid') | |
6608 | try: | |
6609 | with open(path, 'r') as f: | |
6610 | osd_fsid = f.read().strip() | |
f67539c2 | 6611 | logger.info('Found online OSD at %s' % path) |
1911f103 TL |
6612 | except IOError: |
6613 | logger.info('Unable to read OSD fsid from %s' % path) | |
e306af50 TL |
6614 | if os.path.exists(os.path.join(self.osd_data_dir, 'type')): |
6615 | with open(os.path.join(self.osd_data_dir, 'type')) as f: | |
6616 | osd_type = f.read().strip() | |
6617 | else: | |
6618 | logger.info('"type" file missing for OSD data dir') | |
1911f103 TL |
6619 | |
6620 | return osd_fsid, osd_type | |
6621 | ||
6622 | def check_offline_lvm_osd(self): | |
6623 | # type: () -> Tuple[Optional[str], Optional[str]] | |
1911f103 TL |
6624 | osd_fsid, osd_type = None, None |
6625 | ||
20effc67 | 6626 | c = get_ceph_volume_container( |
f67539c2 | 6627 | self.ctx, |
1911f103 | 6628 | args=['lvm', 'list', '--format=json'], |
1911f103 | 6629 | ) |
f67539c2 | 6630 | out, err, code = call_throws(self.ctx, c.run_cmd()) |
1911f103 TL |
6631 | if not code: |
6632 | try: | |
6633 | js = json.loads(out) | |
6634 | if self.osd_id in js: | |
f67539c2 | 6635 | logger.info('Found offline LVM OSD {}'.format(self.osd_id)) |
1911f103 TL |
6636 | osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid'] |
6637 | for device in js[self.osd_id]: | |
6638 | if device['tags']['ceph.type'] == 'block': | |
6639 | osd_type = 'bluestore' | |
6640 | break | |
6641 | if device['tags']['ceph.type'] == 'data': | |
6642 | osd_type = 'filestore' | |
6643 | break | |
6644 | except ValueError as e: | |
f67539c2 | 6645 | logger.info('Invalid JSON in ceph-volume lvm list: {}'.format(e)) |
1911f103 TL |
6646 | |
6647 | return osd_fsid, osd_type | |
6648 | ||
6649 | def check_offline_simple_osd(self): | |
6650 | # type: () -> Tuple[Optional[str], Optional[str]] | |
1911f103 TL |
6651 | osd_fsid, osd_type = None, None |
6652 | ||
f67539c2 | 6653 | osd_file = glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self.osd_id)) |
1911f103 TL |
6654 | if len(osd_file) == 1: |
6655 | with open(osd_file[0], 'r') as f: | |
6656 | try: | |
6657 | js = json.loads(f.read()) | |
f67539c2 TL |
6658 | logger.info('Found offline simple OSD {}'.format(self.osd_id)) |
6659 | osd_fsid = js['fsid'] | |
6660 | osd_type = js['type'] | |
6661 | if osd_type != 'filestore': | |
1911f103 TL |
6662 | # need this to be mounted for the adopt to work, as it |
6663 | # needs to move files from this directory | |
f67539c2 | 6664 | call_throws(self.ctx, ['mount', js['data']['path'], self.osd_data_dir]) |
1911f103 | 6665 | except ValueError as e: |
f67539c2 | 6666 | logger.info('Invalid JSON in {}: {}'.format(osd_file, e)) |
1911f103 TL |
6667 | |
6668 | return osd_fsid, osd_type | |
6669 | ||
2a845540 TL |
6670 | def change_cluster_name(self) -> None: |
6671 | logger.info('Attempting to convert osd cluster name to ceph . . .') | |
6672 | c = get_ceph_volume_container( | |
6673 | self.ctx, | |
6674 | args=['lvm', 'list', '{}'.format(self.osd_id), '--format=json'], | |
6675 | ) | |
6676 | out, err, code = call_throws(self.ctx, c.run_cmd()) | |
6677 | if code: | |
6678 | raise Exception(f'Failed to get list of LVs: {err}\nceph-volume failed with rc {code}') | |
6679 | try: | |
6680 | js = json.loads(out) | |
6681 | if not js: | |
6682 | raise RuntimeError(f'Failed to find osd.{self.osd_id}') | |
6683 | device: Optional[Dict[Any, Any]] = None | |
6684 | for d in js[self.osd_id]: | |
6685 | if d['type'] == 'block': | |
6686 | device = d | |
6687 | break | |
6688 | if not device: | |
6689 | raise RuntimeError(f'Failed to find block device for osd.{self.osd_id}') | |
6690 | vg = device['vg_name'] | |
6691 | out, err, code = call_throws(self.ctx, ['lvchange', '--deltag', f'ceph.cluster_name={self.ctx.cluster}', vg]) | |
6692 | if code: | |
6693 | raise RuntimeError(f"Can't delete tag ceph.cluster_name={self.ctx.cluster} on osd.{self.osd_id}.\nlvchange failed with rc {code}") | |
6694 | out, err, code = call_throws(self.ctx, ['lvchange', '--addtag', 'ceph.cluster_name=ceph', vg]) | |
6695 | if code: | |
6696 | raise RuntimeError(f"Can't add tag ceph.cluster_name=ceph on osd.{self.osd_id}.\nlvchange failed with rc {code}") | |
6697 | logger.info('Successfully converted osd cluster name') | |
6698 | except (Exception, RuntimeError) as e: | |
6699 | logger.info(f'Failed to convert osd cluster name: {e}') | |
6700 | ||
9f95a23c | 6701 | |
f67539c2 TL |
6702 | def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid): |
6703 | # type: (CephadmContext, str, str, str) -> None | |
9f95a23c | 6704 | |
f67539c2 | 6705 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
6706 | |
6707 | data_dir_src = ('/var/lib/ceph/%s/%s-%s' % | |
f67539c2 TL |
6708 | (daemon_type, ctx.cluster, daemon_id)) |
6709 | data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src) | |
9f95a23c | 6710 | |
1911f103 TL |
6711 | if not os.path.exists(data_dir_src): |
6712 | raise Error("{}.{} data directory '{}' does not exist. " | |
f67539c2 TL |
6713 | 'Incorrect ID specified, or daemon already adopted?'.format( |
6714 | daemon_type, daemon_id, data_dir_src)) | |
1911f103 | 6715 | |
9f95a23c TL |
6716 | osd_fsid = None |
6717 | if daemon_type == 'osd': | |
f67539c2 | 6718 | adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id) |
1911f103 TL |
6719 | osd_fsid, osd_type = adopt_osd.check_online_osd() |
6720 | if not osd_fsid: | |
6721 | osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd() | |
6722 | if not osd_fsid: | |
6723 | osd_fsid, osd_type = adopt_osd.check_offline_simple_osd() | |
6724 | if not osd_fsid: | |
6725 | raise Error('Unable to find OSD {}'.format(daemon_id)) | |
2a845540 TL |
6726 | elif ctx.cluster != 'ceph': |
6727 | adopt_osd.change_cluster_name() | |
1911f103 | 6728 | logger.info('objectstore_type is %s' % osd_type) |
e306af50 | 6729 | assert osd_type |
1911f103 | 6730 | if osd_type == 'filestore': |
9f95a23c TL |
6731 | raise Error('FileStore is not supported by cephadm') |
6732 | ||
6733 | # NOTE: implicit assumption here that the units correspond to the | |
6734 | # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph | |
6735 | # CLUSTER field. | |
6736 | unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) | |
f67539c2 | 6737 | (enabled, state, _) = check_unit(ctx, unit_name) |
9f95a23c TL |
6738 | if state == 'running': |
6739 | logger.info('Stopping old systemd unit %s...' % unit_name) | |
f67539c2 | 6740 | call_throws(ctx, ['systemctl', 'stop', unit_name]) |
9f95a23c TL |
6741 | if enabled: |
6742 | logger.info('Disabling old systemd unit %s...' % unit_name) | |
f67539c2 | 6743 | call_throws(ctx, ['systemctl', 'disable', unit_name]) |
9f95a23c TL |
6744 | |
6745 | # data | |
6746 | logger.info('Moving data...') | |
f67539c2 | 6747 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
9f95a23c | 6748 | uid=uid, gid=gid) |
f67539c2 | 6749 | move_files(ctx, glob(os.path.join(data_dir_src, '*')), |
9f95a23c TL |
6750 | data_dir_dst, |
6751 | uid=uid, gid=gid) | |
f67539c2 | 6752 | logger.debug('Remove dir `%s`' % (data_dir_src)) |
9f95a23c | 6753 | if os.path.ismount(data_dir_src): |
f67539c2 | 6754 | call_throws(ctx, ['umount', data_dir_src]) |
9f95a23c TL |
6755 | os.rmdir(data_dir_src) |
6756 | ||
6757 | logger.info('Chowning content...') | |
f67539c2 | 6758 | call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst]) |
9f95a23c TL |
6759 | |
6760 | if daemon_type == 'mon': | |
6761 | # rename *.ldb -> *.sst, in case they are coming from ubuntu | |
6762 | store = os.path.join(data_dir_dst, 'store.db') | |
6763 | num_renamed = 0 | |
6764 | if os.path.exists(store): | |
6765 | for oldf in os.listdir(store): | |
6766 | if oldf.endswith('.ldb'): | |
6767 | newf = oldf.replace('.ldb', '.sst') | |
6768 | oldp = os.path.join(store, oldf) | |
6769 | newp = os.path.join(store, newf) | |
6770 | logger.debug('Renaming %s -> %s' % (oldp, newp)) | |
6771 | os.rename(oldp, newp) | |
6772 | if num_renamed: | |
6773 | logger.info('Renamed %d leveldb *.ldb files to *.sst', | |
6774 | num_renamed) | |
6775 | if daemon_type == 'osd': | |
6776 | for n in ['block', 'block.db', 'block.wal']: | |
6777 | p = os.path.join(data_dir_dst, n) | |
6778 | if os.path.exists(p): | |
6779 | logger.info('Chowning %s...' % p) | |
6780 | os.chown(p, uid, gid) | |
6781 | # disable the ceph-volume 'simple' mode files on the host | |
6782 | simple_fn = os.path.join('/etc/ceph/osd', | |
6783 | '%s-%s.json' % (daemon_id, osd_fsid)) | |
6784 | if os.path.exists(simple_fn): | |
6785 | new_fn = simple_fn + '.adopted-by-cephadm' | |
6786 | logger.info('Renaming %s -> %s', simple_fn, new_fn) | |
6787 | os.rename(simple_fn, new_fn) | |
6788 | logger.info('Disabling host unit ceph-volume@ simple unit...') | |
f67539c2 TL |
6789 | call(ctx, ['systemctl', 'disable', |
6790 | 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)]) | |
9f95a23c TL |
6791 | else: |
6792 | # assume this is an 'lvm' c-v for now, but don't error | |
6793 | # out if it's not. | |
6794 | logger.info('Disabling host unit ceph-volume@ lvm unit...') | |
f67539c2 TL |
6795 | call(ctx, ['systemctl', 'disable', |
6796 | 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)]) | |
9f95a23c TL |
6797 | |
6798 | # config | |
f67539c2 TL |
6799 | config_src = '/etc/ceph/%s.conf' % (ctx.cluster) |
6800 | config_src = os.path.abspath(ctx.legacy_dir + config_src) | |
9f95a23c | 6801 | config_dst = os.path.join(data_dir_dst, 'config') |
f67539c2 | 6802 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
9f95a23c TL |
6803 | |
6804 | # logs | |
6805 | logger.info('Moving logs...') | |
6806 | log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' % | |
f67539c2 TL |
6807 | (ctx.cluster, daemon_type, daemon_id)) |
6808 | log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src) | |
6809 | log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid) | |
6810 | move_files(ctx, glob(log_dir_src), | |
9f95a23c TL |
6811 | log_dir_dst, |
6812 | uid=uid, gid=gid) | |
6813 | ||
6814 | logger.info('Creating new units...') | |
f67539c2 TL |
6815 | make_var_run(ctx, fsid, uid, gid) |
6816 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
6817 | deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c, | |
9f95a23c | 6818 | enable=True, # unconditionally enable the new unit |
f67539c2 | 6819 | start=(state == 'running' or ctx.force_start), |
9f95a23c | 6820 | osd_fsid=osd_fsid) |
f67539c2 | 6821 | update_firewalld(ctx, daemon_type) |
9f95a23c TL |
6822 | |
6823 | ||
f67539c2 TL |
6824 | def command_adopt_prometheus(ctx, daemon_id, fsid): |
6825 | # type: (CephadmContext, str, str) -> None | |
9f95a23c | 6826 | daemon_type = 'prometheus' |
f67539c2 | 6827 | (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) |
9f95a23c | 6828 | |
f67539c2 | 6829 | _stop_and_disable(ctx, 'prometheus') |
9f95a23c | 6830 | |
f67539c2 TL |
6831 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
6832 | uid=uid, gid=gid) | |
9f95a23c TL |
6833 | |
6834 | # config | |
6835 | config_src = '/etc/prometheus/prometheus.yml' | |
f67539c2 | 6836 | config_src = os.path.abspath(ctx.legacy_dir + config_src) |
9f95a23c | 6837 | config_dst = os.path.join(data_dir_dst, 'etc/prometheus') |
1911f103 | 6838 | makedirs(config_dst, uid, gid, 0o755) |
f67539c2 | 6839 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
9f95a23c TL |
6840 | |
6841 | # data | |
6842 | data_src = '/var/lib/prometheus/metrics/' | |
f67539c2 | 6843 | data_src = os.path.abspath(ctx.legacy_dir + data_src) |
9f95a23c | 6844 | data_dst = os.path.join(data_dir_dst, 'data') |
f67539c2 | 6845 | copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) |
9f95a23c | 6846 | |
f67539c2 TL |
6847 | make_var_run(ctx, fsid, uid, gid) |
6848 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
6849 | deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) | |
6850 | update_firewalld(ctx, daemon_type) | |
9f95a23c | 6851 | |
f6b5b4d7 | 6852 | |
f67539c2 TL |
6853 | def command_adopt_grafana(ctx, daemon_id, fsid): |
6854 | # type: (CephadmContext, str, str) -> None | |
9f95a23c TL |
6855 | |
6856 | daemon_type = 'grafana' | |
f67539c2 | 6857 | (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) |
9f95a23c | 6858 | |
f67539c2 | 6859 | _stop_and_disable(ctx, 'grafana-server') |
9f95a23c | 6860 | |
f67539c2 TL |
6861 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
6862 | uid=uid, gid=gid) | |
9f95a23c TL |
6863 | |
6864 | # config | |
6865 | config_src = '/etc/grafana/grafana.ini' | |
f67539c2 | 6866 | config_src = os.path.abspath(ctx.legacy_dir + config_src) |
9f95a23c TL |
6867 | config_dst = os.path.join(data_dir_dst, 'etc/grafana') |
6868 | makedirs(config_dst, uid, gid, 0o755) | |
f67539c2 | 6869 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
9f95a23c TL |
6870 | |
6871 | prov_src = '/etc/grafana/provisioning/' | |
f67539c2 | 6872 | prov_src = os.path.abspath(ctx.legacy_dir + prov_src) |
9f95a23c | 6873 | prov_dst = os.path.join(data_dir_dst, 'etc/grafana') |
f67539c2 | 6874 | copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid) |
9f95a23c TL |
6875 | |
6876 | # cert | |
6877 | cert = '/etc/grafana/grafana.crt' | |
6878 | key = '/etc/grafana/grafana.key' | |
6879 | if os.path.exists(cert) and os.path.exists(key): | |
6880 | cert_src = '/etc/grafana/grafana.crt' | |
f67539c2 | 6881 | cert_src = os.path.abspath(ctx.legacy_dir + cert_src) |
9f95a23c TL |
6882 | makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755) |
6883 | cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file') | |
f67539c2 | 6884 | copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid) |
9f95a23c TL |
6885 | |
6886 | key_src = '/etc/grafana/grafana.key' | |
f67539c2 | 6887 | key_src = os.path.abspath(ctx.legacy_dir + key_src) |
9f95a23c | 6888 | key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key') |
f67539c2 | 6889 | copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid) |
9f95a23c TL |
6890 | |
6891 | _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini')) | |
6892 | else: | |
f67539c2 | 6893 | logger.debug('Skipping ssl, missing cert {} or key {}'.format(cert, key)) |
9f95a23c | 6894 | |
9f95a23c TL |
6895 | # data - possible custom dashboards/plugins |
6896 | data_src = '/var/lib/grafana/' | |
f67539c2 | 6897 | data_src = os.path.abspath(ctx.legacy_dir + data_src) |
9f95a23c | 6898 | data_dst = os.path.join(data_dir_dst, 'data') |
f67539c2 | 6899 | copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) |
9f95a23c | 6900 | |
f67539c2 TL |
6901 | make_var_run(ctx, fsid, uid, gid) |
6902 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
6903 | deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) | |
6904 | update_firewalld(ctx, daemon_type) | |
9f95a23c | 6905 | |
f6b5b4d7 | 6906 | |
f67539c2 TL |
6907 | def command_adopt_alertmanager(ctx, daemon_id, fsid): |
6908 | # type: (CephadmContext, str, str) -> None | |
801d1391 TL |
6909 | |
6910 | daemon_type = 'alertmanager' | |
f67539c2 | 6911 | (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) |
801d1391 | 6912 | |
f67539c2 | 6913 | _stop_and_disable(ctx, 'prometheus-alertmanager') |
801d1391 | 6914 | |
f67539c2 TL |
6915 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
6916 | uid=uid, gid=gid) | |
801d1391 TL |
6917 | |
6918 | # config | |
6919 | config_src = '/etc/prometheus/alertmanager.yml' | |
f67539c2 | 6920 | config_src = os.path.abspath(ctx.legacy_dir + config_src) |
801d1391 TL |
6921 | config_dst = os.path.join(data_dir_dst, 'etc/alertmanager') |
6922 | makedirs(config_dst, uid, gid, 0o755) | |
f67539c2 | 6923 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
801d1391 TL |
6924 | |
6925 | # data | |
6926 | data_src = '/var/lib/prometheus/alertmanager/' | |
f67539c2 | 6927 | data_src = os.path.abspath(ctx.legacy_dir + data_src) |
801d1391 | 6928 | data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data') |
f67539c2 | 6929 | copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) |
801d1391 | 6930 | |
f67539c2 TL |
6931 | make_var_run(ctx, fsid, uid, gid) |
6932 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
6933 | deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) | |
6934 | update_firewalld(ctx, daemon_type) | |
801d1391 | 6935 | |
f6b5b4d7 | 6936 | |
9f95a23c TL |
6937 | def _adjust_grafana_ini(filename): |
6938 | # type: (str) -> None | |
6939 | ||
6940 | # Update cert_file, cert_key pathnames in server section | |
6941 | # ConfigParser does not preserve comments | |
6942 | try: | |
f67539c2 | 6943 | with open(filename, 'r') as grafana_ini: |
9f95a23c | 6944 | lines = grafana_ini.readlines() |
f67539c2 TL |
6945 | with open('{}.new'.format(filename), 'w') as grafana_ini: |
6946 | server_section = False | |
9f95a23c TL |
6947 | for line in lines: |
6948 | if line.startswith('['): | |
f67539c2 | 6949 | server_section = False |
9f95a23c | 6950 | if line.startswith('[server]'): |
f67539c2 | 6951 | server_section = True |
9f95a23c TL |
6952 | if server_section: |
6953 | line = re.sub(r'^cert_file.*', | |
f67539c2 | 6954 | 'cert_file = /etc/grafana/certs/cert_file', line) |
9f95a23c | 6955 | line = re.sub(r'^cert_key.*', |
f67539c2 | 6956 | 'cert_key = /etc/grafana/certs/cert_key', line) |
9f95a23c | 6957 | grafana_ini.write(line) |
f67539c2 | 6958 | os.rename('{}.new'.format(filename), filename) |
9f95a23c | 6959 | except OSError as err: |
f67539c2 | 6960 | raise Error('Cannot update {}: {}'.format(filename, err)) |
9f95a23c TL |
6961 | |
6962 | ||
f67539c2 TL |
6963 | def _stop_and_disable(ctx, unit_name): |
6964 | # type: (CephadmContext, str) -> None | |
9f95a23c | 6965 | |
f67539c2 | 6966 | (enabled, state, _) = check_unit(ctx, unit_name) |
9f95a23c TL |
6967 | if state == 'running': |
6968 | logger.info('Stopping old systemd unit %s...' % unit_name) | |
f67539c2 | 6969 | call_throws(ctx, ['systemctl', 'stop', unit_name]) |
9f95a23c TL |
6970 | if enabled: |
6971 | logger.info('Disabling old systemd unit %s...' % unit_name) | |
f67539c2 | 6972 | call_throws(ctx, ['systemctl', 'disable', unit_name]) |
9f95a23c TL |
6973 | |
6974 | ################################## | |
6975 | ||
9f95a23c | 6976 | |
f67539c2 TL |
6977 | def command_rm_daemon(ctx): |
6978 | # type: (CephadmContext) -> None | |
6979 | lock = FileLock(ctx, ctx.fsid) | |
6980 | lock.acquire() | |
9f95a23c | 6981 | |
f67539c2 TL |
6982 | (daemon_type, daemon_id) = ctx.name.split('.', 1) |
6983 | unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) | |
e306af50 | 6984 | |
f67539c2 | 6985 | if daemon_type in ['mon', 'osd'] and not ctx.force: |
9f95a23c | 6986 | raise Error('must pass --force to proceed: ' |
f67539c2 | 6987 | 'this command may destroy precious data!') |
e306af50 | 6988 | |
f67539c2 | 6989 | call(ctx, ['systemctl', 'stop', unit_name], |
adb31ebb | 6990 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 6991 | call(ctx, ['systemctl', 'reset-failed', unit_name], |
adb31ebb | 6992 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 6993 | call(ctx, ['systemctl', 'disable', unit_name], |
adb31ebb | 6994 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 6995 | data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id) |
9f95a23c | 6996 | if daemon_type in ['mon', 'osd', 'prometheus'] and \ |
f67539c2 | 6997 | not ctx.force_delete_data: |
9f95a23c | 6998 | # rename it out of the way -- do not delete |
f67539c2 | 6999 | backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed') |
9f95a23c TL |
7000 | if not os.path.exists(backup_dir): |
7001 | makedirs(backup_dir, 0, 0, DATA_DIR_MODE) | |
7002 | dirname = '%s.%s_%s' % (daemon_type, daemon_id, | |
7003 | datetime.datetime.utcnow().strftime(DATEFMT)) | |
7004 | os.rename(data_dir, | |
7005 | os.path.join(backup_dir, dirname)) | |
7006 | else: | |
f67539c2 | 7007 | call_throws(ctx, ['rm', '-rf', data_dir]) |
9f95a23c | 7008 | |
33c7a0ef TL |
7009 | if 'tcp_ports' in ctx and ctx.tcp_ports is not None: |
7010 | ports: List[int] = [int(p) for p in ctx.tcp_ports.split()] | |
7011 | try: | |
7012 | fw = Firewalld(ctx) | |
7013 | fw.close_ports(ports) | |
7014 | fw.apply_rules() | |
7015 | except RuntimeError as e: | |
7016 | # in case we cannot close the ports we will remove | |
7017 | # the daemon but keep them open. | |
7018 | logger.warning(f' Error when trying to close ports: {e}') | |
7019 | ||
7020 | ||
9f95a23c TL |
7021 | ################################## |
7022 | ||
f6b5b4d7 | 7023 | |
522d829b | 7024 | def _zap(ctx: CephadmContext, what: str) -> None: |
b3b6e05e | 7025 | mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None) |
20effc67 TL |
7026 | c = get_ceph_volume_container(ctx, |
7027 | args=['lvm', 'zap', '--destroy', what], | |
7028 | volume_mounts=mounts, | |
7029 | envs=ctx.env) | |
b3b6e05e TL |
7030 | logger.info(f'Zapping {what}...') |
7031 | out, err, code = call_throws(ctx, c.run_cmd()) | |
7032 | ||
7033 | ||
7034 | @infer_image | |
522d829b | 7035 | def _zap_osds(ctx: CephadmContext) -> None: |
b3b6e05e TL |
7036 | # assume fsid lock already held |
7037 | ||
7038 | # list | |
7039 | mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None) | |
20effc67 TL |
7040 | c = get_ceph_volume_container(ctx, |
7041 | args=['inventory', '--format', 'json'], | |
7042 | volume_mounts=mounts, | |
7043 | envs=ctx.env) | |
b3b6e05e TL |
7044 | out, err, code = call_throws(ctx, c.run_cmd()) |
7045 | if code: | |
7046 | raise Error('failed to list osd inventory') | |
7047 | try: | |
7048 | ls = json.loads(out) | |
7049 | except ValueError as e: | |
7050 | raise Error(f'Invalid JSON in ceph-volume inventory: {e}') | |
7051 | ||
7052 | for i in ls: | |
2a845540 | 7053 | matches = [lv.get('cluster_fsid') == ctx.fsid and i.get('ceph_device') for lv in i.get('lvs', [])] |
b3b6e05e TL |
7054 | if any(matches) and all(matches): |
7055 | _zap(ctx, i.get('path')) | |
7056 | elif any(matches): | |
7057 | lv_names = [lv['name'] for lv in i.get('lvs', [])] | |
7058 | # TODO: we need to map the lv_names back to device paths (the vg | |
7059 | # id isn't part of the output here!) | |
7060 | logger.warning(f'Not zapping LVs (not implemented): {lv_names}') | |
7061 | ||
7062 | ||
522d829b | 7063 | def command_zap_osds(ctx: CephadmContext) -> None: |
b3b6e05e TL |
7064 | if not ctx.force: |
7065 | raise Error('must pass --force to proceed: ' | |
7066 | 'this command may destroy precious data!') | |
7067 | ||
7068 | lock = FileLock(ctx, ctx.fsid) | |
7069 | lock.acquire() | |
7070 | ||
7071 | _zap_osds(ctx) | |
7072 | ||
7073 | ################################## | |
7074 | ||
7075 | ||
33c7a0ef TL |
7076 | def get_ceph_cluster_count(ctx: CephadmContext) -> int: |
7077 | return len([c for c in os.listdir(ctx.data_dir) if is_fsid(c)]) | |
7078 | ||
7079 | ||
f67539c2 TL |
7080 | def command_rm_cluster(ctx): |
7081 | # type: (CephadmContext) -> None | |
7082 | if not ctx.force: | |
9f95a23c | 7083 | raise Error('must pass --force to proceed: ' |
f67539c2 | 7084 | 'this command may destroy precious data!') |
9f95a23c | 7085 | |
f67539c2 TL |
7086 | lock = FileLock(ctx, ctx.fsid) |
7087 | lock.acquire() | |
9f95a23c | 7088 | |
33c7a0ef | 7089 | def disable_systemd_service(unit_name: str) -> None: |
f67539c2 | 7090 | call(ctx, ['systemctl', 'stop', unit_name], |
adb31ebb | 7091 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 7092 | call(ctx, ['systemctl', 'reset-failed', unit_name], |
adb31ebb | 7093 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 7094 | call(ctx, ['systemctl', 'disable', unit_name], |
adb31ebb | 7095 | verbosity=CallVerbosity.DEBUG) |
9f95a23c | 7096 | |
33c7a0ef TL |
7097 | # stop + disable individual daemon units |
7098 | for d in list_daemons(ctx, detail=False): | |
7099 | if d['fsid'] != ctx.fsid: | |
7100 | continue | |
7101 | if d['style'] != 'cephadm:v1': | |
7102 | continue | |
7103 | disable_systemd_service(get_unit_name(ctx.fsid, d['name'])) | |
7104 | ||
9f95a23c | 7105 | # cluster units |
f67539c2 | 7106 | for unit_name in ['ceph-%s.target' % ctx.fsid]: |
33c7a0ef | 7107 | disable_systemd_service(unit_name) |
9f95a23c | 7108 | |
522d829b | 7109 | slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d')) |
f67539c2 | 7110 | call(ctx, ['systemctl', 'stop', slice_name], |
adb31ebb | 7111 | verbosity=CallVerbosity.DEBUG) |
9f95a23c | 7112 | |
b3b6e05e TL |
7113 | # osds? |
7114 | if ctx.zap_osds: | |
7115 | _zap_osds(ctx) | |
7116 | ||
9f95a23c | 7117 | # rm units |
b3b6e05e TL |
7118 | call_throws(ctx, ['rm', '-f', ctx.unit_dir |
7119 | + '/ceph-%s@.service' % ctx.fsid]) | |
7120 | call_throws(ctx, ['rm', '-f', ctx.unit_dir | |
7121 | + '/ceph-%s.target' % ctx.fsid]) | |
f67539c2 TL |
7122 | call_throws(ctx, ['rm', '-rf', |
7123 | ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid]) | |
9f95a23c | 7124 | # rm data |
f67539c2 TL |
7125 | call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid]) |
7126 | ||
7127 | if not ctx.keep_logs: | |
7128 | # rm logs | |
7129 | call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid]) | |
b3b6e05e TL |
7130 | call_throws(ctx, ['rm', '-rf', ctx.log_dir |
7131 | + '/*.wants/ceph-%s@*' % ctx.fsid]) | |
f67539c2 | 7132 | |
9f95a23c | 7133 | # rm logrotate config |
f67539c2 | 7134 | call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid]) |
9f95a23c | 7135 | |
33c7a0ef TL |
7136 | # if last cluster on host remove shared files |
7137 | if get_ceph_cluster_count(ctx) == 0: | |
7138 | disable_systemd_service('ceph.target') | |
7139 | ||
7140 | # rm shared ceph target files | |
7141 | call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/multi-user.target.wants/ceph.target']) | |
7142 | call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/ceph.target']) | |
7143 | ||
7144 | # rm cephadm logrotate config | |
b3b6e05e TL |
7145 | call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm']) |
7146 | ||
33c7a0ef TL |
7147 | if not ctx.keep_logs: |
7148 | # remove all cephadm logs | |
7149 | for fname in glob(f'{ctx.log_dir}/cephadm.log*'): | |
7150 | os.remove(fname) | |
7151 | ||
b3b6e05e | 7152 | # rm sysctl settings |
33c7a0ef | 7153 | sysctl_dirs: List[Path] = [Path(ctx.sysctl_dir), Path('/usr/lib/sysctl.d')] |
b3b6e05e | 7154 | |
33c7a0ef TL |
7155 | for sysctl_dir in sysctl_dirs: |
7156 | for p in sysctl_dir.glob(f'90-ceph-{ctx.fsid}-*.conf'): | |
7157 | p.unlink() | |
1911f103 | 7158 | |
33c7a0ef TL |
7159 | # cleanup remaining ceph directories |
7160 | ceph_dirs = [f'/run/ceph/{ctx.fsid}', f'/tmp/var/lib/ceph/{ctx.fsid}', f'/var/run/ceph/{ctx.fsid}'] | |
7161 | for dd in ceph_dirs: | |
7162 | shutil.rmtree(dd, ignore_errors=True) | |
7163 | ||
7164 | # clean up config, keyring, and pub key files | |
7165 | files = [CEPH_DEFAULT_CONF, CEPH_DEFAULT_PUBKEY, CEPH_DEFAULT_KEYRING] | |
1911f103 TL |
7166 | if os.path.exists(files[0]): |
7167 | valid_fsid = False | |
7168 | with open(files[0]) as f: | |
f67539c2 | 7169 | if ctx.fsid in f.read(): |
1911f103 TL |
7170 | valid_fsid = True |
7171 | if valid_fsid: | |
33c7a0ef | 7172 | # rm configuration files on /etc/ceph |
1911f103 TL |
7173 | for n in range(0, len(files)): |
7174 | if os.path.exists(files[n]): | |
7175 | os.remove(files[n]) | |
7176 | ||
9f95a23c TL |
7177 | ################################## |
7178 | ||
f67539c2 TL |
7179 | |
7180 | def check_time_sync(ctx, enabler=None): | |
7181 | # type: (CephadmContext, Optional[Packager]) -> bool | |
9f95a23c TL |
7182 | units = [ |
7183 | 'chrony.service', # 18.04 (at least) | |
f67539c2 | 7184 | 'chronyd.service', # el / opensuse |
9f95a23c | 7185 | 'systemd-timesyncd.service', |
f67539c2 | 7186 | 'ntpd.service', # el7 (at least) |
9f95a23c | 7187 | 'ntp.service', # 18.04 (at least) |
f91f0fd5 | 7188 | 'ntpsec.service', # 20.04 (at least) / buster |
522d829b | 7189 | 'openntpd.service', # ubuntu / debian |
9f95a23c | 7190 | ] |
f67539c2 | 7191 | if not check_units(ctx, units, enabler): |
9f95a23c TL |
7192 | logger.warning('No time sync service is running; checked for %s' % units) |
7193 | return False | |
7194 | return True | |
7195 | ||
f6b5b4d7 | 7196 | |
f67539c2 | 7197 | def command_check_host(ctx: CephadmContext) -> None: |
1911f103 | 7198 | errors = [] |
9f95a23c TL |
7199 | commands = ['systemctl', 'lvcreate'] |
7200 | ||
f67539c2 | 7201 | try: |
a4b75251 TL |
7202 | engine = check_container_engine(ctx) |
7203 | logger.info(f'{engine} is present') | |
f67539c2 TL |
7204 | except Error as e: |
7205 | errors.append(str(e)) | |
1911f103 | 7206 | |
9f95a23c TL |
7207 | for command in commands: |
7208 | try: | |
7209 | find_program(command) | |
7210 | logger.info('%s is present' % command) | |
7211 | except ValueError: | |
1911f103 | 7212 | errors.append('%s binary does not appear to be installed' % command) |
9f95a23c TL |
7213 | |
7214 | # check for configured+running chronyd or ntp | |
f67539c2 | 7215 | if not check_time_sync(ctx): |
1911f103 | 7216 | errors.append('No time synchronization is active') |
9f95a23c | 7217 | |
f67539c2 TL |
7218 | if 'expect_hostname' in ctx and ctx.expect_hostname: |
7219 | if get_hostname().lower() != ctx.expect_hostname.lower(): | |
1911f103 | 7220 | errors.append('hostname "%s" does not match expected hostname "%s"' % ( |
f67539c2 | 7221 | get_hostname(), ctx.expect_hostname)) |
20effc67 TL |
7222 | else: |
7223 | logger.info('Hostname "%s" matches what is expected.', | |
7224 | ctx.expect_hostname) | |
9f95a23c | 7225 | |
1911f103 | 7226 | if errors: |
f67539c2 | 7227 | raise Error('\nERROR: '.join(errors)) |
1911f103 | 7228 | |
9f95a23c TL |
7229 | logger.info('Host looks OK') |
7230 | ||
7231 | ################################## | |
7232 | ||
f6b5b4d7 | 7233 | |
33c7a0ef TL |
7234 | def get_ssh_vars(ssh_user: str) -> Tuple[int, int, str]: |
7235 | try: | |
7236 | s_pwd = pwd.getpwnam(ssh_user) | |
7237 | except KeyError: | |
7238 | raise Error('Cannot find uid/gid for ssh-user: %s' % (ssh_user)) | |
7239 | ||
7240 | ssh_uid = s_pwd.pw_uid | |
7241 | ssh_gid = s_pwd.pw_gid | |
7242 | ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh') | |
7243 | return ssh_uid, ssh_gid, ssh_dir | |
7244 | ||
7245 | ||
7246 | def authorize_ssh_key(ssh_pub_key: str, ssh_user: str) -> bool: | |
7247 | """Authorize the public key for the provided ssh user""" | |
7248 | ||
7249 | def key_in_file(path: str, key: str) -> bool: | |
7250 | if not os.path.exists(path): | |
7251 | return False | |
7252 | with open(path) as f: | |
7253 | lines = f.readlines() | |
7254 | for line in lines: | |
7255 | if line.strip() == key.strip(): | |
7256 | return True | |
7257 | return False | |
7258 | ||
7259 | logger.info(f'Adding key to {ssh_user}@localhost authorized_keys...') | |
7260 | if ssh_pub_key is None or ssh_pub_key.isspace(): | |
7261 | raise Error('Trying to authorize an empty ssh key') | |
7262 | ||
7263 | ssh_pub_key = ssh_pub_key.strip() | |
7264 | ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user) | |
7265 | if not os.path.exists(ssh_dir): | |
7266 | makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700) | |
7267 | ||
7268 | auth_keys_file = '%s/authorized_keys' % ssh_dir | |
7269 | if key_in_file(auth_keys_file, ssh_pub_key): | |
7270 | logger.info(f'key already in {ssh_user}@localhost authorized_keys...') | |
7271 | return False | |
7272 | ||
7273 | add_newline = False | |
7274 | if os.path.exists(auth_keys_file): | |
7275 | with open(auth_keys_file, 'r') as f: | |
7276 | f.seek(0, os.SEEK_END) | |
7277 | if f.tell() > 0: | |
7278 | f.seek(f.tell() - 1, os.SEEK_SET) # go to last char | |
7279 | if f.read() != '\n': | |
7280 | add_newline = True | |
7281 | ||
7282 | with open(auth_keys_file, 'a') as f: | |
7283 | os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it | |
7284 | os.fchmod(f.fileno(), 0o600) # just in case we created it | |
7285 | if add_newline: | |
7286 | f.write('\n') | |
7287 | f.write(ssh_pub_key + '\n') | |
7288 | ||
7289 | return True | |
7290 | ||
7291 | ||
7292 | def revoke_ssh_key(key: str, ssh_user: str) -> None: | |
7293 | """Revoke the public key authorization for the ssh user""" | |
7294 | ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user) | |
7295 | auth_keys_file = '%s/authorized_keys' % ssh_dir | |
7296 | deleted = False | |
7297 | if os.path.exists(auth_keys_file): | |
7298 | with open(auth_keys_file, 'r') as f: | |
7299 | lines = f.readlines() | |
7300 | _, filename = tempfile.mkstemp() | |
7301 | with open(filename, 'w') as f: | |
7302 | os.fchown(f.fileno(), ssh_uid, ssh_gid) | |
7303 | os.fchmod(f.fileno(), 0o600) # secure access to the keys file | |
7304 | for line in lines: | |
7305 | if line.strip() == key.strip(): | |
7306 | deleted = True | |
7307 | else: | |
7308 | f.write(line) | |
7309 | ||
7310 | if deleted: | |
7311 | shutil.move(filename, auth_keys_file) | |
7312 | else: | |
7313 | logger.warning('Cannot find the ssh key to be deleted') | |
7314 | ||
7315 | ||
7316 | def check_ssh_connectivity(ctx: CephadmContext) -> None: | |
7317 | ||
7318 | def cmd_is_available(cmd: str) -> bool: | |
7319 | if shutil.which(cmd) is None: | |
7320 | logger.warning(f'Command not found: {cmd}') | |
7321 | return False | |
7322 | return True | |
7323 | ||
7324 | if not cmd_is_available('ssh') or not cmd_is_available('ssh-keygen'): | |
7325 | logger.warning('Cannot check ssh connectivity. Skipping...') | |
7326 | return | |
7327 | ||
7328 | logger.info('Verifying ssh connectivity ...') | |
7329 | if ctx.ssh_private_key and ctx.ssh_public_key: | |
7330 | # let's use the keys provided by the user | |
7331 | ssh_priv_key_path = pathify(ctx.ssh_private_key.name) | |
7332 | ssh_pub_key_path = pathify(ctx.ssh_public_key.name) | |
7333 | else: | |
7334 | # no custom keys, let's generate some random keys just for this check | |
7335 | ssh_priv_key_path = f'/tmp/ssh_key_{uuid.uuid1()}' | |
7336 | ssh_pub_key_path = f'{ssh_priv_key_path}.pub' | |
7337 | ssh_key_gen_cmd = ['ssh-keygen', '-q', '-t', 'rsa', '-N', '', '-C', '', '-f', ssh_priv_key_path] | |
7338 | _, _, code = call(ctx, ssh_key_gen_cmd) | |
7339 | if code != 0: | |
7340 | logger.warning('Cannot generate keys to check ssh connectivity.') | |
7341 | return | |
7342 | ||
7343 | with open(ssh_pub_key_path, 'r') as f: | |
7344 | key = f.read().strip() | |
7345 | new_key = authorize_ssh_key(key, ctx.ssh_user) | |
7346 | ssh_cfg_file_arg = ['-F', pathify(ctx.ssh_config.name)] if ctx.ssh_config else [] | |
7347 | _, _, code = call(ctx, ['ssh', '-o StrictHostKeyChecking=no', | |
7348 | *ssh_cfg_file_arg, '-i', ssh_priv_key_path, | |
7349 | '-o PasswordAuthentication=no', | |
7350 | f'{ctx.ssh_user}@{get_hostname()}', | |
7351 | 'sudo echo']) | |
7352 | ||
7353 | # we only remove the key if it's a new one. In case the user has provided | |
7354 | # some already existing key then we don't alter authorized_keys file | |
7355 | if new_key: | |
7356 | revoke_ssh_key(key, ctx.ssh_user) | |
7357 | ||
7358 | pub_key_msg = '- The public key file configured by --ssh-public-key is valid\n' if ctx.ssh_public_key else '' | |
7359 | prv_key_msg = '- The private key file configured by --ssh-private-key is valid\n' if ctx.ssh_private_key else '' | |
7360 | ssh_cfg_msg = '- The ssh configuration file configured by --ssh-config is valid\n' if ctx.ssh_config else '' | |
7361 | err_msg = f""" | |
7362 | ** Please verify your user's ssh configuration and make sure: | |
7363 | - User {ctx.ssh_user} must have passwordless sudo access | |
7364 | {pub_key_msg}{prv_key_msg}{ssh_cfg_msg} | |
7365 | """ | |
7366 | if code != 0: | |
7367 | raise Error(err_msg) | |
7368 | ||
7369 | ||
f67539c2 | 7370 | def command_prepare_host(ctx: CephadmContext) -> None: |
9f95a23c TL |
7371 | logger.info('Verifying podman|docker is present...') |
7372 | pkg = None | |
f67539c2 TL |
7373 | try: |
7374 | check_container_engine(ctx) | |
7375 | except Error as e: | |
7376 | logger.warning(str(e)) | |
9f95a23c | 7377 | if not pkg: |
f67539c2 | 7378 | pkg = create_packager(ctx) |
9f95a23c TL |
7379 | pkg.install_podman() |
7380 | ||
7381 | logger.info('Verifying lvm2 is present...') | |
7382 | if not find_executable('lvcreate'): | |
7383 | if not pkg: | |
f67539c2 | 7384 | pkg = create_packager(ctx) |
9f95a23c TL |
7385 | pkg.install(['lvm2']) |
7386 | ||
7387 | logger.info('Verifying time synchronization is in place...') | |
f67539c2 | 7388 | if not check_time_sync(ctx): |
9f95a23c | 7389 | if not pkg: |
f67539c2 | 7390 | pkg = create_packager(ctx) |
9f95a23c TL |
7391 | pkg.install(['chrony']) |
7392 | # check again, and this time try to enable | |
7393 | # the service | |
f67539c2 | 7394 | check_time_sync(ctx, enabler=pkg) |
9f95a23c | 7395 | |
f67539c2 TL |
7396 | if 'expect_hostname' in ctx and ctx.expect_hostname and ctx.expect_hostname != get_hostname(): |
7397 | logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname)) | |
7398 | call_throws(ctx, ['hostname', ctx.expect_hostname]) | |
9f95a23c | 7399 | with open('/etc/hostname', 'w') as f: |
f67539c2 | 7400 | f.write(ctx.expect_hostname + '\n') |
9f95a23c TL |
7401 | |
7402 | logger.info('Repeating the final host check...') | |
f67539c2 | 7403 | command_check_host(ctx) |
9f95a23c TL |
7404 | |
7405 | ################################## | |
7406 | ||
f6b5b4d7 | 7407 | |
9f95a23c TL |
7408 | class CustomValidation(argparse.Action): |
7409 | ||
522d829b | 7410 | def _check_name(self, values: str) -> None: |
9f95a23c TL |
7411 | try: |
7412 | (daemon_type, daemon_id) = values.split('.', 1) | |
7413 | except ValueError: | |
7414 | raise argparse.ArgumentError(self, | |
f67539c2 | 7415 | 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com') |
9f95a23c TL |
7416 | |
7417 | daemons = get_supported_daemons() | |
7418 | if daemon_type not in daemons: | |
7419 | raise argparse.ArgumentError(self, | |
f67539c2 TL |
7420 | 'name must declare the type of daemon e.g. ' |
7421 | '{}'.format(', '.join(daemons))) | |
9f95a23c | 7422 | |
522d829b TL |
7423 | def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Union[str, Sequence[Any], None], |
7424 | option_string: Optional[str] = None) -> None: | |
7425 | assert isinstance(values, str) | |
f67539c2 | 7426 | if self.dest == 'name': |
9f95a23c TL |
7427 | self._check_name(values) |
7428 | setattr(namespace, self.dest, values) | |
7429 | ||
7430 | ################################## | |
7431 | ||
f6b5b4d7 | 7432 | |
9f95a23c | 7433 | def get_distro(): |
e306af50 | 7434 | # type: () -> Tuple[Optional[str], Optional[str], Optional[str]] |
9f95a23c TL |
7435 | distro = None |
7436 | distro_version = None | |
7437 | distro_codename = None | |
7438 | with open('/etc/os-release', 'r') as f: | |
7439 | for line in f.readlines(): | |
7440 | line = line.strip() | |
7441 | if '=' not in line or line.startswith('#'): | |
7442 | continue | |
7443 | (var, val) = line.split('=', 1) | |
7444 | if val[0] == '"' and val[-1] == '"': | |
7445 | val = val[1:-1] | |
7446 | if var == 'ID': | |
7447 | distro = val.lower() | |
7448 | elif var == 'VERSION_ID': | |
7449 | distro_version = val.lower() | |
7450 | elif var == 'VERSION_CODENAME': | |
7451 | distro_codename = val.lower() | |
7452 | return distro, distro_version, distro_codename | |
7453 | ||
f6b5b4d7 | 7454 | |
9f95a23c | 7455 | class Packager(object): |
f67539c2 | 7456 | def __init__(self, ctx: CephadmContext, |
522d829b TL |
7457 | stable: Optional[str] = None, version: Optional[str] = None, |
7458 | branch: Optional[str] = None, commit: Optional[str] = None): | |
9f95a23c TL |
7459 | assert \ |
7460 | (stable and not version and not branch and not commit) or \ | |
7461 | (not stable and version and not branch and not commit) or \ | |
7462 | (not stable and not version and branch) or \ | |
7463 | (not stable and not version and not branch and not commit) | |
f67539c2 | 7464 | self.ctx = ctx |
9f95a23c TL |
7465 | self.stable = stable |
7466 | self.version = version | |
7467 | self.branch = branch | |
7468 | self.commit = commit | |
7469 | ||
20effc67 TL |
7470 | def validate(self) -> None: |
7471 | """Validate parameters before writing any state to disk.""" | |
7472 | pass | |
7473 | ||
522d829b TL |
7474 | def add_repo(self) -> None: |
7475 | raise NotImplementedError | |
7476 | ||
7477 | def rm_repo(self) -> None: | |
7478 | raise NotImplementedError | |
7479 | ||
7480 | def install(self, ls: List[str]) -> None: | |
9f95a23c TL |
7481 | raise NotImplementedError |
7482 | ||
522d829b | 7483 | def install_podman(self) -> None: |
9f95a23c TL |
7484 | raise NotImplementedError |
7485 | ||
522d829b | 7486 | def query_shaman(self, distro: str, distro_version: Any, branch: Optional[str], commit: Optional[str]) -> str: |
9f95a23c | 7487 | # query shaman |
f91f0fd5 | 7488 | logger.info('Fetching repo metadata from shaman and chacra...') |
9f95a23c TL |
7489 | shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format( |
7490 | distro=distro, | |
7491 | distro_version=distro_version, | |
7492 | branch=branch, | |
7493 | sha1=commit or 'latest', | |
7494 | arch=get_arch() | |
7495 | ) | |
7496 | try: | |
7497 | shaman_response = urlopen(shaman_url) | |
7498 | except HTTPError as err: | |
f91f0fd5 | 7499 | logger.error('repository not found in shaman (might not be available yet)') |
9f95a23c | 7500 | raise Error('%s, failed to fetch %s' % (err, shaman_url)) |
f67539c2 | 7501 | chacra_url = '' |
9f95a23c TL |
7502 | try: |
7503 | chacra_url = shaman_response.geturl() | |
7504 | chacra_response = urlopen(chacra_url) | |
7505 | except HTTPError as err: | |
f91f0fd5 | 7506 | logger.error('repository not found in chacra (might not be available yet)') |
9f95a23c TL |
7507 | raise Error('%s, failed to fetch %s' % (err, chacra_url)) |
7508 | return chacra_response.read().decode('utf-8') | |
7509 | ||
522d829b | 7510 | def repo_gpgkey(self) -> Tuple[str, str]: |
f67539c2 | 7511 | if self.ctx.gpg_url: |
2a845540 | 7512 | return self.ctx.gpg_url, 'manual' |
9f95a23c | 7513 | if self.stable or self.version: |
b3b6e05e | 7514 | return 'https://download.ceph.com/keys/release.gpg', 'release' |
9f95a23c | 7515 | else: |
b3b6e05e | 7516 | return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild' |
9f95a23c | 7517 | |
522d829b | 7518 | def enable_service(self, service: str) -> None: |
9f95a23c TL |
7519 | """ |
7520 | Start and enable the service (typically using systemd). | |
7521 | """ | |
f67539c2 | 7522 | call_throws(self.ctx, ['systemctl', 'enable', '--now', service]) |
9f95a23c TL |
7523 | |
7524 | ||
7525 | class Apt(Packager): | |
7526 | DISTRO_NAMES = { | |
7527 | 'ubuntu': 'ubuntu', | |
7528 | 'debian': 'debian', | |
7529 | } | |
7530 | ||
f67539c2 | 7531 | def __init__(self, ctx: CephadmContext, |
522d829b TL |
7532 | stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str], |
7533 | distro: Optional[str], distro_version: Optional[str], distro_codename: Optional[str]) -> None: | |
f67539c2 | 7534 | super(Apt, self).__init__(ctx, stable=stable, version=version, |
9f95a23c | 7535 | branch=branch, commit=commit) |
522d829b | 7536 | assert distro |
f67539c2 | 7537 | self.ctx = ctx |
9f95a23c TL |
7538 | self.distro = self.DISTRO_NAMES[distro] |
7539 | self.distro_codename = distro_codename | |
f91f0fd5 | 7540 | self.distro_version = distro_version |
9f95a23c | 7541 | |
522d829b | 7542 | def repo_path(self) -> str: |
9f95a23c TL |
7543 | return '/etc/apt/sources.list.d/ceph.list' |
7544 | ||
522d829b | 7545 | def add_repo(self) -> None: |
f67539c2 | 7546 | |
9f95a23c | 7547 | url, name = self.repo_gpgkey() |
f91f0fd5 | 7548 | logger.info('Installing repo GPG key from %s...' % url) |
9f95a23c TL |
7549 | try: |
7550 | response = urlopen(url) | |
7551 | except HTTPError as err: | |
f91f0fd5 | 7552 | logger.error('failed to fetch GPG repo key from %s: %s' % ( |
9f95a23c TL |
7553 | url, err)) |
7554 | raise Error('failed to fetch GPG key') | |
b3b6e05e TL |
7555 | key = response.read() |
7556 | with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'wb') as f: | |
9f95a23c TL |
7557 | f.write(key) |
7558 | ||
7559 | if self.version: | |
7560 | content = 'deb %s/debian-%s/ %s main\n' % ( | |
f67539c2 | 7561 | self.ctx.repo_url, self.version, self.distro_codename) |
9f95a23c TL |
7562 | elif self.stable: |
7563 | content = 'deb %s/debian-%s/ %s main\n' % ( | |
f67539c2 | 7564 | self.ctx.repo_url, self.stable, self.distro_codename) |
9f95a23c TL |
7565 | else: |
7566 | content = self.query_shaman(self.distro, self.distro_codename, self.branch, | |
7567 | self.commit) | |
7568 | ||
f91f0fd5 | 7569 | logger.info('Installing repo file at %s...' % self.repo_path()) |
9f95a23c TL |
7570 | with open(self.repo_path(), 'w') as f: |
7571 | f.write(content) | |
7572 | ||
b3b6e05e TL |
7573 | self.update() |
7574 | ||
522d829b | 7575 | def rm_repo(self) -> None: |
2a845540 | 7576 | for name in ['autobuild', 'release', 'manual']: |
9f95a23c TL |
7577 | p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name |
7578 | if os.path.exists(p): | |
f91f0fd5 | 7579 | logger.info('Removing repo GPG key %s...' % p) |
9f95a23c TL |
7580 | os.unlink(p) |
7581 | if os.path.exists(self.repo_path()): | |
f91f0fd5 | 7582 | logger.info('Removing repo at %s...' % self.repo_path()) |
9f95a23c TL |
7583 | os.unlink(self.repo_path()) |
7584 | ||
f91f0fd5 TL |
7585 | if self.distro == 'ubuntu': |
7586 | self.rm_kubic_repo() | |
7587 | ||
522d829b | 7588 | def install(self, ls: List[str]) -> None: |
f91f0fd5 | 7589 | logger.info('Installing packages %s...' % ls) |
f67539c2 | 7590 | call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls) |
9f95a23c | 7591 | |
522d829b | 7592 | def update(self) -> None: |
b3b6e05e TL |
7593 | logger.info('Updating package list...') |
7594 | call_throws(self.ctx, ['apt-get', 'update']) | |
7595 | ||
522d829b | 7596 | def install_podman(self) -> None: |
9f95a23c | 7597 | if self.distro == 'ubuntu': |
f91f0fd5 TL |
7598 | logger.info('Setting up repo for podman...') |
7599 | self.add_kubic_repo() | |
b3b6e05e | 7600 | self.update() |
9f95a23c | 7601 | |
f91f0fd5 | 7602 | logger.info('Attempting podman install...') |
9f95a23c TL |
7603 | try: |
7604 | self.install(['podman']) | |
f67539c2 | 7605 | except Error: |
f91f0fd5 | 7606 | logger.info('Podman did not work. Falling back to docker...') |
9f95a23c TL |
7607 | self.install(['docker.io']) |
7608 | ||
522d829b | 7609 | def kubic_repo_url(self) -> str: |
f91f0fd5 TL |
7610 | return 'https://download.opensuse.org/repositories/devel:/kubic:/' \ |
7611 | 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version | |
7612 | ||
522d829b | 7613 | def kubic_repo_path(self) -> str: |
f91f0fd5 TL |
7614 | return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list' |
7615 | ||
522d829b | 7616 | def kubric_repo_gpgkey_url(self) -> str: |
f91f0fd5 TL |
7617 | return '%s/Release.key' % self.kubic_repo_url() |
7618 | ||
522d829b | 7619 | def kubric_repo_gpgkey_path(self) -> str: |
f91f0fd5 TL |
7620 | return '/etc/apt/trusted.gpg.d/kubic.release.gpg' |
7621 | ||
522d829b | 7622 | def add_kubic_repo(self) -> None: |
f91f0fd5 TL |
7623 | url = self.kubric_repo_gpgkey_url() |
7624 | logger.info('Installing repo GPG key from %s...' % url) | |
7625 | try: | |
7626 | response = urlopen(url) | |
7627 | except HTTPError as err: | |
7628 | logger.error('failed to fetch GPG repo key from %s: %s' % ( | |
7629 | url, err)) | |
7630 | raise Error('failed to fetch GPG key') | |
7631 | key = response.read().decode('utf-8') | |
7632 | tmp_key = write_tmp(key, 0, 0) | |
7633 | keyring = self.kubric_repo_gpgkey_path() | |
f67539c2 | 7634 | call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name]) |
f91f0fd5 TL |
7635 | |
7636 | logger.info('Installing repo file at %s...' % self.kubic_repo_path()) | |
7637 | content = 'deb %s /\n' % self.kubic_repo_url() | |
7638 | with open(self.kubic_repo_path(), 'w') as f: | |
7639 | f.write(content) | |
7640 | ||
522d829b | 7641 | def rm_kubic_repo(self) -> None: |
f91f0fd5 TL |
7642 | keyring = self.kubric_repo_gpgkey_path() |
7643 | if os.path.exists(keyring): | |
7644 | logger.info('Removing repo GPG key %s...' % keyring) | |
7645 | os.unlink(keyring) | |
7646 | ||
7647 | p = self.kubic_repo_path() | |
7648 | if os.path.exists(p): | |
7649 | logger.info('Removing repo at %s...' % p) | |
7650 | os.unlink(p) | |
7651 | ||
f6b5b4d7 | 7652 | |
9f95a23c TL |
7653 | class YumDnf(Packager): |
7654 | DISTRO_NAMES = { | |
7655 | 'centos': ('centos', 'el'), | |
7656 | 'rhel': ('centos', 'el'), | |
7657 | 'scientific': ('centos', 'el'), | |
b3b6e05e | 7658 | 'rocky': ('centos', 'el'), |
522d829b | 7659 | 'almalinux': ('centos', 'el'), |
2a845540 | 7660 | 'ol': ('centos', 'el'), |
9f95a23c | 7661 | 'fedora': ('fedora', 'fc'), |
a4b75251 | 7662 | 'mariner': ('mariner', 'cm'), |
9f95a23c TL |
7663 | } |
7664 | ||
f67539c2 | 7665 | def __init__(self, ctx: CephadmContext, |
522d829b TL |
7666 | stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str], |
7667 | distro: Optional[str], distro_version: Optional[str]) -> None: | |
f67539c2 | 7668 | super(YumDnf, self).__init__(ctx, stable=stable, version=version, |
9f95a23c | 7669 | branch=branch, commit=commit) |
522d829b TL |
7670 | assert distro |
7671 | assert distro_version | |
f67539c2 | 7672 | self.ctx = ctx |
9f95a23c TL |
7673 | self.major = int(distro_version.split('.')[0]) |
7674 | self.distro_normalized = self.DISTRO_NAMES[distro][0] | |
7675 | self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major) | |
7676 | if (self.distro_code == 'fc' and self.major >= 30) or \ | |
7677 | (self.distro_code == 'el' and self.major >= 8): | |
7678 | self.tool = 'dnf' | |
a4b75251 TL |
7679 | elif (self.distro_code == 'cm'): |
7680 | self.tool = 'tdnf' | |
9f95a23c TL |
7681 | else: |
7682 | self.tool = 'yum' | |
7683 | ||
522d829b | 7684 | def custom_repo(self, **kw: Any) -> str: |
9f95a23c TL |
7685 | """ |
7686 | Repo files need special care in that a whole line should not be present | |
7687 | if there is no value for it. Because we were using `format()` we could | |
7688 | not conditionally add a line for a repo file. So the end result would | |
7689 | contain a key with a missing value (say if we were passing `None`). | |
7690 | ||
7691 | For example, it could look like:: | |
7692 | ||
7693 | [ceph repo] | |
7694 | name= ceph repo | |
7695 | proxy= | |
7696 | gpgcheck= | |
7697 | ||
7698 | Which breaks. This function allows us to conditionally add lines, | |
7699 | preserving an order and be more careful. | |
7700 | ||
7701 | Previously, and for historical purposes, this is how the template used | |
7702 | to look:: | |
7703 | ||
7704 | custom_repo = | |
7705 | [{repo_name}] | |
7706 | name={name} | |
7707 | baseurl={baseurl} | |
7708 | enabled={enabled} | |
7709 | gpgcheck={gpgcheck} | |
7710 | type={_type} | |
7711 | gpgkey={gpgkey} | |
7712 | proxy={proxy} | |
7713 | ||
7714 | """ | |
7715 | lines = [] | |
7716 | ||
7717 | # by using tuples (vs a dict) we preserve the order of what we want to | |
7718 | # return, like starting with a [repo name] | |
7719 | tmpl = ( | |
7720 | ('reponame', '[%s]'), | |
7721 | ('name', 'name=%s'), | |
7722 | ('baseurl', 'baseurl=%s'), | |
7723 | ('enabled', 'enabled=%s'), | |
7724 | ('gpgcheck', 'gpgcheck=%s'), | |
7725 | ('_type', 'type=%s'), | |
7726 | ('gpgkey', 'gpgkey=%s'), | |
7727 | ('proxy', 'proxy=%s'), | |
7728 | ('priority', 'priority=%s'), | |
7729 | ) | |
7730 | ||
7731 | for line in tmpl: | |
7732 | tmpl_key, tmpl_value = line # key values from tmpl | |
7733 | ||
7734 | # ensure that there is an actual value (not None nor empty string) | |
7735 | if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''): | |
7736 | lines.append(tmpl_value % kw.get(tmpl_key)) | |
7737 | ||
7738 | return '\n'.join(lines) | |
7739 | ||
522d829b | 7740 | def repo_path(self) -> str: |
9f95a23c TL |
7741 | return '/etc/yum.repos.d/ceph.repo' |
7742 | ||
522d829b | 7743 | def repo_baseurl(self) -> str: |
9f95a23c TL |
7744 | assert self.stable or self.version |
7745 | if self.version: | |
f67539c2 | 7746 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version, |
9f95a23c TL |
7747 | self.distro_code) |
7748 | else: | |
f67539c2 | 7749 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable, |
9f95a23c TL |
7750 | self.distro_code) |
7751 | ||
20effc67 | 7752 | def validate(self) -> None: |
b3b6e05e TL |
7753 | if self.distro_code.startswith('fc'): |
7754 | raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro') | |
7755 | if self.distro_code == 'el7': | |
7756 | if self.stable and self.stable >= 'pacific': | |
7757 | raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it') | |
7758 | if self.version and self.version.split('.')[0] >= '16': | |
7759 | raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it') | |
20effc67 TL |
7760 | |
7761 | if self.stable or self.version: | |
7762 | # we know that yum & dnf require there to be a | |
7763 | # $base_url/$arch/repodata/repomd.xml so we can test if this URL | |
7764 | # is gettable in order to validate the inputs | |
7765 | test_url = self.repo_baseurl() + '/noarch/repodata/repomd.xml' | |
7766 | try: | |
7767 | urlopen(test_url) | |
7768 | except HTTPError as err: | |
7769 | logger.error('unable to fetch repo metadata: %r', err) | |
7770 | raise Error('failed to fetch repository metadata. please check' | |
7771 | ' the provided parameters are correct and try again') | |
7772 | ||
7773 | def add_repo(self) -> None: | |
9f95a23c TL |
7774 | if self.stable or self.version: |
7775 | content = '' | |
7776 | for n, t in { | |
7777 | 'Ceph': '$basearch', | |
7778 | 'Ceph-noarch': 'noarch', | |
7779 | 'Ceph-source': 'SRPMS'}.items(): | |
7780 | content += '[%s]\n' % (n) | |
7781 | content += self.custom_repo( | |
7782 | name='Ceph %s' % t, | |
7783 | baseurl=self.repo_baseurl() + '/' + t, | |
7784 | enabled=1, | |
7785 | gpgcheck=1, | |
7786 | gpgkey=self.repo_gpgkey()[0], | |
7787 | ) | |
7788 | content += '\n\n' | |
7789 | else: | |
7790 | content = self.query_shaman(self.distro_normalized, self.major, | |
7791 | self.branch, | |
7792 | self.commit) | |
7793 | ||
f91f0fd5 | 7794 | logger.info('Writing repo to %s...' % self.repo_path()) |
9f95a23c TL |
7795 | with open(self.repo_path(), 'w') as f: |
7796 | f.write(content) | |
7797 | ||
7798 | if self.distro_code.startswith('el'): | |
7799 | logger.info('Enabling EPEL...') | |
f67539c2 | 7800 | call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release']) |
9f95a23c | 7801 | |
522d829b | 7802 | def rm_repo(self) -> None: |
9f95a23c TL |
7803 | if os.path.exists(self.repo_path()): |
7804 | os.unlink(self.repo_path()) | |
9f95a23c | 7805 | |
522d829b | 7806 | def install(self, ls: List[str]) -> None: |
9f95a23c | 7807 | logger.info('Installing packages %s...' % ls) |
f67539c2 | 7808 | call_throws(self.ctx, [self.tool, 'install', '-y'] + ls) |
9f95a23c | 7809 | |
522d829b | 7810 | def install_podman(self) -> None: |
9f95a23c TL |
7811 | self.install(['podman']) |
7812 | ||
7813 | ||
7814 | class Zypper(Packager): | |
7815 | DISTRO_NAMES = [ | |
7816 | 'sles', | |
7817 | 'opensuse-tumbleweed', | |
7818 | 'opensuse-leap' | |
7819 | ] | |
7820 | ||
f67539c2 | 7821 | def __init__(self, ctx: CephadmContext, |
522d829b TL |
7822 | stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str], |
7823 | distro: Optional[str], distro_version: Optional[str]) -> None: | |
f67539c2 | 7824 | super(Zypper, self).__init__(ctx, stable=stable, version=version, |
9f95a23c | 7825 | branch=branch, commit=commit) |
522d829b | 7826 | assert distro is not None |
f67539c2 | 7827 | self.ctx = ctx |
9f95a23c TL |
7828 | self.tool = 'zypper' |
7829 | self.distro = 'opensuse' | |
7830 | self.distro_version = '15.1' | |
7831 | if 'tumbleweed' not in distro and distro_version is not None: | |
7832 | self.distro_version = distro_version | |
7833 | ||
522d829b | 7834 | def custom_repo(self, **kw: Any) -> str: |
9f95a23c TL |
7835 | """ |
7836 | See YumDnf for format explanation. | |
7837 | """ | |
7838 | lines = [] | |
7839 | ||
7840 | # by using tuples (vs a dict) we preserve the order of what we want to | |
7841 | # return, like starting with a [repo name] | |
7842 | tmpl = ( | |
7843 | ('reponame', '[%s]'), | |
7844 | ('name', 'name=%s'), | |
7845 | ('baseurl', 'baseurl=%s'), | |
7846 | ('enabled', 'enabled=%s'), | |
7847 | ('gpgcheck', 'gpgcheck=%s'), | |
7848 | ('_type', 'type=%s'), | |
7849 | ('gpgkey', 'gpgkey=%s'), | |
7850 | ('proxy', 'proxy=%s'), | |
7851 | ('priority', 'priority=%s'), | |
7852 | ) | |
7853 | ||
7854 | for line in tmpl: | |
7855 | tmpl_key, tmpl_value = line # key values from tmpl | |
7856 | ||
7857 | # ensure that there is an actual value (not None nor empty string) | |
7858 | if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''): | |
7859 | lines.append(tmpl_value % kw.get(tmpl_key)) | |
7860 | ||
7861 | return '\n'.join(lines) | |
7862 | ||
522d829b | 7863 | def repo_path(self) -> str: |
9f95a23c TL |
7864 | return '/etc/zypp/repos.d/ceph.repo' |
7865 | ||
522d829b | 7866 | def repo_baseurl(self) -> str: |
9f95a23c TL |
7867 | assert self.stable or self.version |
7868 | if self.version: | |
f67539c2 TL |
7869 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, |
7870 | self.stable, self.distro) | |
9f95a23c | 7871 | else: |
f67539c2 TL |
7872 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, |
7873 | self.stable, self.distro) | |
9f95a23c | 7874 | |
522d829b | 7875 | def add_repo(self) -> None: |
9f95a23c TL |
7876 | if self.stable or self.version: |
7877 | content = '' | |
7878 | for n, t in { | |
7879 | 'Ceph': '$basearch', | |
7880 | 'Ceph-noarch': 'noarch', | |
7881 | 'Ceph-source': 'SRPMS'}.items(): | |
7882 | content += '[%s]\n' % (n) | |
7883 | content += self.custom_repo( | |
7884 | name='Ceph %s' % t, | |
7885 | baseurl=self.repo_baseurl() + '/' + t, | |
7886 | enabled=1, | |
7887 | gpgcheck=1, | |
7888 | gpgkey=self.repo_gpgkey()[0], | |
7889 | ) | |
7890 | content += '\n\n' | |
7891 | else: | |
7892 | content = self.query_shaman(self.distro, self.distro_version, | |
7893 | self.branch, | |
7894 | self.commit) | |
7895 | ||
f91f0fd5 | 7896 | logger.info('Writing repo to %s...' % self.repo_path()) |
9f95a23c TL |
7897 | with open(self.repo_path(), 'w') as f: |
7898 | f.write(content) | |
7899 | ||
522d829b | 7900 | def rm_repo(self) -> None: |
9f95a23c TL |
7901 | if os.path.exists(self.repo_path()): |
7902 | os.unlink(self.repo_path()) | |
7903 | ||
522d829b | 7904 | def install(self, ls: List[str]) -> None: |
9f95a23c | 7905 | logger.info('Installing packages %s...' % ls) |
f67539c2 | 7906 | call_throws(self.ctx, [self.tool, 'in', '-y'] + ls) |
9f95a23c | 7907 | |
522d829b | 7908 | def install_podman(self) -> None: |
9f95a23c TL |
7909 | self.install(['podman']) |
7910 | ||
7911 | ||
f67539c2 | 7912 | def create_packager(ctx: CephadmContext, |
522d829b TL |
7913 | stable: Optional[str] = None, version: Optional[str] = None, |
7914 | branch: Optional[str] = None, commit: Optional[str] = None) -> Packager: | |
9f95a23c TL |
7915 | distro, distro_version, distro_codename = get_distro() |
7916 | if distro in YumDnf.DISTRO_NAMES: | |
f67539c2 | 7917 | return YumDnf(ctx, stable=stable, version=version, |
9f95a23c | 7918 | branch=branch, commit=commit, |
f67539c2 | 7919 | distro=distro, distro_version=distro_version) |
9f95a23c | 7920 | elif distro in Apt.DISTRO_NAMES: |
f67539c2 | 7921 | return Apt(ctx, stable=stable, version=version, |
9f95a23c TL |
7922 | branch=branch, commit=commit, |
7923 | distro=distro, distro_version=distro_version, | |
7924 | distro_codename=distro_codename) | |
7925 | elif distro in Zypper.DISTRO_NAMES: | |
f67539c2 | 7926 | return Zypper(ctx, stable=stable, version=version, |
9f95a23c TL |
7927 | branch=branch, commit=commit, |
7928 | distro=distro, distro_version=distro_version) | |
7929 | raise Error('Distro %s version %s not supported' % (distro, distro_version)) | |
7930 | ||
7931 | ||
522d829b | 7932 | def command_add_repo(ctx: CephadmContext) -> None: |
f67539c2 | 7933 | if ctx.version and ctx.release: |
9f95a23c | 7934 | raise Error('you can specify either --release or --version but not both') |
f67539c2 | 7935 | if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit: |
1911f103 | 7936 | raise Error('please supply a --release, --version, --dev or --dev-commit argument') |
f67539c2 | 7937 | if ctx.version: |
9f95a23c | 7938 | try: |
f67539c2 TL |
7939 | (x, y, z) = ctx.version.split('.') |
7940 | except Exception: | |
9f95a23c | 7941 | raise Error('version must be in the form x.y.z (e.g., 15.2.0)') |
b3b6e05e TL |
7942 | if ctx.release: |
7943 | # Pacific =/= pacific in this case, set to undercase to avoid confision | |
7944 | ctx.release = ctx.release.lower() | |
9f95a23c | 7945 | |
f67539c2 TL |
7946 | pkg = create_packager(ctx, stable=ctx.release, |
7947 | version=ctx.version, | |
7948 | branch=ctx.dev, | |
7949 | commit=ctx.dev_commit) | |
20effc67 | 7950 | pkg.validate() |
9f95a23c | 7951 | pkg.add_repo() |
b3b6e05e | 7952 | logger.info('Completed adding repo.') |
9f95a23c | 7953 | |
f6b5b4d7 | 7954 | |
522d829b | 7955 | def command_rm_repo(ctx: CephadmContext) -> None: |
f67539c2 | 7956 | pkg = create_packager(ctx) |
9f95a23c TL |
7957 | pkg.rm_repo() |
7958 | ||
f6b5b4d7 | 7959 | |
522d829b | 7960 | def command_install(ctx: CephadmContext) -> None: |
f67539c2 TL |
7961 | pkg = create_packager(ctx) |
7962 | pkg.install(ctx.packages) | |
9f95a23c | 7963 | |
2a845540 TL |
7964 | |
7965 | def command_rescan_disks(ctx: CephadmContext) -> str: | |
7966 | ||
7967 | def probe_hba(scan_path: str) -> None: | |
7968 | """Tell the adapter to rescan""" | |
7969 | with open(scan_path, 'w') as f: | |
7970 | f.write('- - -') | |
7971 | ||
7972 | cmd = ctx.func.__name__.replace('command_', '') | |
7973 | logger.info(f'{cmd}: starting') | |
7974 | start = time.time() | |
7975 | ||
7976 | all_scan_files = glob('/sys/class/scsi_host/*/scan') | |
7977 | scan_files = [] | |
7978 | skipped = [] | |
7979 | for scan_path in all_scan_files: | |
7980 | adapter_name = os.path.basename(os.path.dirname(scan_path)) | |
7981 | proc_name = read_file([os.path.join(os.path.dirname(scan_path), 'proc_name')]) | |
7982 | if proc_name in ['unknown', 'usb-storage']: | |
7983 | skipped.append(os.path.basename(scan_path)) | |
7984 | logger.info(f'{cmd}: rescan skipping incompatible host adapter {adapter_name} : {proc_name}') | |
7985 | continue | |
7986 | ||
7987 | scan_files.append(scan_path) | |
7988 | ||
7989 | if not scan_files: | |
7990 | logger.info(f'{cmd}: no compatible HBAs found') | |
7991 | return 'Ok. No compatible HBAs found' | |
7992 | ||
7993 | responses = async_run(concurrent_tasks(probe_hba, scan_files)) | |
7994 | failures = [r for r in responses if r] | |
7995 | ||
7996 | logger.info(f'{cmd}: Complete. {len(scan_files)} adapters rescanned, {len(failures)} failures, {len(skipped)} skipped') | |
7997 | ||
7998 | elapsed = time.time() - start | |
7999 | if failures: | |
8000 | plural = 's' if len(failures) > 1 else '' | |
8001 | if len(failures) == len(scan_files): | |
8002 | return f'Failed. All {len(scan_files)} rescan requests failed' | |
8003 | else: | |
8004 | return f'Partial. {len(scan_files) - len(failures)} successful, {len(failures)} failure{plural} against: {", ".join(failures)}' | |
8005 | ||
8006 | return f'Ok. {len(all_scan_files)} adapters detected: {len(scan_files)} rescanned, {len(skipped)} skipped, {len(failures)} failed ({elapsed:.2f}s)' | |
8007 | ||
9f95a23c TL |
8008 | ################################## |
8009 | ||
f67539c2 | 8010 | |
f91f0fd5 TL |
8011 | def get_ipv4_address(ifname): |
8012 | # type: (str) -> str | |
522d829b | 8013 | def _extract(sock: socket.socket, offset: int) -> str: |
f91f0fd5 | 8014 | return socket.inet_ntop( |
f67539c2 TL |
8015 | socket.AF_INET, |
8016 | fcntl.ioctl( | |
8017 | sock.fileno(), | |
8018 | offset, | |
8019 | struct.pack('256s', bytes(ifname[:15], 'utf-8')) | |
8020 | )[20:24]) | |
f91f0fd5 TL |
8021 | |
8022 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) | |
8023 | try: | |
8024 | addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR | |
8025 | dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK | |
8026 | except OSError: | |
8027 | # interface does not have an ipv4 address | |
8028 | return '' | |
8029 | ||
8030 | dec_mask = sum([bin(int(i)).count('1') | |
8031 | for i in dq_mask.split('.')]) | |
8032 | return '{}/{}'.format(addr, dec_mask) | |
8033 | ||
8034 | ||
8035 | def get_ipv6_address(ifname): | |
8036 | # type: (str) -> str | |
8037 | if not os.path.exists('/proc/net/if_inet6'): | |
8038 | return '' | |
8039 | ||
8040 | raw = read_file(['/proc/net/if_inet6']) | |
8041 | data = raw.splitlines() | |
8042 | # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html | |
8043 | # field 0 is ipv6, field 2 is scope | |
8044 | for iface_setting in data: | |
8045 | field = iface_setting.split() | |
8046 | if field[-1] == ifname: | |
8047 | ipv6_raw = field[0] | |
f67539c2 | 8048 | ipv6_fmtd = ':'.join([ipv6_raw[_p:_p + 4] for _p in range(0, len(field[0]), 4)]) |
f91f0fd5 TL |
8049 | # apply naming rules using ipaddress module |
8050 | ipv6 = ipaddress.ip_address(ipv6_fmtd) | |
f67539c2 | 8051 | return '{}/{}'.format(str(ipv6), int('0x{}'.format(field[2]), 16)) |
f91f0fd5 TL |
8052 | return '' |
8053 | ||
8054 | ||
8055 | def bytes_to_human(num, mode='decimal'): | |
8056 | # type: (float, str) -> str | |
8057 | """Convert a bytes value into it's human-readable form. | |
8058 | ||
8059 | :param num: number, in bytes, to convert | |
8060 | :param mode: Either decimal (default) or binary to determine divisor | |
8061 | :returns: string representing the bytes value in a more readable format | |
8062 | """ | |
8063 | unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB'] | |
8064 | divisor = 1000.0 | |
f67539c2 | 8065 | yotta = 'YB' |
f91f0fd5 TL |
8066 | |
8067 | if mode == 'binary': | |
8068 | unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB'] | |
8069 | divisor = 1024.0 | |
f67539c2 | 8070 | yotta = 'YiB' |
f91f0fd5 TL |
8071 | |
8072 | for unit in unit_list: | |
8073 | if abs(num) < divisor: | |
f67539c2 | 8074 | return '%3.1f%s' % (num, unit) |
f91f0fd5 | 8075 | num /= divisor |
f67539c2 | 8076 | return '%.1f%s' % (num, yotta) |
f91f0fd5 TL |
8077 | |
8078 | ||
8079 | def read_file(path_list, file_name=''): | |
8080 | # type: (List[str], str) -> str | |
8081 | """Returns the content of the first file found within the `path_list` | |
8082 | ||
8083 | :param path_list: list of file paths to search | |
8084 | :param file_name: optional file_name to be applied to a file path | |
8085 | :returns: content of the file or 'Unknown' | |
8086 | """ | |
8087 | for path in path_list: | |
8088 | if file_name: | |
8089 | file_path = os.path.join(path, file_name) | |
8090 | else: | |
8091 | file_path = path | |
8092 | if os.path.exists(file_path): | |
8093 | with open(file_path, 'r') as f: | |
8094 | try: | |
8095 | content = f.read().strip() | |
8096 | except OSError: | |
8097 | # sysfs may populate the file, but for devices like | |
8098 | # virtio reads can fail | |
f67539c2 | 8099 | return 'Unknown' |
f91f0fd5 TL |
8100 | else: |
8101 | return content | |
f67539c2 | 8102 | return 'Unknown' |
f91f0fd5 TL |
8103 | |
8104 | ################################## | |
f67539c2 TL |
8105 | |
8106 | ||
f91f0fd5 TL |
8107 | class HostFacts(): |
8108 | _dmi_path_list = ['/sys/class/dmi/id'] | |
8109 | _nic_path_list = ['/sys/class/net'] | |
f91f0fd5 TL |
8110 | _apparmor_path_list = ['/etc/apparmor'] |
8111 | _disk_vendor_workarounds = { | |
f67539c2 | 8112 | '0x1af4': 'Virtio Block Device' |
f91f0fd5 | 8113 | } |
a4b75251 | 8114 | _excluded_block_devices = ('sr', 'zram', 'dm-') |
f91f0fd5 | 8115 | |
f67539c2 TL |
8116 | def __init__(self, ctx: CephadmContext): |
8117 | self.ctx: CephadmContext = ctx | |
8118 | self.cpu_model: str = 'Unknown' | |
8119 | self.cpu_count: int = 0 | |
8120 | self.cpu_cores: int = 0 | |
8121 | self.cpu_threads: int = 0 | |
8122 | self.interfaces: Dict[str, Any] = {} | |
f91f0fd5 | 8123 | |
f67539c2 | 8124 | self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines() |
f91f0fd5 TL |
8125 | self._get_cpuinfo() |
8126 | self._process_nics() | |
f67539c2 TL |
8127 | self.arch: str = platform.processor() |
8128 | self.kernel: str = platform.release() | |
f91f0fd5 TL |
8129 | |
8130 | def _get_cpuinfo(self): | |
8131 | # type: () -> None | |
8132 | """Determine cpu information via /proc/cpuinfo""" | |
8133 | raw = read_file(['/proc/cpuinfo']) | |
8134 | output = raw.splitlines() | |
8135 | cpu_set = set() | |
8136 | ||
8137 | for line in output: | |
f67539c2 TL |
8138 | field = [f.strip() for f in line.split(':')] |
8139 | if 'model name' in line: | |
f91f0fd5 | 8140 | self.cpu_model = field[1] |
f67539c2 | 8141 | if 'physical id' in line: |
f91f0fd5 | 8142 | cpu_set.add(field[1]) |
f67539c2 | 8143 | if 'siblings' in line: |
f91f0fd5 | 8144 | self.cpu_threads = int(field[1].strip()) |
f67539c2 | 8145 | if 'cpu cores' in line: |
f91f0fd5 TL |
8146 | self.cpu_cores = int(field[1].strip()) |
8147 | pass | |
8148 | self.cpu_count = len(cpu_set) | |
8149 | ||
8150 | def _get_block_devs(self): | |
8151 | # type: () -> List[str] | |
8152 | """Determine the list of block devices by looking at /sys/block""" | |
8153 | return [dev for dev in os.listdir('/sys/block') | |
a4b75251 | 8154 | if not dev.startswith(HostFacts._excluded_block_devices)] |
f91f0fd5 TL |
8155 | |
8156 | def _get_devs_by_type(self, rota='0'): | |
8157 | # type: (str) -> List[str] | |
8158 | """Filter block devices by a given rotational attribute (0=flash, 1=spinner)""" | |
8159 | devs = list() | |
8160 | for blk_dev in self._get_block_devs(): | |
8161 | rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev) | |
8162 | rot_value = read_file([rot_path]) | |
8163 | if rot_value == rota: | |
8164 | devs.append(blk_dev) | |
8165 | return devs | |
8166 | ||
8167 | @property | |
8168 | def operating_system(self): | |
8169 | # type: () -> str | |
8170 | """Determine OS version""" | |
8171 | raw_info = read_file(['/etc/os-release']) | |
8172 | os_release = raw_info.splitlines() | |
8173 | rel_str = 'Unknown' | |
8174 | rel_dict = dict() | |
8175 | ||
8176 | for line in os_release: | |
f67539c2 | 8177 | if '=' in line: |
f91f0fd5 TL |
8178 | var_name, var_value = line.split('=') |
8179 | rel_dict[var_name] = var_value.strip('"') | |
8180 | ||
8181 | # Would normally use PRETTY_NAME, but NAME and VERSION are more | |
8182 | # consistent | |
f67539c2 TL |
8183 | if all(_v in rel_dict for _v in ['NAME', 'VERSION']): |
8184 | rel_str = '{} {}'.format(rel_dict['NAME'], rel_dict['VERSION']) | |
f91f0fd5 TL |
8185 | return rel_str |
8186 | ||
8187 | @property | |
8188 | def hostname(self): | |
8189 | # type: () -> str | |
8190 | """Return the hostname""" | |
8191 | return platform.node() | |
8192 | ||
8193 | @property | |
8194 | def subscribed(self): | |
8195 | # type: () -> str | |
8196 | """Highlevel check to see if the host is subscribed to receive updates/support""" | |
8197 | def _red_hat(): | |
8198 | # type: () -> str | |
8199 | # RHEL 7 and RHEL 8 | |
8200 | entitlements_dir = '/etc/pki/entitlement' | |
8201 | if os.path.exists(entitlements_dir): | |
8202 | pems = glob('{}/*.pem'.format(entitlements_dir)) | |
8203 | if len(pems) >= 2: | |
f67539c2 | 8204 | return 'Yes' |
f91f0fd5 | 8205 | |
f67539c2 | 8206 | return 'No' |
f91f0fd5 TL |
8207 | |
8208 | os_name = self.operating_system | |
f67539c2 | 8209 | if os_name.upper().startswith('RED HAT'): |
f91f0fd5 TL |
8210 | return _red_hat() |
8211 | ||
f67539c2 | 8212 | return 'Unknown' |
f91f0fd5 TL |
8213 | |
8214 | @property | |
8215 | def hdd_count(self): | |
8216 | # type: () -> int | |
8217 | """Return a count of HDDs (spinners)""" | |
8218 | return len(self._get_devs_by_type(rota='1')) | |
8219 | ||
8220 | def _get_capacity(self, dev): | |
8221 | # type: (str) -> int | |
8222 | """Determine the size of a given device""" | |
8223 | size_path = os.path.join('/sys/block', dev, 'size') | |
8224 | size_blocks = int(read_file([size_path])) | |
8225 | blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size') | |
8226 | blk_count = int(read_file([blk_path])) | |
8227 | return size_blocks * blk_count | |
8228 | ||
8229 | def _get_capacity_by_type(self, rota='0'): | |
8230 | # type: (str) -> int | |
8231 | """Return the total capacity of a category of device (flash or hdd)""" | |
8232 | devs = self._get_devs_by_type(rota=rota) | |
8233 | capacity = 0 | |
8234 | for dev in devs: | |
8235 | capacity += self._get_capacity(dev) | |
8236 | return capacity | |
8237 | ||
8238 | def _dev_list(self, dev_list): | |
8239 | # type: (List[str]) -> List[Dict[str, object]] | |
8240 | """Return a 'pretty' name list for each device in the `dev_list`""" | |
8241 | disk_list = list() | |
8242 | ||
8243 | for dev in dev_list: | |
8244 | disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip() | |
8245 | disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip() | |
8246 | disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip() | |
8247 | vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip() | |
8248 | disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor) | |
8249 | disk_size_bytes = self._get_capacity(dev) | |
8250 | disk_list.append({ | |
f67539c2 TL |
8251 | 'description': '{} {} ({})'.format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)), |
8252 | 'vendor': disk_vendor, | |
8253 | 'model': disk_model, | |
8254 | 'rev': disk_rev, | |
8255 | 'wwid': disk_wwid, | |
8256 | 'dev_name': dev, | |
8257 | 'disk_size_bytes': disk_size_bytes, | |
8258 | }) | |
f91f0fd5 TL |
8259 | return disk_list |
8260 | ||
8261 | @property | |
8262 | def hdd_list(self): | |
8263 | # type: () -> List[Dict[str, object]] | |
8264 | """Return a list of devices that are HDDs (spinners)""" | |
8265 | devs = self._get_devs_by_type(rota='1') | |
8266 | return self._dev_list(devs) | |
8267 | ||
8268 | @property | |
8269 | def flash_list(self): | |
8270 | # type: () -> List[Dict[str, object]] | |
8271 | """Return a list of devices that are flash based (SSD, NVMe)""" | |
8272 | devs = self._get_devs_by_type(rota='0') | |
8273 | return self._dev_list(devs) | |
8274 | ||
8275 | @property | |
8276 | def hdd_capacity_bytes(self): | |
8277 | # type: () -> int | |
8278 | """Return the total capacity for all HDD devices (bytes)""" | |
8279 | return self._get_capacity_by_type(rota='1') | |
8280 | ||
8281 | @property | |
8282 | def hdd_capacity(self): | |
8283 | # type: () -> str | |
8284 | """Return the total capacity for all HDD devices (human readable format)""" | |
8285 | return bytes_to_human(self.hdd_capacity_bytes) | |
8286 | ||
8287 | @property | |
8288 | def cpu_load(self): | |
8289 | # type: () -> Dict[str, float] | |
8290 | """Return the cpu load average data for the host""" | |
8291 | raw = read_file(['/proc/loadavg']).strip() | |
8292 | data = raw.split() | |
8293 | return { | |
f67539c2 TL |
8294 | '1min': float(data[0]), |
8295 | '5min': float(data[1]), | |
8296 | '15min': float(data[2]), | |
f91f0fd5 TL |
8297 | } |
8298 | ||
8299 | @property | |
8300 | def flash_count(self): | |
8301 | # type: () -> int | |
8302 | """Return the number of flash devices in the system (SSD, NVMe)""" | |
8303 | return len(self._get_devs_by_type(rota='0')) | |
8304 | ||
8305 | @property | |
8306 | def flash_capacity_bytes(self): | |
8307 | # type: () -> int | |
8308 | """Return the total capacity for all flash devices (bytes)""" | |
8309 | return self._get_capacity_by_type(rota='0') | |
8310 | ||
8311 | @property | |
8312 | def flash_capacity(self): | |
8313 | # type: () -> str | |
8314 | """Return the total capacity for all Flash devices (human readable format)""" | |
8315 | return bytes_to_human(self.flash_capacity_bytes) | |
8316 | ||
8317 | def _process_nics(self): | |
8318 | # type: () -> None | |
8319 | """Look at the NIC devices and extract network related metadata""" | |
8320 | # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h | |
8321 | hw_lookup = { | |
f67539c2 TL |
8322 | '1': 'ethernet', |
8323 | '32': 'infiniband', | |
8324 | '772': 'loopback', | |
f91f0fd5 TL |
8325 | } |
8326 | ||
8327 | for nic_path in HostFacts._nic_path_list: | |
8328 | if not os.path.exists(nic_path): | |
8329 | continue | |
8330 | for iface in os.listdir(nic_path): | |
8331 | ||
33c7a0ef TL |
8332 | if os.path.exists(os.path.join(nic_path, iface, 'bridge')): |
8333 | nic_type = 'bridge' | |
8334 | elif os.path.exists(os.path.join(nic_path, iface, 'bonding')): | |
8335 | nic_type = 'bonding' | |
8336 | else: | |
8337 | nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), 'Unknown') | |
8338 | ||
8339 | if nic_type == 'loopback': # skip loopback devices | |
8340 | continue | |
8341 | ||
f67539c2 TL |
8342 | lower_devs_list = [os.path.basename(link.replace('lower_', '')) for link in glob(os.path.join(nic_path, iface, 'lower_*'))] |
8343 | upper_devs_list = [os.path.basename(link.replace('upper_', '')) for link in glob(os.path.join(nic_path, iface, 'upper_*'))] | |
f91f0fd5 TL |
8344 | |
8345 | try: | |
8346 | mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')])) | |
8347 | except ValueError: | |
8348 | mtu = 0 | |
8349 | ||
8350 | operstate = read_file([os.path.join(nic_path, iface, 'operstate')]) | |
8351 | try: | |
8352 | speed = int(read_file([os.path.join(nic_path, iface, 'speed')])) | |
8353 | except (OSError, ValueError): | |
8354 | # OSError : device doesn't support the ethtool get_link_ksettings | |
8355 | # ValueError : raised when the read fails, and returns Unknown | |
8356 | # | |
8357 | # Either way, we show a -1 when speed isn't available | |
8358 | speed = -1 | |
8359 | ||
f91f0fd5 TL |
8360 | dev_link = os.path.join(nic_path, iface, 'device') |
8361 | if os.path.exists(dev_link): | |
8362 | iftype = 'physical' | |
8363 | driver_path = os.path.join(dev_link, 'driver') | |
8364 | if os.path.exists(driver_path): | |
f67539c2 | 8365 | driver = os.path.basename(os.path.realpath(driver_path)) |
f91f0fd5 TL |
8366 | else: |
8367 | driver = 'Unknown' | |
8368 | ||
8369 | else: | |
8370 | iftype = 'logical' | |
8371 | driver = '' | |
8372 | ||
8373 | self.interfaces[iface] = { | |
f67539c2 TL |
8374 | 'mtu': mtu, |
8375 | 'upper_devs_list': upper_devs_list, | |
8376 | 'lower_devs_list': lower_devs_list, | |
8377 | 'operstate': operstate, | |
8378 | 'iftype': iftype, | |
8379 | 'nic_type': nic_type, | |
8380 | 'driver': driver, | |
8381 | 'speed': speed, | |
8382 | 'ipv4_address': get_ipv4_address(iface), | |
8383 | 'ipv6_address': get_ipv6_address(iface), | |
f91f0fd5 TL |
8384 | } |
8385 | ||
8386 | @property | |
8387 | def nic_count(self): | |
8388 | # type: () -> int | |
8389 | """Return a total count of all physical NICs detected in the host""" | |
8390 | phys_devs = [] | |
8391 | for iface in self.interfaces: | |
f67539c2 | 8392 | if self.interfaces[iface]['iftype'] == 'physical': |
f91f0fd5 TL |
8393 | phys_devs.append(iface) |
8394 | return len(phys_devs) | |
8395 | ||
f91f0fd5 TL |
8396 | def _get_mem_data(self, field_name): |
8397 | # type: (str) -> int | |
8398 | for line in self._meminfo: | |
8399 | if line.startswith(field_name): | |
8400 | _d = line.split() | |
8401 | return int(_d[1]) | |
8402 | return 0 | |
8403 | ||
8404 | @property | |
8405 | def memory_total_kb(self): | |
8406 | # type: () -> int | |
8407 | """Determine the memory installed (kb)""" | |
8408 | return self._get_mem_data('MemTotal') | |
8409 | ||
8410 | @property | |
8411 | def memory_free_kb(self): | |
8412 | # type: () -> int | |
8413 | """Determine the memory free (not cache, immediately usable)""" | |
8414 | return self._get_mem_data('MemFree') | |
8415 | ||
8416 | @property | |
8417 | def memory_available_kb(self): | |
8418 | # type: () -> int | |
8419 | """Determine the memory available to new applications without swapping""" | |
8420 | return self._get_mem_data('MemAvailable') | |
8421 | ||
8422 | @property | |
8423 | def vendor(self): | |
8424 | # type: () -> str | |
8425 | """Determine server vendor from DMI data in sysfs""" | |
f67539c2 | 8426 | return read_file(HostFacts._dmi_path_list, 'sys_vendor') |
f91f0fd5 TL |
8427 | |
8428 | @property | |
8429 | def model(self): | |
8430 | # type: () -> str | |
8431 | """Determine server model information from DMI data in sysfs""" | |
f67539c2 TL |
8432 | family = read_file(HostFacts._dmi_path_list, 'product_family') |
8433 | product = read_file(HostFacts._dmi_path_list, 'product_name') | |
f91f0fd5 | 8434 | if family == 'Unknown' and product: |
f67539c2 | 8435 | return '{}'.format(product) |
f91f0fd5 | 8436 | |
f67539c2 | 8437 | return '{} ({})'.format(family, product) |
f91f0fd5 TL |
8438 | |
8439 | @property | |
8440 | def bios_version(self): | |
8441 | # type: () -> str | |
8442 | """Determine server BIOS version from DMI data in sysfs""" | |
f67539c2 | 8443 | return read_file(HostFacts._dmi_path_list, 'bios_version') |
f91f0fd5 TL |
8444 | |
8445 | @property | |
8446 | def bios_date(self): | |
8447 | # type: () -> str | |
8448 | """Determine server BIOS date from DMI data in sysfs""" | |
f67539c2 | 8449 | return read_file(HostFacts._dmi_path_list, 'bios_date') |
f91f0fd5 TL |
8450 | |
8451 | @property | |
8452 | def timestamp(self): | |
8453 | # type: () -> float | |
8454 | """Return the current time as Epoch seconds""" | |
8455 | return time.time() | |
8456 | ||
8457 | @property | |
8458 | def system_uptime(self): | |
8459 | # type: () -> float | |
8460 | """Return the system uptime (in secs)""" | |
8461 | raw_time = read_file(['/proc/uptime']) | |
8462 | up_secs, _ = raw_time.split() | |
8463 | return float(up_secs) | |
8464 | ||
f67539c2 | 8465 | @property |
f91f0fd5 TL |
8466 | def kernel_security(self): |
8467 | # type: () -> Dict[str, str] | |
8468 | """Determine the security features enabled in the kernel - SELinux, AppArmor""" | |
f67539c2 | 8469 | def _fetch_selinux() -> Dict[str, str]: |
522d829b | 8470 | """Get the selinux status""" |
f91f0fd5 | 8471 | security = {} |
522d829b TL |
8472 | try: |
8473 | out, err, code = call(self.ctx, ['sestatus'], | |
2a845540 | 8474 | verbosity=CallVerbosity.QUIET) |
522d829b TL |
8475 | security['type'] = 'SELinux' |
8476 | status, mode, policy = '', '', '' | |
8477 | for line in out.split('\n'): | |
8478 | if line.startswith('SELinux status:'): | |
8479 | k, v = line.split(':') | |
8480 | status = v.strip() | |
8481 | elif line.startswith('Current mode:'): | |
8482 | k, v = line.split(':') | |
8483 | mode = v.strip() | |
8484 | elif line.startswith('Loaded policy name:'): | |
8485 | k, v = line.split(':') | |
8486 | policy = v.strip() | |
8487 | if status == 'disabled': | |
8488 | security['description'] = 'SELinux: Disabled' | |
8489 | else: | |
8490 | security['description'] = 'SELinux: Enabled({}, {})'.format(mode, policy) | |
8491 | except Exception as e: | |
8492 | logger.info('unable to get selinux status: %s' % e) | |
8493 | return security | |
f91f0fd5 | 8494 | |
f67539c2 | 8495 | def _fetch_apparmor() -> Dict[str, str]: |
f91f0fd5 TL |
8496 | """Read the apparmor profiles directly, returning an overview of AppArmor status""" |
8497 | security = {} | |
8498 | for apparmor_path in HostFacts._apparmor_path_list: | |
8499 | if os.path.exists(apparmor_path): | |
f67539c2 TL |
8500 | security['type'] = 'AppArmor' |
8501 | security['description'] = 'AppArmor: Enabled' | |
f91f0fd5 TL |
8502 | try: |
8503 | profiles = read_file(['/sys/kernel/security/apparmor/profiles']) | |
b3b6e05e TL |
8504 | if len(profiles) == 0: |
8505 | return {} | |
f91f0fd5 TL |
8506 | except OSError: |
8507 | pass | |
8508 | else: | |
8509 | summary = {} # type: Dict[str, int] | |
8510 | for line in profiles.split('\n'): | |
8511 | item, mode = line.split(' ') | |
f67539c2 | 8512 | mode = mode.strip('()') |
f91f0fd5 TL |
8513 | if mode in summary: |
8514 | summary[mode] += 1 | |
8515 | else: | |
8516 | summary[mode] = 0 | |
f67539c2 TL |
8517 | summary_str = ','.join(['{} {}'.format(v, k) for k, v in summary.items()]) |
8518 | security = {**security, **summary} # type: ignore | |
8519 | security['description'] += '({})'.format(summary_str) | |
f91f0fd5 TL |
8520 | |
8521 | return security | |
f67539c2 | 8522 | return {} |
f91f0fd5 | 8523 | |
f67539c2 | 8524 | ret = {} |
f91f0fd5 TL |
8525 | if os.path.exists('/sys/kernel/security/lsm'): |
8526 | lsm = read_file(['/sys/kernel/security/lsm']).strip() | |
8527 | if 'selinux' in lsm: | |
f67539c2 | 8528 | ret = _fetch_selinux() |
f91f0fd5 | 8529 | elif 'apparmor' in lsm: |
f67539c2 | 8530 | ret = _fetch_apparmor() |
f91f0fd5 TL |
8531 | else: |
8532 | return { | |
f67539c2 TL |
8533 | 'type': 'Unknown', |
8534 | 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor' | |
f91f0fd5 TL |
8535 | } |
8536 | ||
f67539c2 TL |
8537 | if ret: |
8538 | return ret | |
8539 | ||
f91f0fd5 | 8540 | return { |
f67539c2 TL |
8541 | 'type': 'None', |
8542 | 'description': 'Linux Security Module framework is not available' | |
f91f0fd5 TL |
8543 | } |
8544 | ||
f67539c2 | 8545 | @property |
522d829b | 8546 | def selinux_enabled(self) -> bool: |
f67539c2 TL |
8547 | return (self.kernel_security['type'] == 'SELinux') and \ |
8548 | (self.kernel_security['description'] != 'SELinux: Disabled') | |
8549 | ||
adb31ebb TL |
8550 | @property |
8551 | def kernel_parameters(self): | |
8552 | # type: () -> Dict[str, str] | |
8553 | """Get kernel parameters required/used in Ceph clusters""" | |
8554 | ||
8555 | k_param = {} | |
f67539c2 | 8556 | out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT) |
adb31ebb TL |
8557 | if out: |
8558 | param_list = out.split('\n') | |
f67539c2 | 8559 | param_dict = {param.split(' = ')[0]: param.split(' = ')[-1] for param in param_list} |
adb31ebb TL |
8560 | |
8561 | # return only desired parameters | |
8562 | if 'net.ipv4.ip_nonlocal_bind' in param_dict: | |
8563 | k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind'] | |
8564 | ||
8565 | return k_param | |
8566 | ||
522d829b TL |
8567 | @staticmethod |
8568 | def _process_net_data(tcp_file: str, protocol: str = 'tcp') -> List[int]: | |
8569 | listening_ports = [] | |
8570 | # Connections state documentation | |
8571 | # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h | |
8572 | # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul <port>) | |
8573 | listening_state = { | |
8574 | 'tcp': '0A', | |
8575 | 'udp': '07' | |
8576 | } | |
8577 | ||
8578 | if protocol not in listening_state.keys(): | |
8579 | return [] | |
8580 | ||
8581 | if os.path.exists(tcp_file): | |
8582 | with open(tcp_file) as f: | |
8583 | tcp_data = f.readlines()[1:] | |
8584 | ||
8585 | for con in tcp_data: | |
8586 | con_info = con.strip().split() | |
8587 | if con_info[3] == listening_state[protocol]: | |
8588 | local_port = int(con_info[1].split(':')[1], 16) | |
8589 | listening_ports.append(local_port) | |
8590 | ||
8591 | return listening_ports | |
8592 | ||
8593 | @property | |
8594 | def tcp_ports_used(self) -> List[int]: | |
8595 | return HostFacts._process_net_data('/proc/net/tcp') | |
8596 | ||
8597 | @property | |
8598 | def tcp6_ports_used(self) -> List[int]: | |
8599 | return HostFacts._process_net_data('/proc/net/tcp6') | |
8600 | ||
8601 | @property | |
8602 | def udp_ports_used(self) -> List[int]: | |
8603 | return HostFacts._process_net_data('/proc/net/udp', 'udp') | |
8604 | ||
8605 | @property | |
8606 | def udp6_ports_used(self) -> List[int]: | |
8607 | return HostFacts._process_net_data('/proc/net/udp6', 'udp') | |
8608 | ||
f91f0fd5 TL |
8609 | def dump(self): |
8610 | # type: () -> str | |
8611 | """Return the attributes of this HostFacts object as json""" | |
f67539c2 TL |
8612 | data = { |
8613 | k: getattr(self, k) for k in dir(self) | |
8614 | if not k.startswith('_') | |
8615 | and isinstance(getattr(self, k), (float, int, str, list, dict, tuple)) | |
f91f0fd5 TL |
8616 | } |
8617 | return json.dumps(data, indent=2, sort_keys=True) | |
8618 | ||
8619 | ################################## | |
8620 | ||
f67539c2 | 8621 | |
522d829b | 8622 | def command_gather_facts(ctx: CephadmContext) -> None: |
f91f0fd5 | 8623 | """gather_facts is intended to provide host releated metadata to the caller""" |
f67539c2 | 8624 | host = HostFacts(ctx) |
f91f0fd5 TL |
8625 | print(host.dump()) |
8626 | ||
f67539c2 TL |
8627 | |
8628 | ################################## | |
8629 | ||
8630 | ||
a4b75251 | 8631 | def systemd_target_state(ctx: CephadmContext, target_name: str, subsystem: str = 'ceph') -> bool: |
f67539c2 TL |
8632 | # TODO: UNITTEST |
8633 | return os.path.exists( | |
8634 | os.path.join( | |
a4b75251 | 8635 | ctx.unit_dir, |
f67539c2 TL |
8636 | f'{subsystem}.target.wants', |
8637 | target_name | |
8638 | ) | |
8639 | ) | |
8640 | ||
8641 | ||
a4b75251 TL |
8642 | def target_exists(ctx: CephadmContext) -> bool: |
8643 | return os.path.exists(ctx.unit_dir + '/ceph.target') | |
8644 | ||
8645 | ||
f67539c2 | 8646 | @infer_fsid |
522d829b | 8647 | def command_maintenance(ctx: CephadmContext) -> str: |
f67539c2 | 8648 | if not ctx.fsid: |
a4b75251 | 8649 | raise Error('failed - must pass --fsid to specify cluster') |
f67539c2 TL |
8650 | |
8651 | target = f'ceph-{ctx.fsid}.target' | |
8652 | ||
8653 | if ctx.maintenance_action.lower() == 'enter': | |
8654 | logger.info('Requested to place host into maintenance') | |
a4b75251 | 8655 | if systemd_target_state(ctx, target): |
f67539c2 TL |
8656 | _out, _err, code = call(ctx, |
8657 | ['systemctl', 'disable', target], | |
8658 | verbosity=CallVerbosity.DEBUG) | |
8659 | if code: | |
8660 | logger.error(f'Failed to disable the {target} target') | |
8661 | return 'failed - to disable the target' | |
8662 | else: | |
8663 | # stopping a target waits by default | |
8664 | _out, _err, code = call(ctx, | |
8665 | ['systemctl', 'stop', target], | |
8666 | verbosity=CallVerbosity.DEBUG) | |
8667 | if code: | |
8668 | logger.error(f'Failed to stop the {target} target') | |
8669 | return 'failed - to disable the target' | |
8670 | else: | |
8671 | return f'success - systemd target {target} disabled' | |
8672 | ||
8673 | else: | |
8674 | return 'skipped - target already disabled' | |
8675 | ||
8676 | else: | |
8677 | logger.info('Requested to exit maintenance state') | |
a4b75251 TL |
8678 | # if we've never deployed a daemon on this host there will be no systemd |
8679 | # target to disable so attempting a disable will fail. We still need to | |
8680 | # return success here or host will be permanently stuck in maintenance mode | |
8681 | # as no daemons can be deployed so no systemd target will ever exist to disable. | |
8682 | if not target_exists(ctx): | |
8683 | return 'skipped - systemd target not present on this host. Host removed from maintenance mode.' | |
f67539c2 | 8684 | # exit maintenance request |
a4b75251 | 8685 | if not systemd_target_state(ctx, target): |
f67539c2 TL |
8686 | _out, _err, code = call(ctx, |
8687 | ['systemctl', 'enable', target], | |
8688 | verbosity=CallVerbosity.DEBUG) | |
8689 | if code: | |
8690 | logger.error(f'Failed to enable the {target} target') | |
8691 | return 'failed - unable to enable the target' | |
8692 | else: | |
8693 | # starting a target waits by default | |
8694 | _out, _err, code = call(ctx, | |
8695 | ['systemctl', 'start', target], | |
8696 | verbosity=CallVerbosity.DEBUG) | |
8697 | if code: | |
8698 | logger.error(f'Failed to start the {target} target') | |
8699 | return 'failed - unable to start the target' | |
8700 | else: | |
8701 | return f'success - systemd target {target} enabled and started' | |
522d829b | 8702 | return f'success - systemd target {target} enabled and started' |
f91f0fd5 TL |
8703 | |
8704 | ################################## | |
8705 | ||
f6b5b4d7 | 8706 | |
9f95a23c TL |
8707 | def _get_parser(): |
8708 | # type: () -> argparse.ArgumentParser | |
8709 | parser = argparse.ArgumentParser( | |
8710 | description='Bootstrap Ceph daemons with systemd and containers.', | |
8711 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
8712 | parser.add_argument( | |
8713 | '--image', | |
8714 | help='container image. Can also be set via the "CEPHADM_IMAGE" ' | |
8715 | 'env var') | |
8716 | parser.add_argument( | |
8717 | '--docker', | |
8718 | action='store_true', | |
8719 | help='use docker instead of podman') | |
8720 | parser.add_argument( | |
8721 | '--data-dir', | |
8722 | default=DATA_DIR, | |
8723 | help='base directory for daemon data') | |
8724 | parser.add_argument( | |
8725 | '--log-dir', | |
8726 | default=LOG_DIR, | |
8727 | help='base directory for daemon logs') | |
8728 | parser.add_argument( | |
8729 | '--logrotate-dir', | |
8730 | default=LOGROTATE_DIR, | |
8731 | help='location of logrotate configuration files') | |
b3b6e05e TL |
8732 | parser.add_argument( |
8733 | '--sysctl-dir', | |
8734 | default=SYSCTL_DIR, | |
8735 | help='location of sysctl configuration files') | |
9f95a23c TL |
8736 | parser.add_argument( |
8737 | '--unit-dir', | |
8738 | default=UNIT_DIR, | |
8739 | help='base directory for systemd units') | |
8740 | parser.add_argument( | |
8741 | '--verbose', '-v', | |
8742 | action='store_true', | |
8743 | help='Show debug-level log messages') | |
8744 | parser.add_argument( | |
8745 | '--timeout', | |
8746 | type=int, | |
8747 | default=DEFAULT_TIMEOUT, | |
8748 | help='timeout in seconds') | |
8749 | parser.add_argument( | |
8750 | '--retry', | |
8751 | type=int, | |
8752 | default=DEFAULT_RETRY, | |
8753 | help='max number of retries') | |
e306af50 TL |
8754 | parser.add_argument( |
8755 | '--env', '-e', | |
8756 | action='append', | |
8757 | default=[], | |
8758 | help='set environment variable') | |
f67539c2 TL |
8759 | parser.add_argument( |
8760 | '--no-container-init', | |
8761 | action='store_true', | |
8762 | default=not CONTAINER_INIT, | |
8763 | help='Do not run podman/docker with `--init`') | |
9f95a23c TL |
8764 | |
8765 | subparsers = parser.add_subparsers(help='sub-command') | |
8766 | ||
8767 | parser_version = subparsers.add_parser( | |
8768 | 'version', help='get ceph version from container') | |
8769 | parser_version.set_defaults(func=command_version) | |
8770 | ||
8771 | parser_pull = subparsers.add_parser( | |
33c7a0ef | 8772 | 'pull', help='pull the default container image') |
9f95a23c | 8773 | parser_pull.set_defaults(func=command_pull) |
a4b75251 TL |
8774 | parser_pull.add_argument( |
8775 | '--insecure', | |
8776 | action='store_true', | |
8777 | help=argparse.SUPPRESS, | |
8778 | ) | |
9f95a23c TL |
8779 | |
8780 | parser_inspect_image = subparsers.add_parser( | |
8781 | 'inspect-image', help='inspect local container image') | |
8782 | parser_inspect_image.set_defaults(func=command_inspect_image) | |
8783 | ||
8784 | parser_ls = subparsers.add_parser( | |
8785 | 'ls', help='list daemon instances on this host') | |
8786 | parser_ls.set_defaults(func=command_ls) | |
8787 | parser_ls.add_argument( | |
8788 | '--no-detail', | |
8789 | action='store_true', | |
8790 | help='Do not include daemon status') | |
8791 | parser_ls.add_argument( | |
8792 | '--legacy-dir', | |
8793 | default='/', | |
8794 | help='base directory for legacy daemon data') | |
8795 | ||
8796 | parser_list_networks = subparsers.add_parser( | |
8797 | 'list-networks', help='list IP networks') | |
8798 | parser_list_networks.set_defaults(func=command_list_networks) | |
8799 | ||
8800 | parser_adopt = subparsers.add_parser( | |
8801 | 'adopt', help='adopt daemon deployed with a different tool') | |
8802 | parser_adopt.set_defaults(func=command_adopt) | |
8803 | parser_adopt.add_argument( | |
8804 | '--name', '-n', | |
8805 | required=True, | |
8806 | help='daemon name (type.id)') | |
8807 | parser_adopt.add_argument( | |
8808 | '--style', | |
8809 | required=True, | |
8810 | help='deployment style (legacy, ...)') | |
8811 | parser_adopt.add_argument( | |
8812 | '--cluster', | |
8813 | default='ceph', | |
8814 | help='cluster name') | |
8815 | parser_adopt.add_argument( | |
8816 | '--legacy-dir', | |
8817 | default='/', | |
8818 | help='base directory for legacy daemon data') | |
8819 | parser_adopt.add_argument( | |
8820 | '--config-json', | |
8821 | help='Additional configuration information in JSON format') | |
8822 | parser_adopt.add_argument( | |
8823 | '--skip-firewalld', | |
8824 | action='store_true', | |
8825 | help='Do not configure firewalld') | |
8826 | parser_adopt.add_argument( | |
8827 | '--skip-pull', | |
8828 | action='store_true', | |
33c7a0ef | 8829 | help='do not pull the default image before adopting') |
1911f103 TL |
8830 | parser_adopt.add_argument( |
8831 | '--force-start', | |
8832 | action='store_true', | |
f67539c2 | 8833 | help='start newly adoped daemon, even if it was not running previously') |
f91f0fd5 TL |
8834 | parser_adopt.add_argument( |
8835 | '--container-init', | |
8836 | action='store_true', | |
f67539c2 TL |
8837 | default=CONTAINER_INIT, |
8838 | help=argparse.SUPPRESS) | |
9f95a23c TL |
8839 | |
8840 | parser_rm_daemon = subparsers.add_parser( | |
8841 | 'rm-daemon', help='remove daemon instance') | |
8842 | parser_rm_daemon.set_defaults(func=command_rm_daemon) | |
8843 | parser_rm_daemon.add_argument( | |
8844 | '--name', '-n', | |
8845 | required=True, | |
8846 | action=CustomValidation, | |
8847 | help='daemon name (type.id)') | |
33c7a0ef TL |
8848 | parser_rm_daemon.add_argument( |
8849 | '--tcp-ports', | |
8850 | help='List of tcp ports to close in the host firewall') | |
9f95a23c TL |
8851 | parser_rm_daemon.add_argument( |
8852 | '--fsid', | |
8853 | required=True, | |
8854 | help='cluster FSID') | |
8855 | parser_rm_daemon.add_argument( | |
8856 | '--force', | |
8857 | action='store_true', | |
8858 | help='proceed, even though this may destroy valuable data') | |
8859 | parser_rm_daemon.add_argument( | |
8860 | '--force-delete-data', | |
8861 | action='store_true', | |
8862 | help='delete valuable daemon data instead of making a backup') | |
8863 | ||
8864 | parser_rm_cluster = subparsers.add_parser( | |
8865 | 'rm-cluster', help='remove all daemons for a cluster') | |
8866 | parser_rm_cluster.set_defaults(func=command_rm_cluster) | |
8867 | parser_rm_cluster.add_argument( | |
8868 | '--fsid', | |
8869 | required=True, | |
8870 | help='cluster FSID') | |
8871 | parser_rm_cluster.add_argument( | |
8872 | '--force', | |
8873 | action='store_true', | |
8874 | help='proceed, even though this may destroy valuable data') | |
f67539c2 TL |
8875 | parser_rm_cluster.add_argument( |
8876 | '--keep-logs', | |
8877 | action='store_true', | |
8878 | help='do not remove log files') | |
b3b6e05e TL |
8879 | parser_rm_cluster.add_argument( |
8880 | '--zap-osds', | |
8881 | action='store_true', | |
8882 | help='zap OSD devices for this cluster') | |
9f95a23c TL |
8883 | |
8884 | parser_run = subparsers.add_parser( | |
8885 | 'run', help='run a ceph daemon, in a container, in the foreground') | |
8886 | parser_run.set_defaults(func=command_run) | |
8887 | parser_run.add_argument( | |
8888 | '--name', '-n', | |
8889 | required=True, | |
8890 | help='daemon name (type.id)') | |
8891 | parser_run.add_argument( | |
8892 | '--fsid', | |
8893 | required=True, | |
8894 | help='cluster FSID') | |
8895 | ||
8896 | parser_shell = subparsers.add_parser( | |
8897 | 'shell', help='run an interactive shell inside a daemon container') | |
8898 | parser_shell.set_defaults(func=command_shell) | |
20effc67 TL |
8899 | parser_shell.add_argument( |
8900 | '--shared_ceph_folder', | |
8901 | metavar='CEPH_SOURCE_FOLDER', | |
8902 | help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder') | |
9f95a23c TL |
8903 | parser_shell.add_argument( |
8904 | '--fsid', | |
8905 | help='cluster FSID') | |
8906 | parser_shell.add_argument( | |
8907 | '--name', '-n', | |
8908 | help='daemon name (type.id)') | |
8909 | parser_shell.add_argument( | |
8910 | '--config', '-c', | |
8911 | help='ceph.conf to pass through to the container') | |
8912 | parser_shell.add_argument( | |
8913 | '--keyring', '-k', | |
8914 | help='ceph.keyring to pass through to the container') | |
e306af50 TL |
8915 | parser_shell.add_argument( |
8916 | '--mount', '-m', | |
f67539c2 TL |
8917 | help=('mount a file or directory in the container. ' |
8918 | 'Support multiple mounts. ' | |
8919 | 'ie: `--mount /foo /bar:/bar`. ' | |
8920 | 'When no destination is passed, default is /mnt'), | |
8921 | nargs='+') | |
9f95a23c TL |
8922 | parser_shell.add_argument( |
8923 | '--env', '-e', | |
8924 | action='append', | |
8925 | default=[], | |
8926 | help='set environment variable') | |
b3b6e05e TL |
8927 | parser_shell.add_argument( |
8928 | '--volume', '-v', | |
8929 | action='append', | |
8930 | default=[], | |
8931 | help='set environment variable') | |
9f95a23c | 8932 | parser_shell.add_argument( |
e306af50 | 8933 | 'command', nargs=argparse.REMAINDER, |
9f95a23c | 8934 | help='command (optional)') |
b3b6e05e TL |
8935 | parser_shell.add_argument( |
8936 | '--no-hosts', | |
8937 | action='store_true', | |
8938 | help='dont pass /etc/hosts through to the container') | |
9f95a23c TL |
8939 | |
8940 | parser_enter = subparsers.add_parser( | |
8941 | 'enter', help='run an interactive shell inside a running daemon container') | |
8942 | parser_enter.set_defaults(func=command_enter) | |
8943 | parser_enter.add_argument( | |
8944 | '--fsid', | |
8945 | help='cluster FSID') | |
8946 | parser_enter.add_argument( | |
8947 | '--name', '-n', | |
8948 | required=True, | |
8949 | help='daemon name (type.id)') | |
8950 | parser_enter.add_argument( | |
e306af50 | 8951 | 'command', nargs=argparse.REMAINDER, |
9f95a23c TL |
8952 | help='command') |
8953 | ||
8954 | parser_ceph_volume = subparsers.add_parser( | |
8955 | 'ceph-volume', help='run ceph-volume inside a container') | |
8956 | parser_ceph_volume.set_defaults(func=command_ceph_volume) | |
20effc67 TL |
8957 | parser_ceph_volume.add_argument( |
8958 | '--shared_ceph_folder', | |
8959 | metavar='CEPH_SOURCE_FOLDER', | |
8960 | help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder') | |
9f95a23c TL |
8961 | parser_ceph_volume.add_argument( |
8962 | '--fsid', | |
8963 | help='cluster FSID') | |
8964 | parser_ceph_volume.add_argument( | |
8965 | '--config-json', | |
20effc67 | 8966 | help='JSON file with config and (client.bootstrap-osd) key') |
801d1391 TL |
8967 | parser_ceph_volume.add_argument( |
8968 | '--config', '-c', | |
8969 | help='ceph conf file') | |
8970 | parser_ceph_volume.add_argument( | |
8971 | '--keyring', '-k', | |
8972 | help='ceph.keyring to pass through to the container') | |
9f95a23c | 8973 | parser_ceph_volume.add_argument( |
e306af50 | 8974 | 'command', nargs=argparse.REMAINDER, |
9f95a23c TL |
8975 | help='command') |
8976 | ||
b3b6e05e TL |
8977 | parser_zap_osds = subparsers.add_parser( |
8978 | 'zap-osds', help='zap all OSDs associated with a particular fsid') | |
8979 | parser_zap_osds.set_defaults(func=command_zap_osds) | |
8980 | parser_zap_osds.add_argument( | |
8981 | '--fsid', | |
8982 | required=True, | |
8983 | help='cluster FSID') | |
8984 | parser_zap_osds.add_argument( | |
8985 | '--force', | |
8986 | action='store_true', | |
8987 | help='proceed, even though this may destroy valuable data') | |
8988 | ||
9f95a23c | 8989 | parser_unit = subparsers.add_parser( |
f67539c2 | 8990 | 'unit', help="operate on the daemon's systemd unit") |
9f95a23c TL |
8991 | parser_unit.set_defaults(func=command_unit) |
8992 | parser_unit.add_argument( | |
8993 | 'command', | |
8994 | help='systemd command (start, stop, restart, enable, disable, ...)') | |
8995 | parser_unit.add_argument( | |
8996 | '--fsid', | |
8997 | help='cluster FSID') | |
8998 | parser_unit.add_argument( | |
8999 | '--name', '-n', | |
9000 | required=True, | |
9001 | help='daemon name (type.id)') | |
9002 | ||
9003 | parser_logs = subparsers.add_parser( | |
9004 | 'logs', help='print journald logs for a daemon container') | |
9005 | parser_logs.set_defaults(func=command_logs) | |
9006 | parser_logs.add_argument( | |
9007 | '--fsid', | |
9008 | help='cluster FSID') | |
9009 | parser_logs.add_argument( | |
9010 | '--name', '-n', | |
9011 | required=True, | |
9012 | help='daemon name (type.id)') | |
9013 | parser_logs.add_argument( | |
9014 | 'command', nargs='*', | |
9015 | help='additional journalctl args') | |
9016 | ||
9017 | parser_bootstrap = subparsers.add_parser( | |
9018 | 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)') | |
9019 | parser_bootstrap.set_defaults(func=command_bootstrap) | |
9020 | parser_bootstrap.add_argument( | |
9021 | '--config', '-c', | |
9022 | help='ceph conf file to incorporate') | |
9023 | parser_bootstrap.add_argument( | |
9024 | '--mon-id', | |
9025 | required=False, | |
9026 | help='mon id (default: local hostname)') | |
33c7a0ef TL |
9027 | group = parser_bootstrap.add_mutually_exclusive_group() |
9028 | group.add_argument( | |
9f95a23c TL |
9029 | '--mon-addrv', |
9030 | help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])') | |
33c7a0ef | 9031 | group.add_argument( |
9f95a23c TL |
9032 | '--mon-ip', |
9033 | help='mon IP') | |
9034 | parser_bootstrap.add_argument( | |
9035 | '--mgr-id', | |
9036 | required=False, | |
9037 | help='mgr id (default: randomly generated)') | |
9038 | parser_bootstrap.add_argument( | |
9039 | '--fsid', | |
9040 | help='cluster FSID') | |
9041 | parser_bootstrap.add_argument( | |
9042 | '--output-dir', | |
9043 | default='/etc/ceph', | |
9044 | help='directory to write config, keyring, and pub key files') | |
9045 | parser_bootstrap.add_argument( | |
9046 | '--output-keyring', | |
9047 | help='location to write keyring file with new cluster admin and mon keys') | |
9048 | parser_bootstrap.add_argument( | |
9049 | '--output-config', | |
9050 | help='location to write conf file to connect to new cluster') | |
9051 | parser_bootstrap.add_argument( | |
9052 | '--output-pub-ssh-key', | |
f67539c2 | 9053 | help="location to write the cluster's public SSH key") |
b3b6e05e TL |
9054 | parser_bootstrap.add_argument( |
9055 | '--skip-admin-label', | |
9056 | action='store_true', | |
9057 | help='do not create admin label for ceph.conf and client.admin keyring distribution') | |
9f95a23c TL |
9058 | parser_bootstrap.add_argument( |
9059 | '--skip-ssh', | |
9060 | action='store_true', | |
9061 | help='skip setup of ssh key on local host') | |
9062 | parser_bootstrap.add_argument( | |
9063 | '--initial-dashboard-user', | |
9064 | default='admin', | |
9065 | help='Initial user for the dashboard') | |
9066 | parser_bootstrap.add_argument( | |
9067 | '--initial-dashboard-password', | |
9068 | help='Initial password for the initial dashboard user') | |
f6b5b4d7 TL |
9069 | parser_bootstrap.add_argument( |
9070 | '--ssl-dashboard-port', | |
9071 | type=int, | |
f67539c2 | 9072 | default=8443, |
f6b5b4d7 | 9073 | help='Port number used to connect with dashboard using SSL') |
9f95a23c TL |
9074 | parser_bootstrap.add_argument( |
9075 | '--dashboard-key', | |
e306af50 | 9076 | type=argparse.FileType('r'), |
9f95a23c TL |
9077 | help='Dashboard key') |
9078 | parser_bootstrap.add_argument( | |
9079 | '--dashboard-crt', | |
e306af50 | 9080 | type=argparse.FileType('r'), |
9f95a23c TL |
9081 | help='Dashboard certificate') |
9082 | ||
e306af50 TL |
9083 | parser_bootstrap.add_argument( |
9084 | '--ssh-config', | |
9085 | type=argparse.FileType('r'), | |
9086 | help='SSH config') | |
9087 | parser_bootstrap.add_argument( | |
9088 | '--ssh-private-key', | |
9089 | type=argparse.FileType('r'), | |
9090 | help='SSH private key') | |
9091 | parser_bootstrap.add_argument( | |
9092 | '--ssh-public-key', | |
9093 | type=argparse.FileType('r'), | |
9094 | help='SSH public key') | |
f6b5b4d7 TL |
9095 | parser_bootstrap.add_argument( |
9096 | '--ssh-user', | |
9097 | default='root', | |
9098 | help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users') | |
9f95a23c TL |
9099 | parser_bootstrap.add_argument( |
9100 | '--skip-mon-network', | |
9101 | action='store_true', | |
9102 | help='set mon public_network based on bootstrap mon ip') | |
9103 | parser_bootstrap.add_argument( | |
9104 | '--skip-dashboard', | |
9105 | action='store_true', | |
9106 | help='do not enable the Ceph Dashboard') | |
9107 | parser_bootstrap.add_argument( | |
9108 | '--dashboard-password-noupdate', | |
9109 | action='store_true', | |
9110 | help='stop forced dashboard password change') | |
9111 | parser_bootstrap.add_argument( | |
9112 | '--no-minimize-config', | |
9113 | action='store_true', | |
9114 | help='do not assimilate and minimize the config file') | |
9115 | parser_bootstrap.add_argument( | |
9116 | '--skip-ping-check', | |
9117 | action='store_true', | |
9118 | help='do not verify that mon IP is pingable') | |
9119 | parser_bootstrap.add_argument( | |
9120 | '--skip-pull', | |
9121 | action='store_true', | |
33c7a0ef | 9122 | help='do not pull the default image before bootstrapping') |
9f95a23c TL |
9123 | parser_bootstrap.add_argument( |
9124 | '--skip-firewalld', | |
9125 | action='store_true', | |
9126 | help='Do not configure firewalld') | |
9127 | parser_bootstrap.add_argument( | |
9128 | '--allow-overwrite', | |
9129 | action='store_true', | |
9130 | help='allow overwrite of existing --output-* config/keyring/ssh files') | |
9131 | parser_bootstrap.add_argument( | |
9132 | '--allow-fqdn-hostname', | |
9133 | action='store_true', | |
9134 | help='allow hostname that is fully-qualified (contains ".")') | |
f67539c2 TL |
9135 | parser_bootstrap.add_argument( |
9136 | '--allow-mismatched-release', | |
9137 | action='store_true', | |
9138 | help="allow bootstrap of ceph that doesn't match this version of cephadm") | |
9f95a23c TL |
9139 | parser_bootstrap.add_argument( |
9140 | '--skip-prepare-host', | |
9141 | action='store_true', | |
9142 | help='Do not prepare host') | |
9143 | parser_bootstrap.add_argument( | |
9144 | '--orphan-initial-daemons', | |
9145 | action='store_true', | |
f67539c2 | 9146 | help='Set mon and mgr service to `unmanaged`, Do not create the crash service') |
9f95a23c TL |
9147 | parser_bootstrap.add_argument( |
9148 | '--skip-monitoring-stack', | |
9149 | action='store_true', | |
9150 | help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)') | |
2a845540 TL |
9151 | parser_bootstrap.add_argument( |
9152 | '--with-centralized-logging', | |
9153 | action='store_true', | |
9154 | help='Automatically provision centralized logging (promtail, loki)') | |
e306af50 TL |
9155 | parser_bootstrap.add_argument( |
9156 | '--apply-spec', | |
9157 | help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)') | |
e306af50 TL |
9158 | parser_bootstrap.add_argument( |
9159 | '--shared_ceph_folder', | |
9160 | metavar='CEPH_SOURCE_FOLDER', | |
9161 | help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder') | |
9f95a23c | 9162 | |
f6b5b4d7 TL |
9163 | parser_bootstrap.add_argument( |
9164 | '--registry-url', | |
9165 | help='url for custom registry') | |
9166 | parser_bootstrap.add_argument( | |
9167 | '--registry-username', | |
9168 | help='username for custom registry') | |
9169 | parser_bootstrap.add_argument( | |
9170 | '--registry-password', | |
9171 | help='password for custom registry') | |
9172 | parser_bootstrap.add_argument( | |
9173 | '--registry-json', | |
9174 | help='json file with custom registry login info (URL, Username, Password)') | |
f91f0fd5 TL |
9175 | parser_bootstrap.add_argument( |
9176 | '--container-init', | |
9177 | action='store_true', | |
f67539c2 TL |
9178 | default=CONTAINER_INIT, |
9179 | help=argparse.SUPPRESS) | |
f67539c2 TL |
9180 | parser_bootstrap.add_argument( |
9181 | '--cluster-network', | |
9182 | help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)') | |
b3b6e05e TL |
9183 | parser_bootstrap.add_argument( |
9184 | '--single-host-defaults', | |
9185 | action='store_true', | |
9186 | help='adjust configuration defaults to suit a single-host cluster') | |
522d829b TL |
9187 | parser_bootstrap.add_argument( |
9188 | '--log-to-file', | |
9189 | action='store_true', | |
9190 | help='configure cluster to log to traditional log files in /var/log/ceph/$fsid') | |
f6b5b4d7 | 9191 | |
9f95a23c TL |
9192 | parser_deploy = subparsers.add_parser( |
9193 | 'deploy', help='deploy a daemon') | |
9194 | parser_deploy.set_defaults(func=command_deploy) | |
9195 | parser_deploy.add_argument( | |
9196 | '--name', | |
9197 | required=True, | |
9198 | action=CustomValidation, | |
9199 | help='daemon name (type.id)') | |
9200 | parser_deploy.add_argument( | |
9201 | '--fsid', | |
9202 | required=True, | |
9203 | help='cluster FSID') | |
9204 | parser_deploy.add_argument( | |
9205 | '--config', '-c', | |
9206 | help='config file for new daemon') | |
9207 | parser_deploy.add_argument( | |
9208 | '--config-json', | |
9209 | help='Additional configuration information in JSON format') | |
9210 | parser_deploy.add_argument( | |
9211 | '--keyring', | |
9212 | help='keyring for new daemon') | |
9213 | parser_deploy.add_argument( | |
9214 | '--key', | |
9215 | help='key for new daemon') | |
9216 | parser_deploy.add_argument( | |
9217 | '--osd-fsid', | |
9218 | help='OSD uuid, if creating an OSD container') | |
9219 | parser_deploy.add_argument( | |
9220 | '--skip-firewalld', | |
9221 | action='store_true', | |
9222 | help='Do not configure firewalld') | |
f6b5b4d7 TL |
9223 | parser_deploy.add_argument( |
9224 | '--tcp-ports', | |
9225 | help='List of tcp ports to open in the host firewall') | |
9f95a23c TL |
9226 | parser_deploy.add_argument( |
9227 | '--reconfig', | |
9228 | action='store_true', | |
9229 | help='Reconfigure a previously deployed daemon') | |
9230 | parser_deploy.add_argument( | |
9231 | '--allow-ptrace', | |
9232 | action='store_true', | |
9233 | help='Allow SYS_PTRACE on daemon container') | |
f91f0fd5 TL |
9234 | parser_deploy.add_argument( |
9235 | '--container-init', | |
9236 | action='store_true', | |
f67539c2 TL |
9237 | default=CONTAINER_INIT, |
9238 | help=argparse.SUPPRESS) | |
9239 | parser_deploy.add_argument( | |
9240 | '--memory-request', | |
9241 | help='Container memory request/target' | |
9242 | ) | |
9243 | parser_deploy.add_argument( | |
9244 | '--memory-limit', | |
9245 | help='Container memory hard limit' | |
9246 | ) | |
9247 | parser_deploy.add_argument( | |
9248 | '--meta-json', | |
9249 | help='JSON dict of additional metadata' | |
9250 | ) | |
20effc67 TL |
9251 | parser_deploy.add_argument( |
9252 | '--extra-container-args', | |
9253 | action='append', | |
9254 | default=[], | |
9255 | help='Additional container arguments to apply to deamon' | |
9256 | ) | |
9f95a23c TL |
9257 | |
9258 | parser_check_host = subparsers.add_parser( | |
9259 | 'check-host', help='check host configuration') | |
9260 | parser_check_host.set_defaults(func=command_check_host) | |
9261 | parser_check_host.add_argument( | |
9262 | '--expect-hostname', | |
9263 | help='Check that hostname matches an expected value') | |
9264 | ||
9265 | parser_prepare_host = subparsers.add_parser( | |
9266 | 'prepare-host', help='prepare a host for cephadm use') | |
9267 | parser_prepare_host.set_defaults(func=command_prepare_host) | |
9268 | parser_prepare_host.add_argument( | |
9269 | '--expect-hostname', | |
9270 | help='Set hostname') | |
9271 | ||
9272 | parser_add_repo = subparsers.add_parser( | |
9273 | 'add-repo', help='configure package repository') | |
9274 | parser_add_repo.set_defaults(func=command_add_repo) | |
9275 | parser_add_repo.add_argument( | |
9276 | '--release', | |
1911f103 | 9277 | help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE)) |
9f95a23c TL |
9278 | parser_add_repo.add_argument( |
9279 | '--version', | |
9280 | help='use specific upstream version (x.y.z)') | |
9281 | parser_add_repo.add_argument( | |
9282 | '--dev', | |
9283 | help='use specified bleeding edge build from git branch or tag') | |
9284 | parser_add_repo.add_argument( | |
9285 | '--dev-commit', | |
9286 | help='use specified bleeding edge build from git commit') | |
9287 | parser_add_repo.add_argument( | |
9288 | '--gpg-url', | |
9289 | help='specify alternative GPG key location') | |
9290 | parser_add_repo.add_argument( | |
9291 | '--repo-url', | |
9292 | default='https://download.ceph.com', | |
9293 | help='specify alternative repo location') | |
9294 | # TODO: proxy? | |
9295 | ||
9296 | parser_rm_repo = subparsers.add_parser( | |
9297 | 'rm-repo', help='remove package repository configuration') | |
9298 | parser_rm_repo.set_defaults(func=command_rm_repo) | |
9299 | ||
9300 | parser_install = subparsers.add_parser( | |
9301 | 'install', help='install ceph package(s)') | |
9302 | parser_install.set_defaults(func=command_install) | |
9303 | parser_install.add_argument( | |
9304 | 'packages', nargs='*', | |
9305 | default=['cephadm'], | |
9306 | help='packages') | |
9307 | ||
f6b5b4d7 TL |
9308 | parser_registry_login = subparsers.add_parser( |
9309 | 'registry-login', help='log host into authenticated registry') | |
9310 | parser_registry_login.set_defaults(func=command_registry_login) | |
9311 | parser_registry_login.add_argument( | |
9312 | '--registry-url', | |
9313 | help='url for custom registry') | |
9314 | parser_registry_login.add_argument( | |
9315 | '--registry-username', | |
9316 | help='username for custom registry') | |
9317 | parser_registry_login.add_argument( | |
9318 | '--registry-password', | |
9319 | help='password for custom registry') | |
9320 | parser_registry_login.add_argument( | |
9321 | '--registry-json', | |
9322 | help='json file with custom registry login info (URL, Username, Password)') | |
9323 | parser_registry_login.add_argument( | |
9324 | '--fsid', | |
9325 | help='cluster FSID') | |
9326 | ||
f91f0fd5 TL |
9327 | parser_gather_facts = subparsers.add_parser( |
9328 | 'gather-facts', help='gather and return host related information (JSON format)') | |
9329 | parser_gather_facts.set_defaults(func=command_gather_facts) | |
9330 | ||
f67539c2 TL |
9331 | parser_maintenance = subparsers.add_parser( |
9332 | 'host-maintenance', help='Manage the maintenance state of a host') | |
9333 | parser_maintenance.add_argument( | |
9334 | '--fsid', | |
9335 | help='cluster FSID') | |
9336 | parser_maintenance.add_argument( | |
9337 | 'maintenance_action', | |
9338 | type=str, | |
9339 | choices=['enter', 'exit'], | |
9340 | help='Maintenance action - enter maintenance, or exit maintenance') | |
9341 | parser_maintenance.set_defaults(func=command_maintenance) | |
9342 | ||
20effc67 TL |
9343 | parser_agent = subparsers.add_parser( |
9344 | 'agent', help='start cephadm agent') | |
9345 | parser_agent.set_defaults(func=command_agent) | |
9346 | parser_agent.add_argument( | |
9347 | '--fsid', | |
9348 | required=True, | |
9349 | help='cluster FSID') | |
9350 | parser_agent.add_argument( | |
9351 | '--daemon-id', | |
9352 | help='daemon id for agent') | |
9353 | ||
2a845540 TL |
9354 | parser_disk_rescan = subparsers.add_parser( |
9355 | 'disk-rescan', help='rescan all HBAs to detect new/removed devices') | |
9356 | parser_disk_rescan.set_defaults(func=command_rescan_disks) | |
9357 | ||
9f95a23c TL |
9358 | return parser |
9359 | ||
f6b5b4d7 | 9360 | |
522d829b | 9361 | def _parse_args(av: List[str]) -> argparse.Namespace: |
9f95a23c | 9362 | parser = _get_parser() |
f67539c2 | 9363 | |
e306af50 | 9364 | args = parser.parse_args(av) |
f67539c2 | 9365 | if 'command' in args and args.command and args.command[0] == '--': |
e306af50 | 9366 | args.command.pop(0) |
f67539c2 TL |
9367 | |
9368 | # workaround argparse to deprecate the subparser `--container-init` flag | |
9369 | # container_init and no_container_init must always be mutually exclusive | |
9370 | container_init_args = ('--container-init', '--no-container-init') | |
9371 | if set(container_init_args).issubset(av): | |
9372 | parser.error('argument %s: not allowed with argument %s' % (container_init_args)) | |
9373 | elif '--container-init' in av: | |
9374 | args.no_container_init = not args.container_init | |
9375 | else: | |
9376 | args.container_init = not args.no_container_init | |
9377 | assert args.container_init is not args.no_container_init | |
9378 | ||
e306af50 | 9379 | return args |
9f95a23c | 9380 | |
f6b5b4d7 | 9381 | |
b3b6e05e | 9382 | def cephadm_init_ctx(args: List[str]) -> CephadmContext: |
f67539c2 TL |
9383 | ctx = CephadmContext() |
9384 | ctx.set_args(_parse_args(args)) | |
9385 | return ctx | |
9386 | ||
9387 | ||
20effc67 TL |
9388 | def cephadm_init_logging(ctx: CephadmContext, args: List[str]) -> None: |
9389 | """Configure the logging for cephadm as well as updating the system | |
9390 | to have the expected log dir and logrotate configuration. | |
9391 | """ | |
2a845540 | 9392 | logging.addLevelName(QUIET_LOG_LEVEL, 'QUIET') |
f67539c2 | 9393 | global logger |
f91f0fd5 TL |
9394 | if not os.path.exists(LOG_DIR): |
9395 | os.makedirs(LOG_DIR) | |
33c7a0ef TL |
9396 | operations = ['bootstrap', 'rm-cluster'] |
9397 | if any(op in args for op in operations): | |
9398 | dictConfig(interactive_logging_config) | |
9399 | else: | |
9400 | dictConfig(logging_config) | |
9401 | ||
f91f0fd5 | 9402 | logger = logging.getLogger() |
2a845540 | 9403 | logger.setLevel(QUIET_LOG_LEVEL) |
f91f0fd5 | 9404 | |
b3b6e05e TL |
9405 | if not os.path.exists(ctx.logrotate_dir + '/cephadm'): |
9406 | with open(ctx.logrotate_dir + '/cephadm', 'w') as f: | |
9407 | f.write("""# created by cephadm | |
9408 | /var/log/ceph/cephadm.log { | |
9409 | rotate 7 | |
9410 | daily | |
9411 | compress | |
9412 | missingok | |
9413 | notifempty | |
2a845540 | 9414 | su root root |
b3b6e05e TL |
9415 | } |
9416 | """) | |
9417 | ||
f67539c2 | 9418 | if ctx.verbose: |
f91f0fd5 | 9419 | for handler in logger.handlers: |
2a845540 TL |
9420 | if handler.name in ['console', 'log_file', 'console_stdout']: |
9421 | handler.setLevel(QUIET_LOG_LEVEL) | |
a4b75251 | 9422 | logger.debug('%s\ncephadm %s' % ('-' * 80, args)) |
f67539c2 TL |
9423 | |
9424 | ||
20effc67 TL |
9425 | def cephadm_require_root() -> None: |
9426 | """Exit if the process is not running as root.""" | |
f67539c2 TL |
9427 | if os.geteuid() != 0: |
9428 | sys.stderr.write('ERROR: cephadm should be run as root\n') | |
9f95a23c TL |
9429 | sys.exit(1) |
9430 | ||
20effc67 TL |
9431 | |
9432 | def main() -> None: | |
f67539c2 TL |
9433 | av: List[str] = [] |
9434 | av = sys.argv[1:] | |
9435 | ||
20effc67 | 9436 | ctx = cephadm_init_ctx(av) |
b3b6e05e TL |
9437 | if not ctx.has_function(): |
9438 | sys.stderr.write('No command specified; pass -h or --help for usage\n') | |
f67539c2 | 9439 | sys.exit(1) |
1911f103 | 9440 | |
20effc67 TL |
9441 | cephadm_require_root() |
9442 | cephadm_init_logging(ctx, av) | |
9f95a23c | 9443 | try: |
f67539c2 TL |
9444 | # podman or docker? |
9445 | ctx.container_engine = find_container_engine(ctx) | |
9446 | if ctx.func not in \ | |
a4b75251 TL |
9447 | [ |
9448 | command_check_host, | |
9449 | command_prepare_host, | |
9450 | command_add_repo, | |
9451 | command_rm_repo, | |
9452 | command_install | |
9453 | ]: | |
f67539c2 TL |
9454 | check_container_engine(ctx) |
9455 | # command handler | |
9456 | r = ctx.func(ctx) | |
9f95a23c | 9457 | except Error as e: |
f67539c2 | 9458 | if ctx.verbose: |
9f95a23c | 9459 | raise |
f67539c2 | 9460 | logger.error('ERROR: %s' % e) |
9f95a23c TL |
9461 | sys.exit(1) |
9462 | if not r: | |
9463 | r = 0 | |
9464 | sys.exit(r) | |
f67539c2 TL |
9465 | |
9466 | ||
9467 | if __name__ == '__main__': | |
9468 | main() |