]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | #!/usr/bin/python3 |
2 | ||
f67539c2 TL |
3 | import asyncio |
4 | import asyncio.subprocess | |
5 | import argparse | |
6 | import datetime | |
7 | import fcntl | |
8 | import ipaddress | |
20effc67 | 9 | import io |
f67539c2 TL |
10 | import json |
11 | import logging | |
12 | from logging.config import dictConfig | |
13 | import os | |
14 | import platform | |
15 | import pwd | |
16 | import random | |
17 | import shlex | |
18 | import shutil | |
19 | import socket | |
20 | import string | |
21 | import subprocess | |
22 | import sys | |
23 | import tempfile | |
24 | import time | |
25 | import errno | |
26 | import struct | |
f67539c2 TL |
27 | import ssl |
28 | from enum import Enum | |
a4b75251 | 29 | from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO, Sequence, TypeVar, cast, Set, Iterable |
f67539c2 TL |
30 | |
31 | import re | |
32 | import uuid | |
33 | ||
34 | from configparser import ConfigParser | |
20effc67 | 35 | from contextlib import redirect_stdout |
f67539c2 TL |
36 | from functools import wraps |
37 | from glob import glob | |
38 | from io import StringIO | |
20effc67 TL |
39 | from threading import Thread, Event |
40 | from urllib.error import HTTPError, URLError | |
41 | from urllib.request import urlopen, Request | |
f67539c2 TL |
42 | from pathlib import Path |
43 | ||
522d829b TL |
44 | FuncT = TypeVar('FuncT', bound=Callable) |
45 | ||
f67539c2 | 46 | # Default container images ----------------------------------------------------- |
1d09f67e TL |
47 | DEFAULT_IMAGE = 'quay.io/ceph/ceph:v17' |
48 | DEFAULT_IMAGE_IS_MASTER = False | |
20effc67 | 49 | DEFAULT_IMAGE_RELEASE = 'quincy' |
1d09f67e | 50 | DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4' |
33c7a0ef TL |
51 | DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0' |
52 | DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0' | |
1d09f67e TL |
53 | DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1' |
54 | DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0' | |
55 | DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:8.3.5' | |
522d829b TL |
56 | DEFAULT_HAPROXY_IMAGE = 'docker.io/library/haproxy:2.3' |
57 | DEFAULT_KEEPALIVED_IMAGE = 'docker.io/arcts/keepalived' | |
20effc67 | 58 | DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1' |
f67539c2 TL |
59 | DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this |
60 | # ------------------------------------------------------------------------------ | |
61 | ||
1d09f67e | 62 | LATEST_STABLE_RELEASE = 'quincy' |
f6b5b4d7 TL |
63 | DATA_DIR = '/var/lib/ceph' |
64 | LOG_DIR = '/var/log/ceph' | |
65 | LOCK_DIR = '/run/cephadm' | |
66 | LOGROTATE_DIR = '/etc/logrotate.d' | |
33c7a0ef | 67 | SYSCTL_DIR = '/etc/sysctl.d' |
f6b5b4d7 | 68 | UNIT_DIR = '/etc/systemd/system' |
33c7a0ef TL |
69 | CEPH_CONF_DIR = 'config' |
70 | CEPH_CONF = 'ceph.conf' | |
71 | CEPH_PUBKEY = 'ceph.pub' | |
72 | CEPH_KEYRING = 'ceph.client.admin.keyring' | |
73 | CEPH_DEFAULT_CONF = f'/etc/ceph/{CEPH_CONF}' | |
74 | CEPH_DEFAULT_KEYRING = f'/etc/ceph/{CEPH_KEYRING}' | |
75 | CEPH_DEFAULT_PUBKEY = f'/etc/ceph/{CEPH_PUBKEY}' | |
f6b5b4d7 TL |
76 | LOG_DIR_MODE = 0o770 |
77 | DATA_DIR_MODE = 0o700 | |
f67539c2 TL |
78 | CONTAINER_INIT = True |
79 | MIN_PODMAN_VERSION = (2, 0, 2) | |
80 | CGROUPS_SPLIT_PODMAN_VERSION = (2, 1, 0) | |
f6b5b4d7 TL |
81 | CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ ' |
82 | DEFAULT_TIMEOUT = None # in seconds | |
f67539c2 | 83 | DEFAULT_RETRY = 15 |
f67539c2 TL |
84 | DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ' |
85 | ||
86 | logger: logging.Logger = None # type: ignore | |
9f95a23c TL |
87 | |
88 | """ | |
89 | You can invoke cephadm in two ways: | |
90 | ||
91 | 1. The normal way, at the command line. | |
92 | ||
93 | 2. By piping the script to the python3 binary. In this latter case, you should | |
94 | prepend one or more lines to the beginning of the script. | |
95 | ||
96 | For arguments, | |
97 | ||
98 | injected_argv = [...] | |
99 | ||
100 | e.g., | |
101 | ||
102 | injected_argv = ['ls'] | |
103 | ||
104 | For reading stdin from the '--config-json -' argument, | |
105 | ||
106 | injected_stdin = '...' | |
107 | """ | |
f67539c2 | 108 | cached_stdin = None |
f91f0fd5 | 109 | |
f67539c2 | 110 | ################################## |
9f95a23c | 111 | |
9f95a23c | 112 | |
33c7a0ef TL |
113 | class EndPoint: |
114 | """EndPoint representing an ip:port format""" | |
115 | ||
116 | def __init__(self, ip: str, port: int) -> None: | |
117 | self.ip = ip | |
118 | self.port = port | |
119 | ||
120 | def __str__(self) -> str: | |
121 | return f'{self.ip}:{self.port}' | |
122 | ||
123 | def __repr__(self) -> str: | |
124 | return f'{self.ip}:{self.port}' | |
125 | ||
126 | ||
127 | class ContainerInfo: | |
128 | def __init__(self, container_id: str, | |
129 | image_name: str, | |
130 | image_id: str, | |
131 | start: str, | |
132 | version: str) -> None: | |
133 | self.container_id = container_id | |
134 | self.image_name = image_name | |
135 | self.image_id = image_id | |
136 | self.start = start | |
137 | self.version = version | |
138 | ||
139 | def __eq__(self, other: Any) -> bool: | |
140 | if not isinstance(other, ContainerInfo): | |
141 | return NotImplemented | |
142 | return (self.container_id == other.container_id | |
143 | and self.image_name == other.image_name | |
144 | and self.image_id == other.image_id | |
145 | and self.start == other.start | |
146 | and self.version == other.version) | |
147 | ||
148 | ||
f67539c2 | 149 | class BaseConfig: |
9f95a23c | 150 | |
522d829b | 151 | def __init__(self) -> None: |
f67539c2 TL |
152 | self.image: str = '' |
153 | self.docker: bool = False | |
154 | self.data_dir: str = DATA_DIR | |
155 | self.log_dir: str = LOG_DIR | |
156 | self.logrotate_dir: str = LOGROTATE_DIR | |
b3b6e05e | 157 | self.sysctl_dir: str = SYSCTL_DIR |
f67539c2 TL |
158 | self.unit_dir: str = UNIT_DIR |
159 | self.verbose: bool = False | |
160 | self.timeout: Optional[int] = DEFAULT_TIMEOUT | |
161 | self.retry: int = DEFAULT_RETRY | |
162 | self.env: List[str] = [] | |
163 | self.memory_request: Optional[int] = None | |
164 | self.memory_limit: Optional[int] = None | |
20effc67 | 165 | self.log_to_journald: Optional[bool] = None |
f67539c2 TL |
166 | |
167 | self.container_init: bool = CONTAINER_INIT | |
168 | self.container_engine: Optional[ContainerEngine] = None | |
169 | ||
522d829b | 170 | def set_from_args(self, args: argparse.Namespace) -> None: |
f67539c2 TL |
171 | argdict: Dict[str, Any] = vars(args) |
172 | for k, v in argdict.items(): | |
173 | if hasattr(self, k): | |
174 | setattr(self, k, v) | |
175 | ||
176 | ||
177 | class CephadmContext: | |
9f95a23c | 178 | |
522d829b | 179 | def __init__(self) -> None: |
f67539c2 TL |
180 | self.__dict__['_args'] = None |
181 | self.__dict__['_conf'] = BaseConfig() | |
9f95a23c | 182 | |
f67539c2 TL |
183 | def set_args(self, args: argparse.Namespace) -> None: |
184 | self._conf.set_from_args(args) | |
185 | self._args = args | |
f6b5b4d7 | 186 | |
f67539c2 TL |
187 | def has_function(self) -> bool: |
188 | return 'func' in self._args | |
189 | ||
190 | def __contains__(self, name: str) -> bool: | |
191 | return hasattr(self, name) | |
192 | ||
193 | def __getattr__(self, name: str) -> Any: | |
194 | if '_conf' in self.__dict__ and hasattr(self._conf, name): | |
195 | return getattr(self._conf, name) | |
196 | elif '_args' in self.__dict__ and hasattr(self._args, name): | |
197 | return getattr(self._args, name) | |
198 | else: | |
199 | return super().__getattribute__(name) | |
200 | ||
201 | def __setattr__(self, name: str, value: Any) -> None: | |
202 | if hasattr(self._conf, name): | |
203 | setattr(self._conf, name, value) | |
204 | elif hasattr(self._args, name): | |
205 | setattr(self._args, name, value) | |
206 | else: | |
207 | super().__setattr__(name, value) | |
208 | ||
209 | ||
210 | class ContainerEngine: | |
522d829b | 211 | def __init__(self) -> None: |
f67539c2 TL |
212 | self.path = find_program(self.EXE) |
213 | ||
522d829b | 214 | @classmethod |
f67539c2 | 215 | @property |
522d829b | 216 | def EXE(cls) -> str: |
f67539c2 TL |
217 | raise NotImplementedError() |
218 | ||
a4b75251 TL |
219 | def __str__(self) -> str: |
220 | return f'{self.EXE} ({self.path})' | |
221 | ||
f67539c2 TL |
222 | |
223 | class Podman(ContainerEngine): | |
224 | EXE = 'podman' | |
225 | ||
522d829b | 226 | def __init__(self) -> None: |
f67539c2 | 227 | super().__init__() |
522d829b | 228 | self._version: Optional[Tuple[int, ...]] = None |
f67539c2 TL |
229 | |
230 | @property | |
522d829b | 231 | def version(self) -> Tuple[int, ...]: |
f67539c2 TL |
232 | if self._version is None: |
233 | raise RuntimeError('Please call `get_version` first') | |
234 | return self._version | |
235 | ||
522d829b | 236 | def get_version(self, ctx: CephadmContext) -> None: |
f67539c2 TL |
237 | out, _, _ = call_throws(ctx, [self.path, 'version', '--format', '{{.Client.Version}}']) |
238 | self._version = _parse_podman_version(out) | |
239 | ||
a4b75251 TL |
240 | def __str__(self) -> str: |
241 | version = '.'.join(map(str, self.version)) | |
242 | return f'{self.EXE} ({self.path}) version {version}' | |
243 | ||
f67539c2 TL |
244 | |
245 | class Docker(ContainerEngine): | |
246 | EXE = 'docker' | |
247 | ||
248 | ||
249 | CONTAINER_PREFERENCE = (Podman, Docker) # prefer podman to docker | |
9f95a23c | 250 | |
9f95a23c | 251 | |
33c7a0ef TL |
252 | # During normal cephadm operations (cephadm ls, gather-facts, etc ) we use: |
253 | # stdout: for JSON output only | |
254 | # stderr: for error, debug, info, etc | |
f91f0fd5 TL |
255 | logging_config = { |
256 | 'version': 1, | |
257 | 'disable_existing_loggers': True, | |
258 | 'formatters': { | |
259 | 'cephadm': { | |
a4b75251 | 260 | 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s' |
f91f0fd5 TL |
261 | }, |
262 | }, | |
263 | 'handlers': { | |
f67539c2 TL |
264 | 'console': { |
265 | 'level': 'INFO', | |
266 | 'class': 'logging.StreamHandler', | |
f91f0fd5 TL |
267 | }, |
268 | 'log_file': { | |
269 | 'level': 'DEBUG', | |
b3b6e05e | 270 | 'class': 'logging.handlers.WatchedFileHandler', |
f91f0fd5 TL |
271 | 'formatter': 'cephadm', |
272 | 'filename': '%s/cephadm.log' % LOG_DIR, | |
f91f0fd5 TL |
273 | } |
274 | }, | |
275 | 'loggers': { | |
276 | '': { | |
277 | 'level': 'DEBUG', | |
278 | 'handlers': ['console', 'log_file'], | |
279 | } | |
280 | } | |
281 | } | |
e306af50 | 282 | |
f67539c2 | 283 | |
33c7a0ef TL |
284 | class ExcludeErrorsFilter(logging.Filter): |
285 | def filter(self, record: logging.LogRecord) -> bool: | |
286 | """Only lets through log messages with log level below WARNING .""" | |
287 | return record.levelno < logging.WARNING | |
288 | ||
289 | ||
290 | # When cephadm is used as standard binary (bootstrap, rm-cluster, etc) we use: | |
291 | # stdout: for debug and info | |
292 | # stderr: for errors and warnings | |
293 | interactive_logging_config = { | |
294 | 'version': 1, | |
295 | 'filters': { | |
296 | 'exclude_errors': { | |
297 | '()': ExcludeErrorsFilter | |
298 | } | |
299 | }, | |
300 | 'disable_existing_loggers': True, | |
301 | 'formatters': { | |
302 | 'cephadm': { | |
303 | 'format': '%(asctime)s %(thread)x %(levelname)s %(message)s' | |
304 | }, | |
305 | }, | |
306 | 'handlers': { | |
307 | 'console_stdout': { | |
308 | 'level': 'INFO', | |
309 | 'class': 'logging.StreamHandler', | |
310 | 'filters': ['exclude_errors'], | |
311 | 'stream': sys.stdout | |
312 | }, | |
313 | 'console_stderr': { | |
314 | 'level': 'WARNING', | |
315 | 'class': 'logging.StreamHandler', | |
316 | 'stream': sys.stderr | |
317 | }, | |
318 | 'log_file': { | |
319 | 'level': 'DEBUG', | |
320 | 'class': 'logging.handlers.WatchedFileHandler', | |
321 | 'formatter': 'cephadm', | |
322 | 'filename': '%s/cephadm.log' % LOG_DIR, | |
323 | } | |
324 | }, | |
325 | 'loggers': { | |
326 | '': { | |
327 | 'level': 'DEBUG', | |
328 | 'handlers': ['console_stdout', 'console_stderr', 'log_file'], | |
329 | } | |
330 | } | |
331 | } | |
332 | ||
333 | ||
e306af50 TL |
334 | class termcolor: |
335 | yellow = '\033[93m' | |
336 | red = '\033[31m' | |
337 | end = '\033[0m' | |
338 | ||
f6b5b4d7 | 339 | |
9f95a23c TL |
340 | class Error(Exception): |
341 | pass | |
342 | ||
f6b5b4d7 | 343 | |
9f95a23c TL |
344 | class TimeoutExpired(Error): |
345 | pass | |
346 | ||
33c7a0ef TL |
347 | |
348 | class UnauthorizedRegistryError(Error): | |
349 | pass | |
350 | ||
9f95a23c TL |
351 | ################################## |
352 | ||
f6b5b4d7 | 353 | |
9f95a23c | 354 | class Ceph(object): |
33c7a0ef | 355 | daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror', |
f67539c2 | 356 | 'crash', 'cephfs-mirror') |
9f95a23c TL |
357 | |
358 | ################################## | |
359 | ||
f6b5b4d7 | 360 | |
b3b6e05e TL |
361 | class OSD(object): |
362 | @staticmethod | |
363 | def get_sysctl_settings() -> List[str]: | |
364 | return [ | |
365 | '# allow a large number of OSDs', | |
366 | 'fs.aio-max-nr = 1048576', | |
367 | 'kernel.pid_max = 4194304', | |
368 | ] | |
369 | ||
20effc67 | 370 | |
b3b6e05e TL |
371 | ################################## |
372 | ||
373 | ||
20effc67 TL |
374 | class SNMPGateway: |
375 | """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks""" | |
376 | daemon_type = 'snmp-gateway' | |
377 | SUPPORTED_VERSIONS = ['V2c', 'V3'] | |
378 | default_image = DEFAULT_SNMP_GATEWAY_IMAGE | |
379 | DEFAULT_PORT = 9464 | |
380 | env_filename = 'snmp-gateway.conf' | |
381 | ||
382 | def __init__(self, | |
383 | ctx: CephadmContext, | |
384 | fsid: str, | |
385 | daemon_id: Union[int, str], | |
386 | config_json: Dict[str, Any], | |
387 | image: Optional[str] = None) -> None: | |
388 | self.ctx = ctx | |
389 | self.fsid = fsid | |
390 | self.daemon_id = daemon_id | |
391 | self.image = image or SNMPGateway.default_image | |
392 | ||
393 | self.uid = config_json.get('uid', 0) | |
394 | self.gid = config_json.get('gid', 0) | |
395 | ||
396 | self.destination = config_json.get('destination', '') | |
397 | self.snmp_version = config_json.get('snmp_version', 'V2c') | |
398 | self.snmp_community = config_json.get('snmp_community', 'public') | |
399 | self.log_level = config_json.get('log_level', 'info') | |
400 | self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '') | |
401 | self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '') | |
402 | self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '') | |
403 | self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '') | |
404 | self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '') | |
405 | self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '') | |
406 | ||
407 | self.validate() | |
408 | ||
409 | @classmethod | |
410 | def init(cls, ctx: CephadmContext, fsid: str, | |
411 | daemon_id: Union[int, str]) -> 'SNMPGateway': | |
412 | assert ctx.config_json | |
413 | return cls(ctx, fsid, daemon_id, | |
414 | get_parm(ctx.config_json), ctx.image) | |
415 | ||
416 | @staticmethod | |
417 | def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]: | |
418 | """Return the version of the notifer from it's http endpoint""" | |
419 | path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta') | |
420 | try: | |
421 | with open(path, 'r') as env: | |
422 | metadata = json.loads(env.read()) | |
423 | except (OSError, json.JSONDecodeError): | |
424 | return None | |
425 | ||
426 | ports = metadata.get('ports', []) | |
427 | if not ports: | |
428 | return None | |
429 | ||
430 | try: | |
431 | with urlopen(f'http://127.0.0.1:{ports[0]}/') as r: | |
432 | html = r.read().decode('utf-8').split('\n') | |
433 | except (HTTPError, URLError): | |
434 | return None | |
435 | ||
436 | for h in html: | |
437 | stripped = h.strip() | |
438 | if stripped.startswith(('<pre>', '<PRE>')) and \ | |
439 | stripped.endswith(('</pre>', '</PRE>')): | |
440 | # <pre>(version=1.2.1, branch=HEAD, revision=7... | |
441 | return stripped.split(',')[0].split('version=')[1] | |
442 | ||
443 | return None | |
444 | ||
445 | @property | |
446 | def port(self) -> int: | |
447 | if not self.ctx.tcp_ports: | |
448 | return self.DEFAULT_PORT | |
449 | else: | |
450 | if len(self.ctx.tcp_ports) > 0: | |
451 | return int(self.ctx.tcp_ports.split()[0]) | |
452 | else: | |
453 | return self.DEFAULT_PORT | |
454 | ||
455 | def get_daemon_args(self) -> List[str]: | |
456 | v3_args = [] | |
457 | base_args = [ | |
458 | f'--web.listen-address=:{self.port}', | |
459 | f'--snmp.destination={self.destination}', | |
460 | f'--snmp.version={self.snmp_version}', | |
461 | f'--log.level={self.log_level}', | |
462 | '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl' | |
463 | ] | |
464 | ||
465 | if self.snmp_version == 'V3': | |
466 | # common auth settings | |
467 | v3_args.extend([ | |
468 | '--snmp.authentication-enabled', | |
469 | f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}', | |
470 | f'--snmp.security-engine-id={self.snmp_v3_engine_id}' | |
471 | ]) | |
472 | # authPriv setting is applied if we have a privacy protocol setting | |
473 | if self.snmp_v3_priv_protocol: | |
474 | v3_args.extend([ | |
475 | '--snmp.private-enabled', | |
476 | f'--snmp.private-protocol={self.snmp_v3_priv_protocol}' | |
477 | ]) | |
478 | ||
479 | return base_args + v3_args | |
480 | ||
481 | @property | |
482 | def data_dir(self) -> str: | |
483 | return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}') | |
484 | ||
485 | @property | |
486 | def conf_file_path(self) -> str: | |
487 | return os.path.join(self.data_dir, self.env_filename) | |
488 | ||
489 | def create_daemon_conf(self) -> None: | |
490 | """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon""" | |
491 | with open(os.open(self.conf_file_path, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
492 | if self.snmp_version == 'V2c': | |
493 | f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n') | |
494 | else: | |
495 | f.write(f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n') | |
496 | f.write(f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n') | |
497 | if self.snmp_v3_priv_password: | |
498 | f.write(f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n') | |
499 | ||
500 | def validate(self) -> None: | |
501 | """Validate the settings | |
502 | ||
503 | Raises: | |
504 | Error: if the fsid doesn't look like an fsid | |
505 | Error: if the snmp version is not supported | |
506 | Error: destination IP and port address missing | |
507 | """ | |
508 | if not is_fsid(self.fsid): | |
509 | raise Error(f'not a valid fsid: {self.fsid}') | |
510 | ||
511 | if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS: | |
512 | raise Error(f'not a valid snmp version: {self.snmp_version}') | |
513 | ||
514 | if not self.destination: | |
515 | raise Error('config is missing destination attribute(<ip>:<port>) of the target SNMP listener') | |
516 | ||
517 | ||
518 | ################################## | |
9f95a23c TL |
519 | class Monitoring(object): |
520 | """Define the configs for the monitoring containers""" | |
521 | ||
522 | port_map = { | |
f67539c2 TL |
523 | 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI |
524 | 'node-exporter': [9100], | |
525 | 'grafana': [3000], | |
526 | 'alertmanager': [9093, 9094], | |
33c7a0ef TL |
527 | 'loki': [3100], |
528 | 'promtail': [9080] | |
9f95a23c TL |
529 | } |
530 | ||
531 | components = { | |
f67539c2 TL |
532 | 'prometheus': { |
533 | 'image': DEFAULT_PROMETHEUS_IMAGE, | |
534 | 'cpus': '2', | |
535 | 'memory': '4GB', | |
536 | 'args': [ | |
537 | '--config.file=/etc/prometheus/prometheus.yml', | |
538 | '--storage.tsdb.path=/prometheus', | |
9f95a23c | 539 | ], |
f67539c2 TL |
540 | 'config-json-files': [ |
541 | 'prometheus.yml', | |
9f95a23c TL |
542 | ], |
543 | }, | |
33c7a0ef TL |
544 | 'loki': { |
545 | 'image': DEFAULT_LOKI_IMAGE, | |
546 | 'cpus': '1', | |
547 | 'memory': '1GB', | |
548 | 'args': [ | |
549 | '--config.file=/etc/loki/loki.yml', | |
550 | ], | |
551 | 'config-json-files': [ | |
552 | 'loki.yml' | |
553 | ], | |
554 | }, | |
555 | 'promtail': { | |
556 | 'image': DEFAULT_PROMTAIL_IMAGE, | |
557 | 'cpus': '1', | |
558 | 'memory': '1GB', | |
559 | 'args': [ | |
560 | '--config.file=/etc/promtail/promtail.yml', | |
561 | ], | |
562 | 'config-json-files': [ | |
563 | 'promtail.yml', | |
564 | ], | |
565 | }, | |
f67539c2 TL |
566 | 'node-exporter': { |
567 | 'image': DEFAULT_NODE_EXPORTER_IMAGE, | |
568 | 'cpus': '1', | |
569 | 'memory': '1GB', | |
570 | 'args': [ | |
571 | '--no-collector.timex', | |
9f95a23c TL |
572 | ], |
573 | }, | |
f67539c2 TL |
574 | 'grafana': { |
575 | 'image': DEFAULT_GRAFANA_IMAGE, | |
576 | 'cpus': '2', | |
577 | 'memory': '4GB', | |
578 | 'args': [], | |
579 | 'config-json-files': [ | |
580 | 'grafana.ini', | |
581 | 'provisioning/datasources/ceph-dashboard.yml', | |
582 | 'certs/cert_file', | |
583 | 'certs/cert_key', | |
9f95a23c TL |
584 | ], |
585 | }, | |
f67539c2 TL |
586 | 'alertmanager': { |
587 | 'image': DEFAULT_ALERT_MANAGER_IMAGE, | |
588 | 'cpus': '2', | |
589 | 'memory': '2GB', | |
590 | 'args': [ | |
f67539c2 | 591 | '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]), |
f91f0fd5 | 592 | ], |
f67539c2 TL |
593 | 'config-json-files': [ |
594 | 'alertmanager.yml', | |
9f95a23c | 595 | ], |
f67539c2 TL |
596 | 'config-json-args': [ |
597 | 'peers', | |
9f95a23c TL |
598 | ], |
599 | }, | |
600 | } # type: ignore | |
601 | ||
f67539c2 TL |
602 | @staticmethod |
603 | def get_version(ctx, container_id, daemon_type): | |
604 | # type: (CephadmContext, str, str) -> str | |
605 | """ | |
33c7a0ef | 606 | :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter" |
f67539c2 | 607 | """ |
33c7a0ef | 608 | assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail') |
f67539c2 TL |
609 | cmd = daemon_type.replace('-', '_') |
610 | code = -1 | |
611 | err = '' | |
612 | version = '' | |
613 | if daemon_type == 'alertmanager': | |
614 | for cmd in ['alertmanager', 'prometheus-alertmanager']: | |
615 | _, err, code = call(ctx, [ | |
616 | ctx.container_engine.path, 'exec', container_id, cmd, | |
617 | '--version' | |
618 | ], verbosity=CallVerbosity.DEBUG) | |
619 | if code == 0: | |
620 | break | |
621 | cmd = 'alertmanager' # reset cmd for version extraction | |
622 | else: | |
623 | _, err, code = call(ctx, [ | |
624 | ctx.container_engine.path, 'exec', container_id, cmd, '--version' | |
625 | ], verbosity=CallVerbosity.DEBUG) | |
626 | if code == 0 and \ | |
627 | err.startswith('%s, version ' % cmd): | |
628 | version = err.split(' ')[2] | |
629 | return version | |
630 | ||
9f95a23c TL |
631 | ################################## |
632 | ||
f6b5b4d7 | 633 | |
f67539c2 TL |
634 | def populate_files(config_dir, config_files, uid, gid): |
635 | # type: (str, Dict, int, int) -> None | |
636 | """create config files for different services""" | |
637 | for fname in config_files: | |
638 | config_file = os.path.join(config_dir, fname) | |
639 | config_content = dict_get_join(config_files, fname) | |
640 | logger.info('Write file: %s' % (config_file)) | |
b3b6e05e | 641 | with open(config_file, 'w', encoding='utf-8') as f: |
f67539c2 TL |
642 | os.fchown(f.fileno(), uid, gid) |
643 | os.fchmod(f.fileno(), 0o600) | |
644 | f.write(config_content) | |
645 | ||
646 | ||
9f95a23c TL |
647 | class NFSGanesha(object): |
648 | """Defines a NFS-Ganesha container""" | |
649 | ||
650 | daemon_type = 'nfs' | |
651 | entrypoint = '/usr/bin/ganesha.nfsd' | |
652 | daemon_args = ['-F', '-L', 'STDERR'] | |
653 | ||
654 | required_files = ['ganesha.conf'] | |
655 | ||
656 | port_map = { | |
f67539c2 | 657 | 'nfs': 2049, |
9f95a23c TL |
658 | } |
659 | ||
660 | def __init__(self, | |
f67539c2 | 661 | ctx, |
9f95a23c TL |
662 | fsid, |
663 | daemon_id, | |
664 | config_json, | |
665 | image=DEFAULT_IMAGE): | |
f67539c2 TL |
666 | # type: (CephadmContext, str, Union[int, str], Dict, str) -> None |
667 | self.ctx = ctx | |
9f95a23c TL |
668 | self.fsid = fsid |
669 | self.daemon_id = daemon_id | |
670 | self.image = image | |
671 | ||
9f95a23c | 672 | # config-json options |
f91f0fd5 TL |
673 | self.pool = dict_get(config_json, 'pool', require=True) |
674 | self.namespace = dict_get(config_json, 'namespace') | |
675 | self.userid = dict_get(config_json, 'userid') | |
676 | self.extra_args = dict_get(config_json, 'extra_args', []) | |
677 | self.files = dict_get(config_json, 'files', {}) | |
678 | self.rgw = dict_get(config_json, 'rgw', {}) | |
9f95a23c TL |
679 | |
680 | # validate the supplied args | |
681 | self.validate() | |
682 | ||
683 | @classmethod | |
f67539c2 TL |
684 | def init(cls, ctx, fsid, daemon_id): |
685 | # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha | |
686 | return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), ctx.image) | |
9f95a23c | 687 | |
f91f0fd5 | 688 | def get_container_mounts(self, data_dir): |
9f95a23c TL |
689 | # type: (str) -> Dict[str, str] |
690 | mounts = dict() | |
691 | mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' | |
692 | mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' | |
693 | mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z' | |
f91f0fd5 TL |
694 | if self.rgw: |
695 | cluster = self.rgw.get('cluster', 'ceph') | |
696 | rgw_user = self.rgw.get('user', 'admin') | |
697 | mounts[os.path.join(data_dir, 'keyring.rgw')] = \ | |
f67539c2 | 698 | '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user) |
9f95a23c TL |
699 | return mounts |
700 | ||
701 | @staticmethod | |
702 | def get_container_envs(): | |
703 | # type: () -> List[str] | |
704 | envs = [ | |
33c7a0ef | 705 | 'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF) |
9f95a23c TL |
706 | ] |
707 | return envs | |
708 | ||
709 | @staticmethod | |
f67539c2 TL |
710 | def get_version(ctx, container_id): |
711 | # type: (CephadmContext, str) -> Optional[str] | |
9f95a23c | 712 | version = None |
f67539c2 TL |
713 | out, err, code = call(ctx, |
714 | [ctx.container_engine.path, 'exec', container_id, | |
715 | NFSGanesha.entrypoint, '-v'], | |
716 | verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
717 | if code == 0: |
718 | match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out) | |
719 | if match: | |
720 | version = match.group(1) | |
721 | return version | |
722 | ||
723 | def validate(self): | |
e306af50 | 724 | # type: () -> None |
9f95a23c TL |
725 | if not is_fsid(self.fsid): |
726 | raise Error('not an fsid: %s' % self.fsid) | |
727 | if not self.daemon_id: | |
728 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
729 | if not self.image: | |
730 | raise Error('invalid image: %s' % self.image) | |
731 | ||
732 | # check for the required files | |
733 | if self.required_files: | |
734 | for fname in self.required_files: | |
735 | if fname not in self.files: | |
736 | raise Error('required file missing from config-json: %s' % fname) | |
737 | ||
f91f0fd5 TL |
738 | # check for an RGW config |
739 | if self.rgw: | |
740 | if not self.rgw.get('keyring'): | |
741 | raise Error('RGW keyring is missing') | |
742 | if not self.rgw.get('user'): | |
743 | raise Error('RGW user is missing') | |
744 | ||
9f95a23c TL |
745 | def get_daemon_name(self): |
746 | # type: () -> str | |
747 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
748 | ||
749 | def get_container_name(self, desc=None): | |
750 | # type: (Optional[str]) -> str | |
751 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
752 | if desc: | |
753 | cname = '%s-%s' % (cname, desc) | |
754 | return cname | |
755 | ||
1911f103 TL |
756 | def get_daemon_args(self): |
757 | # type: () -> List[str] | |
758 | return self.daemon_args + self.extra_args | |
759 | ||
9f95a23c TL |
760 | def create_daemon_dirs(self, data_dir, uid, gid): |
761 | # type: (str, int, int) -> None | |
762 | """Create files under the container data dir""" | |
763 | if not os.path.isdir(data_dir): | |
764 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
765 | ||
766 | logger.info('Creating ganesha config...') | |
767 | ||
768 | # create the ganesha conf dir | |
769 | config_dir = os.path.join(data_dir, 'etc/ganesha') | |
770 | makedirs(config_dir, uid, gid, 0o755) | |
771 | ||
772 | # populate files from the config-json | |
f67539c2 | 773 | populate_files(config_dir, self.files, uid, gid) |
9f95a23c | 774 | |
f91f0fd5 TL |
775 | # write the RGW keyring |
776 | if self.rgw: | |
777 | keyring_path = os.path.join(data_dir, 'keyring.rgw') | |
778 | with open(keyring_path, 'w') as f: | |
779 | os.fchmod(f.fileno(), 0o600) | |
780 | os.fchown(f.fileno(), uid, gid) | |
781 | f.write(self.rgw.get('keyring', '')) | |
782 | ||
9f95a23c TL |
783 | ################################## |
784 | ||
f6b5b4d7 | 785 | |
1911f103 TL |
786 | class CephIscsi(object): |
787 | """Defines a Ceph-Iscsi container""" | |
788 | ||
789 | daemon_type = 'iscsi' | |
790 | entrypoint = '/usr/bin/rbd-target-api' | |
791 | ||
792 | required_files = ['iscsi-gateway.cfg'] | |
793 | ||
794 | def __init__(self, | |
f67539c2 | 795 | ctx, |
1911f103 TL |
796 | fsid, |
797 | daemon_id, | |
798 | config_json, | |
799 | image=DEFAULT_IMAGE): | |
f67539c2 TL |
800 | # type: (CephadmContext, str, Union[int, str], Dict, str) -> None |
801 | self.ctx = ctx | |
1911f103 TL |
802 | self.fsid = fsid |
803 | self.daemon_id = daemon_id | |
804 | self.image = image | |
805 | ||
1911f103 | 806 | # config-json options |
f91f0fd5 | 807 | self.files = dict_get(config_json, 'files', {}) |
1911f103 TL |
808 | |
809 | # validate the supplied args | |
810 | self.validate() | |
811 | ||
812 | @classmethod | |
f67539c2 TL |
813 | def init(cls, ctx, fsid, daemon_id): |
814 | # type: (CephadmContext, str, Union[int, str]) -> CephIscsi | |
815 | return cls(ctx, fsid, daemon_id, | |
816 | get_parm(ctx.config_json), ctx.image) | |
1911f103 TL |
817 | |
818 | @staticmethod | |
819 | def get_container_mounts(data_dir, log_dir): | |
820 | # type: (str, str) -> Dict[str, str] | |
821 | mounts = dict() | |
822 | mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' | |
823 | mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' | |
824 | mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z' | |
f91f0fd5 | 825 | mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' |
a4b75251 | 826 | mounts[log_dir] = '/var/log:z' |
f91f0fd5 | 827 | mounts['/dev'] = '/dev' |
1911f103 TL |
828 | return mounts |
829 | ||
f6b5b4d7 TL |
830 | @staticmethod |
831 | def get_container_binds(): | |
832 | # type: () -> List[List[str]] | |
833 | binds = [] | |
834 | lib_modules = ['type=bind', | |
835 | 'source=/lib/modules', | |
836 | 'destination=/lib/modules', | |
837 | 'ro=true'] | |
838 | binds.append(lib_modules) | |
839 | return binds | |
840 | ||
1911f103 | 841 | @staticmethod |
f67539c2 TL |
842 | def get_version(ctx, container_id): |
843 | # type: (CephadmContext, str) -> Optional[str] | |
1911f103 | 844 | version = None |
f67539c2 TL |
845 | out, err, code = call(ctx, |
846 | [ctx.container_engine.path, 'exec', container_id, | |
847 | '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"], | |
848 | verbosity=CallVerbosity.DEBUG) | |
1911f103 | 849 | if code == 0: |
f6b5b4d7 | 850 | version = out.strip() |
1911f103 TL |
851 | return version |
852 | ||
853 | def validate(self): | |
e306af50 | 854 | # type: () -> None |
1911f103 TL |
855 | if not is_fsid(self.fsid): |
856 | raise Error('not an fsid: %s' % self.fsid) | |
857 | if not self.daemon_id: | |
858 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
859 | if not self.image: | |
860 | raise Error('invalid image: %s' % self.image) | |
861 | ||
862 | # check for the required files | |
863 | if self.required_files: | |
864 | for fname in self.required_files: | |
865 | if fname not in self.files: | |
866 | raise Error('required file missing from config-json: %s' % fname) | |
867 | ||
868 | def get_daemon_name(self): | |
869 | # type: () -> str | |
870 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
871 | ||
872 | def get_container_name(self, desc=None): | |
873 | # type: (Optional[str]) -> str | |
874 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
875 | if desc: | |
876 | cname = '%s-%s' % (cname, desc) | |
877 | return cname | |
878 | ||
1911f103 TL |
879 | def create_daemon_dirs(self, data_dir, uid, gid): |
880 | # type: (str, int, int) -> None | |
881 | """Create files under the container data dir""" | |
882 | if not os.path.isdir(data_dir): | |
883 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
884 | ||
885 | logger.info('Creating ceph-iscsi config...') | |
886 | configfs_dir = os.path.join(data_dir, 'configfs') | |
887 | makedirs(configfs_dir, uid, gid, 0o755) | |
888 | ||
889 | # populate files from the config-json | |
f67539c2 | 890 | populate_files(data_dir, self.files, uid, gid) |
1911f103 TL |
891 | |
892 | @staticmethod | |
893 | def configfs_mount_umount(data_dir, mount=True): | |
e306af50 | 894 | # type: (str, bool) -> List[str] |
1911f103 TL |
895 | mount_path = os.path.join(data_dir, 'configfs') |
896 | if mount: | |
f67539c2 TL |
897 | cmd = 'if ! grep -qs {0} /proc/mounts; then ' \ |
898 | 'mount -t configfs none {0}; fi'.format(mount_path) | |
1911f103 | 899 | else: |
f67539c2 TL |
900 | cmd = 'if grep -qs {0} /proc/mounts; then ' \ |
901 | 'umount {0}; fi'.format(mount_path) | |
1911f103 TL |
902 | return cmd.split() |
903 | ||
f6b5b4d7 TL |
904 | def get_tcmu_runner_container(self): |
905 | # type: () -> CephContainer | |
f67539c2 TL |
906 | tcmu_container = get_container(self.ctx, self.fsid, self.daemon_type, self.daemon_id) |
907 | tcmu_container.entrypoint = '/usr/bin/tcmu-runner' | |
f6b5b4d7 | 908 | tcmu_container.cname = self.get_container_name(desc='tcmu') |
f91f0fd5 TL |
909 | # remove extra container args for tcmu container. |
910 | # extra args could cause issue with forking service type | |
911 | tcmu_container.container_args = [] | |
f6b5b4d7 TL |
912 | return tcmu_container |
913 | ||
1911f103 TL |
914 | ################################## |
915 | ||
f6b5b4d7 | 916 | |
f67539c2 TL |
917 | class HAproxy(object): |
918 | """Defines an HAproxy container""" | |
919 | daemon_type = 'haproxy' | |
920 | required_files = ['haproxy.cfg'] | |
522d829b | 921 | default_image = DEFAULT_HAPROXY_IMAGE |
f67539c2 TL |
922 | |
923 | def __init__(self, | |
924 | ctx: CephadmContext, | |
925 | fsid: str, daemon_id: Union[int, str], | |
926 | config_json: Dict, image: str) -> None: | |
927 | self.ctx = ctx | |
928 | self.fsid = fsid | |
929 | self.daemon_id = daemon_id | |
930 | self.image = image | |
931 | ||
932 | # config-json options | |
933 | self.files = dict_get(config_json, 'files', {}) | |
934 | ||
935 | self.validate() | |
936 | ||
937 | @classmethod | |
938 | def init(cls, ctx: CephadmContext, | |
939 | fsid: str, daemon_id: Union[int, str]) -> 'HAproxy': | |
940 | return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), | |
941 | ctx.image) | |
942 | ||
943 | def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: | |
944 | """Create files under the container data dir""" | |
945 | if not os.path.isdir(data_dir): | |
946 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
947 | ||
948 | # create additional directories in data dir for HAproxy to use | |
949 | if not os.path.isdir(os.path.join(data_dir, 'haproxy')): | |
950 | makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE) | |
951 | ||
952 | data_dir = os.path.join(data_dir, 'haproxy') | |
953 | populate_files(data_dir, self.files, uid, gid) | |
954 | ||
955 | def get_daemon_args(self) -> List[str]: | |
956 | return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg'] | |
957 | ||
958 | def validate(self): | |
959 | # type: () -> None | |
960 | if not is_fsid(self.fsid): | |
961 | raise Error('not an fsid: %s' % self.fsid) | |
962 | if not self.daemon_id: | |
963 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
964 | if not self.image: | |
965 | raise Error('invalid image: %s' % self.image) | |
966 | ||
967 | # check for the required files | |
968 | if self.required_files: | |
969 | for fname in self.required_files: | |
970 | if fname not in self.files: | |
971 | raise Error('required file missing from config-json: %s' % fname) | |
972 | ||
973 | def get_daemon_name(self): | |
974 | # type: () -> str | |
975 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
976 | ||
977 | def get_container_name(self, desc=None): | |
978 | # type: (Optional[str]) -> str | |
979 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
980 | if desc: | |
981 | cname = '%s-%s' % (cname, desc) | |
982 | return cname | |
983 | ||
522d829b | 984 | def extract_uid_gid_haproxy(self) -> Tuple[int, int]: |
f67539c2 TL |
985 | # better directory for this? |
986 | return extract_uid_gid(self.ctx, file_path='/var/lib') | |
987 | ||
988 | @staticmethod | |
989 | def get_container_mounts(data_dir: str) -> Dict[str, str]: | |
990 | mounts = dict() | |
991 | mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy' | |
992 | return mounts | |
993 | ||
b3b6e05e TL |
994 | @staticmethod |
995 | def get_sysctl_settings() -> List[str]: | |
996 | return [ | |
997 | '# IP forwarding', | |
998 | 'net.ipv4.ip_forward = 1', | |
999 | ] | |
1000 | ||
f67539c2 TL |
1001 | ################################## |
1002 | ||
1003 | ||
1004 | class Keepalived(object): | |
1005 | """Defines an Keepalived container""" | |
1006 | daemon_type = 'keepalived' | |
1007 | required_files = ['keepalived.conf'] | |
522d829b | 1008 | default_image = DEFAULT_KEEPALIVED_IMAGE |
f67539c2 TL |
1009 | |
1010 | def __init__(self, | |
1011 | ctx: CephadmContext, | |
1012 | fsid: str, daemon_id: Union[int, str], | |
1013 | config_json: Dict, image: str) -> None: | |
1014 | self.ctx = ctx | |
1015 | self.fsid = fsid | |
1016 | self.daemon_id = daemon_id | |
1017 | self.image = image | |
1018 | ||
1019 | # config-json options | |
1020 | self.files = dict_get(config_json, 'files', {}) | |
1021 | ||
1022 | self.validate() | |
1023 | ||
1024 | @classmethod | |
1025 | def init(cls, ctx: CephadmContext, fsid: str, | |
1026 | daemon_id: Union[int, str]) -> 'Keepalived': | |
1027 | return cls(ctx, fsid, daemon_id, | |
1028 | get_parm(ctx.config_json), ctx.image) | |
1029 | ||
1030 | def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: | |
1031 | """Create files under the container data dir""" | |
1032 | if not os.path.isdir(data_dir): | |
1033 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
1034 | ||
1035 | # create additional directories in data dir for keepalived to use | |
1036 | if not os.path.isdir(os.path.join(data_dir, 'keepalived')): | |
1037 | makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE) | |
1038 | ||
1039 | # populate files from the config-json | |
1040 | populate_files(data_dir, self.files, uid, gid) | |
1041 | ||
1042 | def validate(self): | |
1043 | # type: () -> None | |
1044 | if not is_fsid(self.fsid): | |
1045 | raise Error('not an fsid: %s' % self.fsid) | |
1046 | if not self.daemon_id: | |
1047 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
1048 | if not self.image: | |
1049 | raise Error('invalid image: %s' % self.image) | |
1050 | ||
1051 | # check for the required files | |
1052 | if self.required_files: | |
1053 | for fname in self.required_files: | |
1054 | if fname not in self.files: | |
1055 | raise Error('required file missing from config-json: %s' % fname) | |
1056 | ||
1057 | def get_daemon_name(self): | |
1058 | # type: () -> str | |
1059 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
1060 | ||
1061 | def get_container_name(self, desc=None): | |
1062 | # type: (Optional[str]) -> str | |
1063 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
1064 | if desc: | |
1065 | cname = '%s-%s' % (cname, desc) | |
1066 | return cname | |
1067 | ||
1068 | @staticmethod | |
1069 | def get_container_envs(): | |
1070 | # type: () -> List[str] | |
1071 | envs = [ | |
1072 | 'KEEPALIVED_AUTOCONF=false', | |
1073 | 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf', | |
1074 | 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf', | |
1075 | 'KEEPALIVED_DEBUG=false' | |
1076 | ] | |
1077 | return envs | |
1078 | ||
1079 | @staticmethod | |
b3b6e05e TL |
1080 | def get_sysctl_settings() -> List[str]: |
1081 | return [ | |
1082 | '# IP forwarding and non-local bind', | |
1083 | 'net.ipv4.ip_forward = 1', | |
1084 | 'net.ipv4.ip_nonlocal_bind = 1', | |
1085 | ] | |
f67539c2 | 1086 | |
522d829b | 1087 | def extract_uid_gid_keepalived(self) -> Tuple[int, int]: |
f67539c2 TL |
1088 | # better directory for this? |
1089 | return extract_uid_gid(self.ctx, file_path='/var/lib') | |
1090 | ||
1091 | @staticmethod | |
1092 | def get_container_mounts(data_dir: str) -> Dict[str, str]: | |
1093 | mounts = dict() | |
1094 | mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf' | |
1095 | return mounts | |
1096 | ||
1097 | ################################## | |
1098 | ||
1099 | ||
f91f0fd5 TL |
1100 | class CustomContainer(object): |
1101 | """Defines a custom container""" | |
1102 | daemon_type = 'container' | |
1103 | ||
f67539c2 TL |
1104 | def __init__(self, |
1105 | fsid: str, daemon_id: Union[int, str], | |
f91f0fd5 TL |
1106 | config_json: Dict, image: str) -> None: |
1107 | self.fsid = fsid | |
1108 | self.daemon_id = daemon_id | |
1109 | self.image = image | |
1110 | ||
1111 | # config-json options | |
1112 | self.entrypoint = dict_get(config_json, 'entrypoint') | |
1113 | self.uid = dict_get(config_json, 'uid', 65534) # nobody | |
1114 | self.gid = dict_get(config_json, 'gid', 65534) # nobody | |
1115 | self.volume_mounts = dict_get(config_json, 'volume_mounts', {}) | |
1116 | self.args = dict_get(config_json, 'args', []) | |
1117 | self.envs = dict_get(config_json, 'envs', []) | |
1118 | self.privileged = dict_get(config_json, 'privileged', False) | |
1119 | self.bind_mounts = dict_get(config_json, 'bind_mounts', []) | |
1120 | self.ports = dict_get(config_json, 'ports', []) | |
1121 | self.dirs = dict_get(config_json, 'dirs', []) | |
1122 | self.files = dict_get(config_json, 'files', {}) | |
1123 | ||
1124 | @classmethod | |
f67539c2 TL |
1125 | def init(cls, ctx: CephadmContext, |
1126 | fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer': | |
1127 | return cls(fsid, daemon_id, | |
1128 | get_parm(ctx.config_json), ctx.image) | |
f91f0fd5 TL |
1129 | |
1130 | def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: | |
1131 | """ | |
1132 | Create dirs/files below the container data directory. | |
1133 | """ | |
1134 | logger.info('Creating custom container configuration ' | |
1135 | 'dirs/files in {} ...'.format(data_dir)) | |
1136 | ||
1137 | if not os.path.isdir(data_dir): | |
1138 | raise OSError('data_dir is not a directory: %s' % data_dir) | |
1139 | ||
1140 | for dir_path in self.dirs: | |
1141 | logger.info('Creating directory: {}'.format(dir_path)) | |
1142 | dir_path = os.path.join(data_dir, dir_path.strip('/')) | |
1143 | makedirs(dir_path, uid, gid, 0o755) | |
1144 | ||
1145 | for file_path in self.files: | |
1146 | logger.info('Creating file: {}'.format(file_path)) | |
1147 | content = dict_get_join(self.files, file_path) | |
1148 | file_path = os.path.join(data_dir, file_path.strip('/')) | |
1149 | with open(file_path, 'w', encoding='utf-8') as f: | |
1150 | os.fchown(f.fileno(), uid, gid) | |
1151 | os.fchmod(f.fileno(), 0o600) | |
1152 | f.write(content) | |
1153 | ||
1154 | def get_daemon_args(self) -> List[str]: | |
1155 | return [] | |
1156 | ||
1157 | def get_container_args(self) -> List[str]: | |
1158 | return self.args | |
1159 | ||
1160 | def get_container_envs(self) -> List[str]: | |
1161 | return self.envs | |
1162 | ||
1163 | def get_container_mounts(self, data_dir: str) -> Dict[str, str]: | |
1164 | """ | |
1165 | Get the volume mounts. Relative source paths will be located below | |
1166 | `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. | |
1167 | ||
1168 | Example: | |
1169 | { | |
1170 | /foo/conf: /conf | |
1171 | foo/conf: /conf | |
1172 | } | |
1173 | becomes | |
1174 | { | |
1175 | /foo/conf: /conf | |
1176 | /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf | |
1177 | } | |
1178 | """ | |
1179 | mounts = {} | |
1180 | for source, destination in self.volume_mounts.items(): | |
1181 | source = os.path.join(data_dir, source) | |
1182 | mounts[source] = destination | |
1183 | return mounts | |
1184 | ||
1185 | def get_container_binds(self, data_dir: str) -> List[List[str]]: | |
1186 | """ | |
1187 | Get the bind mounts. Relative `source=...` paths will be located below | |
1188 | `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. | |
1189 | ||
1190 | Example: | |
1191 | [ | |
1192 | 'type=bind', | |
1193 | 'source=lib/modules', | |
1194 | 'destination=/lib/modules', | |
1195 | 'ro=true' | |
1196 | ] | |
1197 | becomes | |
1198 | [ | |
1199 | ... | |
1200 | 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules', | |
1201 | ... | |
1202 | ] | |
1203 | """ | |
1204 | binds = self.bind_mounts.copy() | |
1205 | for bind in binds: | |
1206 | for index, value in enumerate(bind): | |
1207 | match = re.match(r'^source=(.+)$', value) | |
1208 | if match: | |
1209 | bind[index] = 'source={}'.format(os.path.join( | |
1210 | data_dir, match.group(1))) | |
1211 | return binds | |
1212 | ||
1213 | ################################## | |
1214 | ||
1215 | ||
f67539c2 TL |
1216 | def touch(file_path: str, uid: Optional[int] = None, gid: Optional[int] = None) -> None: |
1217 | Path(file_path).touch() | |
1218 | if uid and gid: | |
1219 | os.chown(file_path, uid, gid) | |
1220 | ||
1221 | ||
1222 | ################################## | |
1223 | ||
1224 | ||
f91f0fd5 TL |
1225 | def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any: |
1226 | """ | |
1227 | Helper function to get a key from a dictionary. | |
1228 | :param d: The dictionary to process. | |
1229 | :param key: The name of the key to get. | |
1230 | :param default: The default value in case the key does not | |
1231 | exist. Default is `None`. | |
1232 | :param require: Set to `True` if the key is required. An | |
1233 | exception will be raised if the key does not exist in | |
1234 | the given dictionary. | |
1235 | :return: Returns the value of the given key. | |
1236 | :raises: :exc:`self.Error` if the given key does not exist | |
1237 | and `require` is set to `True`. | |
1238 | """ | |
1239 | if require and key not in d.keys(): | |
1240 | raise Error('{} missing from dict'.format(key)) | |
f67539c2 | 1241 | return d.get(key, default) # type: ignore |
f91f0fd5 TL |
1242 | |
1243 | ################################## | |
1244 | ||
1245 | ||
1246 | def dict_get_join(d: Dict, key: str) -> Any: | |
1247 | """ | |
1248 | Helper function to get the value of a given key from a dictionary. | |
1249 | `List` values will be converted to a string by joining them with a | |
1250 | line break. | |
1251 | :param d: The dictionary to process. | |
1252 | :param key: The name of the key to get. | |
1253 | :return: Returns the value of the given key. If it was a `list`, it | |
1254 | will be joining with a line break. | |
1255 | """ | |
1256 | value = d.get(key) | |
1257 | if isinstance(value, list): | |
1258 | value = '\n'.join(map(str, value)) | |
1259 | return value | |
1260 | ||
1261 | ################################## | |
1262 | ||
1263 | ||
9f95a23c | 1264 | def get_supported_daemons(): |
e306af50 | 1265 | # type: () -> List[str] |
9f95a23c TL |
1266 | supported_daemons = list(Ceph.daemons) |
1267 | supported_daemons.extend(Monitoring.components) | |
1268 | supported_daemons.append(NFSGanesha.daemon_type) | |
1911f103 | 1269 | supported_daemons.append(CephIscsi.daemon_type) |
f91f0fd5 | 1270 | supported_daemons.append(CustomContainer.daemon_type) |
f67539c2 TL |
1271 | supported_daemons.append(HAproxy.daemon_type) |
1272 | supported_daemons.append(Keepalived.daemon_type) | |
20effc67 TL |
1273 | supported_daemons.append(CephadmAgent.daemon_type) |
1274 | supported_daemons.append(SNMPGateway.daemon_type) | |
9f95a23c TL |
1275 | assert len(supported_daemons) == len(set(supported_daemons)) |
1276 | return supported_daemons | |
1277 | ||
1278 | ################################## | |
1279 | ||
f6b5b4d7 | 1280 | |
f67539c2 TL |
1281 | class PortOccupiedError(Error): |
1282 | pass | |
1283 | ||
1284 | ||
1285 | def attempt_bind(ctx, s, address, port): | |
1286 | # type: (CephadmContext, socket.socket, str, int) -> None | |
9f95a23c TL |
1287 | try: |
1288 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) | |
1289 | s.bind((address, port)) | |
b3b6e05e | 1290 | except OSError as e: |
9f95a23c | 1291 | if e.errno == errno.EADDRINUSE: |
f67539c2 TL |
1292 | msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e) |
1293 | logger.warning(msg) | |
1294 | raise PortOccupiedError(msg) | |
1295 | else: | |
b3b6e05e TL |
1296 | raise Error(e) |
1297 | except Exception as e: | |
1298 | raise Error(e) | |
9f95a23c TL |
1299 | finally: |
1300 | s.close() | |
1301 | ||
f6b5b4d7 | 1302 | |
f67539c2 TL |
1303 | def port_in_use(ctx, port_num): |
1304 | # type: (CephadmContext, int) -> bool | |
9f95a23c | 1305 | """Detect whether a port is in use on the local machine - IPv4 and IPv6""" |
e306af50 | 1306 | logger.info('Verifying port %d ...' % port_num) |
9f95a23c | 1307 | |
f67539c2 TL |
1308 | def _port_in_use(af: socket.AddressFamily, address: str) -> bool: |
1309 | try: | |
1310 | s = socket.socket(af, socket.SOCK_STREAM) | |
1311 | attempt_bind(ctx, s, address, port_num) | |
1312 | except PortOccupiedError: | |
1313 | return True | |
1314 | except OSError as e: | |
1315 | if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL): | |
1316 | # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are | |
1317 | # being tested here and one might be intentionally be disabled. | |
1318 | # In that case no error should be raised. | |
1319 | return False | |
1320 | else: | |
1321 | raise e | |
9f95a23c | 1322 | return False |
f67539c2 TL |
1323 | return any(_port_in_use(af, address) for af, address in ( |
1324 | (socket.AF_INET, '0.0.0.0'), | |
1325 | (socket.AF_INET6, '::') | |
1326 | )) | |
9f95a23c | 1327 | |
f6b5b4d7 | 1328 | |
33c7a0ef TL |
1329 | def check_ip_port(ctx, ep): |
1330 | # type: (CephadmContext, EndPoint) -> None | |
f67539c2 | 1331 | if not ctx.skip_ping_check: |
33c7a0ef TL |
1332 | logger.info(f'Verifying IP {ep.ip} port {ep.port} ...') |
1333 | if is_ipv6(ep.ip): | |
9f95a23c | 1334 | s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) |
33c7a0ef | 1335 | ip = unwrap_ipv6(ep.ip) |
9f95a23c TL |
1336 | else: |
1337 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
33c7a0ef TL |
1338 | ip = ep.ip |
1339 | attempt_bind(ctx, s, ip, ep.port) | |
9f95a23c TL |
1340 | |
1341 | ################################## | |
1342 | ||
f67539c2 | 1343 | |
9f95a23c TL |
1344 | # this is an abbreviated version of |
1345 | # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py | |
1346 | # that drops all of the compatibility (this is Unix/Linux only). | |
1347 | ||
9f95a23c TL |
1348 | class Timeout(TimeoutError): |
1349 | """ | |
1350 | Raised when the lock could not be acquired in *timeout* | |
1351 | seconds. | |
1352 | """ | |
1353 | ||
522d829b | 1354 | def __init__(self, lock_file: str) -> None: |
9f95a23c TL |
1355 | """ |
1356 | """ | |
1357 | #: The path of the file lock. | |
1358 | self.lock_file = lock_file | |
1359 | return None | |
1360 | ||
522d829b | 1361 | def __str__(self) -> str: |
9f95a23c TL |
1362 | temp = "The file lock '{}' could not be acquired."\ |
1363 | .format(self.lock_file) | |
1364 | return temp | |
1365 | ||
1366 | ||
1367 | class _Acquire_ReturnProxy(object): | |
522d829b | 1368 | def __init__(self, lock: 'FileLock') -> None: |
9f95a23c TL |
1369 | self.lock = lock |
1370 | return None | |
1371 | ||
522d829b | 1372 | def __enter__(self) -> 'FileLock': |
9f95a23c TL |
1373 | return self.lock |
1374 | ||
522d829b | 1375 | def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: |
9f95a23c TL |
1376 | self.lock.release() |
1377 | return None | |
1378 | ||
1379 | ||
1380 | class FileLock(object): | |
522d829b | 1381 | def __init__(self, ctx: CephadmContext, name: str, timeout: int = -1) -> None: |
9f95a23c TL |
1382 | if not os.path.exists(LOCK_DIR): |
1383 | os.mkdir(LOCK_DIR, 0o700) | |
1384 | self._lock_file = os.path.join(LOCK_DIR, name + '.lock') | |
f67539c2 | 1385 | self.ctx = ctx |
9f95a23c TL |
1386 | |
1387 | # The file descriptor for the *_lock_file* as it is returned by the | |
1388 | # os.open() function. | |
1389 | # This file lock is only NOT None, if the object currently holds the | |
1390 | # lock. | |
f67539c2 | 1391 | self._lock_file_fd: Optional[int] = None |
9f95a23c TL |
1392 | self.timeout = timeout |
1393 | # The lock counter is used for implementing the nested locking | |
1394 | # mechanism. Whenever the lock is acquired, the counter is increased and | |
1395 | # the lock is only released, when this value is 0 again. | |
1396 | self._lock_counter = 0 | |
1397 | return None | |
1398 | ||
1399 | @property | |
522d829b | 1400 | def is_locked(self) -> bool: |
9f95a23c TL |
1401 | return self._lock_file_fd is not None |
1402 | ||
522d829b | 1403 | def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy: |
9f95a23c TL |
1404 | """ |
1405 | Acquires the file lock or fails with a :exc:`Timeout` error. | |
1406 | .. code-block:: python | |
1407 | # You can use this method in the context manager (recommended) | |
1408 | with lock.acquire(): | |
1409 | pass | |
1410 | # Or use an equivalent try-finally construct: | |
1411 | lock.acquire() | |
1412 | try: | |
1413 | pass | |
1414 | finally: | |
1415 | lock.release() | |
1416 | :arg float timeout: | |
1417 | The maximum time waited for the file lock. | |
1418 | If ``timeout < 0``, there is no timeout and this method will | |
1419 | block until the lock could be acquired. | |
1420 | If ``timeout`` is None, the default :attr:`~timeout` is used. | |
1421 | :arg float poll_intervall: | |
1422 | We check once in *poll_intervall* seconds if we can acquire the | |
1423 | file lock. | |
1424 | :raises Timeout: | |
1425 | if the lock could not be acquired in *timeout* seconds. | |
1426 | .. versionchanged:: 2.0.0 | |
1427 | This method returns now a *proxy* object instead of *self*, | |
1428 | so that it can be used in a with statement without side effects. | |
1429 | """ | |
f67539c2 | 1430 | |
9f95a23c TL |
1431 | # Use the default timeout, if no timeout is provided. |
1432 | if timeout is None: | |
1433 | timeout = self.timeout | |
1434 | ||
1435 | # Increment the number right at the beginning. | |
1436 | # We can still undo it, if something fails. | |
1437 | self._lock_counter += 1 | |
1438 | ||
1439 | lock_id = id(self) | |
1440 | lock_filename = self._lock_file | |
1441 | start_time = time.time() | |
1442 | try: | |
1443 | while True: | |
1444 | if not self.is_locked: | |
1445 | logger.debug('Acquiring lock %s on %s', lock_id, | |
1446 | lock_filename) | |
1447 | self._acquire() | |
1448 | ||
1449 | if self.is_locked: | |
1450 | logger.debug('Lock %s acquired on %s', lock_id, | |
1451 | lock_filename) | |
1452 | break | |
1453 | elif timeout >= 0 and time.time() - start_time > timeout: | |
1454 | logger.warning('Timeout acquiring lock %s on %s', lock_id, | |
1455 | lock_filename) | |
1456 | raise Timeout(self._lock_file) | |
1457 | else: | |
1458 | logger.debug( | |
1459 | 'Lock %s not acquired on %s, waiting %s seconds ...', | |
1460 | lock_id, lock_filename, poll_intervall | |
1461 | ) | |
1462 | time.sleep(poll_intervall) | |
b3b6e05e | 1463 | except Exception: |
9f95a23c TL |
1464 | # Something did go wrong, so decrement the counter. |
1465 | self._lock_counter = max(0, self._lock_counter - 1) | |
1466 | ||
1467 | raise | |
f67539c2 | 1468 | return _Acquire_ReturnProxy(lock=self) |
9f95a23c | 1469 | |
522d829b | 1470 | def release(self, force: bool = False) -> None: |
9f95a23c TL |
1471 | """ |
1472 | Releases the file lock. | |
1473 | Please note, that the lock is only completly released, if the lock | |
1474 | counter is 0. | |
1475 | Also note, that the lock file itself is not automatically deleted. | |
1476 | :arg bool force: | |
1477 | If true, the lock counter is ignored and the lock is released in | |
1478 | every case. | |
1479 | """ | |
1480 | if self.is_locked: | |
1481 | self._lock_counter -= 1 | |
1482 | ||
1483 | if self._lock_counter == 0 or force: | |
522d829b TL |
1484 | # lock_id = id(self) |
1485 | # lock_filename = self._lock_file | |
9f95a23c | 1486 | |
522d829b TL |
1487 | # Can't log in shutdown: |
1488 | # File "/usr/lib64/python3.9/logging/__init__.py", line 1175, in _open | |
1489 | # NameError: name 'open' is not defined | |
1490 | # logger.debug('Releasing lock %s on %s', lock_id, lock_filename) | |
9f95a23c TL |
1491 | self._release() |
1492 | self._lock_counter = 0 | |
522d829b | 1493 | # logger.debug('Lock %s released on %s', lock_id, lock_filename) |
9f95a23c TL |
1494 | |
1495 | return None | |
1496 | ||
522d829b | 1497 | def __enter__(self) -> 'FileLock': |
9f95a23c TL |
1498 | self.acquire() |
1499 | return self | |
1500 | ||
522d829b | 1501 | def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: |
9f95a23c TL |
1502 | self.release() |
1503 | return None | |
1504 | ||
522d829b | 1505 | def __del__(self) -> None: |
f6b5b4d7 | 1506 | self.release(force=True) |
9f95a23c TL |
1507 | return None |
1508 | ||
522d829b | 1509 | def _acquire(self) -> None: |
9f95a23c TL |
1510 | open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC |
1511 | fd = os.open(self._lock_file, open_mode) | |
1512 | ||
1513 | try: | |
1514 | fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) | |
1515 | except (IOError, OSError): | |
1516 | os.close(fd) | |
1517 | else: | |
1518 | self._lock_file_fd = fd | |
1519 | return None | |
1520 | ||
522d829b | 1521 | def _release(self) -> None: |
9f95a23c TL |
1522 | # Do not remove the lockfile: |
1523 | # | |
1524 | # https://github.com/benediktschmitt/py-filelock/issues/31 | |
1525 | # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition | |
1526 | fd = self._lock_file_fd | |
1527 | self._lock_file_fd = None | |
f6b5b4d7 TL |
1528 | fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore |
1529 | os.close(fd) # type: ignore | |
9f95a23c TL |
1530 | return None |
1531 | ||
1532 | ||
1533 | ################################## | |
1534 | # Popen wrappers, lifted from ceph-volume | |
1535 | ||
adb31ebb TL |
1536 | class CallVerbosity(Enum): |
1537 | SILENT = 0 | |
1538 | # log stdout/stderr to logger.debug | |
1539 | DEBUG = 1 | |
1540 | # On a non-zero exit status, it will forcefully set | |
1541 | # logging ON for the terminal | |
1542 | VERBOSE_ON_FAILURE = 2 | |
1543 | # log at info (instead of debug) level. | |
1544 | VERBOSE = 3 | |
1545 | ||
1546 | ||
f67539c2 TL |
1547 | if sys.version_info < (3, 8): |
1548 | import itertools | |
1549 | import threading | |
1550 | import warnings | |
1551 | from asyncio import events | |
1552 | ||
1553 | class ThreadedChildWatcher(asyncio.AbstractChildWatcher): | |
1554 | """Threaded child watcher implementation. | |
1555 | The watcher uses a thread per process | |
1556 | for waiting for the process finish. | |
1557 | It doesn't require subscription on POSIX signal | |
1558 | but a thread creation is not free. | |
1559 | The watcher has O(1) complexity, its performance doesn't depend | |
1560 | on amount of spawn processes. | |
1561 | """ | |
1562 | ||
522d829b | 1563 | def __init__(self) -> None: |
f67539c2 | 1564 | self._pid_counter = itertools.count(0) |
a4b75251 | 1565 | self._threads: Dict[Any, Any] = {} |
f67539c2 | 1566 | |
a4b75251 | 1567 | def is_active(self) -> bool: |
f67539c2 TL |
1568 | return True |
1569 | ||
a4b75251 | 1570 | def close(self) -> None: |
f67539c2 TL |
1571 | self._join_threads() |
1572 | ||
a4b75251 | 1573 | def _join_threads(self) -> None: |
f67539c2 TL |
1574 | """Internal: Join all non-daemon threads""" |
1575 | threads = [thread for thread in list(self._threads.values()) | |
1576 | if thread.is_alive() and not thread.daemon] | |
1577 | for thread in threads: | |
1578 | thread.join() | |
1579 | ||
a4b75251 | 1580 | def __enter__(self) -> Any: |
f67539c2 TL |
1581 | return self |
1582 | ||
a4b75251 | 1583 | def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: |
f67539c2 TL |
1584 | pass |
1585 | ||
a4b75251 | 1586 | def __del__(self, _warn: Any = warnings.warn) -> None: |
f67539c2 TL |
1587 | threads = [thread for thread in list(self._threads.values()) |
1588 | if thread.is_alive()] | |
1589 | if threads: | |
1590 | _warn(f'{self.__class__} has registered but not finished child processes', | |
1591 | ResourceWarning, | |
1592 | source=self) | |
1593 | ||
a4b75251 | 1594 | def add_child_handler(self, pid: Any, callback: Any, *args: Any) -> None: |
f67539c2 TL |
1595 | loop = events.get_event_loop() |
1596 | thread = threading.Thread(target=self._do_waitpid, | |
1597 | name=f'waitpid-{next(self._pid_counter)}', | |
1598 | args=(loop, pid, callback, args), | |
1599 | daemon=True) | |
1600 | self._threads[pid] = thread | |
1601 | thread.start() | |
1602 | ||
a4b75251 | 1603 | def remove_child_handler(self, pid: Any) -> bool: |
f67539c2 TL |
1604 | # asyncio never calls remove_child_handler() !!! |
1605 | # The method is no-op but is implemented because | |
1606 | # abstract base classe requires it | |
1607 | return True | |
1608 | ||
a4b75251 | 1609 | def attach_loop(self, loop: Any) -> None: |
f67539c2 TL |
1610 | pass |
1611 | ||
a4b75251 | 1612 | def _do_waitpid(self, loop: Any, expected_pid: Any, callback: Any, args: Any) -> None: |
f67539c2 TL |
1613 | assert expected_pid > 0 |
1614 | ||
1615 | try: | |
1616 | pid, status = os.waitpid(expected_pid, 0) | |
1617 | except ChildProcessError: | |
1618 | # The child process is already reaped | |
1619 | # (may happen if waitpid() is called elsewhere). | |
1620 | pid = expected_pid | |
1621 | returncode = 255 | |
1622 | logger.warning( | |
1623 | 'Unknown child process pid %d, will report returncode 255', | |
1624 | pid) | |
1625 | else: | |
1626 | if os.WIFEXITED(status): | |
1627 | returncode = os.WEXITSTATUS(status) | |
1628 | elif os.WIFSIGNALED(status): | |
1629 | returncode = -os.WTERMSIG(status) | |
1630 | else: | |
1631 | raise ValueError(f'unknown wait status {status}') | |
1632 | if loop.get_debug(): | |
1633 | logger.debug('process %s exited with returncode %s', | |
1634 | expected_pid, returncode) | |
1635 | ||
1636 | if loop.is_closed(): | |
1637 | logger.warning('Loop %r that handles pid %r is closed', loop, pid) | |
1638 | else: | |
1639 | loop.call_soon_threadsafe(callback, pid, returncode, *args) | |
1640 | ||
1641 | self._threads.pop(expected_pid) | |
1642 | ||
1643 | # unlike SafeChildWatcher which handles SIGCHLD in the main thread, | |
1644 | # ThreadedChildWatcher runs in a separated thread, hence allows us to | |
1645 | # run create_subprocess_exec() in non-main thread, see | |
1646 | # https://bugs.python.org/issue35621 | |
1647 | asyncio.set_child_watcher(ThreadedChildWatcher()) | |
1648 | ||
1649 | ||
1650 | try: | |
1651 | from asyncio import run as async_run # type: ignore[attr-defined] | |
1652 | except ImportError: | |
1653 | def async_run(coro): # type: ignore | |
1654 | loop = asyncio.new_event_loop() | |
1655 | try: | |
1656 | asyncio.set_event_loop(loop) | |
1657 | return loop.run_until_complete(coro) | |
1658 | finally: | |
1659 | try: | |
1660 | loop.run_until_complete(loop.shutdown_asyncgens()) | |
1661 | finally: | |
1662 | asyncio.set_event_loop(None) | |
1663 | loop.close() | |
1664 | ||
1665 | ||
1666 | def call(ctx: CephadmContext, | |
1667 | command: List[str], | |
adb31ebb TL |
1668 | desc: Optional[str] = None, |
1669 | verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, | |
1670 | timeout: Optional[int] = DEFAULT_TIMEOUT, | |
522d829b | 1671 | **kwargs: Any) -> Tuple[str, str, int]: |
9f95a23c TL |
1672 | """ |
1673 | Wrap subprocess.Popen to | |
1674 | ||
1675 | - log stdout/stderr to a logger, | |
1676 | - decode utf-8 | |
1677 | - cleanly return out, err, returncode | |
1678 | ||
9f95a23c TL |
1679 | :param timeout: timeout in seconds |
1680 | """ | |
f67539c2 TL |
1681 | |
1682 | prefix = command[0] if desc is None else desc | |
1683 | if prefix: | |
1684 | prefix += ': ' | |
1685 | timeout = timeout or ctx.timeout | |
1686 | ||
f67539c2 TL |
1687 | async def tee(reader: asyncio.StreamReader) -> str: |
1688 | collected = StringIO() | |
1689 | async for line in reader: | |
1690 | message = line.decode('utf-8') | |
1691 | collected.write(message) | |
1692 | if verbosity == CallVerbosity.VERBOSE: | |
1693 | logger.info(prefix + message.rstrip()) | |
1694 | elif verbosity != CallVerbosity.SILENT: | |
1695 | logger.debug(prefix + message.rstrip()) | |
1696 | return collected.getvalue() | |
1697 | ||
1698 | async def run_with_timeout() -> Tuple[str, str, int]: | |
1699 | process = await asyncio.create_subprocess_exec( | |
1700 | *command, | |
1701 | stdout=asyncio.subprocess.PIPE, | |
522d829b TL |
1702 | stderr=asyncio.subprocess.PIPE, |
1703 | env=os.environ.copy()) | |
f67539c2 TL |
1704 | assert process.stdout |
1705 | assert process.stderr | |
1706 | try: | |
1707 | stdout, stderr = await asyncio.gather(tee(process.stdout), | |
1708 | tee(process.stderr)) | |
1709 | returncode = await asyncio.wait_for(process.wait(), timeout) | |
1710 | except asyncio.TimeoutError: | |
1711 | logger.info(prefix + f'timeout after {timeout} seconds') | |
1712 | return '', '', 124 | |
9f95a23c | 1713 | else: |
f67539c2 | 1714 | return stdout, stderr, returncode |
9f95a23c | 1715 | |
f67539c2 | 1716 | stdout, stderr, returncode = async_run(run_with_timeout()) |
adb31ebb | 1717 | if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE: |
f67539c2 TL |
1718 | logger.info('Non-zero exit code %d from %s', |
1719 | returncode, ' '.join(command)) | |
1720 | for line in stdout.splitlines(): | |
1721 | logger.info(prefix + 'stdout ' + line) | |
1722 | for line in stderr.splitlines(): | |
1723 | logger.info(prefix + 'stderr ' + line) | |
1724 | return stdout, stderr, returncode | |
1725 | ||
1726 | ||
1727 | def call_throws( | |
1728 | ctx: CephadmContext, | |
1729 | command: List[str], | |
1730 | desc: Optional[str] = None, | |
1731 | verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, | |
1732 | timeout: Optional[int] = DEFAULT_TIMEOUT, | |
522d829b | 1733 | **kwargs: Any) -> Tuple[str, str, int]: |
f67539c2 | 1734 | out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs) |
9f95a23c | 1735 | if ret: |
20effc67 TL |
1736 | for s in (out, err): |
1737 | if s.strip() and len(s.splitlines()) <= 2: # readable message? | |
1738 | raise RuntimeError(f'Failed command: {" ".join(command)}: {s}') | |
9f95a23c TL |
1739 | raise RuntimeError('Failed command: %s' % ' '.join(command)) |
1740 | return out, err, ret | |
1741 | ||
1742 | ||
f67539c2 TL |
1743 | def call_timeout(ctx, command, timeout): |
1744 | # type: (CephadmContext, List[str], int) -> int | |
9f95a23c | 1745 | logger.debug('Running command (timeout=%s): %s' |
f67539c2 | 1746 | % (timeout, ' '.join(command))) |
9f95a23c TL |
1747 | |
1748 | def raise_timeout(command, timeout): | |
1749 | # type: (List[str], int) -> NoReturn | |
f67539c2 | 1750 | msg = 'Command `%s` timed out after %s seconds' % (command, timeout) |
9f95a23c TL |
1751 | logger.debug(msg) |
1752 | raise TimeoutExpired(msg) | |
1753 | ||
f67539c2 | 1754 | try: |
522d829b | 1755 | return subprocess.call(command, timeout=timeout, env=os.environ.copy()) |
f67539c2 TL |
1756 | except subprocess.TimeoutExpired: |
1757 | raise_timeout(command, timeout) | |
9f95a23c TL |
1758 | |
1759 | ################################## | |
1760 | ||
f6b5b4d7 | 1761 | |
522d829b | 1762 | def json_loads_retry(cli_func: Callable[[], str]) -> Any: |
b3b6e05e TL |
1763 | for sleep_secs in [1, 4, 4]: |
1764 | try: | |
1765 | return json.loads(cli_func()) | |
1766 | except json.JSONDecodeError: | |
1767 | logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs) | |
1768 | time.sleep(sleep_secs) | |
1769 | return json.loads(cli_func()) | |
1770 | ||
1771 | ||
f67539c2 TL |
1772 | def is_available(ctx, what, func): |
1773 | # type: (CephadmContext, str, Callable[[], bool]) -> None | |
9f95a23c TL |
1774 | """ |
1775 | Wait for a service to become available | |
1776 | ||
1777 | :param what: the name of the service | |
1778 | :param func: the callable object that determines availability | |
1779 | """ | |
f67539c2 | 1780 | retry = ctx.retry |
f6b5b4d7 | 1781 | logger.info('Waiting for %s...' % what) |
9f95a23c TL |
1782 | num = 1 |
1783 | while True: | |
1784 | if func(): | |
e306af50 | 1785 | logger.info('%s is available' |
f6b5b4d7 | 1786 | % what) |
9f95a23c TL |
1787 | break |
1788 | elif num > retry: | |
1789 | raise Error('%s not available after %s tries' | |
f67539c2 | 1790 | % (what, retry)) |
9f95a23c TL |
1791 | |
1792 | logger.info('%s not available, waiting (%s/%s)...' | |
f67539c2 | 1793 | % (what, num, retry)) |
9f95a23c TL |
1794 | |
1795 | num += 1 | |
f67539c2 | 1796 | time.sleep(2) |
9f95a23c TL |
1797 | |
1798 | ||
1799 | def read_config(fn): | |
1800 | # type: (Optional[str]) -> ConfigParser | |
f67539c2 | 1801 | cp = ConfigParser() |
9f95a23c | 1802 | if fn: |
f67539c2 | 1803 | cp.read(fn) |
9f95a23c TL |
1804 | return cp |
1805 | ||
f6b5b4d7 | 1806 | |
9f95a23c TL |
1807 | def pathify(p): |
1808 | # type: (str) -> str | |
e306af50 TL |
1809 | p = os.path.expanduser(p) |
1810 | return os.path.abspath(p) | |
9f95a23c | 1811 | |
f6b5b4d7 | 1812 | |
9f95a23c | 1813 | def get_file_timestamp(fn): |
e306af50 | 1814 | # type: (str) -> Optional[str] |
9f95a23c TL |
1815 | try: |
1816 | mt = os.path.getmtime(fn) | |
1817 | return datetime.datetime.fromtimestamp( | |
1818 | mt, tz=datetime.timezone.utc | |
1819 | ).strftime(DATEFMT) | |
adb31ebb | 1820 | except Exception: |
9f95a23c TL |
1821 | return None |
1822 | ||
f6b5b4d7 | 1823 | |
9f95a23c | 1824 | def try_convert_datetime(s): |
e306af50 | 1825 | # type: (str) -> Optional[str] |
9f95a23c TL |
1826 | # This is super irritating because |
1827 | # 1) podman and docker use different formats | |
1828 | # 2) python's strptime can't parse either one | |
1829 | # | |
1830 | # I've seen: | |
1831 | # docker 18.09.7: 2020-03-03T09:21:43.636153304Z | |
1832 | # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00 | |
1833 | # 2020-03-03 15:52:30.136257504 -0600 CST | |
1834 | # (In the podman case, there is a different string format for | |
1835 | # 'inspect' and 'inspect --format {{.Created}}'!!) | |
1836 | ||
1837 | # In *all* cases, the 9 digit second precision is too much for | |
1838 | # python's strptime. Shorten it to 6 digits. | |
1839 | p = re.compile(r'(\.[\d]{6})[\d]*') | |
1840 | s = p.sub(r'\1', s) | |
1841 | ||
adb31ebb | 1842 | # replace trailing Z with -0000, since (on python 3.6.8) it won't parse |
9f95a23c TL |
1843 | if s and s[-1] == 'Z': |
1844 | s = s[:-1] + '-0000' | |
1845 | ||
adb31ebb | 1846 | # cut off the redundant 'CST' part that strptime can't parse, if |
9f95a23c TL |
1847 | # present. |
1848 | v = s.split(' ') | |
1849 | s = ' '.join(v[0:3]) | |
1850 | ||
1851 | # try parsing with several format strings | |
1852 | fmts = [ | |
1853 | '%Y-%m-%dT%H:%M:%S.%f%z', | |
1854 | '%Y-%m-%d %H:%M:%S.%f %z', | |
1855 | ] | |
1856 | for f in fmts: | |
1857 | try: | |
1858 | # return timestamp normalized to UTC, rendered as DATEFMT. | |
1859 | return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT) | |
1860 | except ValueError: | |
1861 | pass | |
1862 | return None | |
1863 | ||
f6b5b4d7 | 1864 | |
f67539c2 | 1865 | def _parse_podman_version(version_str): |
9f95a23c | 1866 | # type: (str) -> Tuple[int, ...] |
522d829b | 1867 | def to_int(val: str, org_e: Optional[Exception] = None) -> int: |
9f95a23c TL |
1868 | if not val and org_e: |
1869 | raise org_e | |
1870 | try: | |
1871 | return int(val) | |
1872 | except ValueError as e: | |
1873 | return to_int(val[0:-1], org_e or e) | |
1874 | ||
1875 | return tuple(map(to_int, version_str.split('.'))) | |
1876 | ||
1877 | ||
1878 | def get_hostname(): | |
1879 | # type: () -> str | |
1880 | return socket.gethostname() | |
1881 | ||
f6b5b4d7 | 1882 | |
9f95a23c TL |
1883 | def get_fqdn(): |
1884 | # type: () -> str | |
1885 | return socket.getfqdn() or socket.gethostname() | |
1886 | ||
f6b5b4d7 | 1887 | |
9f95a23c TL |
1888 | def get_arch(): |
1889 | # type: () -> str | |
1890 | return platform.uname().machine | |
1891 | ||
f6b5b4d7 | 1892 | |
9f95a23c TL |
1893 | def generate_service_id(): |
1894 | # type: () -> str | |
1895 | return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase) | |
1896 | for _ in range(6)) | |
1897 | ||
f6b5b4d7 | 1898 | |
9f95a23c TL |
1899 | def generate_password(): |
1900 | # type: () -> str | |
1901 | return ''.join(random.choice(string.ascii_lowercase + string.digits) | |
1902 | for i in range(10)) | |
1903 | ||
f6b5b4d7 | 1904 | |
9f95a23c TL |
1905 | def normalize_container_id(i): |
1906 | # type: (str) -> str | |
1907 | # docker adds the sha256: prefix, but AFAICS both | |
1908 | # docker (18.09.7 in bionic at least) and podman | |
1909 | # both always use sha256, so leave off the prefix | |
1910 | # for consistency. | |
1911 | prefix = 'sha256:' | |
1912 | if i.startswith(prefix): | |
1913 | i = i[len(prefix):] | |
1914 | return i | |
1915 | ||
f6b5b4d7 | 1916 | |
9f95a23c TL |
1917 | def make_fsid(): |
1918 | # type: () -> str | |
1919 | return str(uuid.uuid1()) | |
1920 | ||
f6b5b4d7 | 1921 | |
9f95a23c TL |
1922 | def is_fsid(s): |
1923 | # type: (str) -> bool | |
1924 | try: | |
1925 | uuid.UUID(s) | |
1926 | except ValueError: | |
1927 | return False | |
1928 | return True | |
1929 | ||
f6b5b4d7 | 1930 | |
522d829b TL |
1931 | def validate_fsid(func: FuncT) -> FuncT: |
1932 | @wraps(func) | |
1933 | def _validate_fsid(ctx: CephadmContext) -> Any: | |
1934 | if 'fsid' in ctx and ctx.fsid: | |
1935 | if not is_fsid(ctx.fsid): | |
1936 | raise Error('not an fsid: %s' % ctx.fsid) | |
1937 | return func(ctx) | |
1938 | return cast(FuncT, _validate_fsid) | |
1939 | ||
1940 | ||
1941 | def infer_fsid(func: FuncT) -> FuncT: | |
9f95a23c TL |
1942 | """ |
1943 | If we only find a single fsid in /var/lib/ceph/*, use that | |
1944 | """ | |
522d829b | 1945 | @infer_config |
9f95a23c | 1946 | @wraps(func) |
522d829b TL |
1947 | def _infer_fsid(ctx: CephadmContext) -> Any: |
1948 | if 'fsid' in ctx and ctx.fsid: | |
f67539c2 TL |
1949 | logger.debug('Using specified fsid: %s' % ctx.fsid) |
1950 | return func(ctx) | |
9f95a23c | 1951 | |
522d829b TL |
1952 | fsids = set() |
1953 | ||
1954 | cp = read_config(ctx.config) | |
1955 | if cp.has_option('global', 'fsid'): | |
1956 | fsids.add(cp.get('global', 'fsid')) | |
1957 | ||
f67539c2 | 1958 | daemon_list = list_daemons(ctx, detail=False) |
9f95a23c | 1959 | for daemon in daemon_list: |
f6b5b4d7 TL |
1960 | if not is_fsid(daemon['fsid']): |
1961 | # 'unknown' fsid | |
1962 | continue | |
f67539c2 TL |
1963 | elif 'name' not in ctx or not ctx.name: |
1964 | # ctx.name not specified | |
522d829b | 1965 | fsids.add(daemon['fsid']) |
f67539c2 TL |
1966 | elif daemon['name'] == ctx.name: |
1967 | # ctx.name is a match | |
522d829b TL |
1968 | fsids.add(daemon['fsid']) |
1969 | fsids = sorted(fsids) | |
9f95a23c TL |
1970 | |
1971 | if not fsids: | |
1972 | # some commands do not always require an fsid | |
1973 | pass | |
1974 | elif len(fsids) == 1: | |
1975 | logger.info('Inferring fsid %s' % fsids[0]) | |
f67539c2 | 1976 | ctx.fsid = fsids[0] |
9f95a23c | 1977 | else: |
33c7a0ef | 1978 | raise Error('Cannot infer an fsid, one must be specified (using --fsid): %s' % fsids) |
f67539c2 | 1979 | return func(ctx) |
9f95a23c | 1980 | |
522d829b | 1981 | return cast(FuncT, _infer_fsid) |
9f95a23c | 1982 | |
f6b5b4d7 | 1983 | |
522d829b | 1984 | def infer_config(func: FuncT) -> FuncT: |
e306af50 | 1985 | """ |
33c7a0ef TL |
1986 | Infer the clusater configuration using the followign priority order: |
1987 | 1- if the user has provided custom conf file (-c option) use it | |
1988 | 2- otherwise if daemon --name has been provided use daemon conf | |
1989 | 3- otherwise find the mon daemon conf file and use it (if v1) | |
1990 | 4- otherwise if {ctx.data_dir}/{fsid}/{CEPH_CONF_DIR} dir exists use it | |
1991 | 5- finally: fallback to the default file /etc/ceph/ceph.conf | |
e306af50 TL |
1992 | """ |
1993 | @wraps(func) | |
522d829b | 1994 | def _infer_config(ctx: CephadmContext) -> Any: |
33c7a0ef TL |
1995 | |
1996 | def config_path(daemon_type: str, daemon_name: str) -> str: | |
1997 | data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_name) | |
1998 | return os.path.join(data_dir, 'config') | |
1999 | ||
2000 | def get_mon_daemon_name(fsid: str) -> Optional[str]: | |
2001 | daemon_list = list_daemons(ctx, detail=False) | |
2002 | for daemon in daemon_list: | |
2003 | if ( | |
2004 | daemon.get('name', '').startswith('mon.') | |
2005 | and daemon.get('fsid', '') == fsid | |
2006 | and daemon.get('style', '') == 'cephadm:v1' | |
2007 | and os.path.exists(config_path('mon', daemon['name'].split('.', 1)[1])) | |
2008 | ): | |
2009 | return daemon['name'] | |
2010 | return None | |
2011 | ||
522d829b | 2012 | ctx.config = ctx.config if 'config' in ctx else None |
33c7a0ef TL |
2013 | # check if user has provided conf by using -c option |
2014 | if ctx.config and (ctx.config != CEPH_DEFAULT_CONF): | |
2015 | logger.debug(f'Using specified config: {ctx.config}') | |
f67539c2 | 2016 | return func(ctx) |
33c7a0ef | 2017 | |
522d829b | 2018 | if 'fsid' in ctx and ctx.fsid: |
33c7a0ef TL |
2019 | name = ctx.name if ('name' in ctx and ctx.name) else get_mon_daemon_name(ctx.fsid) |
2020 | if name is not None: | |
2021 | # daemon name has been specified (or inffered from mon), let's use its conf | |
2022 | ctx.config = config_path(name.split('.', 1)[0], name.split('.', 1)[1]) | |
2023 | else: | |
2024 | # no daemon, in case the cluster has a config dir then use it | |
2025 | ceph_conf = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_CONF}' | |
2026 | if os.path.exists(ceph_conf): | |
2027 | ctx.config = ceph_conf | |
2028 | ||
522d829b | 2029 | if ctx.config: |
33c7a0ef TL |
2030 | logger.info(f'Inferring config {ctx.config}') |
2031 | elif os.path.exists(CEPH_DEFAULT_CONF): | |
2032 | logger.debug(f'Using default config {CEPH_DEFAULT_CONF}') | |
2033 | ctx.config = CEPH_DEFAULT_CONF | |
f67539c2 | 2034 | return func(ctx) |
e306af50 | 2035 | |
522d829b | 2036 | return cast(FuncT, _infer_config) |
e306af50 | 2037 | |
f6b5b4d7 | 2038 | |
522d829b | 2039 | def _get_default_image(ctx: CephadmContext) -> str: |
1911f103 | 2040 | if DEFAULT_IMAGE_IS_MASTER: |
f67539c2 | 2041 | warn = """This is a development version of cephadm. |
1911f103 TL |
2042 | For information regarding the latest stable release: |
2043 | https://docs.ceph.com/docs/{}/cephadm/install | |
f67539c2 | 2044 | """.format(LATEST_STABLE_RELEASE) |
1911f103 | 2045 | for line in warn.splitlines(): |
e306af50 | 2046 | logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end)) |
1911f103 TL |
2047 | return DEFAULT_IMAGE |
2048 | ||
f6b5b4d7 | 2049 | |
522d829b | 2050 | def infer_image(func: FuncT) -> FuncT: |
9f95a23c TL |
2051 | """ |
2052 | Use the most recent ceph image | |
2053 | """ | |
2054 | @wraps(func) | |
522d829b | 2055 | def _infer_image(ctx: CephadmContext) -> Any: |
f67539c2 TL |
2056 | if not ctx.image: |
2057 | ctx.image = os.environ.get('CEPHADM_IMAGE') | |
2058 | if not ctx.image: | |
33c7a0ef | 2059 | ctx.image = infer_local_ceph_image(ctx, ctx.container_engine.path) |
f67539c2 TL |
2060 | if not ctx.image: |
2061 | ctx.image = _get_default_image(ctx) | |
2062 | return func(ctx) | |
9f95a23c | 2063 | |
522d829b | 2064 | return cast(FuncT, _infer_image) |
9f95a23c | 2065 | |
f6b5b4d7 | 2066 | |
522d829b | 2067 | def default_image(func: FuncT) -> FuncT: |
9f95a23c | 2068 | @wraps(func) |
522d829b | 2069 | def _default_image(ctx: CephadmContext) -> Any: |
f67539c2 TL |
2070 | if not ctx.image: |
2071 | if 'name' in ctx and ctx.name: | |
2072 | type_ = ctx.name.split('.', 1)[0] | |
9f95a23c | 2073 | if type_ in Monitoring.components: |
f67539c2 TL |
2074 | ctx.image = Monitoring.components[type_]['image'] |
2075 | if type_ == 'haproxy': | |
2076 | ctx.image = HAproxy.default_image | |
2077 | if type_ == 'keepalived': | |
2078 | ctx.image = Keepalived.default_image | |
20effc67 TL |
2079 | if type_ == SNMPGateway.daemon_type: |
2080 | ctx.image = SNMPGateway.default_image | |
f67539c2 TL |
2081 | if not ctx.image: |
2082 | ctx.image = os.environ.get('CEPHADM_IMAGE') | |
2083 | if not ctx.image: | |
2084 | ctx.image = _get_default_image(ctx) | |
2085 | ||
2086 | return func(ctx) | |
9f95a23c | 2087 | |
522d829b | 2088 | return cast(FuncT, _default_image) |
9f95a23c | 2089 | |
f6b5b4d7 | 2090 | |
33c7a0ef TL |
2091 | def get_container_info(ctx: CephadmContext, daemon_filter: str, by_name: bool) -> Optional[ContainerInfo]: |
2092 | """ | |
2093 | :param ctx: Cephadm context | |
2094 | :param daemon_filter: daemon name or type | |
2095 | :param by_name: must be set to True if daemon name is provided | |
2096 | :return: Container information or None | |
9f95a23c | 2097 | """ |
33c7a0ef TL |
2098 | def daemon_name_or_type(daemon: Dict[str, str]) -> str: |
2099 | return daemon['name'] if by_name else daemon['name'].split('.', 1)[0] | |
2100 | ||
2101 | if by_name and '.' not in daemon_filter: | |
2102 | logger.warning(f'Trying to get container info using invalid daemon name {daemon_filter}') | |
2103 | return None | |
2104 | daemons = list_daemons(ctx, detail=False) | |
2105 | matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid] | |
2106 | if matching_daemons: | |
2107 | d_type, d_id = matching_daemons[0]['name'].split('.', 1) | |
2108 | out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id) | |
2109 | if not code: | |
2110 | (container_id, image_name, image_id, start, version) = out.strip().split(',') | |
2111 | return ContainerInfo(container_id, image_name, image_id, start, version) | |
2112 | return None | |
2113 | ||
2114 | ||
2115 | def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]: | |
2116 | """ | |
2117 | Infer the local ceph image based on the following priority criteria: | |
2118 | 1- the image specified by --image arg (if provided). | |
2119 | 2- the same image as the daemon container specified by --name arg (if provided). | |
2120 | 3- image used by any ceph container running on the host. In this case we use daemon types. | |
2121 | 4- if no container is found then we use the most ceph recent image on the host. | |
2122 | ||
2123 | Note: any selected container must have the same fsid inferred previously. | |
2124 | ||
9f95a23c TL |
2125 | :return: The most recent local ceph image (already pulled) |
2126 | """ | |
33c7a0ef TL |
2127 | # '|' special character is used to separate the output fields into: |
2128 | # - Repository@digest | |
2129 | # - Image Id | |
2130 | # - Image Tag | |
2131 | # - Image creation date | |
f67539c2 TL |
2132 | out, _, _ = call_throws(ctx, |
2133 | [container_path, 'images', | |
2134 | '--filter', 'label=ceph=True', | |
2135 | '--filter', 'dangling=false', | |
33c7a0ef TL |
2136 | '--format', '{{.Repository}}@{{.Digest}}|{{.ID}}|{{.Tag}}|{{.CreatedAt}}']) |
2137 | ||
2138 | container_info = None | |
2139 | daemon_name = ctx.name if ('name' in ctx and ctx.name and '.' in ctx.name) else None | |
2140 | daemons_ls = [daemon_name] if daemon_name is not None else Ceph.daemons # daemon types: 'mon', 'mgr', etc | |
2141 | for daemon in daemons_ls: | |
2142 | container_info = get_container_info(ctx, daemon, daemon_name is not None) | |
2143 | if container_info is not None: | |
2144 | logger.debug(f"Using container info for daemon '{daemon}'") | |
2145 | break | |
adb31ebb | 2146 | |
adb31ebb | 2147 | for image in out.splitlines(): |
33c7a0ef TL |
2148 | if image and not image.isspace(): |
2149 | (digest, image_id, tag, created_date) = image.lstrip().split('|') | |
2150 | if container_info is not None and image_id not in container_info.image_id: | |
2151 | continue | |
2152 | if digest and not digest.endswith('@'): | |
2153 | logger.info(f"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}") | |
2154 | return digest | |
9f95a23c TL |
2155 | return None |
2156 | ||
f6b5b4d7 | 2157 | |
9f95a23c | 2158 | def write_tmp(s, uid, gid): |
f67539c2 | 2159 | # type: (str, int, int) -> IO[str] |
9f95a23c TL |
2160 | tmp_f = tempfile.NamedTemporaryFile(mode='w', |
2161 | prefix='ceph-tmp') | |
2162 | os.fchown(tmp_f.fileno(), uid, gid) | |
2163 | tmp_f.write(s) | |
2164 | tmp_f.flush() | |
2165 | ||
2166 | return tmp_f | |
2167 | ||
f6b5b4d7 | 2168 | |
9f95a23c TL |
2169 | def makedirs(dir, uid, gid, mode): |
2170 | # type: (str, int, int, int) -> None | |
2171 | if not os.path.exists(dir): | |
2172 | os.makedirs(dir, mode=mode) | |
2173 | else: | |
2174 | os.chmod(dir, mode) | |
2175 | os.chown(dir, uid, gid) | |
2176 | os.chmod(dir, mode) # the above is masked by umask... | |
2177 | ||
f6b5b4d7 | 2178 | |
f67539c2 TL |
2179 | def get_data_dir(fsid, data_dir, t, n): |
2180 | # type: (str, str, str, Union[int, str]) -> str | |
2181 | return os.path.join(data_dir, fsid, '%s.%s' % (t, n)) | |
9f95a23c | 2182 | |
f6b5b4d7 | 2183 | |
f67539c2 TL |
2184 | def get_log_dir(fsid, log_dir): |
2185 | # type: (str, str) -> str | |
2186 | return os.path.join(log_dir, fsid) | |
9f95a23c | 2187 | |
f6b5b4d7 | 2188 | |
f67539c2 TL |
2189 | def make_data_dir_base(fsid, data_dir, uid, gid): |
2190 | # type: (str, str, int, int) -> str | |
2191 | data_dir_base = os.path.join(data_dir, fsid) | |
9f95a23c TL |
2192 | makedirs(data_dir_base, uid, gid, DATA_DIR_MODE) |
2193 | makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE) | |
2194 | makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid, | |
2195 | DATA_DIR_MODE) | |
2196 | return data_dir_base | |
2197 | ||
f6b5b4d7 | 2198 | |
f67539c2 TL |
2199 | def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None): |
2200 | # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str | |
f6b5b4d7 | 2201 | if uid is None or gid is None: |
f67539c2 TL |
2202 | uid, gid = extract_uid_gid(ctx) |
2203 | make_data_dir_base(fsid, ctx.data_dir, uid, gid) | |
2204 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
9f95a23c TL |
2205 | makedirs(data_dir, uid, gid, DATA_DIR_MODE) |
2206 | return data_dir | |
2207 | ||
f6b5b4d7 | 2208 | |
f67539c2 TL |
2209 | def make_log_dir(ctx, fsid, uid=None, gid=None): |
2210 | # type: (CephadmContext, str, Optional[int], Optional[int]) -> str | |
f6b5b4d7 | 2211 | if uid is None or gid is None: |
f67539c2 TL |
2212 | uid, gid = extract_uid_gid(ctx) |
2213 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
9f95a23c TL |
2214 | makedirs(log_dir, uid, gid, LOG_DIR_MODE) |
2215 | return log_dir | |
2216 | ||
f6b5b4d7 | 2217 | |
f67539c2 TL |
2218 | def make_var_run(ctx, fsid, uid, gid): |
2219 | # type: (CephadmContext, str, int, int) -> None | |
2220 | call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid), | |
2221 | '/var/run/ceph/%s' % fsid]) | |
9f95a23c | 2222 | |
f6b5b4d7 | 2223 | |
f67539c2 TL |
2224 | def copy_tree(ctx, src, dst, uid=None, gid=None): |
2225 | # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None | |
9f95a23c TL |
2226 | """ |
2227 | Copy a directory tree from src to dst | |
2228 | """ | |
f91f0fd5 | 2229 | if uid is None or gid is None: |
f67539c2 | 2230 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
2231 | |
2232 | for src_dir in src: | |
2233 | dst_dir = dst | |
2234 | if os.path.isdir(dst): | |
2235 | dst_dir = os.path.join(dst, os.path.basename(src_dir)) | |
2236 | ||
f67539c2 | 2237 | logger.debug('copy directory `%s` -> `%s`' % (src_dir, dst_dir)) |
9f95a23c | 2238 | shutil.rmtree(dst_dir, ignore_errors=True) |
f67539c2 | 2239 | shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8 |
9f95a23c TL |
2240 | |
2241 | for dirpath, dirnames, filenames in os.walk(dst_dir): | |
f67539c2 | 2242 | logger.debug('chown %s:%s `%s`' % (uid, gid, dirpath)) |
9f95a23c TL |
2243 | os.chown(dirpath, uid, gid) |
2244 | for filename in filenames: | |
f67539c2 | 2245 | logger.debug('chown %s:%s `%s`' % (uid, gid, filename)) |
9f95a23c TL |
2246 | os.chown(os.path.join(dirpath, filename), uid, gid) |
2247 | ||
2248 | ||
f67539c2 TL |
2249 | def copy_files(ctx, src, dst, uid=None, gid=None): |
2250 | # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None | |
9f95a23c TL |
2251 | """ |
2252 | Copy a files from src to dst | |
2253 | """ | |
f91f0fd5 | 2254 | if uid is None or gid is None: |
f67539c2 | 2255 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
2256 | |
2257 | for src_file in src: | |
2258 | dst_file = dst | |
2259 | if os.path.isdir(dst): | |
2260 | dst_file = os.path.join(dst, os.path.basename(src_file)) | |
2261 | ||
f67539c2 | 2262 | logger.debug('copy file `%s` -> `%s`' % (src_file, dst_file)) |
9f95a23c TL |
2263 | shutil.copyfile(src_file, dst_file) |
2264 | ||
f67539c2 | 2265 | logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file)) |
9f95a23c TL |
2266 | os.chown(dst_file, uid, gid) |
2267 | ||
f6b5b4d7 | 2268 | |
f67539c2 TL |
2269 | def move_files(ctx, src, dst, uid=None, gid=None): |
2270 | # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None | |
9f95a23c TL |
2271 | """ |
2272 | Move files from src to dst | |
2273 | """ | |
f91f0fd5 | 2274 | if uid is None or gid is None: |
f67539c2 | 2275 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
2276 | |
2277 | for src_file in src: | |
2278 | dst_file = dst | |
2279 | if os.path.isdir(dst): | |
2280 | dst_file = os.path.join(dst, os.path.basename(src_file)) | |
2281 | ||
2282 | if os.path.islink(src_file): | |
2283 | # shutil.move() in py2 does not handle symlinks correctly | |
2284 | src_rl = os.readlink(src_file) | |
2285 | logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl)) | |
2286 | os.symlink(src_rl, dst_file) | |
2287 | os.unlink(src_file) | |
2288 | else: | |
2289 | logger.debug("move file '%s' -> '%s'" % (src_file, dst_file)) | |
2290 | shutil.move(src_file, dst_file) | |
f67539c2 | 2291 | logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file)) |
9f95a23c TL |
2292 | os.chown(dst_file, uid, gid) |
2293 | ||
f6b5b4d7 | 2294 | |
33c7a0ef TL |
2295 | def recursive_chown(path: str, uid: int, gid: int) -> None: |
2296 | for dirpath, dirnames, filenames in os.walk(path): | |
2297 | os.chown(dirpath, uid, gid) | |
2298 | for filename in filenames: | |
2299 | os.chown(os.path.join(dirpath, filename), uid, gid) | |
2300 | ||
2301 | ||
f67539c2 | 2302 | # copied from distutils |
522d829b | 2303 | def find_executable(executable: str, path: Optional[str] = None) -> Optional[str]: |
9f95a23c TL |
2304 | """Tries to find 'executable' in the directories listed in 'path'. |
2305 | A string listing directories separated by 'os.pathsep'; defaults to | |
2306 | os.environ['PATH']. Returns the complete filename or None if not found. | |
2307 | """ | |
2308 | _, ext = os.path.splitext(executable) | |
2309 | if (sys.platform == 'win32') and (ext != '.exe'): | |
2310 | executable = executable + '.exe' | |
2311 | ||
2312 | if os.path.isfile(executable): | |
2313 | return executable | |
2314 | ||
2315 | if path is None: | |
2316 | path = os.environ.get('PATH', None) | |
2317 | if path is None: | |
2318 | try: | |
f67539c2 | 2319 | path = os.confstr('CS_PATH') |
9f95a23c TL |
2320 | except (AttributeError, ValueError): |
2321 | # os.confstr() or CS_PATH is not available | |
2322 | path = os.defpath | |
2323 | # bpo-35755: Don't use os.defpath if the PATH environment variable is | |
2324 | # set to an empty string | |
2325 | ||
2326 | # PATH='' doesn't match, whereas PATH=':' looks in the current directory | |
2327 | if not path: | |
2328 | return None | |
2329 | ||
2330 | paths = path.split(os.pathsep) | |
2331 | for p in paths: | |
2332 | f = os.path.join(p, executable) | |
2333 | if os.path.isfile(f): | |
2334 | # the file exists, we have a shot at spawn working | |
2335 | return f | |
2336 | return None | |
2337 | ||
f6b5b4d7 | 2338 | |
9f95a23c TL |
2339 | def find_program(filename): |
2340 | # type: (str) -> str | |
2341 | name = find_executable(filename) | |
2342 | if name is None: | |
2343 | raise ValueError('%s not found' % filename) | |
2344 | return name | |
2345 | ||
f6b5b4d7 | 2346 | |
522d829b | 2347 | def find_container_engine(ctx: CephadmContext) -> Optional[ContainerEngine]: |
f67539c2 TL |
2348 | if ctx.docker: |
2349 | return Docker() | |
2350 | else: | |
2351 | for i in CONTAINER_PREFERENCE: | |
2352 | try: | |
2353 | return i() | |
a4b75251 TL |
2354 | except Exception: |
2355 | pass | |
f67539c2 TL |
2356 | return None |
2357 | ||
2358 | ||
a4b75251 | 2359 | def check_container_engine(ctx: CephadmContext) -> ContainerEngine: |
f67539c2 TL |
2360 | engine = ctx.container_engine |
2361 | if not isinstance(engine, CONTAINER_PREFERENCE): | |
522d829b TL |
2362 | # See https://github.com/python/mypy/issues/8993 |
2363 | exes: List[str] = [i.EXE for i in CONTAINER_PREFERENCE] # type: ignore | |
2364 | raise Error('No container engine binary found ({}). Try run `apt/dnf/yum/zypper install <container engine>`'.format(' or '.join(exes))) | |
f67539c2 TL |
2365 | elif isinstance(engine, Podman): |
2366 | engine.get_version(ctx) | |
2367 | if engine.version < MIN_PODMAN_VERSION: | |
2368 | raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION) | |
a4b75251 | 2369 | return engine |
f67539c2 TL |
2370 | |
2371 | ||
9f95a23c TL |
2372 | def get_unit_name(fsid, daemon_type, daemon_id=None): |
2373 | # type: (str, str, Optional[Union[int, str]]) -> str | |
2374 | # accept either name or type + id | |
20effc67 | 2375 | if daemon_id is not None: |
9f95a23c TL |
2376 | return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id) |
2377 | else: | |
2378 | return 'ceph-%s@%s' % (fsid, daemon_type) | |
2379 | ||
f6b5b4d7 | 2380 | |
522d829b | 2381 | def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid: str, name: str) -> str: |
f67539c2 | 2382 | daemon = get_daemon_description(ctx, fsid, name) |
e306af50 TL |
2383 | try: |
2384 | return daemon['systemd_unit'] | |
2385 | except KeyError: | |
2386 | raise Error('Failed to get unit name for {}'.format(daemon)) | |
2387 | ||
f6b5b4d7 | 2388 | |
f67539c2 TL |
2389 | def check_unit(ctx, unit_name): |
2390 | # type: (CephadmContext, str) -> Tuple[bool, str, bool] | |
9f95a23c TL |
2391 | # NOTE: we ignore the exit code here because systemctl outputs |
2392 | # various exit codes based on the state of the service, but the | |
2393 | # string result is more explicit (and sufficient). | |
2394 | enabled = False | |
2395 | installed = False | |
2396 | try: | |
f67539c2 | 2397 | out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name], |
adb31ebb | 2398 | verbosity=CallVerbosity.DEBUG) |
9f95a23c TL |
2399 | if code == 0: |
2400 | enabled = True | |
2401 | installed = True | |
f67539c2 | 2402 | elif 'disabled' in out: |
9f95a23c TL |
2403 | installed = True |
2404 | except Exception as e: | |
2405 | logger.warning('unable to run systemctl: %s' % e) | |
2406 | enabled = False | |
2407 | installed = False | |
2408 | ||
2409 | state = 'unknown' | |
2410 | try: | |
f67539c2 | 2411 | out, err, code = call(ctx, ['systemctl', 'is-active', unit_name], |
adb31ebb | 2412 | verbosity=CallVerbosity.DEBUG) |
9f95a23c TL |
2413 | out = out.strip() |
2414 | if out in ['active']: | |
2415 | state = 'running' | |
2416 | elif out in ['inactive']: | |
2417 | state = 'stopped' | |
2418 | elif out in ['failed', 'auto-restart']: | |
2419 | state = 'error' | |
2420 | else: | |
2421 | state = 'unknown' | |
2422 | except Exception as e: | |
2423 | logger.warning('unable to run systemctl: %s' % e) | |
2424 | state = 'unknown' | |
2425 | return (enabled, state, installed) | |
2426 | ||
f6b5b4d7 | 2427 | |
f67539c2 TL |
2428 | def check_units(ctx, units, enabler=None): |
2429 | # type: (CephadmContext, List[str], Optional[Packager]) -> bool | |
9f95a23c | 2430 | for u in units: |
f67539c2 | 2431 | (enabled, state, installed) = check_unit(ctx, u) |
9f95a23c TL |
2432 | if enabled and state == 'running': |
2433 | logger.info('Unit %s is enabled and running' % u) | |
2434 | return True | |
2435 | if enabler is not None: | |
2436 | if installed: | |
2437 | logger.info('Enabling unit %s' % u) | |
2438 | enabler.enable_service(u) | |
2439 | return False | |
2440 | ||
f6b5b4d7 | 2441 | |
522d829b | 2442 | def is_container_running(ctx: CephadmContext, c: 'CephContainer') -> bool: |
20effc67 TL |
2443 | if ctx.name.split('.', 1)[0] in ['agent', 'cephadm-exporter']: |
2444 | # these are non-containerized daemon types | |
2445 | return False | |
522d829b TL |
2446 | return bool(get_running_container_name(ctx, c)) |
2447 | ||
2448 | ||
2449 | def get_running_container_name(ctx: CephadmContext, c: 'CephContainer') -> Optional[str]: | |
2450 | for name in [c.cname, c.old_cname]: | |
2451 | out, err, ret = call(ctx, [ | |
2452 | ctx.container_engine.path, 'container', 'inspect', | |
2453 | '--format', '{{.State.Status}}', name | |
2454 | ]) | |
2455 | if out.strip() == 'running': | |
2456 | return name | |
2457 | return None | |
f67539c2 TL |
2458 | |
2459 | ||
9f95a23c | 2460 | def get_legacy_config_fsid(cluster, legacy_dir=None): |
f6b5b4d7 | 2461 | # type: (str, Optional[str]) -> Optional[str] |
9f95a23c TL |
2462 | config_file = '/etc/ceph/%s.conf' % cluster |
2463 | if legacy_dir is not None: | |
2464 | config_file = os.path.abspath(legacy_dir + config_file) | |
2465 | ||
2466 | if os.path.exists(config_file): | |
2467 | config = read_config(config_file) | |
2468 | if config.has_section('global') and config.has_option('global', 'fsid'): | |
2469 | return config.get('global', 'fsid') | |
2470 | return None | |
2471 | ||
f6b5b4d7 | 2472 | |
f67539c2 TL |
2473 | def get_legacy_daemon_fsid(ctx, cluster, |
2474 | daemon_type, daemon_id, legacy_dir=None): | |
2475 | # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str] | |
9f95a23c TL |
2476 | fsid = None |
2477 | if daemon_type == 'osd': | |
2478 | try: | |
f67539c2 | 2479 | fsid_file = os.path.join(ctx.data_dir, |
9f95a23c TL |
2480 | daemon_type, |
2481 | 'ceph-%s' % daemon_id, | |
2482 | 'ceph_fsid') | |
2483 | if legacy_dir is not None: | |
2484 | fsid_file = os.path.abspath(legacy_dir + fsid_file) | |
2485 | with open(fsid_file, 'r') as f: | |
2486 | fsid = f.read().strip() | |
2487 | except IOError: | |
2488 | pass | |
2489 | if not fsid: | |
2490 | fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir) | |
2491 | return fsid | |
2492 | ||
f6b5b4d7 | 2493 | |
20effc67 TL |
2494 | def should_log_to_journald(ctx: CephadmContext) -> bool: |
2495 | if ctx.log_to_journald is not None: | |
2496 | return ctx.log_to_journald | |
2497 | return isinstance(ctx.container_engine, Podman) and \ | |
2498 | ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION | |
2499 | ||
2500 | ||
f67539c2 TL |
2501 | def get_daemon_args(ctx, fsid, daemon_type, daemon_id): |
2502 | # type: (CephadmContext, str, str, Union[int, str]) -> List[str] | |
9f95a23c TL |
2503 | r = list() # type: List[str] |
2504 | ||
2505 | if daemon_type in Ceph.daemons and daemon_type != 'crash': | |
2506 | r += [ | |
2507 | '--setuser', 'ceph', | |
2508 | '--setgroup', 'ceph', | |
2509 | '--default-log-to-file=false', | |
9f95a23c | 2510 | ] |
20effc67 TL |
2511 | log_to_journald = should_log_to_journald(ctx) |
2512 | if log_to_journald: | |
2513 | r += [ | |
2514 | '--default-log-to-journald=true', | |
2515 | '--default-log-to-stderr=false', | |
2516 | ] | |
2517 | else: | |
2518 | r += [ | |
2519 | '--default-log-to-stderr=true', | |
2520 | '--default-log-stderr-prefix=debug ', | |
2521 | ] | |
9f95a23c TL |
2522 | if daemon_type == 'mon': |
2523 | r += [ | |
2524 | '--default-mon-cluster-log-to-file=false', | |
9f95a23c | 2525 | ] |
20effc67 TL |
2526 | if log_to_journald: |
2527 | r += [ | |
2528 | '--default-mon-cluster-log-to-journald=true', | |
2529 | '--default-mon-cluster-log-to-stderr=false', | |
2530 | ] | |
2531 | else: | |
2532 | r += ['--default-mon-cluster-log-to-stderr=true'] | |
9f95a23c TL |
2533 | elif daemon_type in Monitoring.components: |
2534 | metadata = Monitoring.components[daemon_type] | |
2535 | r += metadata.get('args', list()) | |
b3b6e05e | 2536 | # set ip and port to bind to for nodeexporter,alertmanager,prometheus |
33c7a0ef | 2537 | if daemon_type not in ['grafana', 'loki', 'promtail']: |
b3b6e05e TL |
2538 | ip = '' |
2539 | port = Monitoring.port_map[daemon_type][0] | |
2540 | if 'meta_json' in ctx and ctx.meta_json: | |
2541 | meta = json.loads(ctx.meta_json) or {} | |
2542 | if 'ip' in meta and meta['ip']: | |
2543 | ip = meta['ip'] | |
2544 | if 'ports' in meta and meta['ports']: | |
2545 | port = meta['ports'][0] | |
2546 | r += [f'--web.listen-address={ip}:{port}'] | |
33c7a0ef TL |
2547 | if daemon_type == 'prometheus': |
2548 | scheme = 'http' | |
2549 | host = get_fqdn() | |
2550 | r += [f'--web.external-url={scheme}://{host}:{port}'] | |
9f95a23c | 2551 | if daemon_type == 'alertmanager': |
f67539c2 | 2552 | config = get_parm(ctx.config_json) |
9f95a23c TL |
2553 | peers = config.get('peers', list()) # type: ignore |
2554 | for peer in peers: | |
f67539c2 | 2555 | r += ['--cluster.peer={}'.format(peer)] |
f6b5b4d7 | 2556 | # some alertmanager, by default, look elsewhere for a config |
f67539c2 | 2557 | r += ['--config.file=/etc/alertmanager/alertmanager.yml'] |
33c7a0ef TL |
2558 | if daemon_type == 'loki': |
2559 | r += ['--config.file=/etc/loki/loki.yml'] | |
2560 | if daemon_type == 'promtail': | |
2561 | r += ['--config.file=/etc/promtail/promtail.yml'] | |
2562 | if daemon_type == 'node-exporter': | |
2563 | r += ['--path.procfs=/host/proc', | |
2564 | '--path.sysfs=/host/sys', | |
2565 | '--path.rootfs=/rootfs'] | |
9f95a23c | 2566 | elif daemon_type == NFSGanesha.daemon_type: |
f67539c2 | 2567 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) |
1911f103 | 2568 | r += nfs_ganesha.get_daemon_args() |
f67539c2 TL |
2569 | elif daemon_type == HAproxy.daemon_type: |
2570 | haproxy = HAproxy.init(ctx, fsid, daemon_id) | |
2571 | r += haproxy.get_daemon_args() | |
f91f0fd5 | 2572 | elif daemon_type == CustomContainer.daemon_type: |
f67539c2 | 2573 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
f91f0fd5 | 2574 | r.extend(cc.get_daemon_args()) |
20effc67 TL |
2575 | elif daemon_type == SNMPGateway.daemon_type: |
2576 | sc = SNMPGateway.init(ctx, fsid, daemon_id) | |
2577 | r.extend(sc.get_daemon_args()) | |
9f95a23c TL |
2578 | |
2579 | return r | |
2580 | ||
f6b5b4d7 | 2581 | |
f67539c2 | 2582 | def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid, |
e306af50 | 2583 | config=None, keyring=None): |
f67539c2 TL |
2584 | # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None |
2585 | data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid) | |
20effc67 TL |
2586 | |
2587 | if daemon_type in Ceph.daemons: | |
2588 | make_log_dir(ctx, fsid, uid=uid, gid=gid) | |
9f95a23c TL |
2589 | |
2590 | if config: | |
2591 | config_path = os.path.join(data_dir, 'config') | |
2592 | with open(config_path, 'w') as f: | |
2593 | os.fchown(f.fileno(), uid, gid) | |
2594 | os.fchmod(f.fileno(), 0o600) | |
2595 | f.write(config) | |
f91f0fd5 | 2596 | |
9f95a23c TL |
2597 | if keyring: |
2598 | keyring_path = os.path.join(data_dir, 'keyring') | |
2599 | with open(keyring_path, 'w') as f: | |
2600 | os.fchmod(f.fileno(), 0o600) | |
2601 | os.fchown(f.fileno(), uid, gid) | |
2602 | f.write(keyring) | |
2603 | ||
2604 | if daemon_type in Monitoring.components.keys(): | |
522d829b TL |
2605 | config_json: Dict[str, Any] = dict() |
2606 | if 'config_json' in ctx: | |
2607 | config_json = get_parm(ctx.config_json) | |
9f95a23c TL |
2608 | |
2609 | # Set up directories specific to the monitoring component | |
2610 | config_dir = '' | |
f67539c2 | 2611 | data_dir_root = '' |
9f95a23c | 2612 | if daemon_type == 'prometheus': |
f67539c2 TL |
2613 | data_dir_root = get_data_dir(fsid, ctx.data_dir, |
2614 | daemon_type, daemon_id) | |
9f95a23c TL |
2615 | config_dir = 'etc/prometheus' |
2616 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2617 | makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755) | |
2618 | makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) | |
33c7a0ef TL |
2619 | recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid) |
2620 | recursive_chown(os.path.join(data_dir_root, 'data'), uid, gid) | |
9f95a23c | 2621 | elif daemon_type == 'grafana': |
f67539c2 TL |
2622 | data_dir_root = get_data_dir(fsid, ctx.data_dir, |
2623 | daemon_type, daemon_id) | |
9f95a23c TL |
2624 | config_dir = 'etc/grafana' |
2625 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2626 | makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755) | |
2627 | makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755) | |
2628 | makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) | |
f67539c2 | 2629 | touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid) |
9f95a23c | 2630 | elif daemon_type == 'alertmanager': |
f67539c2 TL |
2631 | data_dir_root = get_data_dir(fsid, ctx.data_dir, |
2632 | daemon_type, daemon_id) | |
9f95a23c TL |
2633 | config_dir = 'etc/alertmanager' |
2634 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2635 | makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755) | |
33c7a0ef TL |
2636 | elif daemon_type == 'promtail': |
2637 | data_dir_root = get_data_dir(fsid, ctx.data_dir, | |
2638 | daemon_type, daemon_id) | |
2639 | config_dir = 'etc/promtail' | |
2640 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2641 | makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) | |
2642 | elif daemon_type == 'loki': | |
2643 | data_dir_root = get_data_dir(fsid, ctx.data_dir, | |
2644 | daemon_type, daemon_id) | |
2645 | config_dir = 'etc/loki' | |
2646 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2647 | makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) | |
9f95a23c | 2648 | |
9f95a23c | 2649 | # populate the config directory for the component from the config-json |
b3b6e05e TL |
2650 | if 'files' in config_json: |
2651 | for fname in config_json['files']: | |
f91f0fd5 | 2652 | content = dict_get_join(config_json['files'], fname) |
b3b6e05e TL |
2653 | if os.path.isabs(fname): |
2654 | fpath = os.path.join(data_dir_root, fname.lstrip(os.path.sep)) | |
2655 | else: | |
2656 | fpath = os.path.join(data_dir_root, config_dir, fname) | |
2657 | with open(fpath, 'w', encoding='utf-8') as f: | |
9f95a23c TL |
2658 | os.fchown(f.fileno(), uid, gid) |
2659 | os.fchmod(f.fileno(), 0o600) | |
2660 | f.write(content) | |
2661 | ||
f91f0fd5 | 2662 | elif daemon_type == NFSGanesha.daemon_type: |
f67539c2 | 2663 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) |
9f95a23c TL |
2664 | nfs_ganesha.create_daemon_dirs(data_dir, uid, gid) |
2665 | ||
f91f0fd5 | 2666 | elif daemon_type == CephIscsi.daemon_type: |
f67539c2 | 2667 | ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) |
1911f103 TL |
2668 | ceph_iscsi.create_daemon_dirs(data_dir, uid, gid) |
2669 | ||
f67539c2 TL |
2670 | elif daemon_type == HAproxy.daemon_type: |
2671 | haproxy = HAproxy.init(ctx, fsid, daemon_id) | |
2672 | haproxy.create_daemon_dirs(data_dir, uid, gid) | |
2673 | ||
2674 | elif daemon_type == Keepalived.daemon_type: | |
2675 | keepalived = Keepalived.init(ctx, fsid, daemon_id) | |
2676 | keepalived.create_daemon_dirs(data_dir, uid, gid) | |
2677 | ||
f91f0fd5 | 2678 | elif daemon_type == CustomContainer.daemon_type: |
f67539c2 | 2679 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
f91f0fd5 TL |
2680 | cc.create_daemon_dirs(data_dir, uid, gid) |
2681 | ||
20effc67 TL |
2682 | elif daemon_type == SNMPGateway.daemon_type: |
2683 | sg = SNMPGateway.init(ctx, fsid, daemon_id) | |
2684 | sg.create_daemon_conf() | |
2685 | ||
f6b5b4d7 | 2686 | |
9f95a23c TL |
2687 | def get_parm(option): |
2688 | # type: (str) -> Dict[str, str] | |
2689 | ||
2690 | if not option: | |
2691 | return dict() | |
2692 | ||
2693 | global cached_stdin | |
2694 | if option == '-': | |
2695 | if cached_stdin is not None: | |
2696 | j = cached_stdin | |
2697 | else: | |
f67539c2 TL |
2698 | j = sys.stdin.read() |
2699 | cached_stdin = j | |
9f95a23c TL |
2700 | else: |
2701 | # inline json string | |
2702 | if option[0] == '{' and option[-1] == '}': | |
2703 | j = option | |
2704 | # json file | |
2705 | elif os.path.exists(option): | |
2706 | with open(option, 'r') as f: | |
2707 | j = f.read() | |
2708 | else: | |
f67539c2 | 2709 | raise Error('Config file {} not found'.format(option)) |
9f95a23c TL |
2710 | |
2711 | try: | |
2712 | js = json.loads(j) | |
2713 | except ValueError as e: | |
f67539c2 | 2714 | raise Error('Invalid JSON in {}: {}'.format(option, e)) |
9f95a23c TL |
2715 | else: |
2716 | return js | |
2717 | ||
f6b5b4d7 | 2718 | |
f67539c2 TL |
2719 | def get_config_and_keyring(ctx): |
2720 | # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]] | |
801d1391 TL |
2721 | config = None |
2722 | keyring = None | |
2723 | ||
f67539c2 TL |
2724 | if 'config_json' in ctx and ctx.config_json: |
2725 | d = get_parm(ctx.config_json) | |
9f95a23c TL |
2726 | config = d.get('config') |
2727 | keyring = d.get('keyring') | |
a4b75251 TL |
2728 | if config and keyring: |
2729 | return config, keyring | |
9f95a23c | 2730 | |
f67539c2 TL |
2731 | if 'config' in ctx and ctx.config: |
2732 | try: | |
2733 | with open(ctx.config, 'r') as f: | |
2734 | config = f.read() | |
b3b6e05e TL |
2735 | except FileNotFoundError as e: |
2736 | raise Error(e) | |
f67539c2 TL |
2737 | |
2738 | if 'key' in ctx and ctx.key: | |
2739 | keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key) | |
2740 | elif 'keyring' in ctx and ctx.keyring: | |
2741 | try: | |
2742 | with open(ctx.keyring, 'r') as f: | |
2743 | keyring = f.read() | |
b3b6e05e TL |
2744 | except FileNotFoundError as e: |
2745 | raise Error(e) | |
9f95a23c | 2746 | |
f6b5b4d7 TL |
2747 | return config, keyring |
2748 | ||
2749 | ||
f67539c2 TL |
2750 | def get_container_binds(ctx, fsid, daemon_type, daemon_id): |
2751 | # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]] | |
f6b5b4d7 TL |
2752 | binds = list() |
2753 | ||
2754 | if daemon_type == CephIscsi.daemon_type: | |
f6b5b4d7 | 2755 | binds.extend(CephIscsi.get_container_binds()) |
f91f0fd5 TL |
2756 | elif daemon_type == CustomContainer.daemon_type: |
2757 | assert daemon_id | |
f67539c2 TL |
2758 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
2759 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
f91f0fd5 | 2760 | binds.extend(cc.get_container_binds(data_dir)) |
f6b5b4d7 TL |
2761 | |
2762 | return binds | |
2763 | ||
9f95a23c | 2764 | |
f67539c2 | 2765 | def get_container_mounts(ctx, fsid, daemon_type, daemon_id, |
9f95a23c | 2766 | no_config=False): |
f67539c2 | 2767 | # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str] |
9f95a23c TL |
2768 | mounts = dict() |
2769 | ||
2770 | if daemon_type in Ceph.daemons: | |
2771 | if fsid: | |
f67539c2 | 2772 | run_path = os.path.join('/var/run/ceph', fsid) |
9f95a23c TL |
2773 | if os.path.exists(run_path): |
2774 | mounts[run_path] = '/var/run/ceph:z' | |
f67539c2 | 2775 | log_dir = get_log_dir(fsid, ctx.log_dir) |
9f95a23c TL |
2776 | mounts[log_dir] = '/var/log/ceph:z' |
2777 | crash_dir = '/var/lib/ceph/%s/crash' % fsid | |
2778 | if os.path.exists(crash_dir): | |
2779 | mounts[crash_dir] = '/var/lib/ceph/crash:z' | |
20effc67 TL |
2780 | if daemon_type != 'crash' and should_log_to_journald(ctx): |
2781 | journald_sock_dir = '/run/systemd/journal' | |
2782 | mounts[journald_sock_dir] = journald_sock_dir | |
9f95a23c TL |
2783 | |
2784 | if daemon_type in Ceph.daemons and daemon_id: | |
f67539c2 | 2785 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
9f95a23c TL |
2786 | if daemon_type == 'rgw': |
2787 | cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id) | |
2788 | else: | |
2789 | cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id) | |
2790 | if daemon_type != 'crash': | |
2791 | mounts[data_dir] = cdata_dir + ':z' | |
2792 | if not no_config: | |
2793 | mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z' | |
f67539c2 | 2794 | if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash']: |
9f95a23c TL |
2795 | # these do not search for their keyrings in a data directory |
2796 | mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id) | |
2797 | ||
b3b6e05e | 2798 | if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']: |
9f95a23c TL |
2799 | mounts['/dev'] = '/dev' # FIXME: narrow this down? |
2800 | mounts['/run/udev'] = '/run/udev' | |
b3b6e05e | 2801 | if daemon_type in ['osd', 'clusterless-ceph-volume']: |
9f95a23c | 2802 | mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ... |
b3b6e05e TL |
2803 | mounts['/run/lvm'] = '/run/lvm' |
2804 | mounts['/run/lock/lvm'] = '/run/lock/lvm' | |
2805 | if daemon_type == 'osd': | |
f67539c2 TL |
2806 | # selinux-policy in the container may not match the host. |
2807 | if HostFacts(ctx).selinux_enabled: | |
2808 | selinux_folder = '/var/lib/ceph/%s/selinux' % fsid | |
2809 | if not os.path.exists(selinux_folder): | |
2810 | os.makedirs(selinux_folder, mode=0o755) | |
2811 | mounts[selinux_folder] = '/sys/fs/selinux:ro' | |
20effc67 | 2812 | mounts['/'] = '/rootfs' |
9f95a23c | 2813 | |
e306af50 | 2814 | try: |
f67539c2 TL |
2815 | if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development |
2816 | ceph_folder = pathify(ctx.shared_ceph_folder) | |
e306af50 TL |
2817 | if os.path.exists(ceph_folder): |
2818 | mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume' | |
522d829b | 2819 | mounts[ceph_folder + '/src/cephadm/cephadm'] = '/usr/sbin/cephadm' |
e306af50 TL |
2820 | mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr' |
2821 | mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph' | |
20effc67 TL |
2822 | mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard' |
2823 | mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml' | |
e306af50 TL |
2824 | else: |
2825 | logger.error('{}{}{}'.format(termcolor.red, | |
f67539c2 TL |
2826 | 'Ceph shared source folder does not exist.', |
2827 | termcolor.end)) | |
e306af50 TL |
2828 | except AttributeError: |
2829 | pass | |
2830 | ||
9f95a23c | 2831 | if daemon_type in Monitoring.components and daemon_id: |
f67539c2 | 2832 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
33c7a0ef | 2833 | log_dir = get_log_dir(fsid, ctx.log_dir) |
9f95a23c TL |
2834 | if daemon_type == 'prometheus': |
2835 | mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z' | |
2836 | mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z' | |
33c7a0ef TL |
2837 | elif daemon_type == 'loki': |
2838 | mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z' | |
2839 | mounts[os.path.join(data_dir, 'data')] = '/loki:Z' | |
2840 | elif daemon_type == 'promtail': | |
2841 | mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z' | |
2842 | mounts[log_dir] = '/var/log/ceph:z' | |
2843 | mounts[os.path.join(data_dir, 'data')] = '/promtail:Z' | |
9f95a23c TL |
2844 | elif daemon_type == 'node-exporter': |
2845 | mounts['/proc'] = '/host/proc:ro' | |
2846 | mounts['/sys'] = '/host/sys:ro' | |
2847 | mounts['/'] = '/rootfs:ro' | |
f67539c2 | 2848 | elif daemon_type == 'grafana': |
9f95a23c TL |
2849 | mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z' |
2850 | mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z' | |
2851 | mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z' | |
f67539c2 | 2852 | mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z' |
9f95a23c | 2853 | elif daemon_type == 'alertmanager': |
f6b5b4d7 | 2854 | mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z' |
9f95a23c TL |
2855 | |
2856 | if daemon_type == NFSGanesha.daemon_type: | |
2857 | assert daemon_id | |
f67539c2 TL |
2858 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
2859 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) | |
f91f0fd5 | 2860 | mounts.update(nfs_ganesha.get_container_mounts(data_dir)) |
9f95a23c | 2861 | |
f67539c2 TL |
2862 | if daemon_type == HAproxy.daemon_type: |
2863 | assert daemon_id | |
2864 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
2865 | mounts.update(HAproxy.get_container_mounts(data_dir)) | |
2866 | ||
1911f103 TL |
2867 | if daemon_type == CephIscsi.daemon_type: |
2868 | assert daemon_id | |
f67539c2 TL |
2869 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
2870 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
1911f103 TL |
2871 | mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir)) |
2872 | ||
f67539c2 TL |
2873 | if daemon_type == Keepalived.daemon_type: |
2874 | assert daemon_id | |
2875 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
2876 | mounts.update(Keepalived.get_container_mounts(data_dir)) | |
2877 | ||
f91f0fd5 TL |
2878 | if daemon_type == CustomContainer.daemon_type: |
2879 | assert daemon_id | |
f67539c2 TL |
2880 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
2881 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
f91f0fd5 TL |
2882 | mounts.update(cc.get_container_mounts(data_dir)) |
2883 | ||
9f95a23c TL |
2884 | return mounts |
2885 | ||
f6b5b4d7 | 2886 | |
20effc67 TL |
2887 | def get_ceph_volume_container(ctx: CephadmContext, |
2888 | privileged: bool = True, | |
2889 | cname: str = '', | |
2890 | volume_mounts: Dict[str, str] = {}, | |
2891 | bind_mounts: Optional[List[List[str]]] = None, | |
2892 | args: List[str] = [], | |
2893 | envs: Optional[List[str]] = None) -> 'CephContainer': | |
2894 | if envs is None: | |
2895 | envs = [] | |
2896 | envs.append('CEPH_VOLUME_SKIP_RESTORECON=yes') | |
2897 | envs.append('CEPH_VOLUME_DEBUG=1') | |
2898 | ||
2899 | return CephContainer( | |
2900 | ctx, | |
2901 | image=ctx.image, | |
2902 | entrypoint='/usr/sbin/ceph-volume', | |
2903 | args=args, | |
2904 | volume_mounts=volume_mounts, | |
2905 | bind_mounts=bind_mounts, | |
2906 | envs=envs, | |
2907 | privileged=privileged, | |
2908 | cname=cname, | |
2909 | memory_request=ctx.memory_request, | |
2910 | memory_limit=ctx.memory_limit, | |
2911 | ) | |
2912 | ||
2913 | ||
f67539c2 TL |
2914 | def get_container(ctx: CephadmContext, |
2915 | fsid: str, daemon_type: str, daemon_id: Union[int, str], | |
f91f0fd5 TL |
2916 | privileged: bool = False, |
2917 | ptrace: bool = False, | |
2918 | container_args: Optional[List[str]] = None) -> 'CephContainer': | |
2919 | entrypoint: str = '' | |
2920 | name: str = '' | |
2921 | ceph_args: List[str] = [] | |
522d829b | 2922 | envs: List[str] = [] |
f91f0fd5 TL |
2923 | host_network: bool = True |
2924 | ||
522d829b TL |
2925 | if daemon_type in Ceph.daemons: |
2926 | envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728') | |
f91f0fd5 TL |
2927 | if container_args is None: |
2928 | container_args = [] | |
9f95a23c TL |
2929 | if daemon_type in ['mon', 'osd']: |
2930 | # mon and osd need privileged in order for libudev to query devices | |
2931 | privileged = True | |
2932 | if daemon_type == 'rgw': | |
2933 | entrypoint = '/usr/bin/radosgw' | |
2934 | name = 'client.rgw.%s' % daemon_id | |
2935 | elif daemon_type == 'rbd-mirror': | |
2936 | entrypoint = '/usr/bin/rbd-mirror' | |
2937 | name = 'client.rbd-mirror.%s' % daemon_id | |
f67539c2 TL |
2938 | elif daemon_type == 'cephfs-mirror': |
2939 | entrypoint = '/usr/bin/cephfs-mirror' | |
2940 | name = 'client.cephfs-mirror.%s' % daemon_id | |
9f95a23c TL |
2941 | elif daemon_type == 'crash': |
2942 | entrypoint = '/usr/bin/ceph-crash' | |
2943 | name = 'client.crash.%s' % daemon_id | |
2944 | elif daemon_type in ['mon', 'mgr', 'mds', 'osd']: | |
2945 | entrypoint = '/usr/bin/ceph-' + daemon_type | |
2946 | name = '%s.%s' % (daemon_type, daemon_id) | |
2947 | elif daemon_type in Monitoring.components: | |
2948 | entrypoint = '' | |
9f95a23c TL |
2949 | elif daemon_type == NFSGanesha.daemon_type: |
2950 | entrypoint = NFSGanesha.entrypoint | |
2951 | name = '%s.%s' % (daemon_type, daemon_id) | |
f91f0fd5 | 2952 | envs.extend(NFSGanesha.get_container_envs()) |
f67539c2 TL |
2953 | elif daemon_type == HAproxy.daemon_type: |
2954 | name = '%s.%s' % (daemon_type, daemon_id) | |
522d829b | 2955 | container_args.extend(['--user=root']) # haproxy 2.4 defaults to a different user |
f67539c2 TL |
2956 | elif daemon_type == Keepalived.daemon_type: |
2957 | name = '%s.%s' % (daemon_type, daemon_id) | |
2958 | envs.extend(Keepalived.get_container_envs()) | |
2959 | container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW']) | |
1911f103 TL |
2960 | elif daemon_type == CephIscsi.daemon_type: |
2961 | entrypoint = CephIscsi.entrypoint | |
2962 | name = '%s.%s' % (daemon_type, daemon_id) | |
e306af50 TL |
2963 | # So the container can modprobe iscsi_target_mod and have write perms |
2964 | # to configfs we need to make this a privileged container. | |
2965 | privileged = True | |
f91f0fd5 | 2966 | elif daemon_type == CustomContainer.daemon_type: |
f67539c2 | 2967 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
f91f0fd5 TL |
2968 | entrypoint = cc.entrypoint |
2969 | host_network = False | |
2970 | envs.extend(cc.get_container_envs()) | |
2971 | container_args.extend(cc.get_container_args()) | |
9f95a23c | 2972 | |
9f95a23c | 2973 | if daemon_type in Monitoring.components: |
f67539c2 | 2974 | uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) |
9f95a23c TL |
2975 | monitoring_args = [ |
2976 | '--user', | |
2977 | str(uid), | |
2978 | # FIXME: disable cpu/memory limits for the time being (not supported | |
2979 | # by ubuntu 18.04 kernel!) | |
9f95a23c TL |
2980 | ] |
2981 | container_args.extend(monitoring_args) | |
33c7a0ef TL |
2982 | if daemon_type == 'node-exporter': |
2983 | # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys', | |
2984 | # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation | |
2985 | # between the node-exporter container and the host to avoid selinux denials | |
2986 | container_args.extend(['--security-opt', 'label=disable']) | |
9f95a23c TL |
2987 | elif daemon_type == 'crash': |
2988 | ceph_args = ['-n', name] | |
2989 | elif daemon_type in Ceph.daemons: | |
2990 | ceph_args = ['-n', name, '-f'] | |
20effc67 TL |
2991 | elif daemon_type == SNMPGateway.daemon_type: |
2992 | sg = SNMPGateway.init(ctx, fsid, daemon_id) | |
2993 | container_args.append( | |
2994 | f'--env-file={sg.conf_file_path}' | |
2995 | ) | |
9f95a23c | 2996 | |
f91f0fd5 TL |
2997 | # if using podman, set -d, --conmon-pidfile & --cidfile flags |
2998 | # so service can have Type=Forking | |
f67539c2 | 2999 | if isinstance(ctx.container_engine, Podman): |
f91f0fd5 | 3000 | runtime_dir = '/run' |
f67539c2 TL |
3001 | container_args.extend([ |
3002 | '-d', '--log-driver', 'journald', | |
f91f0fd5 TL |
3003 | '--conmon-pidfile', |
3004 | runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id), | |
3005 | '--cidfile', | |
f67539c2 TL |
3006 | runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id), |
3007 | ]) | |
3008 | if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION: | |
3009 | container_args.append('--cgroups=split') | |
9f95a23c | 3010 | |
522d829b | 3011 | return CephContainer.for_daemon( |
f67539c2 | 3012 | ctx, |
522d829b TL |
3013 | fsid=fsid, |
3014 | daemon_type=daemon_type, | |
3015 | daemon_id=str(daemon_id), | |
9f95a23c | 3016 | entrypoint=entrypoint, |
f67539c2 | 3017 | args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id), |
9f95a23c | 3018 | container_args=container_args, |
f67539c2 TL |
3019 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), |
3020 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
9f95a23c TL |
3021 | envs=envs, |
3022 | privileged=privileged, | |
3023 | ptrace=ptrace, | |
f91f0fd5 | 3024 | host_network=host_network, |
9f95a23c TL |
3025 | ) |
3026 | ||
f6b5b4d7 | 3027 | |
f67539c2 TL |
3028 | def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'): |
3029 | # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int] | |
9f95a23c TL |
3030 | |
3031 | if not img: | |
f67539c2 | 3032 | img = ctx.image |
9f95a23c | 3033 | |
f6b5b4d7 TL |
3034 | if isinstance(file_path, str): |
3035 | paths = [file_path] | |
3036 | else: | |
3037 | paths = file_path | |
3038 | ||
20effc67 TL |
3039 | ex: Optional[Tuple[str, RuntimeError]] = None |
3040 | ||
f6b5b4d7 TL |
3041 | for fp in paths: |
3042 | try: | |
3043 | out = CephContainer( | |
f67539c2 | 3044 | ctx, |
f6b5b4d7 TL |
3045 | image=img, |
3046 | entrypoint='stat', | |
3047 | args=['-c', '%u %g', fp] | |
3048 | ).run() | |
3049 | uid, gid = out.split(' ') | |
3050 | return int(uid), int(gid) | |
20effc67 TL |
3051 | except RuntimeError as e: |
3052 | ex = (fp, e) | |
3053 | if ex: | |
3054 | raise Error(f'Failed to extract uid/gid for path {ex[0]}: {ex[1]}') | |
3055 | ||
f6b5b4d7 TL |
3056 | raise RuntimeError('uid/gid not found') |
3057 | ||
9f95a23c | 3058 | |
f67539c2 | 3059 | def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid, |
9f95a23c TL |
3060 | config=None, keyring=None, |
3061 | osd_fsid=None, | |
f6b5b4d7 TL |
3062 | reconfig=False, |
3063 | ports=None): | |
f67539c2 | 3064 | # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None |
f6b5b4d7 TL |
3065 | |
3066 | ports = ports or [] | |
f67539c2 | 3067 | if any([port_in_use(ctx, port) for port in ports]): |
b3b6e05e TL |
3068 | if daemon_type == 'mgr': |
3069 | # non-fatal for mgr when we are in mgr_standby_modules=false, but we can't | |
3070 | # tell whether that is the case here. | |
3071 | logger.warning( | |
3072 | f"ceph-mgr TCP port(s) {','.join(map(str, ports))} already in use" | |
3073 | ) | |
3074 | else: | |
3075 | raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports)), daemon_type)) | |
f6b5b4d7 | 3076 | |
f67539c2 | 3077 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
9f95a23c TL |
3078 | if reconfig and not os.path.exists(data_dir): |
3079 | raise Error('cannot reconfig, data path %s does not exist' % data_dir) | |
3080 | if daemon_type == 'mon' and not os.path.exists(data_dir): | |
3081 | assert config | |
3082 | assert keyring | |
3083 | # tmp keyring file | |
3084 | tmp_keyring = write_tmp(keyring, uid, gid) | |
3085 | ||
3086 | # tmp config file | |
3087 | tmp_config = write_tmp(config, uid, gid) | |
3088 | ||
3089 | # --mkfs | |
f67539c2 TL |
3090 | create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid) |
3091 | mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id) | |
3092 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
3093 | CephContainer( | |
3094 | ctx, | |
3095 | image=ctx.image, | |
9f95a23c | 3096 | entrypoint='/usr/bin/ceph-mon', |
f67539c2 TL |
3097 | args=[ |
3098 | '--mkfs', | |
3099 | '-i', str(daemon_id), | |
3100 | '--fsid', fsid, | |
3101 | '-c', '/tmp/config', | |
3102 | '--keyring', '/tmp/keyring', | |
3103 | ] + get_daemon_args(ctx, fsid, 'mon', daemon_id), | |
9f95a23c TL |
3104 | volume_mounts={ |
3105 | log_dir: '/var/log/ceph:z', | |
3106 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id), | |
3107 | tmp_keyring.name: '/tmp/keyring:z', | |
3108 | tmp_config.name: '/tmp/config:z', | |
3109 | }, | |
3110 | ).run() | |
3111 | ||
3112 | # write conf | |
3113 | with open(mon_dir + '/config', 'w') as f: | |
3114 | os.fchown(f.fileno(), uid, gid) | |
3115 | os.fchmod(f.fileno(), 0o600) | |
3116 | f.write(config) | |
3117 | else: | |
3118 | # dirs, conf, keyring | |
3119 | create_daemon_dirs( | |
f67539c2 | 3120 | ctx, |
9f95a23c TL |
3121 | fsid, daemon_type, daemon_id, |
3122 | uid, gid, | |
3123 | config, keyring) | |
3124 | ||
3125 | if not reconfig: | |
20effc67 | 3126 | if daemon_type == CephadmAgent.daemon_type: |
f67539c2 TL |
3127 | if ctx.config_json == '-': |
3128 | config_js = get_parm('-') | |
3129 | else: | |
3130 | config_js = get_parm(ctx.config_json) | |
3131 | assert isinstance(config_js, dict) | |
3132 | ||
20effc67 TL |
3133 | cephadm_agent = CephadmAgent(ctx, fsid, daemon_id) |
3134 | cephadm_agent.deploy_daemon_unit(config_js) | |
f67539c2 TL |
3135 | else: |
3136 | if c: | |
3137 | deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, | |
3138 | c, osd_fsid=osd_fsid, ports=ports) | |
3139 | else: | |
3140 | raise RuntimeError('attempting to deploy a daemon without a container image') | |
9f95a23c TL |
3141 | |
3142 | if not os.path.exists(data_dir + '/unit.created'): | |
3143 | with open(data_dir + '/unit.created', 'w') as f: | |
3144 | os.fchmod(f.fileno(), 0o600) | |
3145 | os.fchown(f.fileno(), uid, gid) | |
3146 | f.write('mtime is time the daemon deployment was created\n') | |
3147 | ||
3148 | with open(data_dir + '/unit.configured', 'w') as f: | |
3149 | f.write('mtime is time we were last configured\n') | |
3150 | os.fchmod(f.fileno(), 0o600) | |
3151 | os.fchown(f.fileno(), uid, gid) | |
3152 | ||
f67539c2 | 3153 | update_firewalld(ctx, daemon_type) |
9f95a23c | 3154 | |
f6b5b4d7 TL |
3155 | # Open ports explicitly required for the daemon |
3156 | if ports: | |
f67539c2 | 3157 | fw = Firewalld(ctx) |
f6b5b4d7 TL |
3158 | fw.open_ports(ports) |
3159 | fw.apply_rules() | |
3160 | ||
9f95a23c TL |
3161 | if reconfig and daemon_type not in Ceph.daemons: |
3162 | # ceph daemons do not need a restart; others (presumably) do to pick | |
3163 | # up the new config | |
f67539c2 TL |
3164 | call_throws(ctx, ['systemctl', 'reset-failed', |
3165 | get_unit_name(fsid, daemon_type, daemon_id)]) | |
3166 | call_throws(ctx, ['systemctl', 'restart', | |
3167 | get_unit_name(fsid, daemon_type, daemon_id)]) | |
3168 | ||
9f95a23c | 3169 | |
f67539c2 TL |
3170 | def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False): |
3171 | # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None | |
f6b5b4d7 | 3172 | if comment: |
f91f0fd5 | 3173 | # Sometimes adding a comment, especially if there are multiple containers in one |
f6b5b4d7 TL |
3174 | # unit file, makes it easier to read and grok. |
3175 | file_obj.write('# ' + comment + '\n') | |
3176 | # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually | |
522d829b | 3177 | file_obj.write('! ' + ' '.join(container.rm_cmd(old_cname=True)) + ' 2> /dev/null\n') |
f67539c2 | 3178 | file_obj.write('! ' + ' '.join(container.rm_cmd()) + ' 2> /dev/null\n') |
f6b5b4d7 | 3179 | # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage` |
f67539c2 TL |
3180 | if isinstance(ctx.container_engine, Podman): |
3181 | file_obj.write( | |
3182 | '! ' | |
3183 | + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)]) | |
3184 | + ' 2> /dev/null\n') | |
522d829b TL |
3185 | file_obj.write( |
3186 | '! ' | |
3187 | + ' '.join([shlex.quote(a) for a in container.rm_cmd(old_cname=True, storage=True)]) | |
3188 | + ' 2> /dev/null\n') | |
f6b5b4d7 TL |
3189 | |
3190 | # container run command | |
f67539c2 TL |
3191 | file_obj.write( |
3192 | ' '.join([shlex.quote(a) for a in container.run_cmd()]) | |
3193 | + (' &' if background else '') + '\n') | |
3194 | ||
3195 | ||
522d829b TL |
3196 | def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None: |
3197 | # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail. | |
3198 | # see https://tracker.ceph.com/issues/50998 | |
3199 | ||
3200 | CGROUPV2_PATH = Path('/sys/fs/cgroup') | |
3201 | if not (CGROUPV2_PATH / 'system.slice').exists(): | |
3202 | # Only unified cgroup is affected, skip if not the case | |
3203 | return | |
3204 | ||
3205 | slice_name = 'system-ceph\\x2d{}.slice'.format(fsid.replace('-', '\\x2d')) | |
3206 | cg_path = CGROUPV2_PATH / 'system.slice' / slice_name / f'{unit_name}.service' | |
3207 | if not cg_path.exists(): | |
3208 | return | |
3209 | ||
3210 | def cg_trim(path: Path) -> None: | |
3211 | for p in path.iterdir(): | |
3212 | if p.is_dir(): | |
3213 | cg_trim(p) | |
3214 | path.rmdir() | |
3215 | try: | |
3216 | cg_trim(cg_path) | |
3217 | except OSError: | |
3218 | logger.warning(f'Failed to trim old cgroups {cg_path}') | |
3219 | ||
3220 | ||
f67539c2 TL |
3221 | def deploy_daemon_units( |
3222 | ctx: CephadmContext, | |
3223 | fsid: str, | |
3224 | uid: int, | |
3225 | gid: int, | |
3226 | daemon_type: str, | |
3227 | daemon_id: Union[int, str], | |
3228 | c: 'CephContainer', | |
3229 | enable: bool = True, | |
3230 | start: bool = True, | |
3231 | osd_fsid: Optional[str] = None, | |
3232 | ports: Optional[List[int]] = None, | |
3233 | ) -> None: | |
9f95a23c | 3234 | # cmd |
f67539c2 TL |
3235 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
3236 | with open(data_dir + '/unit.run.new', 'w') as f, \ | |
b3b6e05e | 3237 | open(data_dir + '/unit.meta.new', 'w') as metaf: |
f6b5b4d7 | 3238 | f.write('set -e\n') |
f91f0fd5 TL |
3239 | |
3240 | if daemon_type in Ceph.daemons: | |
3241 | install_path = find_program('install') | |
3242 | f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid)) | |
3243 | ||
9f95a23c TL |
3244 | # pre-start cmd(s) |
3245 | if daemon_type == 'osd': | |
3246 | # osds have a pre-start step | |
3247 | assert osd_fsid | |
f6b5b4d7 TL |
3248 | simple_fn = os.path.join('/etc/ceph/osd', |
3249 | '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid)) | |
3250 | if os.path.exists(simple_fn): | |
3251 | f.write('# Simple OSDs need chown on startup:\n') | |
3252 | for n in ['block', 'block.db', 'block.wal']: | |
3253 | p = os.path.join(data_dir, n) | |
3254 | f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid)) | |
3255 | else: | |
20effc67 TL |
3256 | # if ceph-volume does not support 'ceph-volume activate', we must |
3257 | # do 'ceph-volume lvm activate'. | |
3258 | test_cv = get_ceph_volume_container( | |
f67539c2 | 3259 | ctx, |
20effc67 TL |
3260 | args=['activate', '--bad-option'], |
3261 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), | |
3262 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
3263 | cname='ceph-%s-%s.%s-activate-test' % (fsid, daemon_type, daemon_id), | |
3264 | ) | |
3265 | out, err, ret = call(ctx, test_cv.run_cmd(), verbosity=CallVerbosity.SILENT) | |
3266 | # bad: ceph-volume: error: unrecognized arguments: activate --bad-option | |
3267 | # good: ceph-volume: error: unrecognized arguments: --bad-option | |
3268 | if 'unrecognized arguments: activate' in err: | |
3269 | # older ceph-volume without top-level activate or --no-tmpfs | |
3270 | cmd = [ | |
f6b5b4d7 TL |
3271 | 'lvm', 'activate', |
3272 | str(daemon_id), osd_fsid, | |
20effc67 TL |
3273 | '--no-systemd', |
3274 | ] | |
3275 | else: | |
3276 | cmd = [ | |
3277 | 'activate', | |
3278 | '--osd-id', str(daemon_id), | |
3279 | '--osd-uuid', osd_fsid, | |
3280 | '--no-systemd', | |
3281 | '--no-tmpfs', | |
3282 | ] | |
3283 | ||
3284 | prestart = get_ceph_volume_container( | |
3285 | ctx, | |
3286 | args=cmd, | |
f67539c2 TL |
3287 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), |
3288 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
f6b5b4d7 TL |
3289 | cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id), |
3290 | ) | |
33c7a0ef TL |
3291 | if 'cluster' in ctx and ctx.cluster: |
3292 | # ctx.cluster is only set during adoption of a daemon from a cluster | |
3293 | # with a custom name (not "ceph"). The initial activate command the first | |
3294 | # time we start the new cephadm based systemd unit for this osd must account | |
3295 | # for this by mounting to the correct data dir in the container. Otherwise | |
3296 | # necessary files from the old data dir of the daemon won't be copied over | |
3297 | # to the new data dir on the host. After the first start (e.g. on any redeploys) | |
3298 | # this is no longer necessary as we will have these files in the data dir on the host | |
3299 | if data_dir in prestart.volume_mounts: | |
3300 | prestart.volume_mounts[data_dir] = f'/var/lib/ceph/osd/{ctx.cluster}-{daemon_id}' | |
f67539c2 | 3301 | _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate') |
1911f103 TL |
3302 | elif daemon_type == CephIscsi.daemon_type: |
3303 | f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n') | |
f67539c2 | 3304 | ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) |
f6b5b4d7 | 3305 | tcmu_container = ceph_iscsi.get_tcmu_runner_container() |
a4b75251 | 3306 | _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True) |
f67539c2 TL |
3307 | |
3308 | _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id))) | |
3309 | ||
3310 | # some metadata about the deploy | |
3311 | meta: Dict[str, Any] = {} | |
3312 | if 'meta_json' in ctx and ctx.meta_json: | |
3313 | meta = json.loads(ctx.meta_json) or {} | |
3314 | meta.update({ | |
3315 | 'memory_request': int(ctx.memory_request) if ctx.memory_request else None, | |
3316 | 'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None, | |
3317 | }) | |
3318 | if not meta.get('ports'): | |
3319 | meta['ports'] = ports | |
3320 | metaf.write(json.dumps(meta, indent=4) + '\n') | |
1911f103 | 3321 | |
9f95a23c | 3322 | os.fchmod(f.fileno(), 0o600) |
f67539c2 | 3323 | os.fchmod(metaf.fileno(), 0o600) |
9f95a23c TL |
3324 | os.rename(data_dir + '/unit.run.new', |
3325 | data_dir + '/unit.run') | |
f67539c2 TL |
3326 | os.rename(data_dir + '/unit.meta.new', |
3327 | data_dir + '/unit.meta') | |
9f95a23c TL |
3328 | |
3329 | # post-stop command(s) | |
3330 | with open(data_dir + '/unit.poststop.new', 'w') as f: | |
3331 | if daemon_type == 'osd': | |
3332 | assert osd_fsid | |
20effc67 | 3333 | poststop = get_ceph_volume_container( |
f67539c2 | 3334 | ctx, |
9f95a23c TL |
3335 | args=[ |
3336 | 'lvm', 'deactivate', | |
3337 | str(daemon_id), osd_fsid, | |
3338 | ], | |
f67539c2 TL |
3339 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), |
3340 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
9f95a23c TL |
3341 | cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type, |
3342 | daemon_id), | |
3343 | ) | |
f67539c2 | 3344 | _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd') |
1911f103 | 3345 | elif daemon_type == CephIscsi.daemon_type: |
f6b5b4d7 | 3346 | # make sure we also stop the tcmu container |
f67539c2 | 3347 | ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) |
f6b5b4d7 | 3348 | tcmu_container = ceph_iscsi.get_tcmu_runner_container() |
f67539c2 | 3349 | f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n') |
1911f103 | 3350 | f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n') |
9f95a23c TL |
3351 | os.fchmod(f.fileno(), 0o600) |
3352 | os.rename(data_dir + '/unit.poststop.new', | |
3353 | data_dir + '/unit.poststop') | |
3354 | ||
522d829b TL |
3355 | # post-stop command(s) |
3356 | with open(data_dir + '/unit.stop.new', 'w') as f: | |
33c7a0ef TL |
3357 | # following generated script basically checks if the container exists |
3358 | # before stopping it. Exit code will be success either if it doesn't | |
3359 | # exist or if it exists and is stopped successfully. | |
3360 | container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null' | |
3361 | f.write(f'! {container_exists % c.old_cname} || {" ".join(c.stop_cmd(old_cname=True))} \n') | |
3362 | f.write(f'! {container_exists % c.cname} || {" ".join(c.stop_cmd())} \n') | |
522d829b TL |
3363 | |
3364 | os.fchmod(f.fileno(), 0o600) | |
3365 | os.rename(data_dir + '/unit.stop.new', | |
3366 | data_dir + '/unit.stop') | |
3367 | ||
f67539c2 TL |
3368 | if c: |
3369 | with open(data_dir + '/unit.image.new', 'w') as f: | |
3370 | f.write(c.image + '\n') | |
3371 | os.fchmod(f.fileno(), 0o600) | |
3372 | os.rename(data_dir + '/unit.image.new', | |
3373 | data_dir + '/unit.image') | |
9f95a23c | 3374 | |
b3b6e05e TL |
3375 | # sysctl |
3376 | install_sysctl(ctx, fsid, daemon_type) | |
3377 | ||
9f95a23c | 3378 | # systemd |
f67539c2 TL |
3379 | install_base_units(ctx, fsid) |
3380 | unit = get_unit_file(ctx, fsid) | |
9f95a23c | 3381 | unit_file = 'ceph-%s@.service' % (fsid) |
f67539c2 | 3382 | with open(ctx.unit_dir + '/' + unit_file + '.new', 'w') as f: |
9f95a23c | 3383 | f.write(unit) |
f67539c2 TL |
3384 | os.rename(ctx.unit_dir + '/' + unit_file + '.new', |
3385 | ctx.unit_dir + '/' + unit_file) | |
3386 | call_throws(ctx, ['systemctl', 'daemon-reload']) | |
9f95a23c TL |
3387 | |
3388 | unit_name = get_unit_name(fsid, daemon_type, daemon_id) | |
f67539c2 | 3389 | call(ctx, ['systemctl', 'stop', unit_name], |
adb31ebb | 3390 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 3391 | call(ctx, ['systemctl', 'reset-failed', unit_name], |
adb31ebb | 3392 | verbosity=CallVerbosity.DEBUG) |
9f95a23c | 3393 | if enable: |
f67539c2 | 3394 | call_throws(ctx, ['systemctl', 'enable', unit_name]) |
9f95a23c | 3395 | if start: |
522d829b | 3396 | clean_cgroup(ctx, fsid, unit_name) |
f67539c2 | 3397 | call_throws(ctx, ['systemctl', 'start', unit_name]) |
9f95a23c | 3398 | |
f6b5b4d7 TL |
3399 | |
3400 | class Firewalld(object): | |
f67539c2 TL |
3401 | def __init__(self, ctx): |
3402 | # type: (CephadmContext) -> None | |
3403 | self.ctx = ctx | |
f6b5b4d7 TL |
3404 | self.available = self.check() |
3405 | ||
3406 | def check(self): | |
3407 | # type: () -> bool | |
3408 | self.cmd = find_executable('firewall-cmd') | |
3409 | if not self.cmd: | |
3410 | logger.debug('firewalld does not appear to be present') | |
3411 | return False | |
f67539c2 | 3412 | (enabled, state, _) = check_unit(self.ctx, 'firewalld.service') |
f6b5b4d7 TL |
3413 | if not enabled: |
3414 | logger.debug('firewalld.service is not enabled') | |
3415 | return False | |
f67539c2 | 3416 | if state != 'running': |
f6b5b4d7 TL |
3417 | logger.debug('firewalld.service is not running') |
3418 | return False | |
3419 | ||
f67539c2 | 3420 | logger.info('firewalld ready') |
f6b5b4d7 TL |
3421 | return True |
3422 | ||
3423 | def enable_service_for(self, daemon_type): | |
3424 | # type: (str) -> None | |
3425 | if not self.available: | |
3426 | logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type) | |
3427 | return | |
3428 | ||
3429 | if daemon_type == 'mon': | |
3430 | svc = 'ceph-mon' | |
3431 | elif daemon_type in ['mgr', 'mds', 'osd']: | |
3432 | svc = 'ceph' | |
3433 | elif daemon_type == NFSGanesha.daemon_type: | |
3434 | svc = 'nfs' | |
3435 | else: | |
3436 | return | |
3437 | ||
f67539c2 TL |
3438 | if not self.cmd: |
3439 | raise RuntimeError('command not defined') | |
3440 | ||
3441 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
3442 | if ret: |
3443 | logger.info('Enabling firewalld service %s in current zone...' % svc) | |
f67539c2 | 3444 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc]) |
9f95a23c TL |
3445 | if ret: |
3446 | raise RuntimeError( | |
3447 | 'unable to add service %s to current zone: %s' % (svc, err)) | |
3448 | else: | |
3449 | logger.debug('firewalld service %s is enabled in current zone' % svc) | |
f6b5b4d7 TL |
3450 | |
3451 | def open_ports(self, fw_ports): | |
3452 | # type: (List[int]) -> None | |
3453 | if not self.available: | |
3454 | logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports) | |
3455 | return | |
3456 | ||
f67539c2 TL |
3457 | if not self.cmd: |
3458 | raise RuntimeError('command not defined') | |
3459 | ||
f6b5b4d7 TL |
3460 | for port in fw_ports: |
3461 | tcp_port = str(port) + '/tcp' | |
f67539c2 | 3462 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) |
9f95a23c | 3463 | if ret: |
f6b5b4d7 | 3464 | logger.info('Enabling firewalld port %s in current zone...' % tcp_port) |
f67539c2 | 3465 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port]) |
f6b5b4d7 TL |
3466 | if ret: |
3467 | raise RuntimeError('unable to add port %s to current zone: %s' % | |
f67539c2 | 3468 | (tcp_port, err)) |
f6b5b4d7 TL |
3469 | else: |
3470 | logger.debug('firewalld port %s is enabled in current zone' % tcp_port) | |
3471 | ||
f67539c2 TL |
3472 | def close_ports(self, fw_ports): |
3473 | # type: (List[int]) -> None | |
3474 | if not self.available: | |
3475 | logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports) | |
3476 | return | |
3477 | ||
3478 | if not self.cmd: | |
3479 | raise RuntimeError('command not defined') | |
3480 | ||
3481 | for port in fw_ports: | |
3482 | tcp_port = str(port) + '/tcp' | |
3483 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) | |
3484 | if not ret: | |
3485 | logger.info('Disabling port %s in current zone...' % tcp_port) | |
3486 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port]) | |
3487 | if ret: | |
3488 | raise RuntimeError('unable to remove port %s from current zone: %s' % | |
3489 | (tcp_port, err)) | |
3490 | else: | |
3491 | logger.info(f'Port {tcp_port} disabled') | |
3492 | else: | |
3493 | logger.info(f'firewalld port {tcp_port} already closed') | |
3494 | ||
f6b5b4d7 TL |
3495 | def apply_rules(self): |
3496 | # type: () -> None | |
3497 | if not self.available: | |
3498 | return | |
3499 | ||
f67539c2 TL |
3500 | if not self.cmd: |
3501 | raise RuntimeError('command not defined') | |
f6b5b4d7 | 3502 | |
f67539c2 | 3503 | call_throws(self.ctx, [self.cmd, '--reload']) |
f6b5b4d7 | 3504 | |
f67539c2 TL |
3505 | |
3506 | def update_firewalld(ctx, daemon_type): | |
3507 | # type: (CephadmContext, str) -> None | |
33c7a0ef TL |
3508 | if not ('skip_firewalld' in ctx and ctx.skip_firewalld): |
3509 | firewall = Firewalld(ctx) | |
3510 | firewall.enable_service_for(daemon_type) | |
3511 | firewall.apply_rules() | |
f6b5b4d7 | 3512 | |
f6b5b4d7 | 3513 | |
b3b6e05e TL |
3514 | def install_sysctl(ctx: CephadmContext, fsid: str, daemon_type: str) -> None: |
3515 | """ | |
3516 | Set up sysctl settings | |
3517 | """ | |
3518 | def _write(conf: Path, lines: List[str]) -> None: | |
3519 | lines = [ | |
3520 | '# created by cephadm', | |
3521 | '', | |
3522 | *lines, | |
3523 | '', | |
3524 | ] | |
3525 | with open(conf, 'w') as f: | |
3526 | f.write('\n'.join(lines)) | |
f6b5b4d7 | 3527 | |
b3b6e05e TL |
3528 | conf = Path(ctx.sysctl_dir).joinpath(f'90-ceph-{fsid}-{daemon_type}.conf') |
3529 | lines: Optional[List] = None | |
3530 | ||
3531 | if daemon_type == 'osd': | |
3532 | lines = OSD.get_sysctl_settings() | |
3533 | elif daemon_type == 'haproxy': | |
3534 | lines = HAproxy.get_sysctl_settings() | |
3535 | elif daemon_type == 'keepalived': | |
3536 | lines = Keepalived.get_sysctl_settings() | |
3537 | ||
3538 | # apply the sysctl settings | |
3539 | if lines: | |
522d829b | 3540 | Path(ctx.sysctl_dir).mkdir(mode=0o755, exist_ok=True) |
b3b6e05e TL |
3541 | _write(conf, lines) |
3542 | call_throws(ctx, ['sysctl', '--system']) | |
9f95a23c | 3543 | |
f67539c2 | 3544 | |
33c7a0ef TL |
3545 | def migrate_sysctl_dir(ctx: CephadmContext, fsid: str) -> None: |
3546 | """ | |
3547 | Cephadm once used '/usr/lib/sysctl.d' for storing sysctl configuration. | |
3548 | This moves it to '/etc/sysctl.d'. | |
3549 | """ | |
3550 | deprecated_location: str = '/usr/lib/sysctl.d' | |
3551 | deprecated_confs: List[str] = glob(f'{deprecated_location}/90-ceph-{fsid}-*.conf') | |
3552 | if not deprecated_confs: | |
3553 | return | |
3554 | ||
3555 | file_count: int = len(deprecated_confs) | |
3556 | logger.info(f'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.') | |
3557 | for conf in deprecated_confs: | |
3558 | try: | |
3559 | shutil.move(conf, ctx.sysctl_dir) | |
3560 | file_count -= 1 | |
3561 | except shutil.Error as err: | |
3562 | if str(err).endswith('already exists'): | |
3563 | logger.warning(f'Destination file already exists. Deleting {conf}.') | |
3564 | try: | |
3565 | os.unlink(conf) | |
3566 | file_count -= 1 | |
3567 | except OSError as del_err: | |
3568 | logger.warning(f'Could not remove {conf}: {del_err}.') | |
3569 | else: | |
3570 | logger.warning(f'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}') | |
3571 | ||
3572 | # Log successful migration | |
3573 | if file_count == 0: | |
3574 | logger.info(f'Successfully migrated sysctl config to {ctx.sysctl_dir}.') | |
3575 | return | |
3576 | ||
3577 | # Log partially successful / unsuccessful migration | |
3578 | files_processed: int = len(deprecated_confs) | |
3579 | if file_count < files_processed: | |
3580 | status: str = f'partially successful (failed {file_count}/{files_processed})' | |
3581 | elif file_count == files_processed: | |
3582 | status = 'unsuccessful' | |
3583 | logger.warning(f'Migration of sysctl configuration {status}. You may want to perform a migration manually.') | |
3584 | ||
3585 | ||
f67539c2 TL |
3586 | def install_base_units(ctx, fsid): |
3587 | # type: (CephadmContext, str) -> None | |
9f95a23c TL |
3588 | """ |
3589 | Set up ceph.target and ceph-$fsid.target units. | |
3590 | """ | |
3591 | # global unit | |
f67539c2 TL |
3592 | existed = os.path.exists(ctx.unit_dir + '/ceph.target') |
3593 | with open(ctx.unit_dir + '/ceph.target.new', 'w') as f: | |
9f95a23c TL |
3594 | f.write('[Unit]\n' |
3595 | 'Description=All Ceph clusters and services\n' | |
3596 | '\n' | |
3597 | '[Install]\n' | |
3598 | 'WantedBy=multi-user.target\n') | |
f67539c2 TL |
3599 | os.rename(ctx.unit_dir + '/ceph.target.new', |
3600 | ctx.unit_dir + '/ceph.target') | |
9f95a23c TL |
3601 | if not existed: |
3602 | # we disable before enable in case a different ceph.target | |
3603 | # (from the traditional package) is present; while newer | |
3604 | # systemd is smart enough to disable the old | |
3605 | # (/lib/systemd/...) and enable the new (/etc/systemd/...), | |
3606 | # some older versions of systemd error out with EEXIST. | |
f67539c2 TL |
3607 | call_throws(ctx, ['systemctl', 'disable', 'ceph.target']) |
3608 | call_throws(ctx, ['systemctl', 'enable', 'ceph.target']) | |
3609 | call_throws(ctx, ['systemctl', 'start', 'ceph.target']) | |
9f95a23c TL |
3610 | |
3611 | # cluster unit | |
f67539c2 TL |
3612 | existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid) |
3613 | with open(ctx.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f: | |
3614 | f.write( | |
3615 | '[Unit]\n' | |
3616 | 'Description=Ceph cluster {fsid}\n' | |
3617 | 'PartOf=ceph.target\n' | |
3618 | 'Before=ceph.target\n' | |
3619 | '\n' | |
3620 | '[Install]\n' | |
3621 | 'WantedBy=multi-user.target ceph.target\n'.format( | |
3622 | fsid=fsid) | |
9f95a23c | 3623 | ) |
f67539c2 TL |
3624 | os.rename(ctx.unit_dir + '/ceph-%s.target.new' % fsid, |
3625 | ctx.unit_dir + '/ceph-%s.target' % fsid) | |
9f95a23c | 3626 | if not existed: |
f67539c2 TL |
3627 | call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid]) |
3628 | call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid]) | |
9f95a23c TL |
3629 | |
3630 | # logrotate for the cluster | |
f67539c2 | 3631 | with open(ctx.logrotate_dir + '/ceph-%s' % fsid, 'w') as f: |
9f95a23c TL |
3632 | """ |
3633 | This is a bit sloppy in that the killall/pkill will touch all ceph daemons | |
3634 | in all containers, but I don't see an elegant way to send SIGHUP *just* to | |
3635 | the daemons for this cluster. (1) systemd kill -s will get the signal to | |
3636 | podman, but podman will exit. (2) podman kill will get the signal to the | |
3637 | first child (bash), but that isn't the ceph daemon. This is simpler and | |
3638 | should be harmless. | |
3639 | """ | |
3640 | f.write("""# created by cephadm | |
3641 | /var/log/ceph/%s/*.log { | |
3642 | rotate 7 | |
3643 | daily | |
3644 | compress | |
3645 | sharedscripts | |
3646 | postrotate | |
f67539c2 | 3647 | killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror' || true |
9f95a23c TL |
3648 | endscript |
3649 | missingok | |
3650 | notifempty | |
3651 | su root root | |
3652 | } | |
3653 | """ % fsid) | |
3654 | ||
f6b5b4d7 | 3655 | |
f67539c2 TL |
3656 | def get_unit_file(ctx, fsid): |
3657 | # type: (CephadmContext, str) -> str | |
f91f0fd5 | 3658 | extra_args = '' |
f67539c2 TL |
3659 | if isinstance(ctx.container_engine, Podman): |
3660 | extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n' | |
3661 | 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n' | |
3662 | 'Type=forking\n' | |
3663 | 'PIDFile=%t/%n-pid\n') | |
3664 | if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION: | |
3665 | extra_args += 'Delegate=yes\n' | |
3666 | ||
3667 | docker = isinstance(ctx.container_engine, Docker) | |
9f95a23c TL |
3668 | u = """# generated by cephadm |
3669 | [Unit] | |
3670 | Description=Ceph %i for {fsid} | |
3671 | ||
3672 | # According to: | |
3673 | # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget | |
3674 | # these can be removed once ceph-mon will dynamically change network | |
3675 | # configuration. | |
f67539c2 | 3676 | After=network-online.target local-fs.target time-sync.target{docker_after} |
9f95a23c | 3677 | Wants=network-online.target local-fs.target time-sync.target |
f67539c2 | 3678 | {docker_requires} |
9f95a23c TL |
3679 | |
3680 | PartOf=ceph-{fsid}.target | |
3681 | Before=ceph-{fsid}.target | |
3682 | ||
3683 | [Service] | |
3684 | LimitNOFILE=1048576 | |
3685 | LimitNPROC=1048576 | |
3686 | EnvironmentFile=-/etc/environment | |
9f95a23c | 3687 | ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run |
33c7a0ef | 3688 | ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop' |
9f95a23c TL |
3689 | ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop |
3690 | KillMode=none | |
3691 | Restart=on-failure | |
3692 | RestartSec=10s | |
3693 | TimeoutStartSec=120 | |
e306af50 | 3694 | TimeoutStopSec=120 |
9f95a23c TL |
3695 | StartLimitInterval=30min |
3696 | StartLimitBurst=5 | |
f91f0fd5 | 3697 | {extra_args} |
9f95a23c TL |
3698 | [Install] |
3699 | WantedBy=ceph-{fsid}.target | |
33c7a0ef | 3700 | """.format(fsid=fsid, |
f67539c2 TL |
3701 | data_dir=ctx.data_dir, |
3702 | extra_args=extra_args, | |
3703 | # if docker, we depend on docker.service | |
3704 | docker_after=' docker.service' if docker else '', | |
3705 | docker_requires='Requires=docker.service\n' if docker else '') | |
f91f0fd5 | 3706 | |
9f95a23c TL |
3707 | return u |
3708 | ||
3709 | ################################## | |
3710 | ||
f6b5b4d7 | 3711 | |
9f95a23c TL |
3712 | class CephContainer: |
3713 | def __init__(self, | |
f67539c2 | 3714 | ctx: CephadmContext, |
f91f0fd5 TL |
3715 | image: str, |
3716 | entrypoint: str, | |
3717 | args: List[str] = [], | |
3718 | volume_mounts: Dict[str, str] = {}, | |
3719 | cname: str = '', | |
3720 | container_args: List[str] = [], | |
3721 | envs: Optional[List[str]] = None, | |
3722 | privileged: bool = False, | |
3723 | ptrace: bool = False, | |
3724 | bind_mounts: Optional[List[List[str]]] = None, | |
f67539c2 | 3725 | init: Optional[bool] = None, |
f91f0fd5 | 3726 | host_network: bool = True, |
f67539c2 TL |
3727 | memory_request: Optional[str] = None, |
3728 | memory_limit: Optional[str] = None, | |
f91f0fd5 | 3729 | ) -> None: |
f67539c2 | 3730 | self.ctx = ctx |
9f95a23c TL |
3731 | self.image = image |
3732 | self.entrypoint = entrypoint | |
3733 | self.args = args | |
3734 | self.volume_mounts = volume_mounts | |
522d829b | 3735 | self._cname = cname |
9f95a23c TL |
3736 | self.container_args = container_args |
3737 | self.envs = envs | |
3738 | self.privileged = privileged | |
3739 | self.ptrace = ptrace | |
f6b5b4d7 | 3740 | self.bind_mounts = bind_mounts if bind_mounts else [] |
f67539c2 | 3741 | self.init = init if init else ctx.container_init |
f91f0fd5 | 3742 | self.host_network = host_network |
f67539c2 TL |
3743 | self.memory_request = memory_request |
3744 | self.memory_limit = memory_limit | |
9f95a23c | 3745 | |
522d829b TL |
3746 | @classmethod |
3747 | def for_daemon(cls, | |
3748 | ctx: CephadmContext, | |
3749 | fsid: str, | |
3750 | daemon_type: str, | |
3751 | daemon_id: str, | |
3752 | entrypoint: str, | |
3753 | args: List[str] = [], | |
3754 | volume_mounts: Dict[str, str] = {}, | |
3755 | container_args: List[str] = [], | |
3756 | envs: Optional[List[str]] = None, | |
3757 | privileged: bool = False, | |
3758 | ptrace: bool = False, | |
3759 | bind_mounts: Optional[List[List[str]]] = None, | |
3760 | init: Optional[bool] = None, | |
3761 | host_network: bool = True, | |
3762 | memory_request: Optional[str] = None, | |
3763 | memory_limit: Optional[str] = None, | |
3764 | ) -> 'CephContainer': | |
3765 | return cls( | |
3766 | ctx, | |
3767 | image=ctx.image, | |
3768 | entrypoint=entrypoint, | |
3769 | args=args, | |
3770 | volume_mounts=volume_mounts, | |
3771 | cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id), | |
3772 | container_args=container_args, | |
3773 | envs=envs, | |
3774 | privileged=privileged, | |
3775 | ptrace=ptrace, | |
3776 | bind_mounts=bind_mounts, | |
3777 | init=init, | |
3778 | host_network=host_network, | |
3779 | memory_request=memory_request, | |
3780 | memory_limit=memory_limit, | |
3781 | ) | |
3782 | ||
3783 | @property | |
3784 | def cname(self) -> str: | |
3785 | """ | |
3786 | podman adds the current container name to the /etc/hosts | |
3787 | file. Turns out, python's `socket.getfqdn()` differs from | |
3788 | `hostname -f`, when we have the container names containing | |
3789 | dots in it.: | |
3790 | ||
3791 | # podman run --name foo.bar.baz.com ceph/ceph /bin/bash | |
3792 | [root@sebastians-laptop /]# cat /etc/hosts | |
3793 | 127.0.0.1 localhost | |
3794 | ::1 localhost | |
3795 | 127.0.1.1 sebastians-laptop foo.bar.baz.com | |
3796 | [root@sebastians-laptop /]# hostname -f | |
3797 | sebastians-laptop | |
3798 | [root@sebastians-laptop /]# python3 -c 'import socket; print(socket.getfqdn())' | |
3799 | foo.bar.baz.com | |
3800 | ||
3801 | Fascinatingly, this doesn't happen when using dashes. | |
3802 | """ | |
3803 | return self._cname.replace('.', '-') | |
3804 | ||
3805 | @cname.setter | |
3806 | def cname(self, val: str) -> None: | |
3807 | self._cname = val | |
3808 | ||
3809 | @property | |
3810 | def old_cname(self) -> str: | |
3811 | return self._cname | |
3812 | ||
f91f0fd5 TL |
3813 | def run_cmd(self) -> List[str]: |
3814 | cmd_args: List[str] = [ | |
f67539c2 | 3815 | str(self.ctx.container_engine.path), |
f91f0fd5 TL |
3816 | 'run', |
3817 | '--rm', | |
3818 | '--ipc=host', | |
b3b6e05e TL |
3819 | # some containers (ahem, haproxy) override this, but we want a fast |
3820 | # shutdown always (and, more importantly, a successful exit even if we | |
3821 | # fall back to SIGKILL). | |
3822 | '--stop-signal=SIGTERM', | |
f91f0fd5 | 3823 | ] |
f67539c2 TL |
3824 | |
3825 | if isinstance(self.ctx.container_engine, Podman): | |
f67539c2 TL |
3826 | if os.path.exists('/etc/ceph/podman-auth.json'): |
3827 | cmd_args.append('--authfile=/etc/ceph/podman-auth.json') | |
3828 | ||
f91f0fd5 TL |
3829 | envs: List[str] = [ |
3830 | '-e', 'CONTAINER_IMAGE=%s' % self.image, | |
3831 | '-e', 'NODE_NAME=%s' % get_hostname(), | |
3832 | ] | |
3833 | vols: List[str] = [] | |
3834 | binds: List[str] = [] | |
9f95a23c | 3835 | |
f67539c2 TL |
3836 | if self.memory_request: |
3837 | cmd_args.extend(['-e', 'POD_MEMORY_REQUEST', str(self.memory_request)]) | |
3838 | if self.memory_limit: | |
3839 | cmd_args.extend(['-e', 'POD_MEMORY_LIMIT', str(self.memory_limit)]) | |
3840 | cmd_args.extend(['--memory', str(self.memory_limit)]) | |
3841 | ||
f91f0fd5 TL |
3842 | if self.host_network: |
3843 | cmd_args.append('--net=host') | |
3844 | if self.entrypoint: | |
3845 | cmd_args.extend(['--entrypoint', self.entrypoint]) | |
9f95a23c | 3846 | if self.privileged: |
f91f0fd5 TL |
3847 | cmd_args.extend([ |
3848 | '--privileged', | |
3849 | # let OSD etc read block devs that haven't been chowned | |
3850 | '--group-add=disk']) | |
3851 | if self.ptrace and not self.privileged: | |
3852 | # if privileged, the SYS_PTRACE cap is already added | |
3853 | # in addition, --cap-add and --privileged are mutually | |
3854 | # exclusive since podman >= 2.0 | |
3855 | cmd_args.append('--cap-add=SYS_PTRACE') | |
3856 | if self.init: | |
3857 | cmd_args.append('--init') | |
f67539c2 | 3858 | envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1'] |
f91f0fd5 TL |
3859 | if self.cname: |
3860 | cmd_args.extend(['--name', self.cname]) | |
3861 | if self.envs: | |
3862 | for env in self.envs: | |
3863 | envs.extend(['-e', env]) | |
3864 | ||
9f95a23c TL |
3865 | vols = sum( |
3866 | [['-v', '%s:%s' % (host_dir, container_dir)] | |
3867 | for host_dir, container_dir in self.volume_mounts.items()], []) | |
f6b5b4d7 | 3868 | binds = sum([['--mount', '{}'.format(','.join(bind))] |
f91f0fd5 TL |
3869 | for bind in self.bind_mounts], []) |
3870 | ||
f67539c2 TL |
3871 | return \ |
3872 | cmd_args + self.container_args + \ | |
3873 | envs + vols + binds + \ | |
3874 | [self.image] + self.args # type: ignore | |
f91f0fd5 TL |
3875 | |
3876 | def shell_cmd(self, cmd: List[str]) -> List[str]: | |
3877 | cmd_args: List[str] = [ | |
f67539c2 | 3878 | str(self.ctx.container_engine.path), |
9f95a23c TL |
3879 | 'run', |
3880 | '--rm', | |
e306af50 | 3881 | '--ipc=host', |
f91f0fd5 TL |
3882 | ] |
3883 | envs: List[str] = [ | |
3884 | '-e', 'CONTAINER_IMAGE=%s' % self.image, | |
3885 | '-e', 'NODE_NAME=%s' % get_hostname(), | |
3886 | ] | |
3887 | vols: List[str] = [] | |
3888 | binds: List[str] = [] | |
9f95a23c | 3889 | |
f91f0fd5 TL |
3890 | if self.host_network: |
3891 | cmd_args.append('--net=host') | |
b3b6e05e TL |
3892 | if self.ctx.no_hosts: |
3893 | cmd_args.append('--no-hosts') | |
9f95a23c | 3894 | if self.privileged: |
f91f0fd5 TL |
3895 | cmd_args.extend([ |
3896 | '--privileged', | |
3897 | # let OSD etc read block devs that haven't been chowned | |
3898 | '--group-add=disk', | |
3899 | ]) | |
f67539c2 TL |
3900 | if self.init: |
3901 | cmd_args.append('--init') | |
3902 | envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1'] | |
f91f0fd5 TL |
3903 | if self.envs: |
3904 | for env in self.envs: | |
3905 | envs.extend(['-e', env]) | |
3906 | ||
9f95a23c TL |
3907 | vols = sum( |
3908 | [['-v', '%s:%s' % (host_dir, container_dir)] | |
3909 | for host_dir, container_dir in self.volume_mounts.items()], []) | |
f6b5b4d7 TL |
3910 | binds = sum([['--mount', '{}'.format(','.join(bind))] |
3911 | for bind in self.bind_mounts], []) | |
f91f0fd5 TL |
3912 | |
3913 | return cmd_args + self.container_args + envs + vols + binds + [ | |
9f95a23c | 3914 | '--entrypoint', cmd[0], |
f91f0fd5 | 3915 | self.image, |
9f95a23c TL |
3916 | ] + cmd[1:] |
3917 | ||
3918 | def exec_cmd(self, cmd): | |
3919 | # type: (List[str]) -> List[str] | |
522d829b TL |
3920 | cname = get_running_container_name(self.ctx, self) |
3921 | if not cname: | |
3922 | raise Error('unable to find container "{}"'.format(self.cname)) | |
9f95a23c | 3923 | return [ |
f67539c2 | 3924 | str(self.ctx.container_engine.path), |
9f95a23c TL |
3925 | 'exec', |
3926 | ] + self.container_args + [ | |
3927 | self.cname, | |
3928 | ] + cmd | |
3929 | ||
522d829b | 3930 | def rm_cmd(self, old_cname: bool = False, storage: bool = False) -> List[str]: |
f6b5b4d7 | 3931 | ret = [ |
f67539c2 | 3932 | str(self.ctx.container_engine.path), |
f6b5b4d7 TL |
3933 | 'rm', '-f', |
3934 | ] | |
3935 | if storage: | |
3936 | ret.append('--storage') | |
522d829b TL |
3937 | if old_cname: |
3938 | ret.append(self.old_cname) | |
3939 | else: | |
3940 | ret.append(self.cname) | |
f6b5b4d7 TL |
3941 | return ret |
3942 | ||
522d829b | 3943 | def stop_cmd(self, old_cname: bool = False) -> List[str]: |
f6b5b4d7 | 3944 | ret = [ |
f67539c2 | 3945 | str(self.ctx.container_engine.path), |
522d829b | 3946 | 'stop', self.old_cname if old_cname else self.cname, |
f6b5b4d7 TL |
3947 | ] |
3948 | return ret | |
3949 | ||
9f95a23c TL |
3950 | def run(self, timeout=DEFAULT_TIMEOUT): |
3951 | # type: (Optional[int]) -> str | |
f67539c2 TL |
3952 | out, _, _ = call_throws(self.ctx, self.run_cmd(), |
3953 | desc=self.entrypoint, timeout=timeout) | |
9f95a23c TL |
3954 | return out |
3955 | ||
20effc67 TL |
3956 | |
3957 | ##################################### | |
3958 | ||
3959 | class MgrListener(Thread): | |
3960 | def __init__(self, agent: 'CephadmAgent') -> None: | |
3961 | self.agent = agent | |
3962 | self.stop = False | |
3963 | super(MgrListener, self).__init__(target=self.run) | |
3964 | ||
3965 | def run(self) -> None: | |
3966 | listenSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
3967 | listenSocket.bind(('0.0.0.0', int(self.agent.listener_port))) | |
3968 | listenSocket.settimeout(60) | |
3969 | listenSocket.listen(1) | |
3970 | ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) | |
3971 | ssl_ctx.verify_mode = ssl.CERT_REQUIRED | |
3972 | ssl_ctx.load_cert_chain(self.agent.listener_cert_path, self.agent.listener_key_path) | |
3973 | ssl_ctx.load_verify_locations(self.agent.ca_path) | |
3974 | secureListenSocket = ssl_ctx.wrap_socket(listenSocket, server_side=True) | |
3975 | while not self.stop: | |
3976 | try: | |
3977 | try: | |
3978 | conn, _ = secureListenSocket.accept() | |
3979 | except socket.timeout: | |
3980 | continue | |
3981 | try: | |
3982 | length: int = int(conn.recv(10).decode()) | |
3983 | except Exception as e: | |
3984 | err_str = f'Failed to extract length of payload from message: {e}' | |
3985 | conn.send(err_str.encode()) | |
3986 | logger.error(err_str) | |
3987 | while True: | |
3988 | payload = conn.recv(length).decode() | |
3989 | if not payload: | |
3990 | break | |
3991 | try: | |
3992 | data: Dict[Any, Any] = json.loads(payload) | |
3993 | self.handle_json_payload(data) | |
3994 | except Exception as e: | |
3995 | err_str = f'Failed to extract json payload from message: {e}' | |
3996 | conn.send(err_str.encode()) | |
3997 | logger.error(err_str) | |
3998 | else: | |
3999 | conn.send(b'ACK') | |
4000 | if 'config' in data: | |
4001 | self.agent.wakeup() | |
4002 | self.agent.ls_gatherer.wakeup() | |
4003 | self.agent.volume_gatherer.wakeup() | |
4004 | logger.debug(f'Got mgr message {data}') | |
4005 | except Exception as e: | |
4006 | logger.error(f'Mgr Listener encountered exception: {e}') | |
4007 | ||
4008 | def shutdown(self) -> None: | |
4009 | self.stop = True | |
4010 | ||
4011 | def handle_json_payload(self, data: Dict[Any, Any]) -> None: | |
4012 | self.agent.ack = int(data['counter']) | |
4013 | if 'config' in data: | |
4014 | logger.info('Received new config from mgr') | |
4015 | config = data['config'] | |
4016 | for filename in config: | |
4017 | if filename in self.agent.required_files: | |
4018 | file_path = os.path.join(self.agent.daemon_dir, filename) | |
4019 | with open(os.open(file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
4020 | f.write(config[filename]) | |
4021 | os.rename(file_path + '.new', file_path) | |
4022 | self.agent.pull_conf_settings() | |
4023 | self.agent.wakeup() | |
4024 | ||
4025 | ||
4026 | class CephadmAgent(): | |
4027 | ||
4028 | daemon_type = 'agent' | |
4029 | default_port = 8498 | |
4030 | loop_interval = 30 | |
4031 | stop = False | |
4032 | ||
4033 | required_files = [ | |
4034 | 'agent.json', | |
4035 | 'keyring', | |
4036 | 'root_cert.pem', | |
4037 | 'listener.crt', | |
4038 | 'listener.key', | |
4039 | ] | |
4040 | ||
4041 | def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] = ''): | |
4042 | self.ctx = ctx | |
4043 | self.fsid = fsid | |
4044 | self.daemon_id = daemon_id | |
4045 | self.starting_port = 14873 | |
4046 | self.target_ip = '' | |
4047 | self.target_port = '' | |
4048 | self.host = '' | |
4049 | self.daemon_dir = os.path.join(ctx.data_dir, self.fsid, f'{self.daemon_type}.{self.daemon_id}') | |
4050 | self.config_path = os.path.join(self.daemon_dir, 'agent.json') | |
4051 | self.keyring_path = os.path.join(self.daemon_dir, 'keyring') | |
4052 | self.ca_path = os.path.join(self.daemon_dir, 'root_cert.pem') | |
4053 | self.listener_cert_path = os.path.join(self.daemon_dir, 'listener.crt') | |
4054 | self.listener_key_path = os.path.join(self.daemon_dir, 'listener.key') | |
4055 | self.listener_port = '' | |
4056 | self.ack = 1 | |
4057 | self.event = Event() | |
4058 | self.mgr_listener = MgrListener(self) | |
4059 | self.ls_gatherer = AgentGatherer(self, lambda: self._get_ls(), 'Ls') | |
4060 | self.volume_gatherer = AgentGatherer(self, lambda: self._ceph_volume(enhanced=False), 'Volume') | |
4061 | self.device_enhanced_scan = False | |
4062 | self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0] | |
4063 | self.recent_iteration_index: int = 0 | |
4064 | self.cached_ls_values: Dict[str, Dict[str, str]] = {} | |
4065 | ||
4066 | def validate(self, config: Dict[str, str] = {}) -> None: | |
4067 | # check for the required files | |
4068 | for fname in self.required_files: | |
4069 | if fname not in config: | |
4070 | raise Error('required file missing from config: %s' % fname) | |
4071 | ||
4072 | def deploy_daemon_unit(self, config: Dict[str, str] = {}) -> None: | |
4073 | if not config: | |
4074 | raise Error('Agent needs a config') | |
4075 | assert isinstance(config, dict) | |
4076 | self.validate(config) | |
4077 | ||
4078 | # Create the required config files in the daemons dir, with restricted permissions | |
4079 | for filename in config: | |
4080 | if filename in self.required_files: | |
4081 | file_path = os.path.join(self.daemon_dir, filename) | |
4082 | with open(os.open(file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
4083 | f.write(config[filename]) | |
4084 | os.rename(file_path + '.new', file_path) | |
4085 | ||
4086 | unit_run_path = os.path.join(self.daemon_dir, 'unit.run') | |
4087 | with open(os.open(unit_run_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
4088 | f.write(self.unit_run()) | |
4089 | os.rename(unit_run_path + '.new', unit_run_path) | |
4090 | ||
4091 | meta: Dict[str, Any] = {} | |
4092 | meta_file_path = os.path.join(self.daemon_dir, 'unit.meta') | |
4093 | if 'meta_json' in self.ctx and self.ctx.meta_json: | |
4094 | meta = json.loads(self.ctx.meta_json) or {} | |
4095 | with open(os.open(meta_file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
4096 | f.write(json.dumps(meta, indent=4) + '\n') | |
4097 | os.rename(meta_file_path + '.new', meta_file_path) | |
4098 | ||
4099 | unit_file_path = os.path.join(self.ctx.unit_dir, self.unit_name()) | |
4100 | with open(os.open(unit_file_path + '.new', os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: | |
4101 | f.write(self.unit_file()) | |
4102 | os.rename(unit_file_path + '.new', unit_file_path) | |
4103 | ||
4104 | call_throws(self.ctx, ['systemctl', 'daemon-reload']) | |
4105 | call(self.ctx, ['systemctl', 'stop', self.unit_name()], | |
4106 | verbosity=CallVerbosity.DEBUG) | |
4107 | call(self.ctx, ['systemctl', 'reset-failed', self.unit_name()], | |
4108 | verbosity=CallVerbosity.DEBUG) | |
4109 | call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name()]) | |
4110 | ||
4111 | def unit_name(self) -> str: | |
4112 | return '{}.service'.format(get_unit_name(self.fsid, self.daemon_type, self.daemon_id)) | |
4113 | ||
4114 | def unit_run(self) -> str: | |
4115 | py3 = shutil.which('python3') | |
4116 | binary_path = os.path.realpath(sys.argv[0]) | |
4117 | return ('set -e\n' + f'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n') | |
4118 | ||
4119 | def unit_file(self) -> str: | |
4120 | return """#generated by cephadm | |
4121 | [Unit] | |
4122 | Description=cephadm agent for cluster {fsid} | |
4123 | ||
4124 | PartOf=ceph-{fsid}.target | |
4125 | Before=ceph-{fsid}.target | |
4126 | ||
4127 | [Service] | |
4128 | Type=forking | |
4129 | ExecStart=/bin/bash {data_dir}/unit.run | |
4130 | Restart=on-failure | |
4131 | RestartSec=10s | |
4132 | ||
4133 | [Install] | |
4134 | WantedBy=ceph-{fsid}.target | |
4135 | """.format( | |
4136 | fsid=self.fsid, | |
4137 | data_dir=self.daemon_dir | |
4138 | ) | |
4139 | ||
4140 | def shutdown(self) -> None: | |
4141 | self.stop = True | |
4142 | if self.mgr_listener.is_alive(): | |
4143 | self.mgr_listener.shutdown() | |
4144 | ||
4145 | def wakeup(self) -> None: | |
4146 | self.event.set() | |
4147 | ||
4148 | def pull_conf_settings(self) -> None: | |
4149 | try: | |
4150 | with open(self.config_path, 'r') as f: | |
4151 | config = json.load(f) | |
4152 | self.target_ip = config['target_ip'] | |
4153 | self.target_port = config['target_port'] | |
4154 | self.loop_interval = int(config['refresh_period']) | |
4155 | self.starting_port = int(config['listener_port']) | |
4156 | self.host = config['host'] | |
4157 | use_lsm = config['device_enhanced_scan'] | |
4158 | except Exception as e: | |
4159 | self.shutdown() | |
4160 | raise Error(f'Failed to get agent target ip and port from config: {e}') | |
4161 | ||
4162 | try: | |
4163 | with open(self.keyring_path, 'r') as f: | |
4164 | self.keyring = f.read() | |
4165 | except Exception as e: | |
4166 | self.shutdown() | |
4167 | raise Error(f'Failed to get agent keyring: {e}') | |
4168 | ||
4169 | assert self.target_ip and self.target_port | |
4170 | ||
4171 | self.device_enhanced_scan = False | |
4172 | if use_lsm.lower() == 'true': | |
4173 | self.device_enhanced_scan = True | |
4174 | self.volume_gatherer.update_func(lambda: self._ceph_volume(enhanced=self.device_enhanced_scan)) | |
4175 | ||
4176 | def run(self) -> None: | |
4177 | self.pull_conf_settings() | |
4178 | ||
4179 | try: | |
4180 | for _ in range(1001): | |
4181 | if not port_in_use(self.ctx, self.starting_port): | |
4182 | self.listener_port = str(self.starting_port) | |
4183 | break | |
4184 | self.starting_port += 1 | |
4185 | if not self.listener_port: | |
4186 | raise Error(f'All 1000 ports starting at {str(self.starting_port - 1001)} taken.') | |
4187 | except Exception as e: | |
4188 | raise Error(f'Failed to pick port for agent to listen on: {e}') | |
4189 | ||
4190 | if not self.mgr_listener.is_alive(): | |
4191 | self.mgr_listener.start() | |
4192 | ||
4193 | if not self.ls_gatherer.is_alive(): | |
4194 | self.ls_gatherer.start() | |
4195 | ||
4196 | if not self.volume_gatherer.is_alive(): | |
4197 | self.volume_gatherer.start() | |
4198 | ||
4199 | ssl_ctx = ssl.create_default_context() | |
4200 | ssl_ctx.check_hostname = True | |
4201 | ssl_ctx.verify_mode = ssl.CERT_REQUIRED | |
4202 | ssl_ctx.load_verify_locations(self.ca_path) | |
4203 | ||
4204 | while not self.stop: | |
4205 | start_time = time.monotonic() | |
4206 | ack = self.ack | |
4207 | ||
4208 | # part of the networks info is returned as a set which is not JSON | |
4209 | # serializable. The set must be converted to a list | |
4210 | networks = list_networks(self.ctx) | |
4211 | networks_list = {} | |
4212 | for key in networks.keys(): | |
4213 | for k, v in networks[key].items(): | |
4214 | networks_list[key] = {k: list(v)} | |
4215 | ||
4216 | data = json.dumps({'host': self.host, | |
4217 | 'ls': (self.ls_gatherer.data if self.ack == self.ls_gatherer.ack | |
4218 | and self.ls_gatherer.data is not None else []), | |
4219 | 'networks': networks_list, | |
4220 | 'facts': HostFacts(self.ctx).dump(), | |
4221 | 'volume': (self.volume_gatherer.data if self.ack == self.volume_gatherer.ack | |
4222 | and self.volume_gatherer.data is not None else ''), | |
4223 | 'ack': str(ack), | |
4224 | 'keyring': self.keyring, | |
4225 | 'port': self.listener_port}) | |
4226 | data = data.encode('ascii') | |
4227 | ||
4228 | url = f'https://{self.target_ip}:{self.target_port}/data' | |
4229 | try: | |
4230 | req = Request(url, data, {'Content-Type': 'application/json'}) | |
4231 | send_time = time.monotonic() | |
4232 | with urlopen(req, context=ssl_ctx) as response: | |
4233 | response_str = response.read() | |
4234 | response_json = json.loads(response_str) | |
4235 | total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds() | |
4236 | logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.') | |
4237 | except Exception as e: | |
4238 | logger.error(f'Failed to send metadata to mgr: {e}') | |
4239 | ||
4240 | end_time = time.monotonic() | |
4241 | run_time = datetime.timedelta(seconds=(end_time - start_time)) | |
4242 | self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds() | |
4243 | self.recent_iteration_index = (self.recent_iteration_index + 1) % 3 | |
4244 | run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t]) | |
4245 | ||
4246 | self.event.wait(max(self.loop_interval - int(run_time_average), 0)) | |
4247 | self.event.clear() | |
4248 | ||
4249 | def _ceph_volume(self, enhanced: bool = False) -> Tuple[str, bool]: | |
4250 | self.ctx.command = 'inventory --format=json'.split() | |
4251 | if enhanced: | |
4252 | self.ctx.command.append('--with-lsm') | |
4253 | self.ctx.fsid = self.fsid | |
4254 | ||
4255 | stream = io.StringIO() | |
4256 | with redirect_stdout(stream): | |
4257 | command_ceph_volume(self.ctx) | |
4258 | ||
4259 | stdout = stream.getvalue() | |
4260 | ||
4261 | if stdout: | |
4262 | return (stdout, False) | |
4263 | else: | |
4264 | raise Exception('ceph-volume returned empty value') | |
4265 | ||
4266 | def _daemon_ls_subset(self) -> Dict[str, Dict[str, Any]]: | |
4267 | # gets a subset of ls info quickly. The results of this will tell us if our | |
4268 | # cached info is still good or if we need to run the full ls again. | |
4269 | # for legacy containers, we just grab the full info. For cephadmv1 containers, | |
4270 | # we only grab enabled, state, mem_usage and container id. If container id has | |
4271 | # not changed for any daemon, we assume our cached info is good. | |
4272 | daemons: Dict[str, Dict[str, Any]] = {} | |
4273 | data_dir = self.ctx.data_dir | |
4274 | seen_memusage = {} # type: Dict[str, int] | |
4275 | out, err, code = call( | |
4276 | self.ctx, | |
4277 | [self.ctx.container_engine.path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'], | |
4278 | verbosity=CallVerbosity.DEBUG | |
4279 | ) | |
4280 | seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out) | |
4281 | # we need a mapping from container names to ids. Later we will convert daemon | |
4282 | # names to container names to get daemons container id to see if it has changed | |
4283 | out, err, code = call( | |
4284 | self.ctx, | |
4285 | [self.ctx.container_engine.path, 'ps', '--format', '{{.ID}},{{.Names}}', '--no-trunc'], | |
4286 | verbosity=CallVerbosity.DEBUG | |
4287 | ) | |
4288 | name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out) | |
4289 | for i in os.listdir(data_dir): | |
4290 | if i in ['mon', 'osd', 'mds', 'mgr']: | |
4291 | daemon_type = i | |
4292 | for j in os.listdir(os.path.join(data_dir, i)): | |
4293 | if '-' not in j: | |
4294 | continue | |
4295 | (cluster, daemon_id) = j.split('-', 1) | |
4296 | legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) | |
4297 | (enabled, state, _) = check_unit(self.ctx, legacy_unit_name) | |
4298 | daemons[f'{daemon_type}.{daemon_id}'] = { | |
4299 | 'style': 'legacy', | |
4300 | 'name': '%s.%s' % (daemon_type, daemon_id), | |
4301 | 'fsid': self.ctx.fsid if self.ctx.fsid is not None else 'unknown', | |
4302 | 'systemd_unit': legacy_unit_name, | |
4303 | 'enabled': 'true' if enabled else 'false', | |
4304 | 'state': state, | |
4305 | } | |
4306 | elif is_fsid(i): | |
4307 | fsid = str(i) # convince mypy that fsid is a str here | |
4308 | for j in os.listdir(os.path.join(data_dir, i)): | |
4309 | if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)): | |
4310 | (daemon_type, daemon_id) = j.split('.', 1) | |
4311 | unit_name = get_unit_name(fsid, daemon_type, daemon_id) | |
4312 | (enabled, state, _) = check_unit(self.ctx, unit_name) | |
4313 | daemons[j] = { | |
4314 | 'style': 'cephadm:v1', | |
4315 | 'systemd_unit': unit_name, | |
4316 | 'enabled': 'true' if enabled else 'false', | |
4317 | 'state': state, | |
4318 | } | |
4319 | c = CephContainer.for_daemon(self.ctx, self.ctx.fsid, daemon_type, daemon_id, 'bash') | |
4320 | container_id: Optional[str] = None | |
4321 | for name in (c.cname, c.old_cname): | |
4322 | if name in name_id_mapping: | |
4323 | container_id = name_id_mapping[name] | |
4324 | break | |
4325 | daemons[j]['container_id'] = container_id | |
4326 | if container_id: | |
4327 | daemons[j]['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len]) | |
4328 | return daemons | |
4329 | ||
4330 | def _parse_container_id_name(self, code: int, out: str) -> Dict[str, str]: | |
4331 | # map container names to ids from ps output | |
4332 | name_id_mapping = {} # type: Dict[str, str] | |
4333 | if not code: | |
4334 | for line in out.splitlines(): | |
4335 | id, name = line.split(',') | |
4336 | name_id_mapping[name] = id | |
4337 | return name_id_mapping | |
4338 | ||
4339 | def _get_ls(self) -> Tuple[List[Dict[str, str]], bool]: | |
4340 | if not self.cached_ls_values: | |
4341 | logger.info('No cached ls output. Running full daemon ls') | |
4342 | ls = list_daemons(self.ctx) | |
4343 | for d in ls: | |
4344 | self.cached_ls_values[d['name']] = d | |
4345 | return (ls, True) | |
4346 | else: | |
4347 | ls_subset = self._daemon_ls_subset() | |
4348 | need_full_ls = False | |
4349 | state_change = False | |
4350 | if set(self.cached_ls_values.keys()) != set(ls_subset.keys()): | |
4351 | # case for a new daemon in ls or an old daemon no longer appearing. | |
4352 | # If that happens we need a full ls | |
4353 | logger.info('Change detected in state of daemons. Running full daemon ls') | |
4354 | ls = list_daemons(self.ctx) | |
4355 | for d in ls: | |
4356 | self.cached_ls_values[d['name']] = d | |
4357 | return (ls, True) | |
4358 | for daemon, info in self.cached_ls_values.items(): | |
4359 | if info['style'] == 'legacy': | |
4360 | # for legacy containers, ls_subset just grabs all the info | |
4361 | self.cached_ls_values[daemon] = ls_subset[daemon] | |
4362 | else: | |
4363 | if info['container_id'] != ls_subset[daemon]['container_id']: | |
4364 | # case for container id having changed. We need full ls as | |
4365 | # info we didn't grab like version and start time could have changed | |
4366 | need_full_ls = True | |
4367 | break | |
4368 | ||
4369 | # want to know if a daemons state change because in those cases we want | |
4370 | # to report back quicker | |
4371 | if ( | |
4372 | self.cached_ls_values[daemon]['enabled'] != ls_subset[daemon]['enabled'] | |
4373 | or self.cached_ls_values[daemon]['state'] != ls_subset[daemon]['state'] | |
4374 | ): | |
4375 | state_change = True | |
4376 | # if we reach here, container id matched. Update the few values we do track | |
4377 | # from ls subset: state, enabled, memory_usage. | |
4378 | self.cached_ls_values[daemon]['enabled'] = ls_subset[daemon]['enabled'] | |
4379 | self.cached_ls_values[daemon]['state'] = ls_subset[daemon]['state'] | |
4380 | if 'memory_usage' in ls_subset[daemon]: | |
4381 | self.cached_ls_values[daemon]['memory_usage'] = ls_subset[daemon]['memory_usage'] | |
4382 | if need_full_ls: | |
4383 | logger.info('Change detected in state of daemons. Running full daemon ls') | |
4384 | ls = list_daemons(self.ctx) | |
4385 | for d in ls: | |
4386 | self.cached_ls_values[d['name']] = d | |
4387 | return (ls, True) | |
4388 | else: | |
4389 | ls = [info for daemon, info in self.cached_ls_values.items()] | |
4390 | return (ls, state_change) | |
4391 | ||
4392 | ||
4393 | class AgentGatherer(Thread): | |
4394 | def __init__(self, agent: 'CephadmAgent', func: Callable, gatherer_type: str = 'Unnamed', initial_ack: int = 0) -> None: | |
4395 | self.agent = agent | |
4396 | self.func = func | |
4397 | self.gatherer_type = gatherer_type | |
4398 | self.ack = initial_ack | |
4399 | self.event = Event() | |
4400 | self.data: Any = None | |
4401 | self.stop = False | |
4402 | self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0] | |
4403 | self.recent_iteration_index: int = 0 | |
4404 | super(AgentGatherer, self).__init__(target=self.run) | |
4405 | ||
4406 | def run(self) -> None: | |
4407 | while not self.stop: | |
4408 | try: | |
4409 | start_time = time.monotonic() | |
4410 | ||
4411 | ack = self.agent.ack | |
4412 | change = False | |
4413 | try: | |
4414 | self.data, change = self.func() | |
4415 | except Exception as e: | |
4416 | logger.error(f'{self.gatherer_type} Gatherer encountered exception gathering data: {e}') | |
4417 | self.data = None | |
4418 | if ack != self.ack or change: | |
4419 | self.ack = ack | |
4420 | self.agent.wakeup() | |
4421 | ||
4422 | end_time = time.monotonic() | |
4423 | run_time = datetime.timedelta(seconds=(end_time - start_time)) | |
4424 | self.recent_iteration_run_times[self.recent_iteration_index] = run_time.total_seconds() | |
4425 | self.recent_iteration_index = (self.recent_iteration_index + 1) % 3 | |
4426 | run_time_average = sum(self.recent_iteration_run_times, 0.0) / len([t for t in self.recent_iteration_run_times if t]) | |
4427 | ||
4428 | self.event.wait(max(self.agent.loop_interval - int(run_time_average), 0)) | |
4429 | self.event.clear() | |
4430 | except Exception as e: | |
4431 | logger.error(f'{self.gatherer_type} Gatherer encountered exception: {e}') | |
4432 | ||
4433 | def shutdown(self) -> None: | |
4434 | self.stop = True | |
4435 | ||
4436 | def wakeup(self) -> None: | |
4437 | self.event.set() | |
4438 | ||
4439 | def update_func(self, func: Callable) -> None: | |
4440 | self.func = func | |
4441 | ||
4442 | ||
4443 | def command_agent(ctx: CephadmContext) -> None: | |
4444 | agent = CephadmAgent(ctx, ctx.fsid, ctx.daemon_id) | |
4445 | ||
4446 | if not os.path.isdir(agent.daemon_dir): | |
4447 | raise Error(f'Agent daemon directory {agent.daemon_dir} does not exist. Perhaps agent was never deployed?') | |
4448 | ||
4449 | agent.run() | |
4450 | ||
4451 | ||
9f95a23c TL |
4452 | ################################## |
4453 | ||
f6b5b4d7 | 4454 | |
9f95a23c | 4455 | @infer_image |
f67539c2 TL |
4456 | def command_version(ctx): |
4457 | # type: (CephadmContext) -> int | |
4458 | c = CephContainer(ctx, ctx.image, 'ceph', ['--version']) | |
4459 | out, err, ret = call(ctx, c.run_cmd(), desc=c.entrypoint) | |
4460 | if not ret: | |
4461 | print(out.strip()) | |
4462 | return ret | |
9f95a23c TL |
4463 | |
4464 | ################################## | |
4465 | ||
f6b5b4d7 | 4466 | |
33c7a0ef | 4467 | @default_image |
f67539c2 TL |
4468 | def command_pull(ctx): |
4469 | # type: (CephadmContext) -> int | |
f6b5b4d7 | 4470 | |
33c7a0ef TL |
4471 | try: |
4472 | _pull_image(ctx, ctx.image, ctx.insecure) | |
4473 | except UnauthorizedRegistryError: | |
4474 | err_str = 'Failed to pull container image. Check that host(s) are logged into the registry' | |
4475 | logger.debug(f'Pulling image for `command_pull` failed: {err_str}') | |
4476 | raise Error(err_str) | |
f67539c2 | 4477 | return command_inspect_image(ctx) |
9f95a23c | 4478 | |
f6b5b4d7 | 4479 | |
a4b75251 TL |
4480 | def _pull_image(ctx, image, insecure=False): |
4481 | # type: (CephadmContext, str, bool) -> None | |
f6b5b4d7 TL |
4482 | logger.info('Pulling container image %s...' % image) |
4483 | ||
4484 | ignorelist = [ | |
f67539c2 TL |
4485 | 'error creating read-write layer with ID', |
4486 | 'net/http: TLS handshake timeout', | |
4487 | 'Digest did not match, expected', | |
f6b5b4d7 TL |
4488 | ] |
4489 | ||
f67539c2 | 4490 | cmd = [ctx.container_engine.path, 'pull', image] |
a4b75251 TL |
4491 | if isinstance(ctx.container_engine, Podman): |
4492 | if insecure: | |
4493 | cmd.append('--tls-verify=false') | |
4494 | ||
4495 | if os.path.exists('/etc/ceph/podman-auth.json'): | |
4496 | cmd.append('--authfile=/etc/ceph/podman-auth.json') | |
f6b5b4d7 TL |
4497 | cmd_str = ' '.join(cmd) |
4498 | ||
4499 | for sleep_secs in [1, 4, 25]: | |
f67539c2 | 4500 | out, err, ret = call(ctx, cmd) |
f6b5b4d7 TL |
4501 | if not ret: |
4502 | return | |
4503 | ||
33c7a0ef TL |
4504 | if 'unauthorized' in err: |
4505 | raise UnauthorizedRegistryError() | |
4506 | ||
f6b5b4d7 | 4507 | if not any(pattern in err for pattern in ignorelist): |
a4b75251 | 4508 | raise Error('Failed command: %s' % cmd_str) |
f6b5b4d7 | 4509 | |
f67539c2 | 4510 | logger.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs)) |
f6b5b4d7 TL |
4511 | time.sleep(sleep_secs) |
4512 | ||
a4b75251 | 4513 | raise Error('Failed command: %s: maximum retries reached' % cmd_str) |
f67539c2 | 4514 | |
9f95a23c TL |
4515 | ################################## |
4516 | ||
f6b5b4d7 | 4517 | |
9f95a23c | 4518 | @infer_image |
f67539c2 TL |
4519 | def command_inspect_image(ctx): |
4520 | # type: (CephadmContext) -> int | |
4521 | out, err, ret = call_throws(ctx, [ | |
4522 | ctx.container_engine.path, 'inspect', | |
cd265ab1 | 4523 | '--format', '{{.ID}},{{.RepoDigests}}', |
f67539c2 | 4524 | ctx.image]) |
9f95a23c TL |
4525 | if ret: |
4526 | return errno.ENOENT | |
f67539c2 | 4527 | info_from = get_image_info_from_inspect(out.strip(), ctx.image) |
f91f0fd5 | 4528 | |
f67539c2 | 4529 | ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip() |
f91f0fd5 TL |
4530 | info_from['ceph_version'] = ver |
4531 | ||
4532 | print(json.dumps(info_from, indent=4, sort_keys=True)) | |
4533 | return 0 | |
4534 | ||
4535 | ||
522d829b | 4536 | def normalize_image_digest(digest: str) -> str: |
20effc67 TL |
4537 | """ |
4538 | Normal case: | |
4539 | >>> normalize_image_digest('ceph/ceph', 'docker.io') | |
4540 | 'docker.io/ceph/ceph' | |
4541 | ||
4542 | No change: | |
4543 | >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io') | |
4544 | 'quay.ceph.io/ceph/ceph' | |
4545 | ||
4546 | >>> normalize_image_digest('docker.io/ubuntu', 'docker.io') | |
4547 | 'docker.io/ubuntu' | |
4548 | ||
4549 | >>> normalize_image_digest('localhost/ceph', 'docker.io') | |
4550 | 'localhost/ceph' | |
4551 | """ | |
4552 | known_shortnames = [ | |
4553 | 'ceph/ceph', | |
4554 | 'ceph/daemon', | |
4555 | 'ceph/daemon-base', | |
4556 | ] | |
4557 | for image in known_shortnames: | |
4558 | if digest.startswith(image): | |
4559 | return f'{DEFAULT_REGISTRY}/{digest}' | |
f67539c2 TL |
4560 | return digest |
4561 | ||
4562 | ||
f91f0fd5 | 4563 | def get_image_info_from_inspect(out, image): |
f67539c2 | 4564 | # type: (str, str) -> Dict[str, Union[str,List[str]]] |
f91f0fd5 TL |
4565 | image_id, digests = out.split(',', 1) |
4566 | if not out: | |
4567 | raise Error('inspect {}: empty result'.format(image)) | |
9f95a23c | 4568 | r = { |
f91f0fd5 | 4569 | 'image_id': normalize_container_id(image_id) |
f67539c2 | 4570 | } # type: Dict[str, Union[str,List[str]]] |
f91f0fd5 | 4571 | if digests: |
20effc67 | 4572 | r['repo_digests'] = list(map(normalize_image_digest, digests[1: -1].split(' '))) |
f91f0fd5 TL |
4573 | return r |
4574 | ||
9f95a23c TL |
4575 | ################################## |
4576 | ||
f91f0fd5 | 4577 | |
f67539c2 TL |
4578 | def check_subnet(subnets: str) -> Tuple[int, List[int], str]: |
4579 | """Determine whether the given string is a valid subnet | |
4580 | ||
4581 | :param subnets: subnet string, a single definition or comma separated list of CIDR subnets | |
4582 | :returns: return code, IP version list of the subnets and msg describing any errors validation errors | |
4583 | """ | |
4584 | ||
4585 | rc = 0 | |
4586 | versions = set() | |
4587 | errors = [] | |
4588 | subnet_list = subnets.split(',') | |
4589 | for subnet in subnet_list: | |
4590 | # ensure the format of the string is as expected address/netmask | |
33c7a0ef | 4591 | subnet = subnet.strip() |
f67539c2 TL |
4592 | if not re.search(r'\/\d+$', subnet): |
4593 | rc = 1 | |
4594 | errors.append(f'{subnet} is not in CIDR format (address/netmask)') | |
4595 | continue | |
4596 | try: | |
4597 | v = ipaddress.ip_network(subnet).version | |
4598 | versions.add(v) | |
4599 | except ValueError as e: | |
4600 | rc = 1 | |
4601 | errors.append(f'{subnet} invalid: {str(e)}') | |
4602 | ||
4603 | return rc, list(versions), ', '.join(errors) | |
4604 | ||
4605 | ||
f6b5b4d7 TL |
4606 | def unwrap_ipv6(address): |
4607 | # type: (str) -> str | |
4608 | if address.startswith('[') and address.endswith(']'): | |
20effc67 | 4609 | return address[1: -1] |
f6b5b4d7 TL |
4610 | return address |
4611 | ||
4612 | ||
f91f0fd5 TL |
4613 | def wrap_ipv6(address): |
4614 | # type: (str) -> str | |
4615 | ||
4616 | # We cannot assume it's already wrapped or even an IPv6 address if | |
4617 | # it's already wrapped it'll not pass (like if it's a hostname) and trigger | |
4618 | # the ValueError | |
4619 | try: | |
f67539c2 TL |
4620 | if ipaddress.ip_address(address).version == 6: |
4621 | return f'[{address}]' | |
f91f0fd5 TL |
4622 | except ValueError: |
4623 | pass | |
4624 | ||
4625 | return address | |
4626 | ||
4627 | ||
f6b5b4d7 TL |
4628 | def is_ipv6(address): |
4629 | # type: (str) -> bool | |
4630 | address = unwrap_ipv6(address) | |
4631 | try: | |
f67539c2 | 4632 | return ipaddress.ip_address(address).version == 6 |
f6b5b4d7 | 4633 | except ValueError: |
f67539c2 | 4634 | logger.warning('Address: {} is not a valid IP address'.format(address)) |
f6b5b4d7 TL |
4635 | return False |
4636 | ||
4637 | ||
33c7a0ef TL |
4638 | def ip_in_subnets(ip_addr: str, subnets: str) -> bool: |
4639 | """Determine if the ip_addr belongs to any of the subnets list.""" | |
4640 | subnet_list = [x.strip() for x in subnets.split(',')] | |
4641 | for subnet in subnet_list: | |
4642 | ip_address = unwrap_ipv6(ip_addr) if is_ipv6(ip_addr) else ip_addr | |
4643 | if ipaddress.ip_address(ip_address) in ipaddress.ip_network(subnet): | |
4644 | return True | |
4645 | return False | |
4646 | ||
4647 | ||
4648 | def parse_mon_addrv(addrv_arg: str) -> List[EndPoint]: | |
4649 | """Parse mon-addrv param into a list of mon end points.""" | |
9f95a23c | 4650 | r = re.compile(r':(\d+)$') |
33c7a0ef TL |
4651 | addrv_args = [] |
4652 | addr_arg = addrv_arg | |
4653 | if addr_arg[0] != '[' or addr_arg[-1] != ']': | |
4654 | raise Error(f'--mon-addrv value {addr_arg} must use square backets') | |
4655 | ||
4656 | for addr in addr_arg[1: -1].split(','): | |
4657 | hasport = r.findall(addr) | |
4658 | if not hasport: | |
4659 | raise Error(f'--mon-addrv value {addr_arg} must include port number') | |
4660 | port_str = hasport[0] | |
4661 | addr = re.sub(r'^v\d+:', '', addr) # strip off v1: or v2: prefix | |
4662 | base_ip = addr[0:-(len(port_str)) - 1] | |
4663 | addrv_args.append(EndPoint(base_ip, int(port_str))) | |
4664 | ||
4665 | return addrv_args | |
4666 | ||
4667 | ||
4668 | def parse_mon_ip(mon_ip: str) -> List[EndPoint]: | |
4669 | """Parse mon-ip param into a list of mon end points.""" | |
4670 | r = re.compile(r':(\d+)$') | |
4671 | addrv_args = [] | |
4672 | hasport = r.findall(mon_ip) | |
4673 | if hasport: | |
4674 | port_str = hasport[0] | |
4675 | base_ip = mon_ip[0:-(len(port_str)) - 1] | |
4676 | addrv_args.append(EndPoint(base_ip, int(port_str))) | |
4677 | else: | |
4678 | # No port provided: use fixed ports for ceph monitor | |
4679 | addrv_args.append(EndPoint(mon_ip, 3300)) | |
4680 | addrv_args.append(EndPoint(mon_ip, 6789)) | |
4681 | ||
4682 | return addrv_args | |
4683 | ||
4684 | ||
4685 | def build_addrv_params(addrv: List[EndPoint]) -> str: | |
4686 | """Convert mon end-points (ip:port) into the format: [v[1|2]:ip:port1]""" | |
4687 | if len(addrv) > 2: | |
4688 | raise Error('Detected a local mon-addrv list with more than 2 entries.') | |
4689 | port_to_ver: Dict[int, str] = {6789: 'v1', 3300: 'v2'} | |
4690 | addr_arg_list: List[str] = [] | |
4691 | for ep in addrv: | |
4692 | if ep.port in port_to_ver: | |
4693 | ver = port_to_ver[ep.port] | |
4694 | else: | |
4695 | ver = 'v2' # default mon protocol version if port is not provided | |
4696 | logger.warning(f'Using msgr2 protocol for unrecognized port {ep}') | |
4697 | addr_arg_list.append(f'{ver}:{ep.ip}:{ep.port}') | |
4698 | ||
4699 | addr_arg = '[{0}]'.format(','.join(addr_arg_list)) | |
4700 | return addr_arg | |
4701 | ||
4702 | ||
4703 | def get_public_net_from_cfg(ctx: CephadmContext) -> Optional[str]: | |
4704 | """Get mon public network from configuration file.""" | |
4705 | cp = read_config(ctx.config) | |
4706 | if not cp.has_option('global', 'public_network'): | |
4707 | return None | |
4708 | ||
4709 | # Ensure all public CIDR networks are valid | |
4710 | public_network = cp.get('global', 'public_network') | |
4711 | rc, _, err_msg = check_subnet(public_network) | |
4712 | if rc: | |
4713 | raise Error(f'Invalid public_network {public_network} parameter: {err_msg}') | |
4714 | ||
4715 | # Ensure all public CIDR networks are configured locally | |
4716 | configured_subnets = set([x.strip() for x in public_network.split(',')]) | |
4717 | local_subnets = set([x[0] for x in list_networks(ctx).items()]) | |
4718 | valid_public_net = False | |
4719 | for net in configured_subnets: | |
4720 | if net in local_subnets: | |
4721 | valid_public_net = True | |
4722 | else: | |
4723 | logger.warning(f'The public CIDR network {net} (from -c conf file) is not configured locally.') | |
4724 | if not valid_public_net: | |
4725 | raise Error(f'None of the public CIDR network(s) {configured_subnets} (from -c conf file) is configured locally.') | |
4726 | ||
4727 | # Ensure public_network is compatible with the provided mon-ip (or mon-addrv) | |
4728 | if ctx.mon_ip: | |
4729 | if not ip_in_subnets(ctx.mon_ip, public_network): | |
4730 | raise Error(f'The provided --mon-ip {ctx.mon_ip} does not belong to any public_network(s) {public_network}') | |
4731 | elif ctx.mon_addrv: | |
4732 | addrv_args = parse_mon_addrv(ctx.mon_addrv) | |
4733 | for addrv in addrv_args: | |
4734 | if not ip_in_subnets(addrv.ip, public_network): | |
4735 | raise Error(f'The provided --mon-addrv {addrv.ip} ip does not belong to any public_network(s) {public_network}') | |
4736 | ||
4737 | logger.debug(f'Using mon public network from configuration file {public_network}') | |
4738 | return public_network | |
4739 | ||
4740 | ||
4741 | def infer_mon_network(ctx: CephadmContext, mon_eps: List[EndPoint]) -> Optional[str]: | |
4742 | """Infer mon public network from local network.""" | |
4743 | # Make sure IP is configured locally, and then figure out the CIDR network | |
4744 | mon_networks = [] | |
4745 | for net, ifaces in list_networks(ctx).items(): | |
4746 | # build local_ips list for the specified network | |
4747 | local_ips: List[str] = [] | |
4748 | for _, ls in ifaces.items(): | |
4749 | local_ips.extend([ipaddress.ip_address(ip) for ip in ls]) | |
4750 | ||
4751 | # check if any of mon ips belong to this net | |
4752 | for mon_ep in mon_eps: | |
4753 | try: | |
4754 | if ipaddress.ip_address(unwrap_ipv6(mon_ep.ip)) in local_ips: | |
4755 | mon_networks.append(net) | |
4756 | logger.info(f'Mon IP `{mon_ep.ip}` is in CIDR network `{net}`') | |
4757 | except ValueError as e: | |
4758 | logger.warning(f'Cannot infer CIDR network for mon IP `{mon_ep.ip}` : {e}') | |
4759 | ||
4760 | if not mon_networks: | |
4761 | raise Error('Cannot infer CIDR network. Pass --skip-mon-network to configure it later') | |
4762 | else: | |
4763 | logger.debug(f'Inferred mon public CIDR from local network configuration {mon_networks}') | |
4764 | ||
4765 | mon_networks = list(set(mon_networks)) # remove duplicates | |
4766 | return ','.join(mon_networks) | |
4767 | ||
4768 | ||
4769 | def prepare_mon_addresses(ctx: CephadmContext) -> Tuple[str, bool, Optional[str]]: | |
4770 | """Get mon public network configuration.""" | |
f67539c2 | 4771 | ipv6 = False |
33c7a0ef TL |
4772 | addrv_args: List[EndPoint] = [] |
4773 | mon_addrv: str = '' # i.e: [v2:192.168.100.1:3300,v1:192.168.100.1:6789] | |
f67539c2 TL |
4774 | |
4775 | if ctx.mon_ip: | |
4776 | ipv6 = is_ipv6(ctx.mon_ip) | |
f91f0fd5 | 4777 | if ipv6: |
f67539c2 | 4778 | ctx.mon_ip = wrap_ipv6(ctx.mon_ip) |
33c7a0ef TL |
4779 | addrv_args = parse_mon_ip(ctx.mon_ip) |
4780 | mon_addrv = build_addrv_params(addrv_args) | |
f67539c2 | 4781 | elif ctx.mon_addrv: |
33c7a0ef TL |
4782 | ipv6 = ctx.mon_addrv.count('[') > 1 |
4783 | addrv_args = parse_mon_addrv(ctx.mon_addrv) | |
4784 | mon_addrv = ctx.mon_addrv | |
9f95a23c TL |
4785 | else: |
4786 | raise Error('must specify --mon-ip or --mon-addrv') | |
9f95a23c | 4787 | |
33c7a0ef TL |
4788 | if addrv_args: |
4789 | for end_point in addrv_args: | |
4790 | check_ip_port(ctx, end_point) | |
4791 | ||
4792 | logger.debug(f'Base mon IP(s) is {addrv_args}, mon addrv is {mon_addrv}') | |
9f95a23c | 4793 | mon_network = None |
f67539c2 | 4794 | if not ctx.skip_mon_network: |
33c7a0ef | 4795 | mon_network = get_public_net_from_cfg(ctx) or infer_mon_network(ctx, addrv_args) |
9f95a23c | 4796 | |
33c7a0ef | 4797 | return (mon_addrv, ipv6, mon_network) |
9f95a23c | 4798 | |
f6b5b4d7 | 4799 | |
f67539c2 | 4800 | def prepare_cluster_network(ctx: CephadmContext) -> Tuple[str, bool]: |
f67539c2 TL |
4801 | # the cluster network may not exist on this node, so all we can do is |
4802 | # validate that the address given is valid ipv4 or ipv6 subnet | |
33c7a0ef TL |
4803 | ipv6_cluster_network = False |
4804 | cp = read_config(ctx.config) | |
4805 | cluster_network = ctx.cluster_network | |
4806 | if cluster_network is None and cp.has_option('global', 'cluster_network'): | |
4807 | cluster_network = cp.get('global', 'cluster_network') | |
4808 | ||
4809 | if cluster_network: | |
4810 | cluser_nets = set([x.strip() for x in cluster_network.split(',')]) | |
4811 | local_subnets = set([x[0] for x in list_networks(ctx).items()]) | |
4812 | for net in cluser_nets: | |
4813 | if net not in local_subnets: | |
4814 | logger.warning(f'The cluster CIDR network {net} is not configured locally.') | |
4815 | ||
4816 | rc, versions, err_msg = check_subnet(cluster_network) | |
f67539c2 TL |
4817 | if rc: |
4818 | raise Error(f'Invalid --cluster-network parameter: {err_msg}') | |
f67539c2 TL |
4819 | ipv6_cluster_network = True if 6 in versions else False |
4820 | else: | |
33c7a0ef | 4821 | logger.info('Internal network (--cluster-network) has not ' |
f67539c2 TL |
4822 | 'been provided, OSD replication will default to ' |
4823 | 'the public_network') | |
9f95a23c | 4824 | |
f67539c2 TL |
4825 | return cluster_network, ipv6_cluster_network |
4826 | ||
4827 | ||
4828 | def create_initial_keys( | |
4829 | ctx: CephadmContext, | |
4830 | uid: int, gid: int, | |
4831 | mgr_id: str | |
4832 | ) -> Tuple[str, str, str, Any, Any]: # type: ignore | |
4833 | ||
4834 | _image = ctx.image | |
9f95a23c TL |
4835 | |
4836 | # create some initial keys | |
4837 | logger.info('Creating initial keys...') | |
4838 | mon_key = CephContainer( | |
f67539c2 TL |
4839 | ctx, |
4840 | image=_image, | |
9f95a23c TL |
4841 | entrypoint='/usr/bin/ceph-authtool', |
4842 | args=['--gen-print-key'], | |
4843 | ).run().strip() | |
4844 | admin_key = CephContainer( | |
f67539c2 TL |
4845 | ctx, |
4846 | image=_image, | |
9f95a23c TL |
4847 | entrypoint='/usr/bin/ceph-authtool', |
4848 | args=['--gen-print-key'], | |
4849 | ).run().strip() | |
4850 | mgr_key = CephContainer( | |
f67539c2 TL |
4851 | ctx, |
4852 | image=_image, | |
9f95a23c TL |
4853 | entrypoint='/usr/bin/ceph-authtool', |
4854 | args=['--gen-print-key'], | |
4855 | ).run().strip() | |
4856 | ||
4857 | keyring = ('[mon.]\n' | |
4858 | '\tkey = %s\n' | |
4859 | '\tcaps mon = allow *\n' | |
4860 | '[client.admin]\n' | |
4861 | '\tkey = %s\n' | |
4862 | '\tcaps mon = allow *\n' | |
4863 | '\tcaps mds = allow *\n' | |
4864 | '\tcaps mgr = allow *\n' | |
4865 | '\tcaps osd = allow *\n' | |
4866 | '[mgr.%s]\n' | |
4867 | '\tkey = %s\n' | |
4868 | '\tcaps mon = profile mgr\n' | |
4869 | '\tcaps mds = allow *\n' | |
4870 | '\tcaps osd = allow *\n' | |
4871 | % (mon_key, admin_key, mgr_id, mgr_key)) | |
4872 | ||
f67539c2 TL |
4873 | admin_keyring = write_tmp('[client.admin]\n' |
4874 | '\tkey = ' + admin_key + '\n', | |
4875 | uid, gid) | |
4876 | ||
9f95a23c | 4877 | # tmp keyring file |
f67539c2 TL |
4878 | bootstrap_keyring = write_tmp(keyring, uid, gid) |
4879 | return (mon_key, mgr_key, admin_key, | |
4880 | bootstrap_keyring, admin_keyring) | |
4881 | ||
9f95a23c | 4882 | |
f67539c2 TL |
4883 | def create_initial_monmap( |
4884 | ctx: CephadmContext, | |
4885 | uid: int, gid: int, | |
4886 | fsid: str, | |
4887 | mon_id: str, mon_addr: str | |
4888 | ) -> Any: | |
9f95a23c | 4889 | logger.info('Creating initial monmap...') |
f67539c2 | 4890 | monmap = write_tmp('', 0, 0) |
9f95a23c | 4891 | out = CephContainer( |
f67539c2 TL |
4892 | ctx, |
4893 | image=ctx.image, | |
9f95a23c | 4894 | entrypoint='/usr/bin/monmaptool', |
f67539c2 TL |
4895 | args=[ |
4896 | '--create', | |
4897 | '--clobber', | |
4898 | '--fsid', fsid, | |
4899 | '--addv', mon_id, mon_addr, | |
4900 | '/tmp/monmap' | |
9f95a23c TL |
4901 | ], |
4902 | volume_mounts={ | |
f67539c2 | 4903 | monmap.name: '/tmp/monmap:z', |
9f95a23c TL |
4904 | }, |
4905 | ).run() | |
f67539c2 | 4906 | logger.debug(f'monmaptool for {mon_id} {mon_addr} on {out}') |
9f95a23c TL |
4907 | |
4908 | # pass monmap file to ceph user for use by ceph-mon --mkfs below | |
f67539c2 TL |
4909 | os.fchown(monmap.fileno(), uid, gid) |
4910 | return monmap | |
9f95a23c | 4911 | |
f67539c2 TL |
4912 | |
4913 | def prepare_create_mon( | |
4914 | ctx: CephadmContext, | |
4915 | uid: int, gid: int, | |
4916 | fsid: str, mon_id: str, | |
4917 | bootstrap_keyring_path: str, | |
4918 | monmap_path: str | |
522d829b | 4919 | ) -> Tuple[str, str]: |
9f95a23c | 4920 | logger.info('Creating mon...') |
f67539c2 TL |
4921 | create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid) |
4922 | mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id) | |
4923 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
9f95a23c | 4924 | out = CephContainer( |
f67539c2 TL |
4925 | ctx, |
4926 | image=ctx.image, | |
9f95a23c | 4927 | entrypoint='/usr/bin/ceph-mon', |
f67539c2 TL |
4928 | args=[ |
4929 | '--mkfs', | |
4930 | '-i', mon_id, | |
4931 | '--fsid', fsid, | |
4932 | '-c', '/dev/null', | |
4933 | '--monmap', '/tmp/monmap', | |
4934 | '--keyring', '/tmp/keyring', | |
4935 | ] + get_daemon_args(ctx, fsid, 'mon', mon_id), | |
9f95a23c TL |
4936 | volume_mounts={ |
4937 | log_dir: '/var/log/ceph:z', | |
4938 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id), | |
f67539c2 TL |
4939 | bootstrap_keyring_path: '/tmp/keyring:z', |
4940 | monmap_path: '/tmp/monmap:z', | |
9f95a23c TL |
4941 | }, |
4942 | ).run() | |
f67539c2 TL |
4943 | logger.debug(f'create mon.{mon_id} on {out}') |
4944 | return (mon_dir, log_dir) | |
4945 | ||
4946 | ||
4947 | def create_mon( | |
4948 | ctx: CephadmContext, | |
4949 | uid: int, gid: int, | |
4950 | fsid: str, mon_id: str | |
4951 | ) -> None: | |
4952 | mon_c = get_container(ctx, fsid, 'mon', mon_id) | |
4953 | ctx.meta_json = json.dumps({'service_name': 'mon'}) | |
4954 | deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid, | |
9f95a23c TL |
4955 | config=None, keyring=None) |
4956 | ||
9f95a23c | 4957 | |
f67539c2 TL |
4958 | def wait_for_mon( |
4959 | ctx: CephadmContext, | |
4960 | mon_id: str, mon_dir: str, | |
4961 | admin_keyring_path: str, config_path: str | |
522d829b | 4962 | ) -> None: |
9f95a23c TL |
4963 | logger.info('Waiting for mon to start...') |
4964 | c = CephContainer( | |
f67539c2 TL |
4965 | ctx, |
4966 | image=ctx.image, | |
9f95a23c TL |
4967 | entrypoint='/usr/bin/ceph', |
4968 | args=[ | |
4969 | 'status'], | |
4970 | volume_mounts={ | |
4971 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id), | |
f67539c2 TL |
4972 | admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z', |
4973 | config_path: '/etc/ceph/ceph.conf:z', | |
9f95a23c TL |
4974 | }, |
4975 | ) | |
4976 | ||
4977 | # wait for the service to become available | |
4978 | def is_mon_available(): | |
4979 | # type: () -> bool | |
f67539c2 TL |
4980 | timeout = ctx.timeout if ctx.timeout else 60 # seconds |
4981 | out, err, ret = call(ctx, c.run_cmd(), | |
9f95a23c TL |
4982 | desc=c.entrypoint, |
4983 | timeout=timeout) | |
4984 | return ret == 0 | |
9f95a23c | 4985 | |
f67539c2 TL |
4986 | is_available(ctx, 'mon', is_mon_available) |
4987 | ||
4988 | ||
4989 | def create_mgr( | |
4990 | ctx: CephadmContext, | |
4991 | uid: int, gid: int, | |
4992 | fsid: str, mgr_id: str, mgr_key: str, | |
4993 | config: str, clifunc: Callable | |
4994 | ) -> None: | |
4995 | logger.info('Creating mgr...') | |
4996 | mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key) | |
4997 | mgr_c = get_container(ctx, fsid, 'mgr', mgr_id) | |
4998 | # Note:the default port used by the Prometheus node exporter is opened in fw | |
4999 | ctx.meta_json = json.dumps({'service_name': 'mgr'}) | |
5000 | deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid, | |
5001 | config=config, keyring=mgr_keyring, ports=[9283]) | |
5002 | ||
5003 | # wait for the service to become available | |
5004 | logger.info('Waiting for mgr to start...') | |
5005 | ||
5006 | def is_mgr_available(): | |
5007 | # type: () -> bool | |
5008 | timeout = ctx.timeout if ctx.timeout else 60 # seconds | |
5009 | try: | |
5010 | out = clifunc(['status', '-f', 'json-pretty'], timeout=timeout) | |
5011 | j = json.loads(out) | |
5012 | return j.get('mgrmap', {}).get('available', False) | |
5013 | except Exception as e: | |
5014 | logger.debug('status failed: %s' % e) | |
5015 | return False | |
5016 | is_available(ctx, 'mgr', is_mgr_available) | |
5017 | ||
5018 | ||
5019 | def prepare_ssh( | |
5020 | ctx: CephadmContext, | |
5021 | cli: Callable, wait_for_mgr_restart: Callable | |
5022 | ) -> None: | |
5023 | ||
5024 | cli(['cephadm', 'set-user', ctx.ssh_user]) | |
5025 | ||
5026 | if ctx.ssh_config: | |
5027 | logger.info('Using provided ssh config...') | |
5028 | mounts = { | |
5029 | pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z', | |
5030 | } | |
5031 | cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts) | |
5032 | ||
5033 | if ctx.ssh_private_key and ctx.ssh_public_key: | |
5034 | logger.info('Using provided ssh keys...') | |
5035 | mounts = { | |
5036 | pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z', | |
5037 | pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z' | |
5038 | } | |
5039 | cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts) | |
5040 | cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts) | |
33c7a0ef | 5041 | ssh_pub = cli(['cephadm', 'get-pub-key']) |
f67539c2 TL |
5042 | else: |
5043 | logger.info('Generating ssh key...') | |
5044 | cli(['cephadm', 'generate-key']) | |
5045 | ssh_pub = cli(['cephadm', 'get-pub-key']) | |
f67539c2 TL |
5046 | with open(ctx.output_pub_ssh_key, 'w') as f: |
5047 | f.write(ssh_pub) | |
5048 | logger.info('Wrote public SSH key to %s' % ctx.output_pub_ssh_key) | |
5049 | ||
33c7a0ef | 5050 | authorize_ssh_key(ssh_pub, ctx.ssh_user) |
f67539c2 TL |
5051 | |
5052 | host = get_hostname() | |
5053 | logger.info('Adding host %s...' % host) | |
5054 | try: | |
5055 | args = ['orch', 'host', 'add', host] | |
5056 | if ctx.mon_ip: | |
522d829b | 5057 | args.append(unwrap_ipv6(ctx.mon_ip)) |
33c7a0ef TL |
5058 | elif ctx.mon_addrv: |
5059 | addrv_args = parse_mon_addrv(ctx.mon_addrv) | |
5060 | args.append(unwrap_ipv6(addrv_args[0].ip)) | |
f67539c2 TL |
5061 | cli(args) |
5062 | except RuntimeError as e: | |
5063 | raise Error('Failed to add host <%s>: %s' % (host, e)) | |
5064 | ||
5065 | for t in ['mon', 'mgr']: | |
5066 | if not ctx.orphan_initial_daemons: | |
5067 | logger.info('Deploying %s service with default placement...' % t) | |
5068 | cli(['orch', 'apply', t]) | |
5069 | else: | |
5070 | logger.info('Deploying unmanaged %s service...' % t) | |
5071 | cli(['orch', 'apply', t, '--unmanaged']) | |
5072 | ||
5073 | if not ctx.orphan_initial_daemons: | |
5074 | logger.info('Deploying crash service with default placement...') | |
5075 | cli(['orch', 'apply', 'crash']) | |
5076 | ||
5077 | if not ctx.skip_monitoring_stack: | |
f67539c2 TL |
5078 | for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']: |
5079 | logger.info('Deploying %s service with default placement...' % t) | |
5080 | cli(['orch', 'apply', t]) | |
5081 | ||
5082 | ||
5083 | def enable_cephadm_mgr_module( | |
5084 | cli: Callable, wait_for_mgr_restart: Callable | |
5085 | ) -> None: | |
5086 | ||
5087 | logger.info('Enabling cephadm module...') | |
5088 | cli(['mgr', 'module', 'enable', 'cephadm']) | |
5089 | wait_for_mgr_restart() | |
5090 | logger.info('Setting orchestrator backend to cephadm...') | |
5091 | cli(['orch', 'set', 'backend', 'cephadm']) | |
5092 | ||
5093 | ||
5094 | def prepare_dashboard( | |
5095 | ctx: CephadmContext, | |
5096 | uid: int, gid: int, | |
5097 | cli: Callable, wait_for_mgr_restart: Callable | |
5098 | ) -> None: | |
5099 | ||
5100 | # Configure SSL port (cephadm only allows to configure dashboard SSL port) | |
5101 | # if the user does not want to use SSL he can change this setting once the cluster is up | |
5102 | cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx.ssl_dashboard_port)]) | |
5103 | ||
5104 | # configuring dashboard parameters | |
5105 | logger.info('Enabling the dashboard module...') | |
5106 | cli(['mgr', 'module', 'enable', 'dashboard']) | |
5107 | wait_for_mgr_restart() | |
5108 | ||
5109 | # dashboard crt and key | |
5110 | if ctx.dashboard_key and ctx.dashboard_crt: | |
5111 | logger.info('Using provided dashboard certificate...') | |
5112 | mounts = { | |
5113 | pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z', | |
5114 | pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z' | |
5115 | } | |
5116 | cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts) | |
5117 | cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts) | |
5118 | else: | |
5119 | logger.info('Generating a dashboard self-signed certificate...') | |
5120 | cli(['dashboard', 'create-self-signed-cert']) | |
5121 | ||
5122 | logger.info('Creating initial admin user...') | |
5123 | password = ctx.initial_dashboard_password or generate_password() | |
5124 | tmp_password_file = write_tmp(password, uid, gid) | |
5125 | cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password'] | |
5126 | if not ctx.dashboard_password_noupdate: | |
5127 | cmd.append('--pwd-update-required') | |
5128 | cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'}) | |
5129 | logger.info('Fetching dashboard port number...') | |
5130 | out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port']) | |
5131 | port = int(out) | |
5132 | ||
5133 | # Open dashboard port | |
33c7a0ef TL |
5134 | if not ('skip_firewalld' in ctx and ctx.skip_firewalld): |
5135 | fw = Firewalld(ctx) | |
5136 | fw.open_ports([port]) | |
5137 | fw.apply_rules() | |
f67539c2 TL |
5138 | |
5139 | logger.info('Ceph Dashboard is now available at:\n\n' | |
5140 | '\t URL: https://%s:%s/\n' | |
5141 | '\t User: %s\n' | |
5142 | '\tPassword: %s\n' % ( | |
5143 | get_fqdn(), port, | |
5144 | ctx.initial_dashboard_user, | |
5145 | password)) | |
5146 | ||
5147 | ||
5148 | def prepare_bootstrap_config( | |
5149 | ctx: CephadmContext, | |
5150 | fsid: str, mon_addr: str, image: str | |
5151 | ||
5152 | ) -> str: | |
5153 | ||
5154 | cp = read_config(ctx.config) | |
5155 | if not cp.has_section('global'): | |
5156 | cp.add_section('global') | |
5157 | cp.set('global', 'fsid', fsid) | |
5158 | cp.set('global', 'mon_host', mon_addr) | |
5159 | cp.set('global', 'container_image', image) | |
b3b6e05e | 5160 | |
f67539c2 TL |
5161 | if not cp.has_section('mon'): |
5162 | cp.add_section('mon') | |
5163 | if ( | |
5164 | not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim') | |
5165 | and not cp.has_option('mon', 'auth allow insecure global id reclaim') | |
5166 | ): | |
5167 | cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false') | |
b3b6e05e TL |
5168 | |
5169 | if ctx.single_host_defaults: | |
5170 | logger.info('Adjusting default settings to suit single-host cluster...') | |
5171 | # replicate across osds, not hosts | |
5172 | if ( | |
a4b75251 TL |
5173 | not cp.has_option('global', 'osd_crush_chooseleaf_type') |
5174 | and not cp.has_option('global', 'osd crush chooseleaf type') | |
b3b6e05e | 5175 | ): |
a4b75251 | 5176 | cp.set('global', 'osd_crush_chooseleaf_type', '0') |
b3b6e05e TL |
5177 | # replica 2x |
5178 | if ( | |
5179 | not cp.has_option('global', 'osd_pool_default_size') | |
5180 | and not cp.has_option('global', 'osd pool default size') | |
5181 | ): | |
5182 | cp.set('global', 'osd_pool_default_size', '2') | |
5183 | # disable mgr standby modules (so we can colocate multiple mgrs on one host) | |
5184 | if not cp.has_section('mgr'): | |
5185 | cp.add_section('mgr') | |
5186 | if ( | |
5187 | not cp.has_option('mgr', 'mgr_standby_modules') | |
5188 | and not cp.has_option('mgr', 'mgr standby modules') | |
5189 | ): | |
5190 | cp.set('mgr', 'mgr_standby_modules', 'false') | |
522d829b TL |
5191 | if ctx.log_to_file: |
5192 | cp.set('global', 'log_to_file', 'true') | |
5193 | cp.set('global', 'log_to_stderr', 'false') | |
5194 | cp.set('global', 'log_to_journald', 'false') | |
5195 | cp.set('global', 'mon_cluster_log_to_file', 'true') | |
5196 | cp.set('global', 'mon_cluster_log_to_stderr', 'false') | |
5197 | cp.set('global', 'mon_cluster_log_to_journald', 'false') | |
b3b6e05e | 5198 | |
f67539c2 TL |
5199 | cpf = StringIO() |
5200 | cp.write(cpf) | |
5201 | config = cpf.getvalue() | |
5202 | ||
5203 | if ctx.registry_json or ctx.registry_url: | |
5204 | command_registry_login(ctx) | |
5205 | ||
5206 | return config | |
5207 | ||
5208 | ||
5209 | def finish_bootstrap_config( | |
5210 | ctx: CephadmContext, | |
5211 | fsid: str, | |
5212 | config: str, | |
5213 | mon_id: str, mon_dir: str, | |
5214 | mon_network: Optional[str], ipv6: bool, | |
5215 | cli: Callable, | |
5216 | cluster_network: Optional[str], ipv6_cluster_network: bool | |
5217 | ||
5218 | ) -> None: | |
5219 | if not ctx.no_minimize_config: | |
9f95a23c TL |
5220 | logger.info('Assimilating anything we can from ceph.conf...') |
5221 | cli([ | |
5222 | 'config', 'assimilate-conf', | |
5223 | '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id | |
5224 | ], { | |
5225 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id | |
5226 | }) | |
5227 | logger.info('Generating new minimal ceph.conf...') | |
5228 | cli([ | |
5229 | 'config', 'generate-minimal-conf', | |
5230 | '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id | |
5231 | ], { | |
5232 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id | |
5233 | }) | |
5234 | # re-read our minimized config | |
5235 | with open(mon_dir + '/config', 'r') as f: | |
5236 | config = f.read() | |
5237 | logger.info('Restarting the monitor...') | |
f67539c2 | 5238 | call_throws(ctx, [ |
9f95a23c TL |
5239 | 'systemctl', |
5240 | 'restart', | |
5241 | get_unit_name(fsid, 'mon', mon_id) | |
5242 | ]) | |
33c7a0ef TL |
5243 | elif 'image' in ctx and ctx.image: |
5244 | # we still want to assimilate the given container image if provided | |
5245 | cli(['config', 'set', 'global', 'container_image', f'{ctx.image}']) | |
9f95a23c TL |
5246 | |
5247 | if mon_network: | |
f67539c2 | 5248 | logger.info(f'Setting mon public_network to {mon_network}') |
9f95a23c TL |
5249 | cli(['config', 'set', 'mon', 'public_network', mon_network]) |
5250 | ||
f67539c2 TL |
5251 | if cluster_network: |
5252 | logger.info(f'Setting cluster_network to {cluster_network}') | |
5253 | cli(['config', 'set', 'global', 'cluster_network', cluster_network]) | |
5254 | ||
5255 | if ipv6 or ipv6_cluster_network: | |
5256 | logger.info('Enabling IPv6 (ms_bind_ipv6) binding') | |
f6b5b4d7 TL |
5257 | cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true']) |
5258 | ||
f67539c2 TL |
5259 | with open(ctx.output_config, 'w') as f: |
5260 | f.write(config) | |
5261 | logger.info('Wrote config to %s' % ctx.output_config) | |
5262 | pass | |
5263 | ||
5264 | ||
a4b75251 TL |
5265 | # funcs to process spec file for apply spec |
5266 | def _parse_yaml_docs(f: Iterable[str]) -> List[List[str]]: | |
5267 | docs = [] | |
5268 | current_doc = [] # type: List[str] | |
5269 | for line in f: | |
33c7a0ef | 5270 | if re.search(r'^---\s+', line): |
a4b75251 TL |
5271 | if current_doc: |
5272 | docs.append(current_doc) | |
5273 | current_doc = [] | |
5274 | else: | |
5275 | current_doc.append(line.rstrip()) | |
5276 | if current_doc: | |
5277 | docs.append(current_doc) | |
5278 | return docs | |
5279 | ||
5280 | ||
5281 | def _parse_yaml_obj(doc: List[str]) -> Dict[str, str]: | |
5282 | # note: this only parses the first layer of yaml | |
5283 | obj = {} # type: Dict[str, str] | |
5284 | current_key = '' | |
5285 | for line in doc: | |
5286 | if line.startswith(' '): | |
5287 | obj[current_key] += line.strip() | |
5288 | elif line.endswith(':'): | |
5289 | current_key = line.strip(':') | |
5290 | obj[current_key] = '' | |
5291 | else: | |
5292 | current_key, val = line.split(':') | |
5293 | obj[current_key] = val.strip() | |
5294 | return obj | |
5295 | ||
5296 | ||
5297 | def parse_yaml_objs(f: Iterable[str]) -> List[Dict[str, str]]: | |
5298 | objs = [] | |
5299 | for d in _parse_yaml_docs(f): | |
5300 | objs.append(_parse_yaml_obj(d)) | |
5301 | return objs | |
5302 | ||
5303 | ||
5304 | def _distribute_ssh_keys(ctx: CephadmContext, host_spec: Dict[str, str], bootstrap_hostname: str) -> int: | |
5305 | # copy ssh key to hosts in host spec (used for apply spec) | |
33c7a0ef | 5306 | ssh_key = CEPH_DEFAULT_PUBKEY |
a4b75251 TL |
5307 | if ctx.ssh_public_key: |
5308 | ssh_key = ctx.ssh_public_key.name | |
5309 | ||
5310 | if bootstrap_hostname != host_spec['hostname']: | |
5311 | if 'addr' in host_spec: | |
5312 | addr = host_spec['addr'] | |
5313 | else: | |
5314 | addr = host_spec['hostname'] | |
5315 | out, err, code = call(ctx, ['sudo', '-u', ctx.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx.ssh_user, addr)]) | |
5316 | if code: | |
5317 | logger.info('\nCopying ssh key to host %s at address %s failed!\n' % (host_spec['hostname'], addr)) | |
5318 | return 1 | |
5319 | else: | |
5320 | logger.info('Added ssh key to host %s at address %s\n' % (host_spec['hostname'], addr)) | |
5321 | return 0 | |
5322 | ||
5323 | ||
33c7a0ef TL |
5324 | def save_cluster_config(ctx: CephadmContext, uid: int, gid: int, fsid: str) -> None: |
5325 | """Save cluster configuration to the per fsid directory """ | |
5326 | def copy_file(src: str, dst: str) -> None: | |
5327 | if src: | |
5328 | shutil.copyfile(src, dst) | |
5329 | ||
5330 | conf_dir = f'{ctx.data_dir}/{fsid}/{CEPH_CONF_DIR}' | |
5331 | makedirs(conf_dir, uid, gid, DATA_DIR_MODE) | |
5332 | if os.path.exists(conf_dir): | |
5333 | logger.info(f'Saving cluster configuration to {conf_dir} directory') | |
5334 | copy_file(ctx.output_config, os.path.join(conf_dir, CEPH_CONF)) | |
5335 | copy_file(ctx.output_keyring, os.path.join(conf_dir, CEPH_KEYRING)) | |
5336 | # ctx.output_pub_ssh_key may not exist if user has provided custom ssh keys | |
5337 | if (os.path.exists(ctx.output_pub_ssh_key)): | |
5338 | copy_file(ctx.output_pub_ssh_key, os.path.join(conf_dir, CEPH_PUBKEY)) | |
5339 | else: | |
5340 | logger.warning(f'Cannot create cluster configuration directory {conf_dir}') | |
5341 | ||
5342 | ||
f67539c2 TL |
5343 | @default_image |
5344 | def command_bootstrap(ctx): | |
5345 | # type: (CephadmContext) -> int | |
5346 | ||
5347 | if not ctx.output_config: | |
33c7a0ef | 5348 | ctx.output_config = os.path.join(ctx.output_dir, CEPH_CONF) |
f67539c2 | 5349 | if not ctx.output_keyring: |
33c7a0ef | 5350 | ctx.output_keyring = os.path.join(ctx.output_dir, CEPH_KEYRING) |
f67539c2 | 5351 | if not ctx.output_pub_ssh_key: |
33c7a0ef TL |
5352 | ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, CEPH_PUBKEY) |
5353 | ||
5354 | if bool(ctx.ssh_private_key) is not bool(ctx.ssh_public_key): | |
5355 | raise Error('--ssh-private-key and --ssh-public-key must be provided together or not at all.') | |
5356 | ||
5357 | if ctx.fsid: | |
5358 | data_dir_base = os.path.join(ctx.data_dir, ctx.fsid) | |
5359 | if os.path.exists(data_dir_base): | |
5360 | raise Error(f"A cluster with the same fsid '{ctx.fsid}' already exists.") | |
5361 | else: | |
5362 | logger.warning('Specifying an fsid for your cluster offers no advantages and may increase the likelihood of fsid conflicts.') | |
f67539c2 TL |
5363 | |
5364 | # verify output files | |
5365 | for f in [ctx.output_config, ctx.output_keyring, | |
5366 | ctx.output_pub_ssh_key]: | |
5367 | if not ctx.allow_overwrite: | |
5368 | if os.path.exists(f): | |
5369 | raise Error('%s already exists; delete or pass ' | |
5370 | '--allow-overwrite to overwrite' % f) | |
5371 | dirname = os.path.dirname(f) | |
5372 | if dirname and not os.path.exists(dirname): | |
5373 | fname = os.path.basename(f) | |
5374 | logger.info(f'Creating directory {dirname} for {fname}') | |
5375 | try: | |
5376 | # use makedirs to create intermediate missing dirs | |
5377 | os.makedirs(dirname, 0o755) | |
5378 | except PermissionError: | |
5379 | raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.') | |
5380 | ||
b3b6e05e TL |
5381 | (user_conf, _) = get_config_and_keyring(ctx) |
5382 | ||
33c7a0ef TL |
5383 | if ctx.ssh_user != 'root': |
5384 | check_ssh_connectivity(ctx) | |
5385 | ||
f67539c2 TL |
5386 | if not ctx.skip_prepare_host: |
5387 | command_prepare_host(ctx) | |
5388 | else: | |
5389 | logger.info('Skip prepare_host') | |
5390 | ||
5391 | # initial vars | |
5392 | fsid = ctx.fsid or make_fsid() | |
b3b6e05e TL |
5393 | if not is_fsid(fsid): |
5394 | raise Error('not an fsid: %s' % fsid) | |
5395 | logger.info('Cluster fsid: %s' % fsid) | |
5396 | ||
f67539c2 TL |
5397 | hostname = get_hostname() |
5398 | if '.' in hostname and not ctx.allow_fqdn_hostname: | |
5399 | raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0])) | |
5400 | mon_id = ctx.mon_id or hostname | |
5401 | mgr_id = ctx.mgr_id or generate_service_id() | |
f67539c2 TL |
5402 | |
5403 | lock = FileLock(ctx, fsid) | |
5404 | lock.acquire() | |
5405 | ||
5406 | (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx) | |
5407 | cluster_network, ipv6_cluster_network = prepare_cluster_network(ctx) | |
5408 | ||
5409 | config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image) | |
5410 | ||
5411 | if not ctx.skip_pull: | |
33c7a0ef TL |
5412 | try: |
5413 | _pull_image(ctx, ctx.image) | |
5414 | except UnauthorizedRegistryError: | |
5415 | err_str = 'Failed to pull container image. Check that correct registry credentials are provided in bootstrap by --registry-url, --registry-username, --registry-password, or supply --registry-json with credentials' | |
5416 | logger.debug(f'Pulling image for bootstrap on {hostname} failed: {err_str}') | |
5417 | raise Error(err_str) | |
f67539c2 TL |
5418 | |
5419 | image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip() | |
5420 | logger.info(f'Ceph version: {image_ver}') | |
b3b6e05e TL |
5421 | |
5422 | if not ctx.allow_mismatched_release: | |
5423 | image_release = image_ver.split()[4] | |
5424 | if image_release not in \ | |
5425 | [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE]: | |
5426 | raise Error( | |
5427 | f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE};' | |
5428 | ' please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)' | |
5429 | ) | |
f67539c2 TL |
5430 | |
5431 | logger.info('Extracting ceph user uid/gid from container image...') | |
5432 | (uid, gid) = extract_uid_gid(ctx) | |
5433 | ||
5434 | # create some initial keys | |
20effc67 | 5435 | (mon_key, mgr_key, admin_key, bootstrap_keyring, admin_keyring) = create_initial_keys(ctx, uid, gid, mgr_id) |
f67539c2 TL |
5436 | |
5437 | monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg) | |
20effc67 TL |
5438 | (mon_dir, log_dir) = prepare_create_mon(ctx, uid, gid, fsid, mon_id, |
5439 | bootstrap_keyring.name, monmap.name) | |
f67539c2 TL |
5440 | |
5441 | with open(mon_dir + '/config', 'w') as f: | |
5442 | os.fchown(f.fileno(), uid, gid) | |
5443 | os.fchmod(f.fileno(), 0o600) | |
5444 | f.write(config) | |
5445 | ||
5446 | make_var_run(ctx, fsid, uid, gid) | |
5447 | create_mon(ctx, uid, gid, fsid, mon_id) | |
5448 | ||
5449 | # config to issue various CLI commands | |
5450 | tmp_config = write_tmp(config, uid, gid) | |
5451 | ||
5452 | # a CLI helper to reduce our typing | |
5453 | def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT): | |
5454 | # type: (List[str], Dict[str, str], Optional[int]) -> str | |
5455 | mounts = { | |
5456 | log_dir: '/var/log/ceph:z', | |
5457 | admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z', | |
5458 | tmp_config.name: '/etc/ceph/ceph.conf:z', | |
5459 | } | |
5460 | for k, v in extra_mounts.items(): | |
5461 | mounts[k] = v | |
5462 | timeout = timeout or ctx.timeout | |
5463 | return CephContainer( | |
5464 | ctx, | |
5465 | image=ctx.image, | |
5466 | entrypoint='/usr/bin/ceph', | |
5467 | args=cmd, | |
5468 | volume_mounts=mounts, | |
5469 | ).run(timeout=timeout) | |
5470 | ||
5471 | wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name) | |
5472 | ||
5473 | finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir, | |
5474 | mon_network, ipv6, cli, | |
5475 | cluster_network, ipv6_cluster_network) | |
9f95a23c TL |
5476 | |
5477 | # output files | |
f67539c2 | 5478 | with open(ctx.output_keyring, 'w') as f: |
9f95a23c TL |
5479 | os.fchmod(f.fileno(), 0o600) |
5480 | f.write('[client.admin]\n' | |
5481 | '\tkey = ' + admin_key + '\n') | |
f67539c2 | 5482 | logger.info('Wrote keyring to %s' % ctx.output_keyring) |
9f95a23c | 5483 | |
f67539c2 TL |
5484 | # create mgr |
5485 | create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli) | |
9f95a23c | 5486 | |
b3b6e05e TL |
5487 | if user_conf: |
5488 | # user given config settings were already assimilated earlier | |
5489 | # but if the given settings contained any attributes in | |
5490 | # the mgr (e.g. mgr/cephadm/container_image_prometheus) | |
5491 | # they don't seem to be stored if there isn't a mgr yet. | |
5492 | # Since re-assimilating the same conf settings should be | |
5493 | # idempotent we can just do it again here. | |
5494 | with tempfile.NamedTemporaryFile(buffering=0) as tmp: | |
5495 | tmp.write(user_conf.encode('utf-8')) | |
5496 | cli(['config', 'assimilate-conf', | |
5497 | '-i', '/var/lib/ceph/user.conf'], | |
5498 | {tmp.name: '/var/lib/ceph/user.conf:z'}) | |
9f95a23c TL |
5499 | |
5500 | # wait for mgr to restart (after enabling a module) | |
522d829b | 5501 | def wait_for_mgr_restart() -> None: |
f67539c2 TL |
5502 | # first get latest mgrmap epoch from the mon. try newer 'mgr |
5503 | # stat' command first, then fall back to 'mgr dump' if | |
5504 | # necessary | |
5505 | try: | |
5506 | j = json_loads_retry(lambda: cli(['mgr', 'stat'])) | |
5507 | except Exception: | |
5508 | j = json_loads_retry(lambda: cli(['mgr', 'dump'])) | |
9f95a23c | 5509 | epoch = j['epoch'] |
f67539c2 | 5510 | |
9f95a23c TL |
5511 | # wait for mgr to have it |
5512 | logger.info('Waiting for the mgr to restart...') | |
f67539c2 | 5513 | |
9f95a23c TL |
5514 | def mgr_has_latest_epoch(): |
5515 | # type: () -> bool | |
5516 | try: | |
5517 | out = cli(['tell', 'mgr', 'mgr_status']) | |
5518 | j = json.loads(out) | |
5519 | return j['mgrmap_epoch'] >= epoch | |
5520 | except Exception as e: | |
5521 | logger.debug('tell mgr mgr_status failed: %s' % e) | |
5522 | return False | |
f67539c2 | 5523 | is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch) |
e306af50 | 5524 | |
f67539c2 | 5525 | enable_cephadm_mgr_module(cli, wait_for_mgr_restart) |
e306af50 | 5526 | |
f67539c2 TL |
5527 | # ssh |
5528 | if not ctx.skip_ssh: | |
5529 | prepare_ssh(ctx, cli, wait_for_mgr_restart) | |
5530 | ||
5531 | if ctx.registry_url and ctx.registry_username and ctx.registry_password: | |
20effc67 TL |
5532 | registry_credentials = {'url': ctx.registry_url, 'username': ctx.registry_username, 'password': ctx.registry_password} |
5533 | cli(['config-key', 'set', 'mgr/cephadm/registry_credentials', json.dumps(registry_credentials)]) | |
f67539c2 TL |
5534 | |
5535 | cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force']) | |
5536 | ||
f67539c2 TL |
5537 | if not ctx.skip_dashboard: |
5538 | prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart) | |
f6b5b4d7 | 5539 | |
33c7a0ef | 5540 | if ctx.output_config == CEPH_DEFAULT_CONF and not ctx.skip_admin_label and not ctx.no_minimize_config: |
b3b6e05e TL |
5541 | logger.info('Enabling client.admin keyring and conf on hosts with "admin" label') |
5542 | try: | |
5543 | cli(['orch', 'client-keyring', 'set', 'client.admin', 'label:_admin']) | |
5544 | cli(['orch', 'host', 'label', 'add', get_hostname(), '_admin']) | |
5545 | except Exception: | |
5546 | logger.info('Unable to set up "admin" label; assuming older version of Ceph') | |
5547 | ||
f67539c2 TL |
5548 | if ctx.apply_spec: |
5549 | logger.info('Applying %s to cluster' % ctx.apply_spec) | |
a4b75251 | 5550 | # copy ssh key to hosts in spec file |
f67539c2 | 5551 | with open(ctx.apply_spec) as f: |
a4b75251 TL |
5552 | try: |
5553 | for spec in parse_yaml_objs(f): | |
5554 | if spec.get('service_type') == 'host': | |
5555 | _distribute_ssh_keys(ctx, spec, hostname) | |
5556 | except ValueError: | |
5557 | logger.info('Unable to parse %s succesfully' % ctx.apply_spec) | |
e306af50 TL |
5558 | |
5559 | mounts = {} | |
a4b75251 TL |
5560 | mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro' |
5561 | try: | |
5562 | out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts) | |
5563 | logger.info(out) | |
5564 | except Exception: | |
5565 | logger.info('\nApplying %s to cluster failed!\n' % ctx.apply_spec) | |
9f95a23c | 5566 | |
33c7a0ef TL |
5567 | save_cluster_config(ctx, uid, gid, fsid) |
5568 | ||
20effc67 TL |
5569 | # enable autotune for osd_memory_target |
5570 | logger.info('Enabling autotune for osd_memory_target') | |
5571 | cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true']) | |
5572 | ||
5573 | # Notify the Dashboard to show the 'Expand cluster' page on first log in. | |
5574 | cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED']) | |
5575 | ||
33c7a0ef | 5576 | logger.info('You can access the Ceph CLI as following in case of multi-cluster or non-default config:\n\n' |
9f95a23c TL |
5577 | '\tsudo %s shell --fsid %s -c %s -k %s\n' % ( |
5578 | sys.argv[0], | |
5579 | fsid, | |
f67539c2 TL |
5580 | ctx.output_config, |
5581 | ctx.output_keyring)) | |
33c7a0ef TL |
5582 | |
5583 | logger.info('Or, if you are only running a single cluster on this host:\n\n\tsudo %s shell \n' % (sys.argv[0])) | |
5584 | ||
9f95a23c TL |
5585 | logger.info('Please consider enabling telemetry to help improve Ceph:\n\n' |
5586 | '\tceph telemetry on\n\n' | |
5587 | 'For more information see:\n\n' | |
20effc67 | 5588 | '\thttps://docs.ceph.com/docs/master/mgr/telemetry/\n') |
9f95a23c TL |
5589 | logger.info('Bootstrap complete.') |
5590 | return 0 | |
5591 | ||
5592 | ################################## | |
5593 | ||
f67539c2 | 5594 | |
522d829b | 5595 | def command_registry_login(ctx: CephadmContext) -> int: |
f67539c2 TL |
5596 | if ctx.registry_json: |
5597 | logger.info('Pulling custom registry login info from %s.' % ctx.registry_json) | |
5598 | d = get_parm(ctx.registry_json) | |
f6b5b4d7 | 5599 | if d.get('url') and d.get('username') and d.get('password'): |
f67539c2 TL |
5600 | ctx.registry_url = d.get('url') |
5601 | ctx.registry_username = d.get('username') | |
5602 | ctx.registry_password = d.get('password') | |
5603 | registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password) | |
f6b5b4d7 | 5604 | else: |
f67539c2 TL |
5605 | raise Error('json provided for custom registry login did not include all necessary fields. ' |
5606 | 'Please setup json file as\n' | |
5607 | '{\n' | |
5608 | ' "url": "REGISTRY_URL",\n' | |
5609 | ' "username": "REGISTRY_USERNAME",\n' | |
5610 | ' "password": "REGISTRY_PASSWORD"\n' | |
5611 | '}\n') | |
5612 | elif ctx.registry_url and ctx.registry_username and ctx.registry_password: | |
5613 | registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password) | |
f6b5b4d7 | 5614 | else: |
f67539c2 TL |
5615 | raise Error('Invalid custom registry arguments received. To login to a custom registry include ' |
5616 | '--registry-url, --registry-username and --registry-password ' | |
5617 | 'options or --registry-json option') | |
f6b5b4d7 TL |
5618 | return 0 |
5619 | ||
f67539c2 | 5620 | |
522d829b | 5621 | def registry_login(ctx: CephadmContext, url: Optional[str], username: Optional[str], password: Optional[str]) -> None: |
f67539c2 | 5622 | logger.info('Logging into custom registry.') |
f6b5b4d7 | 5623 | try: |
f67539c2 TL |
5624 | engine = ctx.container_engine |
5625 | cmd = [engine.path, 'login', | |
5626 | '-u', username, '-p', password, | |
5627 | url] | |
5628 | if isinstance(engine, Podman): | |
5629 | cmd.append('--authfile=/etc/ceph/podman-auth.json') | |
5630 | out, _, _ = call_throws(ctx, cmd) | |
5631 | if isinstance(engine, Podman): | |
5632 | os.chmod('/etc/ceph/podman-auth.json', 0o600) | |
5633 | except Exception: | |
5634 | raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx.registry_url, ctx.registry_username)) | |
f6b5b4d7 TL |
5635 | |
5636 | ################################## | |
5637 | ||
5638 | ||
f67539c2 TL |
5639 | def extract_uid_gid_monitoring(ctx, daemon_type): |
5640 | # type: (CephadmContext, str) -> Tuple[int, int] | |
9f95a23c TL |
5641 | |
5642 | if daemon_type == 'prometheus': | |
f67539c2 | 5643 | uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus') |
9f95a23c TL |
5644 | elif daemon_type == 'node-exporter': |
5645 | uid, gid = 65534, 65534 | |
5646 | elif daemon_type == 'grafana': | |
f67539c2 | 5647 | uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana') |
33c7a0ef TL |
5648 | elif daemon_type == 'loki': |
5649 | uid, gid = extract_uid_gid(ctx, file_path='/etc/loki') | |
5650 | elif daemon_type == 'promtail': | |
5651 | uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail') | |
9f95a23c | 5652 | elif daemon_type == 'alertmanager': |
f67539c2 | 5653 | uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus']) |
9f95a23c | 5654 | else: |
f67539c2 | 5655 | raise Error('{} not implemented yet'.format(daemon_type)) |
9f95a23c TL |
5656 | return uid, gid |
5657 | ||
5658 | ||
20effc67 TL |
5659 | def get_container_with_extra_args(ctx: CephadmContext, |
5660 | fsid: str, daemon_type: str, daemon_id: Union[int, str], | |
5661 | privileged: bool = False, | |
5662 | ptrace: bool = False, | |
5663 | container_args: Optional[List[str]] = None) -> 'CephContainer': | |
5664 | # wrapper for get_container that additionally adds extra_container_args if present | |
5665 | # used for deploying daemons with additional podman/docker container arguments | |
5666 | c = get_container(ctx, fsid, daemon_type, daemon_id, privileged, ptrace, container_args) | |
5667 | if 'extra_container_args' in ctx and ctx.extra_container_args: | |
5668 | c.container_args.extend(ctx.extra_container_args) | |
5669 | return c | |
5670 | ||
5671 | ||
9f95a23c | 5672 | @default_image |
f67539c2 TL |
5673 | def command_deploy(ctx): |
5674 | # type: (CephadmContext) -> None | |
5675 | daemon_type, daemon_id = ctx.name.split('.', 1) | |
9f95a23c | 5676 | |
f67539c2 TL |
5677 | lock = FileLock(ctx, ctx.fsid) |
5678 | lock.acquire() | |
9f95a23c TL |
5679 | |
5680 | if daemon_type not in get_supported_daemons(): | |
5681 | raise Error('daemon type %s not recognized' % daemon_type) | |
5682 | ||
e306af50 | 5683 | redeploy = False |
f67539c2 | 5684 | unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id) |
f67539c2 | 5685 | (_, state, _) = check_unit(ctx, unit_name) |
522d829b | 5686 | if state == 'running' or is_container_running(ctx, CephContainer.for_daemon(ctx, ctx.fsid, daemon_type, daemon_id, 'bash')): |
e306af50 TL |
5687 | redeploy = True |
5688 | ||
f67539c2 TL |
5689 | if ctx.reconfig: |
5690 | logger.info('%s daemon %s ...' % ('Reconfig', ctx.name)) | |
e306af50 | 5691 | elif redeploy: |
f67539c2 | 5692 | logger.info('%s daemon %s ...' % ('Redeploy', ctx.name)) |
e306af50 | 5693 | else: |
f67539c2 | 5694 | logger.info('%s daemon %s ...' % ('Deploy', ctx.name)) |
9f95a23c | 5695 | |
33c7a0ef TL |
5696 | # Migrate sysctl conf files from /usr/lib to /etc |
5697 | migrate_sysctl_dir(ctx, ctx.fsid) | |
5698 | ||
f6b5b4d7 | 5699 | # Get and check ports explicitly required to be opened |
f67539c2 TL |
5700 | daemon_ports = [] # type: List[int] |
5701 | ||
5702 | # only check port in use if not reconfig or redeploy since service | |
5703 | # we are redeploying/reconfiguring will already be using the port | |
5704 | if not ctx.reconfig and not redeploy: | |
5705 | if ctx.tcp_ports: | |
5706 | daemon_ports = list(map(int, ctx.tcp_ports.split())) | |
f6b5b4d7 | 5707 | |
9f95a23c | 5708 | if daemon_type in Ceph.daemons: |
f67539c2 TL |
5709 | config, keyring = get_config_and_keyring(ctx) |
5710 | uid, gid = extract_uid_gid(ctx) | |
5711 | make_var_run(ctx, ctx.fsid, uid, gid) | |
f6b5b4d7 | 5712 | |
20effc67 TL |
5713 | c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id, |
5714 | ptrace=ctx.allow_ptrace) | |
f67539c2 | 5715 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
9f95a23c | 5716 | config=config, keyring=keyring, |
f67539c2 TL |
5717 | osd_fsid=ctx.osd_fsid, |
5718 | reconfig=ctx.reconfig, | |
f6b5b4d7 | 5719 | ports=daemon_ports) |
9f95a23c TL |
5720 | |
5721 | elif daemon_type in Monitoring.components: | |
5722 | # monitoring daemon - prometheus, grafana, alertmanager, node-exporter | |
9f95a23c | 5723 | # Default Checks |
9f95a23c | 5724 | # make sure provided config-json is sufficient |
f67539c2 | 5725 | config = get_parm(ctx.config_json) # type: ignore |
9f95a23c TL |
5726 | required_files = Monitoring.components[daemon_type].get('config-json-files', list()) |
5727 | required_args = Monitoring.components[daemon_type].get('config-json-args', list()) | |
5728 | if required_files: | |
5729 | if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore | |
f67539c2 TL |
5730 | raise Error('{} deployment requires config-json which must ' |
5731 | 'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files))) | |
9f95a23c TL |
5732 | if required_args: |
5733 | if not config or not all(c in config.keys() for c in required_args): # type: ignore | |
f67539c2 TL |
5734 | raise Error('{} deployment requires config-json which must ' |
5735 | 'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args))) | |
9f95a23c | 5736 | |
f67539c2 | 5737 | uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) |
20effc67 | 5738 | c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) |
f67539c2 TL |
5739 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
5740 | reconfig=ctx.reconfig, | |
f6b5b4d7 | 5741 | ports=daemon_ports) |
9f95a23c TL |
5742 | |
5743 | elif daemon_type == NFSGanesha.daemon_type: | |
b3b6e05e TL |
5744 | if not ctx.reconfig and not redeploy and not daemon_ports: |
5745 | daemon_ports = list(NFSGanesha.port_map.values()) | |
f6b5b4d7 | 5746 | |
f67539c2 | 5747 | config, keyring = get_config_and_keyring(ctx) |
9f95a23c | 5748 | # TODO: extract ganesha uid/gid (997, 994) ? |
f67539c2 | 5749 | uid, gid = extract_uid_gid(ctx) |
20effc67 | 5750 | c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) |
f67539c2 | 5751 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
9f95a23c | 5752 | config=config, keyring=keyring, |
f67539c2 | 5753 | reconfig=ctx.reconfig, |
f6b5b4d7 | 5754 | ports=daemon_ports) |
e306af50 | 5755 | |
1911f103 | 5756 | elif daemon_type == CephIscsi.daemon_type: |
f67539c2 TL |
5757 | config, keyring = get_config_and_keyring(ctx) |
5758 | uid, gid = extract_uid_gid(ctx) | |
20effc67 | 5759 | c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) |
f67539c2 | 5760 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
1911f103 | 5761 | config=config, keyring=keyring, |
f67539c2 TL |
5762 | reconfig=ctx.reconfig, |
5763 | ports=daemon_ports) | |
5764 | ||
5765 | elif daemon_type == HAproxy.daemon_type: | |
5766 | haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id) | |
5767 | uid, gid = haproxy.extract_uid_gid_haproxy() | |
20effc67 | 5768 | c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) |
f67539c2 TL |
5769 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
5770 | reconfig=ctx.reconfig, | |
5771 | ports=daemon_ports) | |
5772 | ||
5773 | elif daemon_type == Keepalived.daemon_type: | |
5774 | keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id) | |
5775 | uid, gid = keepalived.extract_uid_gid_keepalived() | |
20effc67 | 5776 | c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) |
f67539c2 TL |
5777 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, |
5778 | reconfig=ctx.reconfig, | |
f6b5b4d7 | 5779 | ports=daemon_ports) |
f91f0fd5 TL |
5780 | |
5781 | elif daemon_type == CustomContainer.daemon_type: | |
f67539c2 TL |
5782 | cc = CustomContainer.init(ctx, ctx.fsid, daemon_id) |
5783 | if not ctx.reconfig and not redeploy: | |
f91f0fd5 | 5784 | daemon_ports.extend(cc.ports) |
20effc67 TL |
5785 | c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id, |
5786 | privileged=cc.privileged, | |
5787 | ptrace=ctx.allow_ptrace) | |
f67539c2 | 5788 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, |
f91f0fd5 | 5789 | uid=cc.uid, gid=cc.gid, config=None, |
f67539c2 | 5790 | keyring=None, reconfig=ctx.reconfig, |
f91f0fd5 TL |
5791 | ports=daemon_ports) |
5792 | ||
20effc67 | 5793 | elif daemon_type == CephadmAgent.daemon_type: |
f67539c2 TL |
5794 | # get current user gid and uid |
5795 | uid = os.getuid() | |
5796 | gid = os.getgid() | |
f67539c2 TL |
5797 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None, |
5798 | uid, gid, ports=daemon_ports) | |
5799 | ||
20effc67 TL |
5800 | elif daemon_type == SNMPGateway.daemon_type: |
5801 | sc = SNMPGateway.init(ctx, ctx.fsid, daemon_id) | |
5802 | c = get_container_with_extra_args(ctx, ctx.fsid, daemon_type, daemon_id) | |
5803 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, | |
5804 | sc.uid, sc.gid, | |
5805 | ports=daemon_ports) | |
5806 | ||
9f95a23c | 5807 | else: |
f91f0fd5 TL |
5808 | raise Error('daemon type {} not implemented in command_deploy function' |
5809 | .format(daemon_type)) | |
9f95a23c TL |
5810 | |
5811 | ################################## | |
5812 | ||
f6b5b4d7 | 5813 | |
9f95a23c | 5814 | @infer_image |
f67539c2 TL |
5815 | def command_run(ctx): |
5816 | # type: (CephadmContext) -> int | |
5817 | (daemon_type, daemon_id) = ctx.name.split('.', 1) | |
5818 | c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) | |
9f95a23c | 5819 | command = c.run_cmd() |
f67539c2 | 5820 | return call_timeout(ctx, command, ctx.timeout) |
9f95a23c TL |
5821 | |
5822 | ################################## | |
5823 | ||
f6b5b4d7 | 5824 | |
9f95a23c | 5825 | @infer_fsid |
e306af50 | 5826 | @infer_config |
9f95a23c | 5827 | @infer_image |
522d829b | 5828 | @validate_fsid |
f67539c2 TL |
5829 | def command_shell(ctx): |
5830 | # type: (CephadmContext) -> int | |
522d829b TL |
5831 | cp = read_config(ctx.config) |
5832 | if cp.has_option('global', 'fsid') and \ | |
5833 | cp.get('global', 'fsid') != ctx.fsid: | |
5834 | raise Error('fsid does not match ceph.conf') | |
f67539c2 | 5835 | |
f67539c2 TL |
5836 | if ctx.name: |
5837 | if '.' in ctx.name: | |
5838 | (daemon_type, daemon_id) = ctx.name.split('.', 1) | |
9f95a23c | 5839 | else: |
f67539c2 | 5840 | daemon_type = ctx.name |
9f95a23c TL |
5841 | daemon_id = None |
5842 | else: | |
5843 | daemon_type = 'osd' # get the most mounts | |
5844 | daemon_id = None | |
5845 | ||
20effc67 TL |
5846 | if ctx.fsid and daemon_type in Ceph.daemons: |
5847 | make_log_dir(ctx, ctx.fsid) | |
5848 | ||
f67539c2 | 5849 | if daemon_id and not ctx.fsid: |
9f95a23c TL |
5850 | raise Error('must pass --fsid to specify cluster') |
5851 | ||
33c7a0ef TL |
5852 | # in case a dedicated keyring for the specified fsid is found we us it. |
5853 | # Otherwise, use /etc/ceph files by default, if present. We do this instead of | |
9f95a23c TL |
5854 | # making these defaults in the arg parser because we don't want an error |
5855 | # if they don't exist. | |
33c7a0ef TL |
5856 | if not ctx.keyring: |
5857 | keyring_file = f'{ctx.data_dir}/{ctx.fsid}/{CEPH_CONF_DIR}/{CEPH_KEYRING}' | |
5858 | if os.path.exists(keyring_file): | |
5859 | ctx.keyring = keyring_file | |
5860 | elif os.path.exists(CEPH_DEFAULT_KEYRING): | |
5861 | ctx.keyring = CEPH_DEFAULT_KEYRING | |
f67539c2 TL |
5862 | |
5863 | container_args: List[str] = ['-i'] | |
5864 | mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id, | |
5865 | no_config=True if ctx.config else False) | |
5866 | binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id) | |
5867 | if ctx.config: | |
5868 | mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z' | |
5869 | if ctx.keyring: | |
5870 | mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z' | |
5871 | if ctx.mount: | |
5872 | for _mount in ctx.mount: | |
f91f0fd5 TL |
5873 | split_src_dst = _mount.split(':') |
5874 | mount = pathify(split_src_dst[0]) | |
5875 | filename = os.path.basename(split_src_dst[0]) | |
5876 | if len(split_src_dst) > 1: | |
a4b75251 TL |
5877 | dst = split_src_dst[1] |
5878 | if len(split_src_dst) == 3: | |
5879 | dst = '{}:{}'.format(dst, split_src_dst[2]) | |
f91f0fd5 TL |
5880 | mounts[mount] = dst |
5881 | else: | |
a4b75251 | 5882 | mounts[mount] = '/mnt/{}'.format(filename) |
f67539c2 TL |
5883 | if ctx.command: |
5884 | command = ctx.command | |
9f95a23c TL |
5885 | else: |
5886 | command = ['bash'] | |
5887 | container_args += [ | |
f67539c2 | 5888 | '-t', |
9f95a23c | 5889 | '-e', 'LANG=C', |
f67539c2 | 5890 | '-e', 'PS1=%s' % CUSTOM_PS1, |
9f95a23c | 5891 | ] |
f67539c2 TL |
5892 | if ctx.fsid: |
5893 | home = os.path.join(ctx.data_dir, ctx.fsid, 'home') | |
9f95a23c TL |
5894 | if not os.path.exists(home): |
5895 | logger.debug('Creating root home at %s' % home) | |
5896 | makedirs(home, 0, 0, 0o660) | |
5897 | if os.path.exists('/etc/skel'): | |
5898 | for f in os.listdir('/etc/skel'): | |
5899 | if f.startswith('.bash'): | |
5900 | shutil.copyfile(os.path.join('/etc/skel', f), | |
5901 | os.path.join(home, f)) | |
5902 | mounts[home] = '/root' | |
5903 | ||
b3b6e05e TL |
5904 | for i in ctx.volume: |
5905 | a, b = i.split(':', 1) | |
5906 | mounts[a] = b | |
5907 | ||
9f95a23c | 5908 | c = CephContainer( |
f67539c2 TL |
5909 | ctx, |
5910 | image=ctx.image, | |
9f95a23c TL |
5911 | entrypoint='doesnotmatter', |
5912 | args=[], | |
5913 | container_args=container_args, | |
5914 | volume_mounts=mounts, | |
f6b5b4d7 | 5915 | bind_mounts=binds, |
f67539c2 | 5916 | envs=ctx.env, |
9f95a23c TL |
5917 | privileged=True) |
5918 | command = c.shell_cmd(command) | |
5919 | ||
f67539c2 | 5920 | return call_timeout(ctx, command, ctx.timeout) |
9f95a23c TL |
5921 | |
5922 | ################################## | |
5923 | ||
f6b5b4d7 | 5924 | |
9f95a23c | 5925 | @infer_fsid |
f67539c2 TL |
5926 | def command_enter(ctx): |
5927 | # type: (CephadmContext) -> int | |
5928 | if not ctx.fsid: | |
9f95a23c | 5929 | raise Error('must pass --fsid to specify cluster') |
f67539c2 TL |
5930 | (daemon_type, daemon_id) = ctx.name.split('.', 1) |
5931 | container_args = ['-i'] # type: List[str] | |
5932 | if ctx.command: | |
5933 | command = ctx.command | |
9f95a23c TL |
5934 | else: |
5935 | command = ['sh'] | |
5936 | container_args += [ | |
f67539c2 | 5937 | '-t', |
9f95a23c | 5938 | '-e', 'LANG=C', |
f67539c2 | 5939 | '-e', 'PS1=%s' % CUSTOM_PS1, |
9f95a23c | 5940 | ] |
1911f103 | 5941 | c = CephContainer( |
f67539c2 TL |
5942 | ctx, |
5943 | image=ctx.image, | |
1911f103 TL |
5944 | entrypoint='doesnotmatter', |
5945 | container_args=container_args, | |
f67539c2 | 5946 | cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id), |
1911f103 | 5947 | ) |
9f95a23c | 5948 | command = c.exec_cmd(command) |
f67539c2 | 5949 | return call_timeout(ctx, command, ctx.timeout) |
9f95a23c TL |
5950 | |
5951 | ################################## | |
5952 | ||
f6b5b4d7 | 5953 | |
9f95a23c TL |
5954 | @infer_fsid |
5955 | @infer_image | |
522d829b | 5956 | @validate_fsid |
f67539c2 TL |
5957 | def command_ceph_volume(ctx): |
5958 | # type: (CephadmContext) -> None | |
522d829b TL |
5959 | cp = read_config(ctx.config) |
5960 | if cp.has_option('global', 'fsid') and \ | |
5961 | cp.get('global', 'fsid') != ctx.fsid: | |
5962 | raise Error('fsid does not match ceph.conf') | |
5963 | ||
f67539c2 TL |
5964 | if ctx.fsid: |
5965 | make_log_dir(ctx, ctx.fsid) | |
9f95a23c | 5966 | |
f67539c2 TL |
5967 | lock = FileLock(ctx, ctx.fsid) |
5968 | lock.acquire() | |
1911f103 | 5969 | |
f67539c2 TL |
5970 | (uid, gid) = (0, 0) # ceph-volume runs as root |
5971 | mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None) | |
9f95a23c TL |
5972 | |
5973 | tmp_config = None | |
5974 | tmp_keyring = None | |
5975 | ||
f67539c2 | 5976 | (config, keyring) = get_config_and_keyring(ctx) |
9f95a23c | 5977 | |
801d1391 | 5978 | if config: |
9f95a23c TL |
5979 | # tmp config file |
5980 | tmp_config = write_tmp(config, uid, gid) | |
9f95a23c | 5981 | mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z' |
801d1391 TL |
5982 | |
5983 | if keyring: | |
5984 | # tmp keyring file | |
5985 | tmp_keyring = write_tmp(keyring, uid, gid) | |
9f95a23c TL |
5986 | mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z' |
5987 | ||
20effc67 | 5988 | c = get_ceph_volume_container( |
f67539c2 | 5989 | ctx, |
f67539c2 TL |
5990 | envs=ctx.env, |
5991 | args=ctx.command, | |
9f95a23c TL |
5992 | volume_mounts=mounts, |
5993 | ) | |
b3b6e05e TL |
5994 | |
5995 | out, err, code = call_throws(ctx, c.run_cmd()) | |
9f95a23c TL |
5996 | if not code: |
5997 | print(out) | |
5998 | ||
5999 | ################################## | |
6000 | ||
f6b5b4d7 | 6001 | |
9f95a23c | 6002 | @infer_fsid |
f67539c2 | 6003 | def command_unit(ctx): |
33c7a0ef | 6004 | # type: (CephadmContext) -> int |
f67539c2 | 6005 | if not ctx.fsid: |
9f95a23c | 6006 | raise Error('must pass --fsid to specify cluster') |
e306af50 | 6007 | |
f67539c2 | 6008 | unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) |
e306af50 | 6009 | |
33c7a0ef TL |
6010 | _, _, code = call( |
6011 | ctx, | |
6012 | ['systemctl', ctx.command, unit_name], | |
adb31ebb TL |
6013 | verbosity=CallVerbosity.VERBOSE, |
6014 | desc='' | |
6015 | ) | |
33c7a0ef | 6016 | return code |
9f95a23c TL |
6017 | |
6018 | ################################## | |
6019 | ||
f6b5b4d7 | 6020 | |
9f95a23c | 6021 | @infer_fsid |
f67539c2 TL |
6022 | def command_logs(ctx): |
6023 | # type: (CephadmContext) -> None | |
6024 | if not ctx.fsid: | |
9f95a23c TL |
6025 | raise Error('must pass --fsid to specify cluster') |
6026 | ||
f67539c2 | 6027 | unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) |
9f95a23c TL |
6028 | |
6029 | cmd = [find_program('journalctl')] | |
6030 | cmd.extend(['-u', unit_name]) | |
f67539c2 TL |
6031 | if ctx.command: |
6032 | cmd.extend(ctx.command) | |
9f95a23c TL |
6033 | |
6034 | # call this directly, without our wrapper, so that we get an unmolested | |
6035 | # stdout with logger prefixing. | |
f67539c2 | 6036 | logger.debug('Running command: %s' % ' '.join(cmd)) |
522d829b | 6037 | subprocess.call(cmd, env=os.environ.copy()) # type: ignore |
9f95a23c TL |
6038 | |
6039 | ################################## | |
6040 | ||
f6b5b4d7 | 6041 | |
f67539c2 | 6042 | def list_networks(ctx): |
522d829b | 6043 | # type: (CephadmContext) -> Dict[str,Dict[str, Set[str]]] |
9f95a23c | 6044 | |
f67539c2 TL |
6045 | # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag, |
6046 | # so we'll need to use a regex to parse 'ip' command output. | |
6047 | # | |
6048 | # out, _, _ = call_throws(['ip', '-j', 'route', 'ls']) | |
6049 | # j = json.loads(out) | |
6050 | # for x in j: | |
f67539c2 TL |
6051 | res = _list_ipv4_networks(ctx) |
6052 | res.update(_list_ipv6_networks(ctx)) | |
f6b5b4d7 TL |
6053 | return res |
6054 | ||
6055 | ||
522d829b | 6056 | def _list_ipv4_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]: |
f67539c2 TL |
6057 | execstr: Optional[str] = find_executable('ip') |
6058 | if not execstr: | |
6059 | raise FileNotFoundError("unable to find 'ip' command") | |
6060 | out, _, _ = call_throws(ctx, [execstr, 'route', 'ls']) | |
f6b5b4d7 TL |
6061 | return _parse_ipv4_route(out) |
6062 | ||
9f95a23c | 6063 | |
522d829b TL |
6064 | def _parse_ipv4_route(out: str) -> Dict[str, Dict[str, Set[str]]]: |
6065 | r = {} # type: Dict[str, Dict[str, Set[str]]] | |
33c7a0ef | 6066 | p = re.compile(r'^(\S+) (?:via \S+)? ?dev (\S+) (.*)scope link (.*)src (\S+)') |
9f95a23c TL |
6067 | for line in out.splitlines(): |
6068 | m = p.findall(line) | |
6069 | if not m: | |
6070 | continue | |
6071 | net = m[0][0] | |
33c7a0ef TL |
6072 | if '/' not in net: # aggregate /32 mask for single host sub-networks |
6073 | net += '/32' | |
f67539c2 TL |
6074 | iface = m[0][1] |
6075 | ip = m[0][4] | |
9f95a23c | 6076 | if net not in r: |
f67539c2 TL |
6077 | r[net] = {} |
6078 | if iface not in r[net]: | |
522d829b TL |
6079 | r[net][iface] = set() |
6080 | r[net][iface].add(ip) | |
9f95a23c TL |
6081 | return r |
6082 | ||
f6b5b4d7 | 6083 | |
522d829b | 6084 | def _list_ipv6_networks(ctx: CephadmContext) -> Dict[str, Dict[str, Set[str]]]: |
f67539c2 TL |
6085 | execstr: Optional[str] = find_executable('ip') |
6086 | if not execstr: | |
6087 | raise FileNotFoundError("unable to find 'ip' command") | |
6088 | routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls']) | |
6089 | ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls']) | |
f6b5b4d7 TL |
6090 | return _parse_ipv6_route(routes, ips) |
6091 | ||
6092 | ||
522d829b TL |
6093 | def _parse_ipv6_route(routes: str, ips: str) -> Dict[str, Dict[str, Set[str]]]: |
6094 | r = {} # type: Dict[str, Dict[str, Set[str]]] | |
f6b5b4d7 TL |
6095 | route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$') |
6096 | ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$') | |
f67539c2 | 6097 | iface_p = re.compile(r'^(\d+): (\S+): (.*)$') |
f6b5b4d7 TL |
6098 | for line in routes.splitlines(): |
6099 | m = route_p.findall(line) | |
6100 | if not m or m[0][0].lower() == 'default': | |
6101 | continue | |
6102 | net = m[0][0] | |
33c7a0ef TL |
6103 | if '/' not in net: # aggregate /128 mask for single host sub-networks |
6104 | net += '/128' | |
f67539c2 | 6105 | iface = m[0][1] |
33c7a0ef TL |
6106 | if iface == 'lo': # skip loopback devices |
6107 | continue | |
f6b5b4d7 | 6108 | if net not in r: |
f67539c2 TL |
6109 | r[net] = {} |
6110 | if iface not in r[net]: | |
522d829b | 6111 | r[net][iface] = set() |
f6b5b4d7 | 6112 | |
f67539c2 | 6113 | iface = None |
f6b5b4d7 TL |
6114 | for line in ips.splitlines(): |
6115 | m = ip_p.findall(line) | |
6116 | if not m: | |
f67539c2 TL |
6117 | m = iface_p.findall(line) |
6118 | if m: | |
6119 | # drop @... suffix, if present | |
6120 | iface = m[0][1].split('@')[0] | |
f6b5b4d7 TL |
6121 | continue |
6122 | ip = m[0][0] | |
6123 | # find the network it belongs to | |
6124 | net = [n for n in r.keys() | |
f67539c2 | 6125 | if ipaddress.ip_address(ip) in ipaddress.ip_network(n)] |
20effc67 | 6126 | if net and iface in r[net[0]]: |
f67539c2 | 6127 | assert(iface) |
522d829b | 6128 | r[net[0]][iface].add(ip) |
f6b5b4d7 TL |
6129 | |
6130 | return r | |
6131 | ||
6132 | ||
f67539c2 TL |
6133 | def command_list_networks(ctx): |
6134 | # type: (CephadmContext) -> None | |
6135 | r = list_networks(ctx) | |
522d829b TL |
6136 | |
6137 | def serialize_sets(obj: Any) -> Any: | |
6138 | return list(obj) if isinstance(obj, set) else obj | |
6139 | ||
6140 | print(json.dumps(r, indent=4, default=serialize_sets)) | |
9f95a23c TL |
6141 | |
6142 | ################################## | |
6143 | ||
f6b5b4d7 | 6144 | |
f67539c2 TL |
6145 | def command_ls(ctx): |
6146 | # type: (CephadmContext) -> None | |
6147 | ls = list_daemons(ctx, detail=not ctx.no_detail, | |
6148 | legacy_dir=ctx.legacy_dir) | |
9f95a23c TL |
6149 | print(json.dumps(ls, indent=4)) |
6150 | ||
f6b5b4d7 | 6151 | |
f67539c2 TL |
6152 | def with_units_to_int(v: str) -> int: |
6153 | if v.endswith('iB'): | |
6154 | v = v[:-2] | |
6155 | elif v.endswith('B'): | |
6156 | v = v[:-1] | |
6157 | mult = 1 | |
6158 | if v[-1].upper() == 'K': | |
6159 | mult = 1024 | |
6160 | v = v[:-1] | |
6161 | elif v[-1].upper() == 'M': | |
6162 | mult = 1024 * 1024 | |
6163 | v = v[:-1] | |
6164 | elif v[-1].upper() == 'G': | |
6165 | mult = 1024 * 1024 * 1024 | |
6166 | v = v[:-1] | |
6167 | elif v[-1].upper() == 'T': | |
6168 | mult = 1024 * 1024 * 1024 * 1024 | |
6169 | v = v[:-1] | |
6170 | return int(float(v) * mult) | |
6171 | ||
6172 | ||
6173 | def list_daemons(ctx, detail=True, legacy_dir=None): | |
6174 | # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]] | |
6175 | host_version: Optional[str] = None | |
9f95a23c | 6176 | ls = [] |
f67539c2 | 6177 | container_path = ctx.container_engine.path |
9f95a23c | 6178 | |
f67539c2 | 6179 | data_dir = ctx.data_dir |
9f95a23c TL |
6180 | if legacy_dir is not None: |
6181 | data_dir = os.path.abspath(legacy_dir + data_dir) | |
6182 | ||
6183 | # keep track of ceph versions we see | |
6184 | seen_versions = {} # type: Dict[str, Optional[str]] | |
6185 | ||
f67539c2 TL |
6186 | # keep track of image digests |
6187 | seen_digests = {} # type: Dict[str, List[str]] | |
6188 | ||
33c7a0ef | 6189 | # keep track of memory and cpu usage we've seen |
f67539c2 | 6190 | seen_memusage = {} # type: Dict[str, int] |
33c7a0ef | 6191 | seen_cpuperc = {} # type: Dict[str, str] |
f67539c2 TL |
6192 | out, err, code = call( |
6193 | ctx, | |
6194 | [container_path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'], | |
6195 | verbosity=CallVerbosity.DEBUG | |
6196 | ) | |
522d829b | 6197 | seen_memusage_cid_len, seen_memusage = _parse_mem_usage(code, out) |
f67539c2 | 6198 | |
33c7a0ef TL |
6199 | out, err, code = call( |
6200 | ctx, | |
6201 | [container_path, 'stats', '--format', '{{.ID}},{{.CPUPerc}}', '--no-stream'], | |
6202 | verbosity=CallVerbosity.DEBUG | |
6203 | ) | |
6204 | seen_cpuperc_cid_len, seen_cpuperc = _parse_cpu_perc(code, out) | |
6205 | ||
9f95a23c TL |
6206 | # /var/lib/ceph |
6207 | if os.path.exists(data_dir): | |
6208 | for i in os.listdir(data_dir): | |
6209 | if i in ['mon', 'osd', 'mds', 'mgr']: | |
6210 | daemon_type = i | |
6211 | for j in os.listdir(os.path.join(data_dir, i)): | |
6212 | if '-' not in j: | |
6213 | continue | |
6214 | (cluster, daemon_id) = j.split('-', 1) | |
f67539c2 TL |
6215 | fsid = get_legacy_daemon_fsid(ctx, |
6216 | cluster, daemon_type, daemon_id, | |
6217 | legacy_dir=legacy_dir) | |
e306af50 | 6218 | legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) |
f67539c2 | 6219 | val: Dict[str, Any] = { |
9f95a23c TL |
6220 | 'style': 'legacy', |
6221 | 'name': '%s.%s' % (daemon_type, daemon_id), | |
6222 | 'fsid': fsid if fsid is not None else 'unknown', | |
e306af50 | 6223 | 'systemd_unit': legacy_unit_name, |
9f95a23c TL |
6224 | } |
6225 | if detail: | |
20effc67 | 6226 | (val['enabled'], val['state'], _) = check_unit(ctx, legacy_unit_name) |
9f95a23c TL |
6227 | if not host_version: |
6228 | try: | |
f67539c2 TL |
6229 | out, err, code = call(ctx, |
6230 | ['ceph', '-v'], | |
6231 | verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
6232 | if not code and out.startswith('ceph version '): |
6233 | host_version = out.split(' ')[2] | |
6234 | except Exception: | |
6235 | pass | |
f67539c2 TL |
6236 | val['host_version'] = host_version |
6237 | ls.append(val) | |
9f95a23c TL |
6238 | elif is_fsid(i): |
6239 | fsid = str(i) # convince mypy that fsid is a str here | |
6240 | for j in os.listdir(os.path.join(data_dir, i)): | |
f67539c2 | 6241 | if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)): |
9f95a23c TL |
6242 | name = j |
6243 | (daemon_type, daemon_id) = j.split('.', 1) | |
6244 | unit_name = get_unit_name(fsid, | |
6245 | daemon_type, | |
6246 | daemon_id) | |
6247 | else: | |
6248 | continue | |
f67539c2 | 6249 | val = { |
9f95a23c TL |
6250 | 'style': 'cephadm:v1', |
6251 | 'name': name, | |
6252 | 'fsid': fsid, | |
e306af50 | 6253 | 'systemd_unit': unit_name, |
9f95a23c TL |
6254 | } |
6255 | if detail: | |
6256 | # get container id | |
20effc67 | 6257 | (val['enabled'], val['state'], _) = check_unit(ctx, unit_name) |
9f95a23c TL |
6258 | container_id = None |
6259 | image_name = None | |
6260 | image_id = None | |
f67539c2 | 6261 | image_digests = None |
9f95a23c TL |
6262 | version = None |
6263 | start_stamp = None | |
6264 | ||
522d829b | 6265 | out, err, code = get_container_stats(ctx, container_path, fsid, daemon_type, daemon_id) |
9f95a23c TL |
6266 | if not code: |
6267 | (container_id, image_name, image_id, start, | |
6268 | version) = out.strip().split(',') | |
6269 | image_id = normalize_container_id(image_id) | |
6270 | daemon_type = name.split('.', 1)[0] | |
6271 | start_stamp = try_convert_datetime(start) | |
f67539c2 TL |
6272 | |
6273 | # collect digests for this image id | |
6274 | image_digests = seen_digests.get(image_id) | |
6275 | if not image_digests: | |
6276 | out, err, code = call( | |
6277 | ctx, | |
6278 | [ | |
6279 | container_path, 'image', 'inspect', image_id, | |
6280 | '--format', '{{.RepoDigests}}', | |
6281 | ], | |
6282 | verbosity=CallVerbosity.DEBUG) | |
6283 | if not code: | |
18d92ca7 TL |
6284 | image_digests = list(set(map( |
6285 | normalize_image_digest, | |
6286 | out.strip()[1:-1].split(' ')))) | |
f67539c2 TL |
6287 | seen_digests[image_id] = image_digests |
6288 | ||
6289 | # identify software version inside the container (if we can) | |
9f95a23c TL |
6290 | if not version or '.' not in version: |
6291 | version = seen_versions.get(image_id, None) | |
6292 | if daemon_type == NFSGanesha.daemon_type: | |
f67539c2 | 6293 | version = NFSGanesha.get_version(ctx, container_id) |
1911f103 | 6294 | if daemon_type == CephIscsi.daemon_type: |
f67539c2 | 6295 | version = CephIscsi.get_version(ctx, container_id) |
9f95a23c TL |
6296 | elif not version: |
6297 | if daemon_type in Ceph.daemons: | |
f67539c2 TL |
6298 | out, err, code = call(ctx, |
6299 | [container_path, 'exec', container_id, | |
6300 | 'ceph', '-v'], | |
6301 | verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
6302 | if not code and \ |
6303 | out.startswith('ceph version '): | |
6304 | version = out.split(' ')[2] | |
6305 | seen_versions[image_id] = version | |
6306 | elif daemon_type == 'grafana': | |
f67539c2 TL |
6307 | out, err, code = call(ctx, |
6308 | [container_path, 'exec', container_id, | |
6309 | 'grafana-server', '-v'], | |
6310 | verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
6311 | if not code and \ |
6312 | out.startswith('Version '): | |
6313 | version = out.split(' ')[1] | |
6314 | seen_versions[image_id] = version | |
6315 | elif daemon_type in ['prometheus', | |
6316 | 'alertmanager', | |
33c7a0ef TL |
6317 | 'node-exporter', |
6318 | 'loki', | |
6319 | 'promtail']: | |
f67539c2 TL |
6320 | version = Monitoring.get_version(ctx, container_id, daemon_type) |
6321 | seen_versions[image_id] = version | |
6322 | elif daemon_type == 'haproxy': | |
6323 | out, err, code = call(ctx, | |
6324 | [container_path, 'exec', container_id, | |
6325 | 'haproxy', '-v'], | |
6326 | verbosity=CallVerbosity.DEBUG) | |
6327 | if not code and \ | |
6328 | out.startswith('HA-Proxy version '): | |
6329 | version = out.split(' ')[2] | |
6330 | seen_versions[image_id] = version | |
6331 | elif daemon_type == 'keepalived': | |
6332 | out, err, code = call(ctx, | |
6333 | [container_path, 'exec', container_id, | |
6334 | 'keepalived', '--version'], | |
6335 | verbosity=CallVerbosity.DEBUG) | |
9f95a23c | 6336 | if not code and \ |
f67539c2 TL |
6337 | err.startswith('Keepalived '): |
6338 | version = err.split(' ')[1] | |
6339 | if version[0] == 'v': | |
6340 | version = version[1:] | |
9f95a23c | 6341 | seen_versions[image_id] = version |
f91f0fd5 TL |
6342 | elif daemon_type == CustomContainer.daemon_type: |
6343 | # Because a custom container can contain | |
6344 | # everything, we do not know which command | |
6345 | # to execute to get the version. | |
6346 | pass | |
20effc67 TL |
6347 | elif daemon_type == SNMPGateway.daemon_type: |
6348 | version = SNMPGateway.get_version(ctx, fsid, daemon_id) | |
6349 | seen_versions[image_id] = version | |
9f95a23c | 6350 | else: |
f91f0fd5 | 6351 | logger.warning('version for unknown daemon type %s' % daemon_type) |
9f95a23c | 6352 | else: |
f67539c2 | 6353 | vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore |
9f95a23c TL |
6354 | try: |
6355 | with open(vfile, 'r') as f: | |
6356 | image_name = f.read().strip() or None | |
6357 | except IOError: | |
6358 | pass | |
f67539c2 TL |
6359 | |
6360 | # unit.meta? | |
6361 | mfile = os.path.join(data_dir, fsid, j, 'unit.meta') # type: ignore | |
6362 | try: | |
6363 | with open(mfile, 'r') as f: | |
6364 | meta = json.loads(f.read()) | |
6365 | val.update(meta) | |
6366 | except IOError: | |
6367 | pass | |
6368 | ||
6369 | val['container_id'] = container_id | |
6370 | val['container_image_name'] = image_name | |
6371 | val['container_image_id'] = image_id | |
6372 | val['container_image_digests'] = image_digests | |
6373 | if container_id: | |
6374 | val['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len]) | |
33c7a0ef | 6375 | val['cpu_percentage'] = seen_cpuperc.get(container_id[0:seen_cpuperc_cid_len]) |
f67539c2 TL |
6376 | val['version'] = version |
6377 | val['started'] = start_stamp | |
6378 | val['created'] = get_file_timestamp( | |
9f95a23c TL |
6379 | os.path.join(data_dir, fsid, j, 'unit.created') |
6380 | ) | |
f67539c2 | 6381 | val['deployed'] = get_file_timestamp( |
9f95a23c | 6382 | os.path.join(data_dir, fsid, j, 'unit.image')) |
f67539c2 | 6383 | val['configured'] = get_file_timestamp( |
9f95a23c | 6384 | os.path.join(data_dir, fsid, j, 'unit.configured')) |
f67539c2 | 6385 | ls.append(val) |
9f95a23c | 6386 | |
9f95a23c TL |
6387 | return ls |
6388 | ||
6389 | ||
522d829b TL |
6390 | def _parse_mem_usage(code: int, out: str) -> Tuple[int, Dict[str, int]]: |
6391 | # keep track of memory usage we've seen | |
6392 | seen_memusage = {} # type: Dict[str, int] | |
6393 | seen_memusage_cid_len = 0 | |
6394 | if not code: | |
6395 | for line in out.splitlines(): | |
6396 | (cid, usage) = line.split(',') | |
6397 | (used, limit) = usage.split(' / ') | |
6398 | try: | |
6399 | seen_memusage[cid] = with_units_to_int(used) | |
6400 | if not seen_memusage_cid_len: | |
6401 | seen_memusage_cid_len = len(cid) | |
6402 | except ValueError: | |
6403 | logger.info('unable to parse memory usage line\n>{}'.format(line)) | |
6404 | pass | |
6405 | return seen_memusage_cid_len, seen_memusage | |
6406 | ||
6407 | ||
33c7a0ef TL |
6408 | def _parse_cpu_perc(code: int, out: str) -> Tuple[int, Dict[str, str]]: |
6409 | seen_cpuperc = {} | |
6410 | seen_cpuperc_cid_len = 0 | |
6411 | if not code: | |
6412 | for line in out.splitlines(): | |
6413 | (cid, cpuperc) = line.split(',') | |
6414 | try: | |
6415 | seen_cpuperc[cid] = cpuperc | |
6416 | if not seen_cpuperc_cid_len: | |
6417 | seen_cpuperc_cid_len = len(cid) | |
6418 | except ValueError: | |
6419 | logger.info('unable to parse cpu percentage line\n>{}'.format(line)) | |
6420 | pass | |
6421 | return seen_cpuperc_cid_len, seen_cpuperc | |
6422 | ||
6423 | ||
f67539c2 TL |
6424 | def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None): |
6425 | # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str] | |
e306af50 | 6426 | |
f67539c2 | 6427 | for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir): |
e306af50 TL |
6428 | if d['fsid'] != fsid: |
6429 | continue | |
6430 | if d['name'] != name: | |
6431 | continue | |
6432 | return d | |
6433 | raise Error('Daemon not found: {}. See `cephadm ls`'.format(name)) | |
6434 | ||
522d829b TL |
6435 | |
6436 | def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, daemon_type: str, daemon_id: str) -> Tuple[str, str, int]: | |
6437 | c = CephContainer.for_daemon(ctx, fsid, daemon_type, daemon_id, 'bash') | |
6438 | out, err, code = '', '', -1 | |
6439 | for name in (c.cname, c.old_cname): | |
6440 | cmd = [ | |
6441 | container_path, 'inspect', | |
6442 | '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}', | |
6443 | name | |
6444 | ] | |
6445 | out, err, code = call(ctx, cmd, verbosity=CallVerbosity.DEBUG) | |
6446 | if not code: | |
6447 | break | |
6448 | return out, err, code | |
6449 | ||
9f95a23c TL |
6450 | ################################## |
6451 | ||
f67539c2 | 6452 | |
9f95a23c | 6453 | @default_image |
f67539c2 TL |
6454 | def command_adopt(ctx): |
6455 | # type: (CephadmContext) -> None | |
9f95a23c | 6456 | |
f67539c2 | 6457 | if not ctx.skip_pull: |
33c7a0ef TL |
6458 | try: |
6459 | _pull_image(ctx, ctx.image) | |
6460 | except UnauthorizedRegistryError: | |
6461 | err_str = 'Failed to pull container image. Host may not be logged into container registry. Try `cephadm registry-login --registry-url <url> --registry-username <username> --registry-password <password>` or supply login info via a json file with `cephadm registry-login --registry-json <file>`' | |
6462 | logger.debug(f'Pulling image for `command_adopt` failed: {err_str}') | |
6463 | raise Error(err_str) | |
9f95a23c | 6464 | |
f67539c2 | 6465 | (daemon_type, daemon_id) = ctx.name.split('.', 1) |
9f95a23c TL |
6466 | |
6467 | # legacy check | |
f67539c2 TL |
6468 | if ctx.style != 'legacy': |
6469 | raise Error('adoption of style %s not implemented' % ctx.style) | |
9f95a23c TL |
6470 | |
6471 | # lock | |
f67539c2 TL |
6472 | fsid = get_legacy_daemon_fsid(ctx, |
6473 | ctx.cluster, | |
9f95a23c TL |
6474 | daemon_type, |
6475 | daemon_id, | |
f67539c2 | 6476 | legacy_dir=ctx.legacy_dir) |
9f95a23c TL |
6477 | if not fsid: |
6478 | raise Error('could not detect legacy fsid; set fsid in ceph.conf') | |
f67539c2 TL |
6479 | lock = FileLock(ctx, fsid) |
6480 | lock.acquire() | |
9f95a23c TL |
6481 | |
6482 | # call correct adoption | |
6483 | if daemon_type in Ceph.daemons: | |
f67539c2 | 6484 | command_adopt_ceph(ctx, daemon_type, daemon_id, fsid) |
9f95a23c | 6485 | elif daemon_type == 'prometheus': |
f67539c2 | 6486 | command_adopt_prometheus(ctx, daemon_id, fsid) |
9f95a23c | 6487 | elif daemon_type == 'grafana': |
f67539c2 | 6488 | command_adopt_grafana(ctx, daemon_id, fsid) |
9f95a23c TL |
6489 | elif daemon_type == 'node-exporter': |
6490 | raise Error('adoption of node-exporter not implemented') | |
6491 | elif daemon_type == 'alertmanager': | |
f67539c2 | 6492 | command_adopt_alertmanager(ctx, daemon_id, fsid) |
9f95a23c TL |
6493 | else: |
6494 | raise Error('daemon type %s not recognized' % daemon_type) | |
6495 | ||
6496 | ||
1911f103 | 6497 | class AdoptOsd(object): |
f67539c2 TL |
6498 | def __init__(self, ctx, osd_data_dir, osd_id): |
6499 | # type: (CephadmContext, str, str) -> None | |
6500 | self.ctx = ctx | |
1911f103 TL |
6501 | self.osd_data_dir = osd_data_dir |
6502 | self.osd_id = osd_id | |
6503 | ||
6504 | def check_online_osd(self): | |
6505 | # type: () -> Tuple[Optional[str], Optional[str]] | |
6506 | ||
6507 | osd_fsid, osd_type = None, None | |
6508 | ||
6509 | path = os.path.join(self.osd_data_dir, 'fsid') | |
6510 | try: | |
6511 | with open(path, 'r') as f: | |
6512 | osd_fsid = f.read().strip() | |
f67539c2 | 6513 | logger.info('Found online OSD at %s' % path) |
1911f103 TL |
6514 | except IOError: |
6515 | logger.info('Unable to read OSD fsid from %s' % path) | |
e306af50 TL |
6516 | if os.path.exists(os.path.join(self.osd_data_dir, 'type')): |
6517 | with open(os.path.join(self.osd_data_dir, 'type')) as f: | |
6518 | osd_type = f.read().strip() | |
6519 | else: | |
6520 | logger.info('"type" file missing for OSD data dir') | |
1911f103 TL |
6521 | |
6522 | return osd_fsid, osd_type | |
6523 | ||
6524 | def check_offline_lvm_osd(self): | |
6525 | # type: () -> Tuple[Optional[str], Optional[str]] | |
1911f103 TL |
6526 | osd_fsid, osd_type = None, None |
6527 | ||
20effc67 | 6528 | c = get_ceph_volume_container( |
f67539c2 | 6529 | self.ctx, |
1911f103 | 6530 | args=['lvm', 'list', '--format=json'], |
1911f103 | 6531 | ) |
f67539c2 | 6532 | out, err, code = call_throws(self.ctx, c.run_cmd()) |
1911f103 TL |
6533 | if not code: |
6534 | try: | |
6535 | js = json.loads(out) | |
6536 | if self.osd_id in js: | |
f67539c2 | 6537 | logger.info('Found offline LVM OSD {}'.format(self.osd_id)) |
1911f103 TL |
6538 | osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid'] |
6539 | for device in js[self.osd_id]: | |
6540 | if device['tags']['ceph.type'] == 'block': | |
6541 | osd_type = 'bluestore' | |
6542 | break | |
6543 | if device['tags']['ceph.type'] == 'data': | |
6544 | osd_type = 'filestore' | |
6545 | break | |
6546 | except ValueError as e: | |
f67539c2 | 6547 | logger.info('Invalid JSON in ceph-volume lvm list: {}'.format(e)) |
1911f103 TL |
6548 | |
6549 | return osd_fsid, osd_type | |
6550 | ||
6551 | def check_offline_simple_osd(self): | |
6552 | # type: () -> Tuple[Optional[str], Optional[str]] | |
1911f103 TL |
6553 | osd_fsid, osd_type = None, None |
6554 | ||
f67539c2 | 6555 | osd_file = glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self.osd_id)) |
1911f103 TL |
6556 | if len(osd_file) == 1: |
6557 | with open(osd_file[0], 'r') as f: | |
6558 | try: | |
6559 | js = json.loads(f.read()) | |
f67539c2 TL |
6560 | logger.info('Found offline simple OSD {}'.format(self.osd_id)) |
6561 | osd_fsid = js['fsid'] | |
6562 | osd_type = js['type'] | |
6563 | if osd_type != 'filestore': | |
1911f103 TL |
6564 | # need this to be mounted for the adopt to work, as it |
6565 | # needs to move files from this directory | |
f67539c2 | 6566 | call_throws(self.ctx, ['mount', js['data']['path'], self.osd_data_dir]) |
1911f103 | 6567 | except ValueError as e: |
f67539c2 | 6568 | logger.info('Invalid JSON in {}: {}'.format(osd_file, e)) |
1911f103 TL |
6569 | |
6570 | return osd_fsid, osd_type | |
6571 | ||
9f95a23c | 6572 | |
f67539c2 TL |
6573 | def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid): |
6574 | # type: (CephadmContext, str, str, str) -> None | |
9f95a23c | 6575 | |
f67539c2 | 6576 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
6577 | |
6578 | data_dir_src = ('/var/lib/ceph/%s/%s-%s' % | |
f67539c2 TL |
6579 | (daemon_type, ctx.cluster, daemon_id)) |
6580 | data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src) | |
9f95a23c | 6581 | |
1911f103 TL |
6582 | if not os.path.exists(data_dir_src): |
6583 | raise Error("{}.{} data directory '{}' does not exist. " | |
f67539c2 TL |
6584 | 'Incorrect ID specified, or daemon already adopted?'.format( |
6585 | daemon_type, daemon_id, data_dir_src)) | |
1911f103 | 6586 | |
9f95a23c TL |
6587 | osd_fsid = None |
6588 | if daemon_type == 'osd': | |
f67539c2 | 6589 | adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id) |
1911f103 TL |
6590 | osd_fsid, osd_type = adopt_osd.check_online_osd() |
6591 | if not osd_fsid: | |
6592 | osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd() | |
6593 | if not osd_fsid: | |
6594 | osd_fsid, osd_type = adopt_osd.check_offline_simple_osd() | |
6595 | if not osd_fsid: | |
6596 | raise Error('Unable to find OSD {}'.format(daemon_id)) | |
6597 | logger.info('objectstore_type is %s' % osd_type) | |
e306af50 | 6598 | assert osd_type |
1911f103 | 6599 | if osd_type == 'filestore': |
9f95a23c TL |
6600 | raise Error('FileStore is not supported by cephadm') |
6601 | ||
6602 | # NOTE: implicit assumption here that the units correspond to the | |
6603 | # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph | |
6604 | # CLUSTER field. | |
6605 | unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) | |
f67539c2 | 6606 | (enabled, state, _) = check_unit(ctx, unit_name) |
9f95a23c TL |
6607 | if state == 'running': |
6608 | logger.info('Stopping old systemd unit %s...' % unit_name) | |
f67539c2 | 6609 | call_throws(ctx, ['systemctl', 'stop', unit_name]) |
9f95a23c TL |
6610 | if enabled: |
6611 | logger.info('Disabling old systemd unit %s...' % unit_name) | |
f67539c2 | 6612 | call_throws(ctx, ['systemctl', 'disable', unit_name]) |
9f95a23c TL |
6613 | |
6614 | # data | |
6615 | logger.info('Moving data...') | |
f67539c2 | 6616 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
9f95a23c | 6617 | uid=uid, gid=gid) |
f67539c2 | 6618 | move_files(ctx, glob(os.path.join(data_dir_src, '*')), |
9f95a23c TL |
6619 | data_dir_dst, |
6620 | uid=uid, gid=gid) | |
f67539c2 | 6621 | logger.debug('Remove dir `%s`' % (data_dir_src)) |
9f95a23c | 6622 | if os.path.ismount(data_dir_src): |
f67539c2 | 6623 | call_throws(ctx, ['umount', data_dir_src]) |
9f95a23c TL |
6624 | os.rmdir(data_dir_src) |
6625 | ||
6626 | logger.info('Chowning content...') | |
f67539c2 | 6627 | call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst]) |
9f95a23c TL |
6628 | |
6629 | if daemon_type == 'mon': | |
6630 | # rename *.ldb -> *.sst, in case they are coming from ubuntu | |
6631 | store = os.path.join(data_dir_dst, 'store.db') | |
6632 | num_renamed = 0 | |
6633 | if os.path.exists(store): | |
6634 | for oldf in os.listdir(store): | |
6635 | if oldf.endswith('.ldb'): | |
6636 | newf = oldf.replace('.ldb', '.sst') | |
6637 | oldp = os.path.join(store, oldf) | |
6638 | newp = os.path.join(store, newf) | |
6639 | logger.debug('Renaming %s -> %s' % (oldp, newp)) | |
6640 | os.rename(oldp, newp) | |
6641 | if num_renamed: | |
6642 | logger.info('Renamed %d leveldb *.ldb files to *.sst', | |
6643 | num_renamed) | |
6644 | if daemon_type == 'osd': | |
6645 | for n in ['block', 'block.db', 'block.wal']: | |
6646 | p = os.path.join(data_dir_dst, n) | |
6647 | if os.path.exists(p): | |
6648 | logger.info('Chowning %s...' % p) | |
6649 | os.chown(p, uid, gid) | |
6650 | # disable the ceph-volume 'simple' mode files on the host | |
6651 | simple_fn = os.path.join('/etc/ceph/osd', | |
6652 | '%s-%s.json' % (daemon_id, osd_fsid)) | |
6653 | if os.path.exists(simple_fn): | |
6654 | new_fn = simple_fn + '.adopted-by-cephadm' | |
6655 | logger.info('Renaming %s -> %s', simple_fn, new_fn) | |
6656 | os.rename(simple_fn, new_fn) | |
6657 | logger.info('Disabling host unit ceph-volume@ simple unit...') | |
f67539c2 TL |
6658 | call(ctx, ['systemctl', 'disable', |
6659 | 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)]) | |
9f95a23c TL |
6660 | else: |
6661 | # assume this is an 'lvm' c-v for now, but don't error | |
6662 | # out if it's not. | |
6663 | logger.info('Disabling host unit ceph-volume@ lvm unit...') | |
f67539c2 TL |
6664 | call(ctx, ['systemctl', 'disable', |
6665 | 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)]) | |
9f95a23c TL |
6666 | |
6667 | # config | |
f67539c2 TL |
6668 | config_src = '/etc/ceph/%s.conf' % (ctx.cluster) |
6669 | config_src = os.path.abspath(ctx.legacy_dir + config_src) | |
9f95a23c | 6670 | config_dst = os.path.join(data_dir_dst, 'config') |
f67539c2 | 6671 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
9f95a23c TL |
6672 | |
6673 | # logs | |
6674 | logger.info('Moving logs...') | |
6675 | log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' % | |
f67539c2 TL |
6676 | (ctx.cluster, daemon_type, daemon_id)) |
6677 | log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src) | |
6678 | log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid) | |
6679 | move_files(ctx, glob(log_dir_src), | |
9f95a23c TL |
6680 | log_dir_dst, |
6681 | uid=uid, gid=gid) | |
6682 | ||
6683 | logger.info('Creating new units...') | |
f67539c2 TL |
6684 | make_var_run(ctx, fsid, uid, gid) |
6685 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
6686 | deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c, | |
9f95a23c | 6687 | enable=True, # unconditionally enable the new unit |
f67539c2 | 6688 | start=(state == 'running' or ctx.force_start), |
9f95a23c | 6689 | osd_fsid=osd_fsid) |
f67539c2 | 6690 | update_firewalld(ctx, daemon_type) |
9f95a23c TL |
6691 | |
6692 | ||
f67539c2 TL |
6693 | def command_adopt_prometheus(ctx, daemon_id, fsid): |
6694 | # type: (CephadmContext, str, str) -> None | |
9f95a23c | 6695 | daemon_type = 'prometheus' |
f67539c2 | 6696 | (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) |
9f95a23c | 6697 | |
f67539c2 | 6698 | _stop_and_disable(ctx, 'prometheus') |
9f95a23c | 6699 | |
f67539c2 TL |
6700 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
6701 | uid=uid, gid=gid) | |
9f95a23c TL |
6702 | |
6703 | # config | |
6704 | config_src = '/etc/prometheus/prometheus.yml' | |
f67539c2 | 6705 | config_src = os.path.abspath(ctx.legacy_dir + config_src) |
9f95a23c | 6706 | config_dst = os.path.join(data_dir_dst, 'etc/prometheus') |
1911f103 | 6707 | makedirs(config_dst, uid, gid, 0o755) |
f67539c2 | 6708 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
9f95a23c TL |
6709 | |
6710 | # data | |
6711 | data_src = '/var/lib/prometheus/metrics/' | |
f67539c2 | 6712 | data_src = os.path.abspath(ctx.legacy_dir + data_src) |
9f95a23c | 6713 | data_dst = os.path.join(data_dir_dst, 'data') |
f67539c2 | 6714 | copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) |
9f95a23c | 6715 | |
f67539c2 TL |
6716 | make_var_run(ctx, fsid, uid, gid) |
6717 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
6718 | deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) | |
6719 | update_firewalld(ctx, daemon_type) | |
9f95a23c | 6720 | |
f6b5b4d7 | 6721 | |
f67539c2 TL |
6722 | def command_adopt_grafana(ctx, daemon_id, fsid): |
6723 | # type: (CephadmContext, str, str) -> None | |
9f95a23c TL |
6724 | |
6725 | daemon_type = 'grafana' | |
f67539c2 | 6726 | (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) |
9f95a23c | 6727 | |
f67539c2 | 6728 | _stop_and_disable(ctx, 'grafana-server') |
9f95a23c | 6729 | |
f67539c2 TL |
6730 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
6731 | uid=uid, gid=gid) | |
9f95a23c TL |
6732 | |
6733 | # config | |
6734 | config_src = '/etc/grafana/grafana.ini' | |
f67539c2 | 6735 | config_src = os.path.abspath(ctx.legacy_dir + config_src) |
9f95a23c TL |
6736 | config_dst = os.path.join(data_dir_dst, 'etc/grafana') |
6737 | makedirs(config_dst, uid, gid, 0o755) | |
f67539c2 | 6738 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
9f95a23c TL |
6739 | |
6740 | prov_src = '/etc/grafana/provisioning/' | |
f67539c2 | 6741 | prov_src = os.path.abspath(ctx.legacy_dir + prov_src) |
9f95a23c | 6742 | prov_dst = os.path.join(data_dir_dst, 'etc/grafana') |
f67539c2 | 6743 | copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid) |
9f95a23c TL |
6744 | |
6745 | # cert | |
6746 | cert = '/etc/grafana/grafana.crt' | |
6747 | key = '/etc/grafana/grafana.key' | |
6748 | if os.path.exists(cert) and os.path.exists(key): | |
6749 | cert_src = '/etc/grafana/grafana.crt' | |
f67539c2 | 6750 | cert_src = os.path.abspath(ctx.legacy_dir + cert_src) |
9f95a23c TL |
6751 | makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755) |
6752 | cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file') | |
f67539c2 | 6753 | copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid) |
9f95a23c TL |
6754 | |
6755 | key_src = '/etc/grafana/grafana.key' | |
f67539c2 | 6756 | key_src = os.path.abspath(ctx.legacy_dir + key_src) |
9f95a23c | 6757 | key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key') |
f67539c2 | 6758 | copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid) |
9f95a23c TL |
6759 | |
6760 | _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini')) | |
6761 | else: | |
f67539c2 | 6762 | logger.debug('Skipping ssl, missing cert {} or key {}'.format(cert, key)) |
9f95a23c | 6763 | |
9f95a23c TL |
6764 | # data - possible custom dashboards/plugins |
6765 | data_src = '/var/lib/grafana/' | |
f67539c2 | 6766 | data_src = os.path.abspath(ctx.legacy_dir + data_src) |
9f95a23c | 6767 | data_dst = os.path.join(data_dir_dst, 'data') |
f67539c2 | 6768 | copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) |
9f95a23c | 6769 | |
f67539c2 TL |
6770 | make_var_run(ctx, fsid, uid, gid) |
6771 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
6772 | deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) | |
6773 | update_firewalld(ctx, daemon_type) | |
9f95a23c | 6774 | |
f6b5b4d7 | 6775 | |
f67539c2 TL |
6776 | def command_adopt_alertmanager(ctx, daemon_id, fsid): |
6777 | # type: (CephadmContext, str, str) -> None | |
801d1391 TL |
6778 | |
6779 | daemon_type = 'alertmanager' | |
f67539c2 | 6780 | (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) |
801d1391 | 6781 | |
f67539c2 | 6782 | _stop_and_disable(ctx, 'prometheus-alertmanager') |
801d1391 | 6783 | |
f67539c2 TL |
6784 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
6785 | uid=uid, gid=gid) | |
801d1391 TL |
6786 | |
6787 | # config | |
6788 | config_src = '/etc/prometheus/alertmanager.yml' | |
f67539c2 | 6789 | config_src = os.path.abspath(ctx.legacy_dir + config_src) |
801d1391 TL |
6790 | config_dst = os.path.join(data_dir_dst, 'etc/alertmanager') |
6791 | makedirs(config_dst, uid, gid, 0o755) | |
f67539c2 | 6792 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
801d1391 TL |
6793 | |
6794 | # data | |
6795 | data_src = '/var/lib/prometheus/alertmanager/' | |
f67539c2 | 6796 | data_src = os.path.abspath(ctx.legacy_dir + data_src) |
801d1391 | 6797 | data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data') |
f67539c2 | 6798 | copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) |
801d1391 | 6799 | |
f67539c2 TL |
6800 | make_var_run(ctx, fsid, uid, gid) |
6801 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
6802 | deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) | |
6803 | update_firewalld(ctx, daemon_type) | |
801d1391 | 6804 | |
f6b5b4d7 | 6805 | |
9f95a23c TL |
6806 | def _adjust_grafana_ini(filename): |
6807 | # type: (str) -> None | |
6808 | ||
6809 | # Update cert_file, cert_key pathnames in server section | |
6810 | # ConfigParser does not preserve comments | |
6811 | try: | |
f67539c2 | 6812 | with open(filename, 'r') as grafana_ini: |
9f95a23c | 6813 | lines = grafana_ini.readlines() |
f67539c2 TL |
6814 | with open('{}.new'.format(filename), 'w') as grafana_ini: |
6815 | server_section = False | |
9f95a23c TL |
6816 | for line in lines: |
6817 | if line.startswith('['): | |
f67539c2 | 6818 | server_section = False |
9f95a23c | 6819 | if line.startswith('[server]'): |
f67539c2 | 6820 | server_section = True |
9f95a23c TL |
6821 | if server_section: |
6822 | line = re.sub(r'^cert_file.*', | |
f67539c2 | 6823 | 'cert_file = /etc/grafana/certs/cert_file', line) |
9f95a23c | 6824 | line = re.sub(r'^cert_key.*', |
f67539c2 | 6825 | 'cert_key = /etc/grafana/certs/cert_key', line) |
9f95a23c | 6826 | grafana_ini.write(line) |
f67539c2 | 6827 | os.rename('{}.new'.format(filename), filename) |
9f95a23c | 6828 | except OSError as err: |
f67539c2 | 6829 | raise Error('Cannot update {}: {}'.format(filename, err)) |
9f95a23c TL |
6830 | |
6831 | ||
f67539c2 TL |
6832 | def _stop_and_disable(ctx, unit_name): |
6833 | # type: (CephadmContext, str) -> None | |
9f95a23c | 6834 | |
f67539c2 | 6835 | (enabled, state, _) = check_unit(ctx, unit_name) |
9f95a23c TL |
6836 | if state == 'running': |
6837 | logger.info('Stopping old systemd unit %s...' % unit_name) | |
f67539c2 | 6838 | call_throws(ctx, ['systemctl', 'stop', unit_name]) |
9f95a23c TL |
6839 | if enabled: |
6840 | logger.info('Disabling old systemd unit %s...' % unit_name) | |
f67539c2 | 6841 | call_throws(ctx, ['systemctl', 'disable', unit_name]) |
9f95a23c TL |
6842 | |
6843 | ################################## | |
6844 | ||
9f95a23c | 6845 | |
f67539c2 TL |
6846 | def command_rm_daemon(ctx): |
6847 | # type: (CephadmContext) -> None | |
6848 | lock = FileLock(ctx, ctx.fsid) | |
6849 | lock.acquire() | |
9f95a23c | 6850 | |
f67539c2 TL |
6851 | (daemon_type, daemon_id) = ctx.name.split('.', 1) |
6852 | unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) | |
e306af50 | 6853 | |
f67539c2 | 6854 | if daemon_type in ['mon', 'osd'] and not ctx.force: |
9f95a23c | 6855 | raise Error('must pass --force to proceed: ' |
f67539c2 | 6856 | 'this command may destroy precious data!') |
e306af50 | 6857 | |
f67539c2 | 6858 | call(ctx, ['systemctl', 'stop', unit_name], |
adb31ebb | 6859 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 6860 | call(ctx, ['systemctl', 'reset-failed', unit_name], |
adb31ebb | 6861 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 6862 | call(ctx, ['systemctl', 'disable', unit_name], |
adb31ebb | 6863 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 6864 | data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id) |
9f95a23c | 6865 | if daemon_type in ['mon', 'osd', 'prometheus'] and \ |
f67539c2 | 6866 | not ctx.force_delete_data: |
9f95a23c | 6867 | # rename it out of the way -- do not delete |
f67539c2 | 6868 | backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed') |
9f95a23c TL |
6869 | if not os.path.exists(backup_dir): |
6870 | makedirs(backup_dir, 0, 0, DATA_DIR_MODE) | |
6871 | dirname = '%s.%s_%s' % (daemon_type, daemon_id, | |
6872 | datetime.datetime.utcnow().strftime(DATEFMT)) | |
6873 | os.rename(data_dir, | |
6874 | os.path.join(backup_dir, dirname)) | |
6875 | else: | |
f67539c2 | 6876 | call_throws(ctx, ['rm', '-rf', data_dir]) |
9f95a23c | 6877 | |
33c7a0ef TL |
6878 | if 'tcp_ports' in ctx and ctx.tcp_ports is not None: |
6879 | ports: List[int] = [int(p) for p in ctx.tcp_ports.split()] | |
6880 | try: | |
6881 | fw = Firewalld(ctx) | |
6882 | fw.close_ports(ports) | |
6883 | fw.apply_rules() | |
6884 | except RuntimeError as e: | |
6885 | # in case we cannot close the ports we will remove | |
6886 | # the daemon but keep them open. | |
6887 | logger.warning(f' Error when trying to close ports: {e}') | |
6888 | ||
6889 | ||
9f95a23c TL |
6890 | ################################## |
6891 | ||
f6b5b4d7 | 6892 | |
522d829b | 6893 | def _zap(ctx: CephadmContext, what: str) -> None: |
b3b6e05e | 6894 | mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None) |
20effc67 TL |
6895 | c = get_ceph_volume_container(ctx, |
6896 | args=['lvm', 'zap', '--destroy', what], | |
6897 | volume_mounts=mounts, | |
6898 | envs=ctx.env) | |
b3b6e05e TL |
6899 | logger.info(f'Zapping {what}...') |
6900 | out, err, code = call_throws(ctx, c.run_cmd()) | |
6901 | ||
6902 | ||
6903 | @infer_image | |
522d829b | 6904 | def _zap_osds(ctx: CephadmContext) -> None: |
b3b6e05e TL |
6905 | # assume fsid lock already held |
6906 | ||
6907 | # list | |
6908 | mounts = get_container_mounts(ctx, ctx.fsid, 'clusterless-ceph-volume', None) | |
20effc67 TL |
6909 | c = get_ceph_volume_container(ctx, |
6910 | args=['inventory', '--format', 'json'], | |
6911 | volume_mounts=mounts, | |
6912 | envs=ctx.env) | |
b3b6e05e TL |
6913 | out, err, code = call_throws(ctx, c.run_cmd()) |
6914 | if code: | |
6915 | raise Error('failed to list osd inventory') | |
6916 | try: | |
6917 | ls = json.loads(out) | |
6918 | except ValueError as e: | |
6919 | raise Error(f'Invalid JSON in ceph-volume inventory: {e}') | |
6920 | ||
6921 | for i in ls: | |
6922 | matches = [lv.get('cluster_fsid') == ctx.fsid for lv in i.get('lvs', [])] | |
6923 | if any(matches) and all(matches): | |
6924 | _zap(ctx, i.get('path')) | |
6925 | elif any(matches): | |
6926 | lv_names = [lv['name'] for lv in i.get('lvs', [])] | |
6927 | # TODO: we need to map the lv_names back to device paths (the vg | |
6928 | # id isn't part of the output here!) | |
6929 | logger.warning(f'Not zapping LVs (not implemented): {lv_names}') | |
6930 | ||
6931 | ||
522d829b | 6932 | def command_zap_osds(ctx: CephadmContext) -> None: |
b3b6e05e TL |
6933 | if not ctx.force: |
6934 | raise Error('must pass --force to proceed: ' | |
6935 | 'this command may destroy precious data!') | |
6936 | ||
6937 | lock = FileLock(ctx, ctx.fsid) | |
6938 | lock.acquire() | |
6939 | ||
6940 | _zap_osds(ctx) | |
6941 | ||
6942 | ################################## | |
6943 | ||
6944 | ||
33c7a0ef TL |
6945 | def get_ceph_cluster_count(ctx: CephadmContext) -> int: |
6946 | return len([c for c in os.listdir(ctx.data_dir) if is_fsid(c)]) | |
6947 | ||
6948 | ||
f67539c2 TL |
6949 | def command_rm_cluster(ctx): |
6950 | # type: (CephadmContext) -> None | |
6951 | if not ctx.force: | |
9f95a23c | 6952 | raise Error('must pass --force to proceed: ' |
f67539c2 | 6953 | 'this command may destroy precious data!') |
9f95a23c | 6954 | |
f67539c2 TL |
6955 | lock = FileLock(ctx, ctx.fsid) |
6956 | lock.acquire() | |
9f95a23c | 6957 | |
33c7a0ef | 6958 | def disable_systemd_service(unit_name: str) -> None: |
f67539c2 | 6959 | call(ctx, ['systemctl', 'stop', unit_name], |
adb31ebb | 6960 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 6961 | call(ctx, ['systemctl', 'reset-failed', unit_name], |
adb31ebb | 6962 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 6963 | call(ctx, ['systemctl', 'disable', unit_name], |
adb31ebb | 6964 | verbosity=CallVerbosity.DEBUG) |
9f95a23c | 6965 | |
33c7a0ef TL |
6966 | # stop + disable individual daemon units |
6967 | for d in list_daemons(ctx, detail=False): | |
6968 | if d['fsid'] != ctx.fsid: | |
6969 | continue | |
6970 | if d['style'] != 'cephadm:v1': | |
6971 | continue | |
6972 | disable_systemd_service(get_unit_name(ctx.fsid, d['name'])) | |
6973 | ||
9f95a23c | 6974 | # cluster units |
f67539c2 | 6975 | for unit_name in ['ceph-%s.target' % ctx.fsid]: |
33c7a0ef | 6976 | disable_systemd_service(unit_name) |
9f95a23c | 6977 | |
522d829b | 6978 | slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d')) |
f67539c2 | 6979 | call(ctx, ['systemctl', 'stop', slice_name], |
adb31ebb | 6980 | verbosity=CallVerbosity.DEBUG) |
9f95a23c | 6981 | |
b3b6e05e TL |
6982 | # osds? |
6983 | if ctx.zap_osds: | |
6984 | _zap_osds(ctx) | |
6985 | ||
9f95a23c | 6986 | # rm units |
b3b6e05e TL |
6987 | call_throws(ctx, ['rm', '-f', ctx.unit_dir |
6988 | + '/ceph-%s@.service' % ctx.fsid]) | |
6989 | call_throws(ctx, ['rm', '-f', ctx.unit_dir | |
6990 | + '/ceph-%s.target' % ctx.fsid]) | |
f67539c2 TL |
6991 | call_throws(ctx, ['rm', '-rf', |
6992 | ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid]) | |
9f95a23c | 6993 | # rm data |
f67539c2 TL |
6994 | call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid]) |
6995 | ||
6996 | if not ctx.keep_logs: | |
6997 | # rm logs | |
6998 | call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid]) | |
b3b6e05e TL |
6999 | call_throws(ctx, ['rm', '-rf', ctx.log_dir |
7000 | + '/*.wants/ceph-%s@*' % ctx.fsid]) | |
f67539c2 | 7001 | |
9f95a23c | 7002 | # rm logrotate config |
f67539c2 | 7003 | call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid]) |
9f95a23c | 7004 | |
33c7a0ef TL |
7005 | # if last cluster on host remove shared files |
7006 | if get_ceph_cluster_count(ctx) == 0: | |
7007 | disable_systemd_service('ceph.target') | |
7008 | ||
7009 | # rm shared ceph target files | |
7010 | call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/multi-user.target.wants/ceph.target']) | |
7011 | call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/ceph.target']) | |
7012 | ||
7013 | # rm cephadm logrotate config | |
b3b6e05e TL |
7014 | call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm']) |
7015 | ||
33c7a0ef TL |
7016 | if not ctx.keep_logs: |
7017 | # remove all cephadm logs | |
7018 | for fname in glob(f'{ctx.log_dir}/cephadm.log*'): | |
7019 | os.remove(fname) | |
7020 | ||
b3b6e05e | 7021 | # rm sysctl settings |
33c7a0ef | 7022 | sysctl_dirs: List[Path] = [Path(ctx.sysctl_dir), Path('/usr/lib/sysctl.d')] |
b3b6e05e | 7023 | |
33c7a0ef TL |
7024 | for sysctl_dir in sysctl_dirs: |
7025 | for p in sysctl_dir.glob(f'90-ceph-{ctx.fsid}-*.conf'): | |
7026 | p.unlink() | |
1911f103 | 7027 | |
33c7a0ef TL |
7028 | # cleanup remaining ceph directories |
7029 | ceph_dirs = [f'/run/ceph/{ctx.fsid}', f'/tmp/var/lib/ceph/{ctx.fsid}', f'/var/run/ceph/{ctx.fsid}'] | |
7030 | for dd in ceph_dirs: | |
7031 | shutil.rmtree(dd, ignore_errors=True) | |
7032 | ||
7033 | # clean up config, keyring, and pub key files | |
7034 | files = [CEPH_DEFAULT_CONF, CEPH_DEFAULT_PUBKEY, CEPH_DEFAULT_KEYRING] | |
1911f103 TL |
7035 | if os.path.exists(files[0]): |
7036 | valid_fsid = False | |
7037 | with open(files[0]) as f: | |
f67539c2 | 7038 | if ctx.fsid in f.read(): |
1911f103 TL |
7039 | valid_fsid = True |
7040 | if valid_fsid: | |
33c7a0ef | 7041 | # rm configuration files on /etc/ceph |
1911f103 TL |
7042 | for n in range(0, len(files)): |
7043 | if os.path.exists(files[n]): | |
7044 | os.remove(files[n]) | |
7045 | ||
9f95a23c TL |
7046 | ################################## |
7047 | ||
f67539c2 TL |
7048 | |
7049 | def check_time_sync(ctx, enabler=None): | |
7050 | # type: (CephadmContext, Optional[Packager]) -> bool | |
9f95a23c TL |
7051 | units = [ |
7052 | 'chrony.service', # 18.04 (at least) | |
f67539c2 | 7053 | 'chronyd.service', # el / opensuse |
9f95a23c | 7054 | 'systemd-timesyncd.service', |
f67539c2 | 7055 | 'ntpd.service', # el7 (at least) |
9f95a23c | 7056 | 'ntp.service', # 18.04 (at least) |
f91f0fd5 | 7057 | 'ntpsec.service', # 20.04 (at least) / buster |
522d829b | 7058 | 'openntpd.service', # ubuntu / debian |
9f95a23c | 7059 | ] |
f67539c2 | 7060 | if not check_units(ctx, units, enabler): |
9f95a23c TL |
7061 | logger.warning('No time sync service is running; checked for %s' % units) |
7062 | return False | |
7063 | return True | |
7064 | ||
f6b5b4d7 | 7065 | |
f67539c2 | 7066 | def command_check_host(ctx: CephadmContext) -> None: |
1911f103 | 7067 | errors = [] |
9f95a23c TL |
7068 | commands = ['systemctl', 'lvcreate'] |
7069 | ||
f67539c2 | 7070 | try: |
a4b75251 TL |
7071 | engine = check_container_engine(ctx) |
7072 | logger.info(f'{engine} is present') | |
f67539c2 TL |
7073 | except Error as e: |
7074 | errors.append(str(e)) | |
1911f103 | 7075 | |
9f95a23c TL |
7076 | for command in commands: |
7077 | try: | |
7078 | find_program(command) | |
7079 | logger.info('%s is present' % command) | |
7080 | except ValueError: | |
1911f103 | 7081 | errors.append('%s binary does not appear to be installed' % command) |
9f95a23c TL |
7082 | |
7083 | # check for configured+running chronyd or ntp | |
f67539c2 | 7084 | if not check_time_sync(ctx): |
1911f103 | 7085 | errors.append('No time synchronization is active') |
9f95a23c | 7086 | |
f67539c2 TL |
7087 | if 'expect_hostname' in ctx and ctx.expect_hostname: |
7088 | if get_hostname().lower() != ctx.expect_hostname.lower(): | |
1911f103 | 7089 | errors.append('hostname "%s" does not match expected hostname "%s"' % ( |
f67539c2 | 7090 | get_hostname(), ctx.expect_hostname)) |
20effc67 TL |
7091 | else: |
7092 | logger.info('Hostname "%s" matches what is expected.', | |
7093 | ctx.expect_hostname) | |
9f95a23c | 7094 | |
1911f103 | 7095 | if errors: |
f67539c2 | 7096 | raise Error('\nERROR: '.join(errors)) |
1911f103 | 7097 | |
9f95a23c TL |
7098 | logger.info('Host looks OK') |
7099 | ||
7100 | ################################## | |
7101 | ||
f6b5b4d7 | 7102 | |
33c7a0ef TL |
7103 | def get_ssh_vars(ssh_user: str) -> Tuple[int, int, str]: |
7104 | try: | |
7105 | s_pwd = pwd.getpwnam(ssh_user) | |
7106 | except KeyError: | |
7107 | raise Error('Cannot find uid/gid for ssh-user: %s' % (ssh_user)) | |
7108 | ||
7109 | ssh_uid = s_pwd.pw_uid | |
7110 | ssh_gid = s_pwd.pw_gid | |
7111 | ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh') | |
7112 | return ssh_uid, ssh_gid, ssh_dir | |
7113 | ||
7114 | ||
7115 | def authorize_ssh_key(ssh_pub_key: str, ssh_user: str) -> bool: | |
7116 | """Authorize the public key for the provided ssh user""" | |
7117 | ||
7118 | def key_in_file(path: str, key: str) -> bool: | |
7119 | if not os.path.exists(path): | |
7120 | return False | |
7121 | with open(path) as f: | |
7122 | lines = f.readlines() | |
7123 | for line in lines: | |
7124 | if line.strip() == key.strip(): | |
7125 | return True | |
7126 | return False | |
7127 | ||
7128 | logger.info(f'Adding key to {ssh_user}@localhost authorized_keys...') | |
7129 | if ssh_pub_key is None or ssh_pub_key.isspace(): | |
7130 | raise Error('Trying to authorize an empty ssh key') | |
7131 | ||
7132 | ssh_pub_key = ssh_pub_key.strip() | |
7133 | ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user) | |
7134 | if not os.path.exists(ssh_dir): | |
7135 | makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700) | |
7136 | ||
7137 | auth_keys_file = '%s/authorized_keys' % ssh_dir | |
7138 | if key_in_file(auth_keys_file, ssh_pub_key): | |
7139 | logger.info(f'key already in {ssh_user}@localhost authorized_keys...') | |
7140 | return False | |
7141 | ||
7142 | add_newline = False | |
7143 | if os.path.exists(auth_keys_file): | |
7144 | with open(auth_keys_file, 'r') as f: | |
7145 | f.seek(0, os.SEEK_END) | |
7146 | if f.tell() > 0: | |
7147 | f.seek(f.tell() - 1, os.SEEK_SET) # go to last char | |
7148 | if f.read() != '\n': | |
7149 | add_newline = True | |
7150 | ||
7151 | with open(auth_keys_file, 'a') as f: | |
7152 | os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it | |
7153 | os.fchmod(f.fileno(), 0o600) # just in case we created it | |
7154 | if add_newline: | |
7155 | f.write('\n') | |
7156 | f.write(ssh_pub_key + '\n') | |
7157 | ||
7158 | return True | |
7159 | ||
7160 | ||
7161 | def revoke_ssh_key(key: str, ssh_user: str) -> None: | |
7162 | """Revoke the public key authorization for the ssh user""" | |
7163 | ssh_uid, ssh_gid, ssh_dir = get_ssh_vars(ssh_user) | |
7164 | auth_keys_file = '%s/authorized_keys' % ssh_dir | |
7165 | deleted = False | |
7166 | if os.path.exists(auth_keys_file): | |
7167 | with open(auth_keys_file, 'r') as f: | |
7168 | lines = f.readlines() | |
7169 | _, filename = tempfile.mkstemp() | |
7170 | with open(filename, 'w') as f: | |
7171 | os.fchown(f.fileno(), ssh_uid, ssh_gid) | |
7172 | os.fchmod(f.fileno(), 0o600) # secure access to the keys file | |
7173 | for line in lines: | |
7174 | if line.strip() == key.strip(): | |
7175 | deleted = True | |
7176 | else: | |
7177 | f.write(line) | |
7178 | ||
7179 | if deleted: | |
7180 | shutil.move(filename, auth_keys_file) | |
7181 | else: | |
7182 | logger.warning('Cannot find the ssh key to be deleted') | |
7183 | ||
7184 | ||
7185 | def check_ssh_connectivity(ctx: CephadmContext) -> None: | |
7186 | ||
7187 | def cmd_is_available(cmd: str) -> bool: | |
7188 | if shutil.which(cmd) is None: | |
7189 | logger.warning(f'Command not found: {cmd}') | |
7190 | return False | |
7191 | return True | |
7192 | ||
7193 | if not cmd_is_available('ssh') or not cmd_is_available('ssh-keygen'): | |
7194 | logger.warning('Cannot check ssh connectivity. Skipping...') | |
7195 | return | |
7196 | ||
7197 | logger.info('Verifying ssh connectivity ...') | |
7198 | if ctx.ssh_private_key and ctx.ssh_public_key: | |
7199 | # let's use the keys provided by the user | |
7200 | ssh_priv_key_path = pathify(ctx.ssh_private_key.name) | |
7201 | ssh_pub_key_path = pathify(ctx.ssh_public_key.name) | |
7202 | else: | |
7203 | # no custom keys, let's generate some random keys just for this check | |
7204 | ssh_priv_key_path = f'/tmp/ssh_key_{uuid.uuid1()}' | |
7205 | ssh_pub_key_path = f'{ssh_priv_key_path}.pub' | |
7206 | ssh_key_gen_cmd = ['ssh-keygen', '-q', '-t', 'rsa', '-N', '', '-C', '', '-f', ssh_priv_key_path] | |
7207 | _, _, code = call(ctx, ssh_key_gen_cmd) | |
7208 | if code != 0: | |
7209 | logger.warning('Cannot generate keys to check ssh connectivity.') | |
7210 | return | |
7211 | ||
7212 | with open(ssh_pub_key_path, 'r') as f: | |
7213 | key = f.read().strip() | |
7214 | new_key = authorize_ssh_key(key, ctx.ssh_user) | |
7215 | ssh_cfg_file_arg = ['-F', pathify(ctx.ssh_config.name)] if ctx.ssh_config else [] | |
7216 | _, _, code = call(ctx, ['ssh', '-o StrictHostKeyChecking=no', | |
7217 | *ssh_cfg_file_arg, '-i', ssh_priv_key_path, | |
7218 | '-o PasswordAuthentication=no', | |
7219 | f'{ctx.ssh_user}@{get_hostname()}', | |
7220 | 'sudo echo']) | |
7221 | ||
7222 | # we only remove the key if it's a new one. In case the user has provided | |
7223 | # some already existing key then we don't alter authorized_keys file | |
7224 | if new_key: | |
7225 | revoke_ssh_key(key, ctx.ssh_user) | |
7226 | ||
7227 | pub_key_msg = '- The public key file configured by --ssh-public-key is valid\n' if ctx.ssh_public_key else '' | |
7228 | prv_key_msg = '- The private key file configured by --ssh-private-key is valid\n' if ctx.ssh_private_key else '' | |
7229 | ssh_cfg_msg = '- The ssh configuration file configured by --ssh-config is valid\n' if ctx.ssh_config else '' | |
7230 | err_msg = f""" | |
7231 | ** Please verify your user's ssh configuration and make sure: | |
7232 | - User {ctx.ssh_user} must have passwordless sudo access | |
7233 | {pub_key_msg}{prv_key_msg}{ssh_cfg_msg} | |
7234 | """ | |
7235 | if code != 0: | |
7236 | raise Error(err_msg) | |
7237 | ||
7238 | ||
f67539c2 | 7239 | def command_prepare_host(ctx: CephadmContext) -> None: |
9f95a23c TL |
7240 | logger.info('Verifying podman|docker is present...') |
7241 | pkg = None | |
f67539c2 TL |
7242 | try: |
7243 | check_container_engine(ctx) | |
7244 | except Error as e: | |
7245 | logger.warning(str(e)) | |
9f95a23c | 7246 | if not pkg: |
f67539c2 | 7247 | pkg = create_packager(ctx) |
9f95a23c TL |
7248 | pkg.install_podman() |
7249 | ||
7250 | logger.info('Verifying lvm2 is present...') | |
7251 | if not find_executable('lvcreate'): | |
7252 | if not pkg: | |
f67539c2 | 7253 | pkg = create_packager(ctx) |
9f95a23c TL |
7254 | pkg.install(['lvm2']) |
7255 | ||
7256 | logger.info('Verifying time synchronization is in place...') | |
f67539c2 | 7257 | if not check_time_sync(ctx): |
9f95a23c | 7258 | if not pkg: |
f67539c2 | 7259 | pkg = create_packager(ctx) |
9f95a23c TL |
7260 | pkg.install(['chrony']) |
7261 | # check again, and this time try to enable | |
7262 | # the service | |
f67539c2 | 7263 | check_time_sync(ctx, enabler=pkg) |
9f95a23c | 7264 | |
f67539c2 TL |
7265 | if 'expect_hostname' in ctx and ctx.expect_hostname and ctx.expect_hostname != get_hostname(): |
7266 | logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname)) | |
7267 | call_throws(ctx, ['hostname', ctx.expect_hostname]) | |
9f95a23c | 7268 | with open('/etc/hostname', 'w') as f: |
f67539c2 | 7269 | f.write(ctx.expect_hostname + '\n') |
9f95a23c TL |
7270 | |
7271 | logger.info('Repeating the final host check...') | |
f67539c2 | 7272 | command_check_host(ctx) |
9f95a23c TL |
7273 | |
7274 | ################################## | |
7275 | ||
f6b5b4d7 | 7276 | |
9f95a23c TL |
7277 | class CustomValidation(argparse.Action): |
7278 | ||
522d829b | 7279 | def _check_name(self, values: str) -> None: |
9f95a23c TL |
7280 | try: |
7281 | (daemon_type, daemon_id) = values.split('.', 1) | |
7282 | except ValueError: | |
7283 | raise argparse.ArgumentError(self, | |
f67539c2 | 7284 | 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com') |
9f95a23c TL |
7285 | |
7286 | daemons = get_supported_daemons() | |
7287 | if daemon_type not in daemons: | |
7288 | raise argparse.ArgumentError(self, | |
f67539c2 TL |
7289 | 'name must declare the type of daemon e.g. ' |
7290 | '{}'.format(', '.join(daemons))) | |
9f95a23c | 7291 | |
522d829b TL |
7292 | def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Union[str, Sequence[Any], None], |
7293 | option_string: Optional[str] = None) -> None: | |
7294 | assert isinstance(values, str) | |
f67539c2 | 7295 | if self.dest == 'name': |
9f95a23c TL |
7296 | self._check_name(values) |
7297 | setattr(namespace, self.dest, values) | |
7298 | ||
7299 | ################################## | |
7300 | ||
f6b5b4d7 | 7301 | |
9f95a23c | 7302 | def get_distro(): |
e306af50 | 7303 | # type: () -> Tuple[Optional[str], Optional[str], Optional[str]] |
9f95a23c TL |
7304 | distro = None |
7305 | distro_version = None | |
7306 | distro_codename = None | |
7307 | with open('/etc/os-release', 'r') as f: | |
7308 | for line in f.readlines(): | |
7309 | line = line.strip() | |
7310 | if '=' not in line or line.startswith('#'): | |
7311 | continue | |
7312 | (var, val) = line.split('=', 1) | |
7313 | if val[0] == '"' and val[-1] == '"': | |
7314 | val = val[1:-1] | |
7315 | if var == 'ID': | |
7316 | distro = val.lower() | |
7317 | elif var == 'VERSION_ID': | |
7318 | distro_version = val.lower() | |
7319 | elif var == 'VERSION_CODENAME': | |
7320 | distro_codename = val.lower() | |
7321 | return distro, distro_version, distro_codename | |
7322 | ||
f6b5b4d7 | 7323 | |
9f95a23c | 7324 | class Packager(object): |
f67539c2 | 7325 | def __init__(self, ctx: CephadmContext, |
522d829b TL |
7326 | stable: Optional[str] = None, version: Optional[str] = None, |
7327 | branch: Optional[str] = None, commit: Optional[str] = None): | |
9f95a23c TL |
7328 | assert \ |
7329 | (stable and not version and not branch and not commit) or \ | |
7330 | (not stable and version and not branch and not commit) or \ | |
7331 | (not stable and not version and branch) or \ | |
7332 | (not stable and not version and not branch and not commit) | |
f67539c2 | 7333 | self.ctx = ctx |
9f95a23c TL |
7334 | self.stable = stable |
7335 | self.version = version | |
7336 | self.branch = branch | |
7337 | self.commit = commit | |
7338 | ||
20effc67 TL |
7339 | def validate(self) -> None: |
7340 | """Validate parameters before writing any state to disk.""" | |
7341 | pass | |
7342 | ||
522d829b TL |
7343 | def add_repo(self) -> None: |
7344 | raise NotImplementedError | |
7345 | ||
7346 | def rm_repo(self) -> None: | |
7347 | raise NotImplementedError | |
7348 | ||
7349 | def install(self, ls: List[str]) -> None: | |
9f95a23c TL |
7350 | raise NotImplementedError |
7351 | ||
522d829b | 7352 | def install_podman(self) -> None: |
9f95a23c TL |
7353 | raise NotImplementedError |
7354 | ||
522d829b | 7355 | def query_shaman(self, distro: str, distro_version: Any, branch: Optional[str], commit: Optional[str]) -> str: |
9f95a23c | 7356 | # query shaman |
f91f0fd5 | 7357 | logger.info('Fetching repo metadata from shaman and chacra...') |
9f95a23c TL |
7358 | shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format( |
7359 | distro=distro, | |
7360 | distro_version=distro_version, | |
7361 | branch=branch, | |
7362 | sha1=commit or 'latest', | |
7363 | arch=get_arch() | |
7364 | ) | |
7365 | try: | |
7366 | shaman_response = urlopen(shaman_url) | |
7367 | except HTTPError as err: | |
f91f0fd5 | 7368 | logger.error('repository not found in shaman (might not be available yet)') |
9f95a23c | 7369 | raise Error('%s, failed to fetch %s' % (err, shaman_url)) |
f67539c2 | 7370 | chacra_url = '' |
9f95a23c TL |
7371 | try: |
7372 | chacra_url = shaman_response.geturl() | |
7373 | chacra_response = urlopen(chacra_url) | |
7374 | except HTTPError as err: | |
f91f0fd5 | 7375 | logger.error('repository not found in chacra (might not be available yet)') |
9f95a23c TL |
7376 | raise Error('%s, failed to fetch %s' % (err, chacra_url)) |
7377 | return chacra_response.read().decode('utf-8') | |
7378 | ||
522d829b | 7379 | def repo_gpgkey(self) -> Tuple[str, str]: |
f67539c2 TL |
7380 | if self.ctx.gpg_url: |
7381 | return self.ctx.gpg_url | |
9f95a23c | 7382 | if self.stable or self.version: |
b3b6e05e | 7383 | return 'https://download.ceph.com/keys/release.gpg', 'release' |
9f95a23c | 7384 | else: |
b3b6e05e | 7385 | return 'https://download.ceph.com/keys/autobuild.gpg', 'autobuild' |
9f95a23c | 7386 | |
522d829b | 7387 | def enable_service(self, service: str) -> None: |
9f95a23c TL |
7388 | """ |
7389 | Start and enable the service (typically using systemd). | |
7390 | """ | |
f67539c2 | 7391 | call_throws(self.ctx, ['systemctl', 'enable', '--now', service]) |
9f95a23c TL |
7392 | |
7393 | ||
7394 | class Apt(Packager): | |
7395 | DISTRO_NAMES = { | |
7396 | 'ubuntu': 'ubuntu', | |
7397 | 'debian': 'debian', | |
7398 | } | |
7399 | ||
f67539c2 | 7400 | def __init__(self, ctx: CephadmContext, |
522d829b TL |
7401 | stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str], |
7402 | distro: Optional[str], distro_version: Optional[str], distro_codename: Optional[str]) -> None: | |
f67539c2 | 7403 | super(Apt, self).__init__(ctx, stable=stable, version=version, |
9f95a23c | 7404 | branch=branch, commit=commit) |
522d829b | 7405 | assert distro |
f67539c2 | 7406 | self.ctx = ctx |
9f95a23c TL |
7407 | self.distro = self.DISTRO_NAMES[distro] |
7408 | self.distro_codename = distro_codename | |
f91f0fd5 | 7409 | self.distro_version = distro_version |
9f95a23c | 7410 | |
522d829b | 7411 | def repo_path(self) -> str: |
9f95a23c TL |
7412 | return '/etc/apt/sources.list.d/ceph.list' |
7413 | ||
522d829b | 7414 | def add_repo(self) -> None: |
f67539c2 | 7415 | |
9f95a23c | 7416 | url, name = self.repo_gpgkey() |
f91f0fd5 | 7417 | logger.info('Installing repo GPG key from %s...' % url) |
9f95a23c TL |
7418 | try: |
7419 | response = urlopen(url) | |
7420 | except HTTPError as err: | |
f91f0fd5 | 7421 | logger.error('failed to fetch GPG repo key from %s: %s' % ( |
9f95a23c TL |
7422 | url, err)) |
7423 | raise Error('failed to fetch GPG key') | |
b3b6e05e TL |
7424 | key = response.read() |
7425 | with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'wb') as f: | |
9f95a23c TL |
7426 | f.write(key) |
7427 | ||
7428 | if self.version: | |
7429 | content = 'deb %s/debian-%s/ %s main\n' % ( | |
f67539c2 | 7430 | self.ctx.repo_url, self.version, self.distro_codename) |
9f95a23c TL |
7431 | elif self.stable: |
7432 | content = 'deb %s/debian-%s/ %s main\n' % ( | |
f67539c2 | 7433 | self.ctx.repo_url, self.stable, self.distro_codename) |
9f95a23c TL |
7434 | else: |
7435 | content = self.query_shaman(self.distro, self.distro_codename, self.branch, | |
7436 | self.commit) | |
7437 | ||
f91f0fd5 | 7438 | logger.info('Installing repo file at %s...' % self.repo_path()) |
9f95a23c TL |
7439 | with open(self.repo_path(), 'w') as f: |
7440 | f.write(content) | |
7441 | ||
b3b6e05e TL |
7442 | self.update() |
7443 | ||
522d829b | 7444 | def rm_repo(self) -> None: |
9f95a23c TL |
7445 | for name in ['autobuild', 'release']: |
7446 | p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name | |
7447 | if os.path.exists(p): | |
f91f0fd5 | 7448 | logger.info('Removing repo GPG key %s...' % p) |
9f95a23c TL |
7449 | os.unlink(p) |
7450 | if os.path.exists(self.repo_path()): | |
f91f0fd5 | 7451 | logger.info('Removing repo at %s...' % self.repo_path()) |
9f95a23c TL |
7452 | os.unlink(self.repo_path()) |
7453 | ||
f91f0fd5 TL |
7454 | if self.distro == 'ubuntu': |
7455 | self.rm_kubic_repo() | |
7456 | ||
522d829b | 7457 | def install(self, ls: List[str]) -> None: |
f91f0fd5 | 7458 | logger.info('Installing packages %s...' % ls) |
f67539c2 | 7459 | call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls) |
9f95a23c | 7460 | |
522d829b | 7461 | def update(self) -> None: |
b3b6e05e TL |
7462 | logger.info('Updating package list...') |
7463 | call_throws(self.ctx, ['apt-get', 'update']) | |
7464 | ||
522d829b | 7465 | def install_podman(self) -> None: |
9f95a23c | 7466 | if self.distro == 'ubuntu': |
f91f0fd5 TL |
7467 | logger.info('Setting up repo for podman...') |
7468 | self.add_kubic_repo() | |
b3b6e05e | 7469 | self.update() |
9f95a23c | 7470 | |
f91f0fd5 | 7471 | logger.info('Attempting podman install...') |
9f95a23c TL |
7472 | try: |
7473 | self.install(['podman']) | |
f67539c2 | 7474 | except Error: |
f91f0fd5 | 7475 | logger.info('Podman did not work. Falling back to docker...') |
9f95a23c TL |
7476 | self.install(['docker.io']) |
7477 | ||
522d829b | 7478 | def kubic_repo_url(self) -> str: |
f91f0fd5 TL |
7479 | return 'https://download.opensuse.org/repositories/devel:/kubic:/' \ |
7480 | 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version | |
7481 | ||
522d829b | 7482 | def kubic_repo_path(self) -> str: |
f91f0fd5 TL |
7483 | return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list' |
7484 | ||
522d829b | 7485 | def kubric_repo_gpgkey_url(self) -> str: |
f91f0fd5 TL |
7486 | return '%s/Release.key' % self.kubic_repo_url() |
7487 | ||
522d829b | 7488 | def kubric_repo_gpgkey_path(self) -> str: |
f91f0fd5 TL |
7489 | return '/etc/apt/trusted.gpg.d/kubic.release.gpg' |
7490 | ||
522d829b | 7491 | def add_kubic_repo(self) -> None: |
f91f0fd5 TL |
7492 | url = self.kubric_repo_gpgkey_url() |
7493 | logger.info('Installing repo GPG key from %s...' % url) | |
7494 | try: | |
7495 | response = urlopen(url) | |
7496 | except HTTPError as err: | |
7497 | logger.error('failed to fetch GPG repo key from %s: %s' % ( | |
7498 | url, err)) | |
7499 | raise Error('failed to fetch GPG key') | |
7500 | key = response.read().decode('utf-8') | |
7501 | tmp_key = write_tmp(key, 0, 0) | |
7502 | keyring = self.kubric_repo_gpgkey_path() | |
f67539c2 | 7503 | call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name]) |
f91f0fd5 TL |
7504 | |
7505 | logger.info('Installing repo file at %s...' % self.kubic_repo_path()) | |
7506 | content = 'deb %s /\n' % self.kubic_repo_url() | |
7507 | with open(self.kubic_repo_path(), 'w') as f: | |
7508 | f.write(content) | |
7509 | ||
522d829b | 7510 | def rm_kubic_repo(self) -> None: |
f91f0fd5 TL |
7511 | keyring = self.kubric_repo_gpgkey_path() |
7512 | if os.path.exists(keyring): | |
7513 | logger.info('Removing repo GPG key %s...' % keyring) | |
7514 | os.unlink(keyring) | |
7515 | ||
7516 | p = self.kubic_repo_path() | |
7517 | if os.path.exists(p): | |
7518 | logger.info('Removing repo at %s...' % p) | |
7519 | os.unlink(p) | |
7520 | ||
f6b5b4d7 | 7521 | |
9f95a23c TL |
7522 | class YumDnf(Packager): |
7523 | DISTRO_NAMES = { | |
7524 | 'centos': ('centos', 'el'), | |
7525 | 'rhel': ('centos', 'el'), | |
7526 | 'scientific': ('centos', 'el'), | |
b3b6e05e | 7527 | 'rocky': ('centos', 'el'), |
522d829b | 7528 | 'almalinux': ('centos', 'el'), |
9f95a23c | 7529 | 'fedora': ('fedora', 'fc'), |
a4b75251 | 7530 | 'mariner': ('mariner', 'cm'), |
9f95a23c TL |
7531 | } |
7532 | ||
f67539c2 | 7533 | def __init__(self, ctx: CephadmContext, |
522d829b TL |
7534 | stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str], |
7535 | distro: Optional[str], distro_version: Optional[str]) -> None: | |
f67539c2 | 7536 | super(YumDnf, self).__init__(ctx, stable=stable, version=version, |
9f95a23c | 7537 | branch=branch, commit=commit) |
522d829b TL |
7538 | assert distro |
7539 | assert distro_version | |
f67539c2 | 7540 | self.ctx = ctx |
9f95a23c TL |
7541 | self.major = int(distro_version.split('.')[0]) |
7542 | self.distro_normalized = self.DISTRO_NAMES[distro][0] | |
7543 | self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major) | |
7544 | if (self.distro_code == 'fc' and self.major >= 30) or \ | |
7545 | (self.distro_code == 'el' and self.major >= 8): | |
7546 | self.tool = 'dnf' | |
a4b75251 TL |
7547 | elif (self.distro_code == 'cm'): |
7548 | self.tool = 'tdnf' | |
9f95a23c TL |
7549 | else: |
7550 | self.tool = 'yum' | |
7551 | ||
522d829b | 7552 | def custom_repo(self, **kw: Any) -> str: |
9f95a23c TL |
7553 | """ |
7554 | Repo files need special care in that a whole line should not be present | |
7555 | if there is no value for it. Because we were using `format()` we could | |
7556 | not conditionally add a line for a repo file. So the end result would | |
7557 | contain a key with a missing value (say if we were passing `None`). | |
7558 | ||
7559 | For example, it could look like:: | |
7560 | ||
7561 | [ceph repo] | |
7562 | name= ceph repo | |
7563 | proxy= | |
7564 | gpgcheck= | |
7565 | ||
7566 | Which breaks. This function allows us to conditionally add lines, | |
7567 | preserving an order and be more careful. | |
7568 | ||
7569 | Previously, and for historical purposes, this is how the template used | |
7570 | to look:: | |
7571 | ||
7572 | custom_repo = | |
7573 | [{repo_name}] | |
7574 | name={name} | |
7575 | baseurl={baseurl} | |
7576 | enabled={enabled} | |
7577 | gpgcheck={gpgcheck} | |
7578 | type={_type} | |
7579 | gpgkey={gpgkey} | |
7580 | proxy={proxy} | |
7581 | ||
7582 | """ | |
7583 | lines = [] | |
7584 | ||
7585 | # by using tuples (vs a dict) we preserve the order of what we want to | |
7586 | # return, like starting with a [repo name] | |
7587 | tmpl = ( | |
7588 | ('reponame', '[%s]'), | |
7589 | ('name', 'name=%s'), | |
7590 | ('baseurl', 'baseurl=%s'), | |
7591 | ('enabled', 'enabled=%s'), | |
7592 | ('gpgcheck', 'gpgcheck=%s'), | |
7593 | ('_type', 'type=%s'), | |
7594 | ('gpgkey', 'gpgkey=%s'), | |
7595 | ('proxy', 'proxy=%s'), | |
7596 | ('priority', 'priority=%s'), | |
7597 | ) | |
7598 | ||
7599 | for line in tmpl: | |
7600 | tmpl_key, tmpl_value = line # key values from tmpl | |
7601 | ||
7602 | # ensure that there is an actual value (not None nor empty string) | |
7603 | if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''): | |
7604 | lines.append(tmpl_value % kw.get(tmpl_key)) | |
7605 | ||
7606 | return '\n'.join(lines) | |
7607 | ||
522d829b | 7608 | def repo_path(self) -> str: |
9f95a23c TL |
7609 | return '/etc/yum.repos.d/ceph.repo' |
7610 | ||
522d829b | 7611 | def repo_baseurl(self) -> str: |
9f95a23c TL |
7612 | assert self.stable or self.version |
7613 | if self.version: | |
f67539c2 | 7614 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version, |
9f95a23c TL |
7615 | self.distro_code) |
7616 | else: | |
f67539c2 | 7617 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable, |
9f95a23c TL |
7618 | self.distro_code) |
7619 | ||
20effc67 | 7620 | def validate(self) -> None: |
b3b6e05e TL |
7621 | if self.distro_code.startswith('fc'): |
7622 | raise Error('Ceph team does not build Fedora specific packages and therefore cannot add repos for this distro') | |
7623 | if self.distro_code == 'el7': | |
7624 | if self.stable and self.stable >= 'pacific': | |
7625 | raise Error('Ceph does not support pacific or later for this version of this linux distro and therefore cannot add a repo for it') | |
7626 | if self.version and self.version.split('.')[0] >= '16': | |
7627 | raise Error('Ceph does not support 16.y.z or later for this version of this linux distro and therefore cannot add a repo for it') | |
20effc67 TL |
7628 | |
7629 | if self.stable or self.version: | |
7630 | # we know that yum & dnf require there to be a | |
7631 | # $base_url/$arch/repodata/repomd.xml so we can test if this URL | |
7632 | # is gettable in order to validate the inputs | |
7633 | test_url = self.repo_baseurl() + '/noarch/repodata/repomd.xml' | |
7634 | try: | |
7635 | urlopen(test_url) | |
7636 | except HTTPError as err: | |
7637 | logger.error('unable to fetch repo metadata: %r', err) | |
7638 | raise Error('failed to fetch repository metadata. please check' | |
7639 | ' the provided parameters are correct and try again') | |
7640 | ||
7641 | def add_repo(self) -> None: | |
9f95a23c TL |
7642 | if self.stable or self.version: |
7643 | content = '' | |
7644 | for n, t in { | |
7645 | 'Ceph': '$basearch', | |
7646 | 'Ceph-noarch': 'noarch', | |
7647 | 'Ceph-source': 'SRPMS'}.items(): | |
7648 | content += '[%s]\n' % (n) | |
7649 | content += self.custom_repo( | |
7650 | name='Ceph %s' % t, | |
7651 | baseurl=self.repo_baseurl() + '/' + t, | |
7652 | enabled=1, | |
7653 | gpgcheck=1, | |
7654 | gpgkey=self.repo_gpgkey()[0], | |
7655 | ) | |
7656 | content += '\n\n' | |
7657 | else: | |
7658 | content = self.query_shaman(self.distro_normalized, self.major, | |
7659 | self.branch, | |
7660 | self.commit) | |
7661 | ||
f91f0fd5 | 7662 | logger.info('Writing repo to %s...' % self.repo_path()) |
9f95a23c TL |
7663 | with open(self.repo_path(), 'w') as f: |
7664 | f.write(content) | |
7665 | ||
7666 | if self.distro_code.startswith('el'): | |
7667 | logger.info('Enabling EPEL...') | |
f67539c2 | 7668 | call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release']) |
9f95a23c | 7669 | |
522d829b | 7670 | def rm_repo(self) -> None: |
9f95a23c TL |
7671 | if os.path.exists(self.repo_path()): |
7672 | os.unlink(self.repo_path()) | |
9f95a23c | 7673 | |
522d829b | 7674 | def install(self, ls: List[str]) -> None: |
9f95a23c | 7675 | logger.info('Installing packages %s...' % ls) |
f67539c2 | 7676 | call_throws(self.ctx, [self.tool, 'install', '-y'] + ls) |
9f95a23c | 7677 | |
522d829b | 7678 | def install_podman(self) -> None: |
9f95a23c TL |
7679 | self.install(['podman']) |
7680 | ||
7681 | ||
7682 | class Zypper(Packager): | |
7683 | DISTRO_NAMES = [ | |
7684 | 'sles', | |
7685 | 'opensuse-tumbleweed', | |
7686 | 'opensuse-leap' | |
7687 | ] | |
7688 | ||
f67539c2 | 7689 | def __init__(self, ctx: CephadmContext, |
522d829b TL |
7690 | stable: Optional[str], version: Optional[str], branch: Optional[str], commit: Optional[str], |
7691 | distro: Optional[str], distro_version: Optional[str]) -> None: | |
f67539c2 | 7692 | super(Zypper, self).__init__(ctx, stable=stable, version=version, |
9f95a23c | 7693 | branch=branch, commit=commit) |
522d829b | 7694 | assert distro is not None |
f67539c2 | 7695 | self.ctx = ctx |
9f95a23c TL |
7696 | self.tool = 'zypper' |
7697 | self.distro = 'opensuse' | |
7698 | self.distro_version = '15.1' | |
7699 | if 'tumbleweed' not in distro and distro_version is not None: | |
7700 | self.distro_version = distro_version | |
7701 | ||
522d829b | 7702 | def custom_repo(self, **kw: Any) -> str: |
9f95a23c TL |
7703 | """ |
7704 | See YumDnf for format explanation. | |
7705 | """ | |
7706 | lines = [] | |
7707 | ||
7708 | # by using tuples (vs a dict) we preserve the order of what we want to | |
7709 | # return, like starting with a [repo name] | |
7710 | tmpl = ( | |
7711 | ('reponame', '[%s]'), | |
7712 | ('name', 'name=%s'), | |
7713 | ('baseurl', 'baseurl=%s'), | |
7714 | ('enabled', 'enabled=%s'), | |
7715 | ('gpgcheck', 'gpgcheck=%s'), | |
7716 | ('_type', 'type=%s'), | |
7717 | ('gpgkey', 'gpgkey=%s'), | |
7718 | ('proxy', 'proxy=%s'), | |
7719 | ('priority', 'priority=%s'), | |
7720 | ) | |
7721 | ||
7722 | for line in tmpl: | |
7723 | tmpl_key, tmpl_value = line # key values from tmpl | |
7724 | ||
7725 | # ensure that there is an actual value (not None nor empty string) | |
7726 | if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''): | |
7727 | lines.append(tmpl_value % kw.get(tmpl_key)) | |
7728 | ||
7729 | return '\n'.join(lines) | |
7730 | ||
522d829b | 7731 | def repo_path(self) -> str: |
9f95a23c TL |
7732 | return '/etc/zypp/repos.d/ceph.repo' |
7733 | ||
522d829b | 7734 | def repo_baseurl(self) -> str: |
9f95a23c TL |
7735 | assert self.stable or self.version |
7736 | if self.version: | |
f67539c2 TL |
7737 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, |
7738 | self.stable, self.distro) | |
9f95a23c | 7739 | else: |
f67539c2 TL |
7740 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, |
7741 | self.stable, self.distro) | |
9f95a23c | 7742 | |
522d829b | 7743 | def add_repo(self) -> None: |
9f95a23c TL |
7744 | if self.stable or self.version: |
7745 | content = '' | |
7746 | for n, t in { | |
7747 | 'Ceph': '$basearch', | |
7748 | 'Ceph-noarch': 'noarch', | |
7749 | 'Ceph-source': 'SRPMS'}.items(): | |
7750 | content += '[%s]\n' % (n) | |
7751 | content += self.custom_repo( | |
7752 | name='Ceph %s' % t, | |
7753 | baseurl=self.repo_baseurl() + '/' + t, | |
7754 | enabled=1, | |
7755 | gpgcheck=1, | |
7756 | gpgkey=self.repo_gpgkey()[0], | |
7757 | ) | |
7758 | content += '\n\n' | |
7759 | else: | |
7760 | content = self.query_shaman(self.distro, self.distro_version, | |
7761 | self.branch, | |
7762 | self.commit) | |
7763 | ||
f91f0fd5 | 7764 | logger.info('Writing repo to %s...' % self.repo_path()) |
9f95a23c TL |
7765 | with open(self.repo_path(), 'w') as f: |
7766 | f.write(content) | |
7767 | ||
522d829b | 7768 | def rm_repo(self) -> None: |
9f95a23c TL |
7769 | if os.path.exists(self.repo_path()): |
7770 | os.unlink(self.repo_path()) | |
7771 | ||
522d829b | 7772 | def install(self, ls: List[str]) -> None: |
9f95a23c | 7773 | logger.info('Installing packages %s...' % ls) |
f67539c2 | 7774 | call_throws(self.ctx, [self.tool, 'in', '-y'] + ls) |
9f95a23c | 7775 | |
522d829b | 7776 | def install_podman(self) -> None: |
9f95a23c TL |
7777 | self.install(['podman']) |
7778 | ||
7779 | ||
f67539c2 | 7780 | def create_packager(ctx: CephadmContext, |
522d829b TL |
7781 | stable: Optional[str] = None, version: Optional[str] = None, |
7782 | branch: Optional[str] = None, commit: Optional[str] = None) -> Packager: | |
9f95a23c TL |
7783 | distro, distro_version, distro_codename = get_distro() |
7784 | if distro in YumDnf.DISTRO_NAMES: | |
f67539c2 | 7785 | return YumDnf(ctx, stable=stable, version=version, |
9f95a23c | 7786 | branch=branch, commit=commit, |
f67539c2 | 7787 | distro=distro, distro_version=distro_version) |
9f95a23c | 7788 | elif distro in Apt.DISTRO_NAMES: |
f67539c2 | 7789 | return Apt(ctx, stable=stable, version=version, |
9f95a23c TL |
7790 | branch=branch, commit=commit, |
7791 | distro=distro, distro_version=distro_version, | |
7792 | distro_codename=distro_codename) | |
7793 | elif distro in Zypper.DISTRO_NAMES: | |
f67539c2 | 7794 | return Zypper(ctx, stable=stable, version=version, |
9f95a23c TL |
7795 | branch=branch, commit=commit, |
7796 | distro=distro, distro_version=distro_version) | |
7797 | raise Error('Distro %s version %s not supported' % (distro, distro_version)) | |
7798 | ||
7799 | ||
522d829b | 7800 | def command_add_repo(ctx: CephadmContext) -> None: |
f67539c2 | 7801 | if ctx.version and ctx.release: |
9f95a23c | 7802 | raise Error('you can specify either --release or --version but not both') |
f67539c2 | 7803 | if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit: |
1911f103 | 7804 | raise Error('please supply a --release, --version, --dev or --dev-commit argument') |
f67539c2 | 7805 | if ctx.version: |
9f95a23c | 7806 | try: |
f67539c2 TL |
7807 | (x, y, z) = ctx.version.split('.') |
7808 | except Exception: | |
9f95a23c | 7809 | raise Error('version must be in the form x.y.z (e.g., 15.2.0)') |
b3b6e05e TL |
7810 | if ctx.release: |
7811 | # Pacific =/= pacific in this case, set to undercase to avoid confision | |
7812 | ctx.release = ctx.release.lower() | |
9f95a23c | 7813 | |
f67539c2 TL |
7814 | pkg = create_packager(ctx, stable=ctx.release, |
7815 | version=ctx.version, | |
7816 | branch=ctx.dev, | |
7817 | commit=ctx.dev_commit) | |
20effc67 | 7818 | pkg.validate() |
9f95a23c | 7819 | pkg.add_repo() |
b3b6e05e | 7820 | logger.info('Completed adding repo.') |
9f95a23c | 7821 | |
f6b5b4d7 | 7822 | |
522d829b | 7823 | def command_rm_repo(ctx: CephadmContext) -> None: |
f67539c2 | 7824 | pkg = create_packager(ctx) |
9f95a23c TL |
7825 | pkg.rm_repo() |
7826 | ||
f6b5b4d7 | 7827 | |
522d829b | 7828 | def command_install(ctx: CephadmContext) -> None: |
f67539c2 TL |
7829 | pkg = create_packager(ctx) |
7830 | pkg.install(ctx.packages) | |
9f95a23c TL |
7831 | |
7832 | ################################## | |
7833 | ||
f67539c2 | 7834 | |
f91f0fd5 TL |
7835 | def get_ipv4_address(ifname): |
7836 | # type: (str) -> str | |
522d829b | 7837 | def _extract(sock: socket.socket, offset: int) -> str: |
f91f0fd5 | 7838 | return socket.inet_ntop( |
f67539c2 TL |
7839 | socket.AF_INET, |
7840 | fcntl.ioctl( | |
7841 | sock.fileno(), | |
7842 | offset, | |
7843 | struct.pack('256s', bytes(ifname[:15], 'utf-8')) | |
7844 | )[20:24]) | |
f91f0fd5 TL |
7845 | |
7846 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) | |
7847 | try: | |
7848 | addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR | |
7849 | dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK | |
7850 | except OSError: | |
7851 | # interface does not have an ipv4 address | |
7852 | return '' | |
7853 | ||
7854 | dec_mask = sum([bin(int(i)).count('1') | |
7855 | for i in dq_mask.split('.')]) | |
7856 | return '{}/{}'.format(addr, dec_mask) | |
7857 | ||
7858 | ||
7859 | def get_ipv6_address(ifname): | |
7860 | # type: (str) -> str | |
7861 | if not os.path.exists('/proc/net/if_inet6'): | |
7862 | return '' | |
7863 | ||
7864 | raw = read_file(['/proc/net/if_inet6']) | |
7865 | data = raw.splitlines() | |
7866 | # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html | |
7867 | # field 0 is ipv6, field 2 is scope | |
7868 | for iface_setting in data: | |
7869 | field = iface_setting.split() | |
7870 | if field[-1] == ifname: | |
7871 | ipv6_raw = field[0] | |
f67539c2 | 7872 | ipv6_fmtd = ':'.join([ipv6_raw[_p:_p + 4] for _p in range(0, len(field[0]), 4)]) |
f91f0fd5 TL |
7873 | # apply naming rules using ipaddress module |
7874 | ipv6 = ipaddress.ip_address(ipv6_fmtd) | |
f67539c2 | 7875 | return '{}/{}'.format(str(ipv6), int('0x{}'.format(field[2]), 16)) |
f91f0fd5 TL |
7876 | return '' |
7877 | ||
7878 | ||
7879 | def bytes_to_human(num, mode='decimal'): | |
7880 | # type: (float, str) -> str | |
7881 | """Convert a bytes value into it's human-readable form. | |
7882 | ||
7883 | :param num: number, in bytes, to convert | |
7884 | :param mode: Either decimal (default) or binary to determine divisor | |
7885 | :returns: string representing the bytes value in a more readable format | |
7886 | """ | |
7887 | unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB'] | |
7888 | divisor = 1000.0 | |
f67539c2 | 7889 | yotta = 'YB' |
f91f0fd5 TL |
7890 | |
7891 | if mode == 'binary': | |
7892 | unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB'] | |
7893 | divisor = 1024.0 | |
f67539c2 | 7894 | yotta = 'YiB' |
f91f0fd5 TL |
7895 | |
7896 | for unit in unit_list: | |
7897 | if abs(num) < divisor: | |
f67539c2 | 7898 | return '%3.1f%s' % (num, unit) |
f91f0fd5 | 7899 | num /= divisor |
f67539c2 | 7900 | return '%.1f%s' % (num, yotta) |
f91f0fd5 TL |
7901 | |
7902 | ||
7903 | def read_file(path_list, file_name=''): | |
7904 | # type: (List[str], str) -> str | |
7905 | """Returns the content of the first file found within the `path_list` | |
7906 | ||
7907 | :param path_list: list of file paths to search | |
7908 | :param file_name: optional file_name to be applied to a file path | |
7909 | :returns: content of the file or 'Unknown' | |
7910 | """ | |
7911 | for path in path_list: | |
7912 | if file_name: | |
7913 | file_path = os.path.join(path, file_name) | |
7914 | else: | |
7915 | file_path = path | |
7916 | if os.path.exists(file_path): | |
7917 | with open(file_path, 'r') as f: | |
7918 | try: | |
7919 | content = f.read().strip() | |
7920 | except OSError: | |
7921 | # sysfs may populate the file, but for devices like | |
7922 | # virtio reads can fail | |
f67539c2 | 7923 | return 'Unknown' |
f91f0fd5 TL |
7924 | else: |
7925 | return content | |
f67539c2 | 7926 | return 'Unknown' |
f91f0fd5 TL |
7927 | |
7928 | ################################## | |
f67539c2 TL |
7929 | |
7930 | ||
f91f0fd5 TL |
7931 | class HostFacts(): |
7932 | _dmi_path_list = ['/sys/class/dmi/id'] | |
7933 | _nic_path_list = ['/sys/class/net'] | |
f91f0fd5 TL |
7934 | _apparmor_path_list = ['/etc/apparmor'] |
7935 | _disk_vendor_workarounds = { | |
f67539c2 | 7936 | '0x1af4': 'Virtio Block Device' |
f91f0fd5 | 7937 | } |
a4b75251 | 7938 | _excluded_block_devices = ('sr', 'zram', 'dm-') |
f91f0fd5 | 7939 | |
f67539c2 TL |
7940 | def __init__(self, ctx: CephadmContext): |
7941 | self.ctx: CephadmContext = ctx | |
7942 | self.cpu_model: str = 'Unknown' | |
7943 | self.cpu_count: int = 0 | |
7944 | self.cpu_cores: int = 0 | |
7945 | self.cpu_threads: int = 0 | |
7946 | self.interfaces: Dict[str, Any] = {} | |
f91f0fd5 | 7947 | |
f67539c2 | 7948 | self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines() |
f91f0fd5 TL |
7949 | self._get_cpuinfo() |
7950 | self._process_nics() | |
f67539c2 TL |
7951 | self.arch: str = platform.processor() |
7952 | self.kernel: str = platform.release() | |
f91f0fd5 TL |
7953 | |
7954 | def _get_cpuinfo(self): | |
7955 | # type: () -> None | |
7956 | """Determine cpu information via /proc/cpuinfo""" | |
7957 | raw = read_file(['/proc/cpuinfo']) | |
7958 | output = raw.splitlines() | |
7959 | cpu_set = set() | |
7960 | ||
7961 | for line in output: | |
f67539c2 TL |
7962 | field = [f.strip() for f in line.split(':')] |
7963 | if 'model name' in line: | |
f91f0fd5 | 7964 | self.cpu_model = field[1] |
f67539c2 | 7965 | if 'physical id' in line: |
f91f0fd5 | 7966 | cpu_set.add(field[1]) |
f67539c2 | 7967 | if 'siblings' in line: |
f91f0fd5 | 7968 | self.cpu_threads = int(field[1].strip()) |
f67539c2 | 7969 | if 'cpu cores' in line: |
f91f0fd5 TL |
7970 | self.cpu_cores = int(field[1].strip()) |
7971 | pass | |
7972 | self.cpu_count = len(cpu_set) | |
7973 | ||
7974 | def _get_block_devs(self): | |
7975 | # type: () -> List[str] | |
7976 | """Determine the list of block devices by looking at /sys/block""" | |
7977 | return [dev for dev in os.listdir('/sys/block') | |
a4b75251 | 7978 | if not dev.startswith(HostFacts._excluded_block_devices)] |
f91f0fd5 TL |
7979 | |
7980 | def _get_devs_by_type(self, rota='0'): | |
7981 | # type: (str) -> List[str] | |
7982 | """Filter block devices by a given rotational attribute (0=flash, 1=spinner)""" | |
7983 | devs = list() | |
7984 | for blk_dev in self._get_block_devs(): | |
7985 | rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev) | |
7986 | rot_value = read_file([rot_path]) | |
7987 | if rot_value == rota: | |
7988 | devs.append(blk_dev) | |
7989 | return devs | |
7990 | ||
7991 | @property | |
7992 | def operating_system(self): | |
7993 | # type: () -> str | |
7994 | """Determine OS version""" | |
7995 | raw_info = read_file(['/etc/os-release']) | |
7996 | os_release = raw_info.splitlines() | |
7997 | rel_str = 'Unknown' | |
7998 | rel_dict = dict() | |
7999 | ||
8000 | for line in os_release: | |
f67539c2 | 8001 | if '=' in line: |
f91f0fd5 TL |
8002 | var_name, var_value = line.split('=') |
8003 | rel_dict[var_name] = var_value.strip('"') | |
8004 | ||
8005 | # Would normally use PRETTY_NAME, but NAME and VERSION are more | |
8006 | # consistent | |
f67539c2 TL |
8007 | if all(_v in rel_dict for _v in ['NAME', 'VERSION']): |
8008 | rel_str = '{} {}'.format(rel_dict['NAME'], rel_dict['VERSION']) | |
f91f0fd5 TL |
8009 | return rel_str |
8010 | ||
8011 | @property | |
8012 | def hostname(self): | |
8013 | # type: () -> str | |
8014 | """Return the hostname""" | |
8015 | return platform.node() | |
8016 | ||
8017 | @property | |
8018 | def subscribed(self): | |
8019 | # type: () -> str | |
8020 | """Highlevel check to see if the host is subscribed to receive updates/support""" | |
8021 | def _red_hat(): | |
8022 | # type: () -> str | |
8023 | # RHEL 7 and RHEL 8 | |
8024 | entitlements_dir = '/etc/pki/entitlement' | |
8025 | if os.path.exists(entitlements_dir): | |
8026 | pems = glob('{}/*.pem'.format(entitlements_dir)) | |
8027 | if len(pems) >= 2: | |
f67539c2 | 8028 | return 'Yes' |
f91f0fd5 | 8029 | |
f67539c2 | 8030 | return 'No' |
f91f0fd5 TL |
8031 | |
8032 | os_name = self.operating_system | |
f67539c2 | 8033 | if os_name.upper().startswith('RED HAT'): |
f91f0fd5 TL |
8034 | return _red_hat() |
8035 | ||
f67539c2 | 8036 | return 'Unknown' |
f91f0fd5 TL |
8037 | |
8038 | @property | |
8039 | def hdd_count(self): | |
8040 | # type: () -> int | |
8041 | """Return a count of HDDs (spinners)""" | |
8042 | return len(self._get_devs_by_type(rota='1')) | |
8043 | ||
8044 | def _get_capacity(self, dev): | |
8045 | # type: (str) -> int | |
8046 | """Determine the size of a given device""" | |
8047 | size_path = os.path.join('/sys/block', dev, 'size') | |
8048 | size_blocks = int(read_file([size_path])) | |
8049 | blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size') | |
8050 | blk_count = int(read_file([blk_path])) | |
8051 | return size_blocks * blk_count | |
8052 | ||
8053 | def _get_capacity_by_type(self, rota='0'): | |
8054 | # type: (str) -> int | |
8055 | """Return the total capacity of a category of device (flash or hdd)""" | |
8056 | devs = self._get_devs_by_type(rota=rota) | |
8057 | capacity = 0 | |
8058 | for dev in devs: | |
8059 | capacity += self._get_capacity(dev) | |
8060 | return capacity | |
8061 | ||
8062 | def _dev_list(self, dev_list): | |
8063 | # type: (List[str]) -> List[Dict[str, object]] | |
8064 | """Return a 'pretty' name list for each device in the `dev_list`""" | |
8065 | disk_list = list() | |
8066 | ||
8067 | for dev in dev_list: | |
8068 | disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip() | |
8069 | disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip() | |
8070 | disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip() | |
8071 | vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip() | |
8072 | disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor) | |
8073 | disk_size_bytes = self._get_capacity(dev) | |
8074 | disk_list.append({ | |
f67539c2 TL |
8075 | 'description': '{} {} ({})'.format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)), |
8076 | 'vendor': disk_vendor, | |
8077 | 'model': disk_model, | |
8078 | 'rev': disk_rev, | |
8079 | 'wwid': disk_wwid, | |
8080 | 'dev_name': dev, | |
8081 | 'disk_size_bytes': disk_size_bytes, | |
8082 | }) | |
f91f0fd5 TL |
8083 | return disk_list |
8084 | ||
8085 | @property | |
8086 | def hdd_list(self): | |
8087 | # type: () -> List[Dict[str, object]] | |
8088 | """Return a list of devices that are HDDs (spinners)""" | |
8089 | devs = self._get_devs_by_type(rota='1') | |
8090 | return self._dev_list(devs) | |
8091 | ||
8092 | @property | |
8093 | def flash_list(self): | |
8094 | # type: () -> List[Dict[str, object]] | |
8095 | """Return a list of devices that are flash based (SSD, NVMe)""" | |
8096 | devs = self._get_devs_by_type(rota='0') | |
8097 | return self._dev_list(devs) | |
8098 | ||
8099 | @property | |
8100 | def hdd_capacity_bytes(self): | |
8101 | # type: () -> int | |
8102 | """Return the total capacity for all HDD devices (bytes)""" | |
8103 | return self._get_capacity_by_type(rota='1') | |
8104 | ||
8105 | @property | |
8106 | def hdd_capacity(self): | |
8107 | # type: () -> str | |
8108 | """Return the total capacity for all HDD devices (human readable format)""" | |
8109 | return bytes_to_human(self.hdd_capacity_bytes) | |
8110 | ||
8111 | @property | |
8112 | def cpu_load(self): | |
8113 | # type: () -> Dict[str, float] | |
8114 | """Return the cpu load average data for the host""" | |
8115 | raw = read_file(['/proc/loadavg']).strip() | |
8116 | data = raw.split() | |
8117 | return { | |
f67539c2 TL |
8118 | '1min': float(data[0]), |
8119 | '5min': float(data[1]), | |
8120 | '15min': float(data[2]), | |
f91f0fd5 TL |
8121 | } |
8122 | ||
8123 | @property | |
8124 | def flash_count(self): | |
8125 | # type: () -> int | |
8126 | """Return the number of flash devices in the system (SSD, NVMe)""" | |
8127 | return len(self._get_devs_by_type(rota='0')) | |
8128 | ||
8129 | @property | |
8130 | def flash_capacity_bytes(self): | |
8131 | # type: () -> int | |
8132 | """Return the total capacity for all flash devices (bytes)""" | |
8133 | return self._get_capacity_by_type(rota='0') | |
8134 | ||
8135 | @property | |
8136 | def flash_capacity(self): | |
8137 | # type: () -> str | |
8138 | """Return the total capacity for all Flash devices (human readable format)""" | |
8139 | return bytes_to_human(self.flash_capacity_bytes) | |
8140 | ||
8141 | def _process_nics(self): | |
8142 | # type: () -> None | |
8143 | """Look at the NIC devices and extract network related metadata""" | |
8144 | # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h | |
8145 | hw_lookup = { | |
f67539c2 TL |
8146 | '1': 'ethernet', |
8147 | '32': 'infiniband', | |
8148 | '772': 'loopback', | |
f91f0fd5 TL |
8149 | } |
8150 | ||
8151 | for nic_path in HostFacts._nic_path_list: | |
8152 | if not os.path.exists(nic_path): | |
8153 | continue | |
8154 | for iface in os.listdir(nic_path): | |
8155 | ||
33c7a0ef TL |
8156 | if os.path.exists(os.path.join(nic_path, iface, 'bridge')): |
8157 | nic_type = 'bridge' | |
8158 | elif os.path.exists(os.path.join(nic_path, iface, 'bonding')): | |
8159 | nic_type = 'bonding' | |
8160 | else: | |
8161 | nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), 'Unknown') | |
8162 | ||
8163 | if nic_type == 'loopback': # skip loopback devices | |
8164 | continue | |
8165 | ||
f67539c2 TL |
8166 | lower_devs_list = [os.path.basename(link.replace('lower_', '')) for link in glob(os.path.join(nic_path, iface, 'lower_*'))] |
8167 | upper_devs_list = [os.path.basename(link.replace('upper_', '')) for link in glob(os.path.join(nic_path, iface, 'upper_*'))] | |
f91f0fd5 TL |
8168 | |
8169 | try: | |
8170 | mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')])) | |
8171 | except ValueError: | |
8172 | mtu = 0 | |
8173 | ||
8174 | operstate = read_file([os.path.join(nic_path, iface, 'operstate')]) | |
8175 | try: | |
8176 | speed = int(read_file([os.path.join(nic_path, iface, 'speed')])) | |
8177 | except (OSError, ValueError): | |
8178 | # OSError : device doesn't support the ethtool get_link_ksettings | |
8179 | # ValueError : raised when the read fails, and returns Unknown | |
8180 | # | |
8181 | # Either way, we show a -1 when speed isn't available | |
8182 | speed = -1 | |
8183 | ||
f91f0fd5 TL |
8184 | dev_link = os.path.join(nic_path, iface, 'device') |
8185 | if os.path.exists(dev_link): | |
8186 | iftype = 'physical' | |
8187 | driver_path = os.path.join(dev_link, 'driver') | |
8188 | if os.path.exists(driver_path): | |
f67539c2 | 8189 | driver = os.path.basename(os.path.realpath(driver_path)) |
f91f0fd5 TL |
8190 | else: |
8191 | driver = 'Unknown' | |
8192 | ||
8193 | else: | |
8194 | iftype = 'logical' | |
8195 | driver = '' | |
8196 | ||
8197 | self.interfaces[iface] = { | |
f67539c2 TL |
8198 | 'mtu': mtu, |
8199 | 'upper_devs_list': upper_devs_list, | |
8200 | 'lower_devs_list': lower_devs_list, | |
8201 | 'operstate': operstate, | |
8202 | 'iftype': iftype, | |
8203 | 'nic_type': nic_type, | |
8204 | 'driver': driver, | |
8205 | 'speed': speed, | |
8206 | 'ipv4_address': get_ipv4_address(iface), | |
8207 | 'ipv6_address': get_ipv6_address(iface), | |
f91f0fd5 TL |
8208 | } |
8209 | ||
8210 | @property | |
8211 | def nic_count(self): | |
8212 | # type: () -> int | |
8213 | """Return a total count of all physical NICs detected in the host""" | |
8214 | phys_devs = [] | |
8215 | for iface in self.interfaces: | |
f67539c2 | 8216 | if self.interfaces[iface]['iftype'] == 'physical': |
f91f0fd5 TL |
8217 | phys_devs.append(iface) |
8218 | return len(phys_devs) | |
8219 | ||
f91f0fd5 TL |
8220 | def _get_mem_data(self, field_name): |
8221 | # type: (str) -> int | |
8222 | for line in self._meminfo: | |
8223 | if line.startswith(field_name): | |
8224 | _d = line.split() | |
8225 | return int(_d[1]) | |
8226 | return 0 | |
8227 | ||
8228 | @property | |
8229 | def memory_total_kb(self): | |
8230 | # type: () -> int | |
8231 | """Determine the memory installed (kb)""" | |
8232 | return self._get_mem_data('MemTotal') | |
8233 | ||
8234 | @property | |
8235 | def memory_free_kb(self): | |
8236 | # type: () -> int | |
8237 | """Determine the memory free (not cache, immediately usable)""" | |
8238 | return self._get_mem_data('MemFree') | |
8239 | ||
8240 | @property | |
8241 | def memory_available_kb(self): | |
8242 | # type: () -> int | |
8243 | """Determine the memory available to new applications without swapping""" | |
8244 | return self._get_mem_data('MemAvailable') | |
8245 | ||
8246 | @property | |
8247 | def vendor(self): | |
8248 | # type: () -> str | |
8249 | """Determine server vendor from DMI data in sysfs""" | |
f67539c2 | 8250 | return read_file(HostFacts._dmi_path_list, 'sys_vendor') |
f91f0fd5 TL |
8251 | |
8252 | @property | |
8253 | def model(self): | |
8254 | # type: () -> str | |
8255 | """Determine server model information from DMI data in sysfs""" | |
f67539c2 TL |
8256 | family = read_file(HostFacts._dmi_path_list, 'product_family') |
8257 | product = read_file(HostFacts._dmi_path_list, 'product_name') | |
f91f0fd5 | 8258 | if family == 'Unknown' and product: |
f67539c2 | 8259 | return '{}'.format(product) |
f91f0fd5 | 8260 | |
f67539c2 | 8261 | return '{} ({})'.format(family, product) |
f91f0fd5 TL |
8262 | |
8263 | @property | |
8264 | def bios_version(self): | |
8265 | # type: () -> str | |
8266 | """Determine server BIOS version from DMI data in sysfs""" | |
f67539c2 | 8267 | return read_file(HostFacts._dmi_path_list, 'bios_version') |
f91f0fd5 TL |
8268 | |
8269 | @property | |
8270 | def bios_date(self): | |
8271 | # type: () -> str | |
8272 | """Determine server BIOS date from DMI data in sysfs""" | |
f67539c2 | 8273 | return read_file(HostFacts._dmi_path_list, 'bios_date') |
f91f0fd5 TL |
8274 | |
8275 | @property | |
8276 | def timestamp(self): | |
8277 | # type: () -> float | |
8278 | """Return the current time as Epoch seconds""" | |
8279 | return time.time() | |
8280 | ||
8281 | @property | |
8282 | def system_uptime(self): | |
8283 | # type: () -> float | |
8284 | """Return the system uptime (in secs)""" | |
8285 | raw_time = read_file(['/proc/uptime']) | |
8286 | up_secs, _ = raw_time.split() | |
8287 | return float(up_secs) | |
8288 | ||
f67539c2 | 8289 | @property |
f91f0fd5 TL |
8290 | def kernel_security(self): |
8291 | # type: () -> Dict[str, str] | |
8292 | """Determine the security features enabled in the kernel - SELinux, AppArmor""" | |
f67539c2 | 8293 | def _fetch_selinux() -> Dict[str, str]: |
522d829b | 8294 | """Get the selinux status""" |
f91f0fd5 | 8295 | security = {} |
522d829b TL |
8296 | try: |
8297 | out, err, code = call(self.ctx, ['sestatus'], | |
8298 | verbosity=CallVerbosity.DEBUG) | |
8299 | security['type'] = 'SELinux' | |
8300 | status, mode, policy = '', '', '' | |
8301 | for line in out.split('\n'): | |
8302 | if line.startswith('SELinux status:'): | |
8303 | k, v = line.split(':') | |
8304 | status = v.strip() | |
8305 | elif line.startswith('Current mode:'): | |
8306 | k, v = line.split(':') | |
8307 | mode = v.strip() | |
8308 | elif line.startswith('Loaded policy name:'): | |
8309 | k, v = line.split(':') | |
8310 | policy = v.strip() | |
8311 | if status == 'disabled': | |
8312 | security['description'] = 'SELinux: Disabled' | |
8313 | else: | |
8314 | security['description'] = 'SELinux: Enabled({}, {})'.format(mode, policy) | |
8315 | except Exception as e: | |
8316 | logger.info('unable to get selinux status: %s' % e) | |
8317 | return security | |
f91f0fd5 | 8318 | |
f67539c2 | 8319 | def _fetch_apparmor() -> Dict[str, str]: |
f91f0fd5 TL |
8320 | """Read the apparmor profiles directly, returning an overview of AppArmor status""" |
8321 | security = {} | |
8322 | for apparmor_path in HostFacts._apparmor_path_list: | |
8323 | if os.path.exists(apparmor_path): | |
f67539c2 TL |
8324 | security['type'] = 'AppArmor' |
8325 | security['description'] = 'AppArmor: Enabled' | |
f91f0fd5 TL |
8326 | try: |
8327 | profiles = read_file(['/sys/kernel/security/apparmor/profiles']) | |
b3b6e05e TL |
8328 | if len(profiles) == 0: |
8329 | return {} | |
f91f0fd5 TL |
8330 | except OSError: |
8331 | pass | |
8332 | else: | |
8333 | summary = {} # type: Dict[str, int] | |
8334 | for line in profiles.split('\n'): | |
8335 | item, mode = line.split(' ') | |
f67539c2 | 8336 | mode = mode.strip('()') |
f91f0fd5 TL |
8337 | if mode in summary: |
8338 | summary[mode] += 1 | |
8339 | else: | |
8340 | summary[mode] = 0 | |
f67539c2 TL |
8341 | summary_str = ','.join(['{} {}'.format(v, k) for k, v in summary.items()]) |
8342 | security = {**security, **summary} # type: ignore | |
8343 | security['description'] += '({})'.format(summary_str) | |
f91f0fd5 TL |
8344 | |
8345 | return security | |
f67539c2 | 8346 | return {} |
f91f0fd5 | 8347 | |
f67539c2 | 8348 | ret = {} |
f91f0fd5 TL |
8349 | if os.path.exists('/sys/kernel/security/lsm'): |
8350 | lsm = read_file(['/sys/kernel/security/lsm']).strip() | |
8351 | if 'selinux' in lsm: | |
f67539c2 | 8352 | ret = _fetch_selinux() |
f91f0fd5 | 8353 | elif 'apparmor' in lsm: |
f67539c2 | 8354 | ret = _fetch_apparmor() |
f91f0fd5 TL |
8355 | else: |
8356 | return { | |
f67539c2 TL |
8357 | 'type': 'Unknown', |
8358 | 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor' | |
f91f0fd5 TL |
8359 | } |
8360 | ||
f67539c2 TL |
8361 | if ret: |
8362 | return ret | |
8363 | ||
f91f0fd5 | 8364 | return { |
f67539c2 TL |
8365 | 'type': 'None', |
8366 | 'description': 'Linux Security Module framework is not available' | |
f91f0fd5 TL |
8367 | } |
8368 | ||
f67539c2 | 8369 | @property |
522d829b | 8370 | def selinux_enabled(self) -> bool: |
f67539c2 TL |
8371 | return (self.kernel_security['type'] == 'SELinux') and \ |
8372 | (self.kernel_security['description'] != 'SELinux: Disabled') | |
8373 | ||
adb31ebb TL |
8374 | @property |
8375 | def kernel_parameters(self): | |
8376 | # type: () -> Dict[str, str] | |
8377 | """Get kernel parameters required/used in Ceph clusters""" | |
8378 | ||
8379 | k_param = {} | |
f67539c2 | 8380 | out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT) |
adb31ebb TL |
8381 | if out: |
8382 | param_list = out.split('\n') | |
f67539c2 | 8383 | param_dict = {param.split(' = ')[0]: param.split(' = ')[-1] for param in param_list} |
adb31ebb TL |
8384 | |
8385 | # return only desired parameters | |
8386 | if 'net.ipv4.ip_nonlocal_bind' in param_dict: | |
8387 | k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind'] | |
8388 | ||
8389 | return k_param | |
8390 | ||
522d829b TL |
8391 | @staticmethod |
8392 | def _process_net_data(tcp_file: str, protocol: str = 'tcp') -> List[int]: | |
8393 | listening_ports = [] | |
8394 | # Connections state documentation | |
8395 | # tcp - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/net/tcp_states.h | |
8396 | # udp - uses 07 (TCP_CLOSE or UNCONN, since udp is stateless. test with netcat -ul <port>) | |
8397 | listening_state = { | |
8398 | 'tcp': '0A', | |
8399 | 'udp': '07' | |
8400 | } | |
8401 | ||
8402 | if protocol not in listening_state.keys(): | |
8403 | return [] | |
8404 | ||
8405 | if os.path.exists(tcp_file): | |
8406 | with open(tcp_file) as f: | |
8407 | tcp_data = f.readlines()[1:] | |
8408 | ||
8409 | for con in tcp_data: | |
8410 | con_info = con.strip().split() | |
8411 | if con_info[3] == listening_state[protocol]: | |
8412 | local_port = int(con_info[1].split(':')[1], 16) | |
8413 | listening_ports.append(local_port) | |
8414 | ||
8415 | return listening_ports | |
8416 | ||
8417 | @property | |
8418 | def tcp_ports_used(self) -> List[int]: | |
8419 | return HostFacts._process_net_data('/proc/net/tcp') | |
8420 | ||
8421 | @property | |
8422 | def tcp6_ports_used(self) -> List[int]: | |
8423 | return HostFacts._process_net_data('/proc/net/tcp6') | |
8424 | ||
8425 | @property | |
8426 | def udp_ports_used(self) -> List[int]: | |
8427 | return HostFacts._process_net_data('/proc/net/udp', 'udp') | |
8428 | ||
8429 | @property | |
8430 | def udp6_ports_used(self) -> List[int]: | |
8431 | return HostFacts._process_net_data('/proc/net/udp6', 'udp') | |
8432 | ||
f91f0fd5 TL |
8433 | def dump(self): |
8434 | # type: () -> str | |
8435 | """Return the attributes of this HostFacts object as json""" | |
f67539c2 TL |
8436 | data = { |
8437 | k: getattr(self, k) for k in dir(self) | |
8438 | if not k.startswith('_') | |
8439 | and isinstance(getattr(self, k), (float, int, str, list, dict, tuple)) | |
f91f0fd5 TL |
8440 | } |
8441 | return json.dumps(data, indent=2, sort_keys=True) | |
8442 | ||
8443 | ################################## | |
8444 | ||
f67539c2 | 8445 | |
522d829b | 8446 | def command_gather_facts(ctx: CephadmContext) -> None: |
f91f0fd5 | 8447 | """gather_facts is intended to provide host releated metadata to the caller""" |
f67539c2 | 8448 | host = HostFacts(ctx) |
f91f0fd5 TL |
8449 | print(host.dump()) |
8450 | ||
f67539c2 TL |
8451 | |
8452 | ################################## | |
8453 | ||
8454 | ||
a4b75251 | 8455 | def systemd_target_state(ctx: CephadmContext, target_name: str, subsystem: str = 'ceph') -> bool: |
f67539c2 TL |
8456 | # TODO: UNITTEST |
8457 | return os.path.exists( | |
8458 | os.path.join( | |
a4b75251 | 8459 | ctx.unit_dir, |
f67539c2 TL |
8460 | f'{subsystem}.target.wants', |
8461 | target_name | |
8462 | ) | |
8463 | ) | |
8464 | ||
8465 | ||
a4b75251 TL |
8466 | def target_exists(ctx: CephadmContext) -> bool: |
8467 | return os.path.exists(ctx.unit_dir + '/ceph.target') | |
8468 | ||
8469 | ||
f67539c2 | 8470 | @infer_fsid |
522d829b | 8471 | def command_maintenance(ctx: CephadmContext) -> str: |
f67539c2 | 8472 | if not ctx.fsid: |
a4b75251 | 8473 | raise Error('failed - must pass --fsid to specify cluster') |
f67539c2 TL |
8474 | |
8475 | target = f'ceph-{ctx.fsid}.target' | |
8476 | ||
8477 | if ctx.maintenance_action.lower() == 'enter': | |
8478 | logger.info('Requested to place host into maintenance') | |
a4b75251 | 8479 | if systemd_target_state(ctx, target): |
f67539c2 TL |
8480 | _out, _err, code = call(ctx, |
8481 | ['systemctl', 'disable', target], | |
8482 | verbosity=CallVerbosity.DEBUG) | |
8483 | if code: | |
8484 | logger.error(f'Failed to disable the {target} target') | |
8485 | return 'failed - to disable the target' | |
8486 | else: | |
8487 | # stopping a target waits by default | |
8488 | _out, _err, code = call(ctx, | |
8489 | ['systemctl', 'stop', target], | |
8490 | verbosity=CallVerbosity.DEBUG) | |
8491 | if code: | |
8492 | logger.error(f'Failed to stop the {target} target') | |
8493 | return 'failed - to disable the target' | |
8494 | else: | |
8495 | return f'success - systemd target {target} disabled' | |
8496 | ||
8497 | else: | |
8498 | return 'skipped - target already disabled' | |
8499 | ||
8500 | else: | |
8501 | logger.info('Requested to exit maintenance state') | |
a4b75251 TL |
8502 | # if we've never deployed a daemon on this host there will be no systemd |
8503 | # target to disable so attempting a disable will fail. We still need to | |
8504 | # return success here or host will be permanently stuck in maintenance mode | |
8505 | # as no daemons can be deployed so no systemd target will ever exist to disable. | |
8506 | if not target_exists(ctx): | |
8507 | return 'skipped - systemd target not present on this host. Host removed from maintenance mode.' | |
f67539c2 | 8508 | # exit maintenance request |
a4b75251 | 8509 | if not systemd_target_state(ctx, target): |
f67539c2 TL |
8510 | _out, _err, code = call(ctx, |
8511 | ['systemctl', 'enable', target], | |
8512 | verbosity=CallVerbosity.DEBUG) | |
8513 | if code: | |
8514 | logger.error(f'Failed to enable the {target} target') | |
8515 | return 'failed - unable to enable the target' | |
8516 | else: | |
8517 | # starting a target waits by default | |
8518 | _out, _err, code = call(ctx, | |
8519 | ['systemctl', 'start', target], | |
8520 | verbosity=CallVerbosity.DEBUG) | |
8521 | if code: | |
8522 | logger.error(f'Failed to start the {target} target') | |
8523 | return 'failed - unable to start the target' | |
8524 | else: | |
8525 | return f'success - systemd target {target} enabled and started' | |
522d829b | 8526 | return f'success - systemd target {target} enabled and started' |
f91f0fd5 TL |
8527 | |
8528 | ################################## | |
8529 | ||
f6b5b4d7 | 8530 | |
9f95a23c TL |
8531 | def _get_parser(): |
8532 | # type: () -> argparse.ArgumentParser | |
8533 | parser = argparse.ArgumentParser( | |
8534 | description='Bootstrap Ceph daemons with systemd and containers.', | |
8535 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
8536 | parser.add_argument( | |
8537 | '--image', | |
8538 | help='container image. Can also be set via the "CEPHADM_IMAGE" ' | |
8539 | 'env var') | |
8540 | parser.add_argument( | |
8541 | '--docker', | |
8542 | action='store_true', | |
8543 | help='use docker instead of podman') | |
8544 | parser.add_argument( | |
8545 | '--data-dir', | |
8546 | default=DATA_DIR, | |
8547 | help='base directory for daemon data') | |
8548 | parser.add_argument( | |
8549 | '--log-dir', | |
8550 | default=LOG_DIR, | |
8551 | help='base directory for daemon logs') | |
8552 | parser.add_argument( | |
8553 | '--logrotate-dir', | |
8554 | default=LOGROTATE_DIR, | |
8555 | help='location of logrotate configuration files') | |
b3b6e05e TL |
8556 | parser.add_argument( |
8557 | '--sysctl-dir', | |
8558 | default=SYSCTL_DIR, | |
8559 | help='location of sysctl configuration files') | |
9f95a23c TL |
8560 | parser.add_argument( |
8561 | '--unit-dir', | |
8562 | default=UNIT_DIR, | |
8563 | help='base directory for systemd units') | |
8564 | parser.add_argument( | |
8565 | '--verbose', '-v', | |
8566 | action='store_true', | |
8567 | help='Show debug-level log messages') | |
8568 | parser.add_argument( | |
8569 | '--timeout', | |
8570 | type=int, | |
8571 | default=DEFAULT_TIMEOUT, | |
8572 | help='timeout in seconds') | |
8573 | parser.add_argument( | |
8574 | '--retry', | |
8575 | type=int, | |
8576 | default=DEFAULT_RETRY, | |
8577 | help='max number of retries') | |
e306af50 TL |
8578 | parser.add_argument( |
8579 | '--env', '-e', | |
8580 | action='append', | |
8581 | default=[], | |
8582 | help='set environment variable') | |
f67539c2 TL |
8583 | parser.add_argument( |
8584 | '--no-container-init', | |
8585 | action='store_true', | |
8586 | default=not CONTAINER_INIT, | |
8587 | help='Do not run podman/docker with `--init`') | |
9f95a23c TL |
8588 | |
8589 | subparsers = parser.add_subparsers(help='sub-command') | |
8590 | ||
8591 | parser_version = subparsers.add_parser( | |
8592 | 'version', help='get ceph version from container') | |
8593 | parser_version.set_defaults(func=command_version) | |
8594 | ||
8595 | parser_pull = subparsers.add_parser( | |
33c7a0ef | 8596 | 'pull', help='pull the default container image') |
9f95a23c | 8597 | parser_pull.set_defaults(func=command_pull) |
a4b75251 TL |
8598 | parser_pull.add_argument( |
8599 | '--insecure', | |
8600 | action='store_true', | |
8601 | help=argparse.SUPPRESS, | |
8602 | ) | |
9f95a23c TL |
8603 | |
8604 | parser_inspect_image = subparsers.add_parser( | |
8605 | 'inspect-image', help='inspect local container image') | |
8606 | parser_inspect_image.set_defaults(func=command_inspect_image) | |
8607 | ||
8608 | parser_ls = subparsers.add_parser( | |
8609 | 'ls', help='list daemon instances on this host') | |
8610 | parser_ls.set_defaults(func=command_ls) | |
8611 | parser_ls.add_argument( | |
8612 | '--no-detail', | |
8613 | action='store_true', | |
8614 | help='Do not include daemon status') | |
8615 | parser_ls.add_argument( | |
8616 | '--legacy-dir', | |
8617 | default='/', | |
8618 | help='base directory for legacy daemon data') | |
8619 | ||
8620 | parser_list_networks = subparsers.add_parser( | |
8621 | 'list-networks', help='list IP networks') | |
8622 | parser_list_networks.set_defaults(func=command_list_networks) | |
8623 | ||
8624 | parser_adopt = subparsers.add_parser( | |
8625 | 'adopt', help='adopt daemon deployed with a different tool') | |
8626 | parser_adopt.set_defaults(func=command_adopt) | |
8627 | parser_adopt.add_argument( | |
8628 | '--name', '-n', | |
8629 | required=True, | |
8630 | help='daemon name (type.id)') | |
8631 | parser_adopt.add_argument( | |
8632 | '--style', | |
8633 | required=True, | |
8634 | help='deployment style (legacy, ...)') | |
8635 | parser_adopt.add_argument( | |
8636 | '--cluster', | |
8637 | default='ceph', | |
8638 | help='cluster name') | |
8639 | parser_adopt.add_argument( | |
8640 | '--legacy-dir', | |
8641 | default='/', | |
8642 | help='base directory for legacy daemon data') | |
8643 | parser_adopt.add_argument( | |
8644 | '--config-json', | |
8645 | help='Additional configuration information in JSON format') | |
8646 | parser_adopt.add_argument( | |
8647 | '--skip-firewalld', | |
8648 | action='store_true', | |
8649 | help='Do not configure firewalld') | |
8650 | parser_adopt.add_argument( | |
8651 | '--skip-pull', | |
8652 | action='store_true', | |
33c7a0ef | 8653 | help='do not pull the default image before adopting') |
1911f103 TL |
8654 | parser_adopt.add_argument( |
8655 | '--force-start', | |
8656 | action='store_true', | |
f67539c2 | 8657 | help='start newly adoped daemon, even if it was not running previously') |
f91f0fd5 TL |
8658 | parser_adopt.add_argument( |
8659 | '--container-init', | |
8660 | action='store_true', | |
f67539c2 TL |
8661 | default=CONTAINER_INIT, |
8662 | help=argparse.SUPPRESS) | |
9f95a23c TL |
8663 | |
8664 | parser_rm_daemon = subparsers.add_parser( | |
8665 | 'rm-daemon', help='remove daemon instance') | |
8666 | parser_rm_daemon.set_defaults(func=command_rm_daemon) | |
8667 | parser_rm_daemon.add_argument( | |
8668 | '--name', '-n', | |
8669 | required=True, | |
8670 | action=CustomValidation, | |
8671 | help='daemon name (type.id)') | |
33c7a0ef TL |
8672 | parser_rm_daemon.add_argument( |
8673 | '--tcp-ports', | |
8674 | help='List of tcp ports to close in the host firewall') | |
9f95a23c TL |
8675 | parser_rm_daemon.add_argument( |
8676 | '--fsid', | |
8677 | required=True, | |
8678 | help='cluster FSID') | |
8679 | parser_rm_daemon.add_argument( | |
8680 | '--force', | |
8681 | action='store_true', | |
8682 | help='proceed, even though this may destroy valuable data') | |
8683 | parser_rm_daemon.add_argument( | |
8684 | '--force-delete-data', | |
8685 | action='store_true', | |
8686 | help='delete valuable daemon data instead of making a backup') | |
8687 | ||
8688 | parser_rm_cluster = subparsers.add_parser( | |
8689 | 'rm-cluster', help='remove all daemons for a cluster') | |
8690 | parser_rm_cluster.set_defaults(func=command_rm_cluster) | |
8691 | parser_rm_cluster.add_argument( | |
8692 | '--fsid', | |
8693 | required=True, | |
8694 | help='cluster FSID') | |
8695 | parser_rm_cluster.add_argument( | |
8696 | '--force', | |
8697 | action='store_true', | |
8698 | help='proceed, even though this may destroy valuable data') | |
f67539c2 TL |
8699 | parser_rm_cluster.add_argument( |
8700 | '--keep-logs', | |
8701 | action='store_true', | |
8702 | help='do not remove log files') | |
b3b6e05e TL |
8703 | parser_rm_cluster.add_argument( |
8704 | '--zap-osds', | |
8705 | action='store_true', | |
8706 | help='zap OSD devices for this cluster') | |
9f95a23c TL |
8707 | |
8708 | parser_run = subparsers.add_parser( | |
8709 | 'run', help='run a ceph daemon, in a container, in the foreground') | |
8710 | parser_run.set_defaults(func=command_run) | |
8711 | parser_run.add_argument( | |
8712 | '--name', '-n', | |
8713 | required=True, | |
8714 | help='daemon name (type.id)') | |
8715 | parser_run.add_argument( | |
8716 | '--fsid', | |
8717 | required=True, | |
8718 | help='cluster FSID') | |
8719 | ||
8720 | parser_shell = subparsers.add_parser( | |
8721 | 'shell', help='run an interactive shell inside a daemon container') | |
8722 | parser_shell.set_defaults(func=command_shell) | |
20effc67 TL |
8723 | parser_shell.add_argument( |
8724 | '--shared_ceph_folder', | |
8725 | metavar='CEPH_SOURCE_FOLDER', | |
8726 | help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder') | |
9f95a23c TL |
8727 | parser_shell.add_argument( |
8728 | '--fsid', | |
8729 | help='cluster FSID') | |
8730 | parser_shell.add_argument( | |
8731 | '--name', '-n', | |
8732 | help='daemon name (type.id)') | |
8733 | parser_shell.add_argument( | |
8734 | '--config', '-c', | |
8735 | help='ceph.conf to pass through to the container') | |
8736 | parser_shell.add_argument( | |
8737 | '--keyring', '-k', | |
8738 | help='ceph.keyring to pass through to the container') | |
e306af50 TL |
8739 | parser_shell.add_argument( |
8740 | '--mount', '-m', | |
f67539c2 TL |
8741 | help=('mount a file or directory in the container. ' |
8742 | 'Support multiple mounts. ' | |
8743 | 'ie: `--mount /foo /bar:/bar`. ' | |
8744 | 'When no destination is passed, default is /mnt'), | |
8745 | nargs='+') | |
9f95a23c TL |
8746 | parser_shell.add_argument( |
8747 | '--env', '-e', | |
8748 | action='append', | |
8749 | default=[], | |
8750 | help='set environment variable') | |
b3b6e05e TL |
8751 | parser_shell.add_argument( |
8752 | '--volume', '-v', | |
8753 | action='append', | |
8754 | default=[], | |
8755 | help='set environment variable') | |
9f95a23c | 8756 | parser_shell.add_argument( |
e306af50 | 8757 | 'command', nargs=argparse.REMAINDER, |
9f95a23c | 8758 | help='command (optional)') |
b3b6e05e TL |
8759 | parser_shell.add_argument( |
8760 | '--no-hosts', | |
8761 | action='store_true', | |
8762 | help='dont pass /etc/hosts through to the container') | |
9f95a23c TL |
8763 | |
8764 | parser_enter = subparsers.add_parser( | |
8765 | 'enter', help='run an interactive shell inside a running daemon container') | |
8766 | parser_enter.set_defaults(func=command_enter) | |
8767 | parser_enter.add_argument( | |
8768 | '--fsid', | |
8769 | help='cluster FSID') | |
8770 | parser_enter.add_argument( | |
8771 | '--name', '-n', | |
8772 | required=True, | |
8773 | help='daemon name (type.id)') | |
8774 | parser_enter.add_argument( | |
e306af50 | 8775 | 'command', nargs=argparse.REMAINDER, |
9f95a23c TL |
8776 | help='command') |
8777 | ||
8778 | parser_ceph_volume = subparsers.add_parser( | |
8779 | 'ceph-volume', help='run ceph-volume inside a container') | |
8780 | parser_ceph_volume.set_defaults(func=command_ceph_volume) | |
20effc67 TL |
8781 | parser_ceph_volume.add_argument( |
8782 | '--shared_ceph_folder', | |
8783 | metavar='CEPH_SOURCE_FOLDER', | |
8784 | help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder') | |
9f95a23c TL |
8785 | parser_ceph_volume.add_argument( |
8786 | '--fsid', | |
8787 | help='cluster FSID') | |
8788 | parser_ceph_volume.add_argument( | |
8789 | '--config-json', | |
20effc67 | 8790 | help='JSON file with config and (client.bootstrap-osd) key') |
801d1391 TL |
8791 | parser_ceph_volume.add_argument( |
8792 | '--config', '-c', | |
8793 | help='ceph conf file') | |
8794 | parser_ceph_volume.add_argument( | |
8795 | '--keyring', '-k', | |
8796 | help='ceph.keyring to pass through to the container') | |
9f95a23c | 8797 | parser_ceph_volume.add_argument( |
e306af50 | 8798 | 'command', nargs=argparse.REMAINDER, |
9f95a23c TL |
8799 | help='command') |
8800 | ||
b3b6e05e TL |
8801 | parser_zap_osds = subparsers.add_parser( |
8802 | 'zap-osds', help='zap all OSDs associated with a particular fsid') | |
8803 | parser_zap_osds.set_defaults(func=command_zap_osds) | |
8804 | parser_zap_osds.add_argument( | |
8805 | '--fsid', | |
8806 | required=True, | |
8807 | help='cluster FSID') | |
8808 | parser_zap_osds.add_argument( | |
8809 | '--force', | |
8810 | action='store_true', | |
8811 | help='proceed, even though this may destroy valuable data') | |
8812 | ||
9f95a23c | 8813 | parser_unit = subparsers.add_parser( |
f67539c2 | 8814 | 'unit', help="operate on the daemon's systemd unit") |
9f95a23c TL |
8815 | parser_unit.set_defaults(func=command_unit) |
8816 | parser_unit.add_argument( | |
8817 | 'command', | |
8818 | help='systemd command (start, stop, restart, enable, disable, ...)') | |
8819 | parser_unit.add_argument( | |
8820 | '--fsid', | |
8821 | help='cluster FSID') | |
8822 | parser_unit.add_argument( | |
8823 | '--name', '-n', | |
8824 | required=True, | |
8825 | help='daemon name (type.id)') | |
8826 | ||
8827 | parser_logs = subparsers.add_parser( | |
8828 | 'logs', help='print journald logs for a daemon container') | |
8829 | parser_logs.set_defaults(func=command_logs) | |
8830 | parser_logs.add_argument( | |
8831 | '--fsid', | |
8832 | help='cluster FSID') | |
8833 | parser_logs.add_argument( | |
8834 | '--name', '-n', | |
8835 | required=True, | |
8836 | help='daemon name (type.id)') | |
8837 | parser_logs.add_argument( | |
8838 | 'command', nargs='*', | |
8839 | help='additional journalctl args') | |
8840 | ||
8841 | parser_bootstrap = subparsers.add_parser( | |
8842 | 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)') | |
8843 | parser_bootstrap.set_defaults(func=command_bootstrap) | |
8844 | parser_bootstrap.add_argument( | |
8845 | '--config', '-c', | |
8846 | help='ceph conf file to incorporate') | |
8847 | parser_bootstrap.add_argument( | |
8848 | '--mon-id', | |
8849 | required=False, | |
8850 | help='mon id (default: local hostname)') | |
33c7a0ef TL |
8851 | group = parser_bootstrap.add_mutually_exclusive_group() |
8852 | group.add_argument( | |
9f95a23c TL |
8853 | '--mon-addrv', |
8854 | help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])') | |
33c7a0ef | 8855 | group.add_argument( |
9f95a23c TL |
8856 | '--mon-ip', |
8857 | help='mon IP') | |
8858 | parser_bootstrap.add_argument( | |
8859 | '--mgr-id', | |
8860 | required=False, | |
8861 | help='mgr id (default: randomly generated)') | |
8862 | parser_bootstrap.add_argument( | |
8863 | '--fsid', | |
8864 | help='cluster FSID') | |
8865 | parser_bootstrap.add_argument( | |
8866 | '--output-dir', | |
8867 | default='/etc/ceph', | |
8868 | help='directory to write config, keyring, and pub key files') | |
8869 | parser_bootstrap.add_argument( | |
8870 | '--output-keyring', | |
8871 | help='location to write keyring file with new cluster admin and mon keys') | |
8872 | parser_bootstrap.add_argument( | |
8873 | '--output-config', | |
8874 | help='location to write conf file to connect to new cluster') | |
8875 | parser_bootstrap.add_argument( | |
8876 | '--output-pub-ssh-key', | |
f67539c2 | 8877 | help="location to write the cluster's public SSH key") |
b3b6e05e TL |
8878 | parser_bootstrap.add_argument( |
8879 | '--skip-admin-label', | |
8880 | action='store_true', | |
8881 | help='do not create admin label for ceph.conf and client.admin keyring distribution') | |
9f95a23c TL |
8882 | parser_bootstrap.add_argument( |
8883 | '--skip-ssh', | |
8884 | action='store_true', | |
8885 | help='skip setup of ssh key on local host') | |
8886 | parser_bootstrap.add_argument( | |
8887 | '--initial-dashboard-user', | |
8888 | default='admin', | |
8889 | help='Initial user for the dashboard') | |
8890 | parser_bootstrap.add_argument( | |
8891 | '--initial-dashboard-password', | |
8892 | help='Initial password for the initial dashboard user') | |
f6b5b4d7 TL |
8893 | parser_bootstrap.add_argument( |
8894 | '--ssl-dashboard-port', | |
8895 | type=int, | |
f67539c2 | 8896 | default=8443, |
f6b5b4d7 | 8897 | help='Port number used to connect with dashboard using SSL') |
9f95a23c TL |
8898 | parser_bootstrap.add_argument( |
8899 | '--dashboard-key', | |
e306af50 | 8900 | type=argparse.FileType('r'), |
9f95a23c TL |
8901 | help='Dashboard key') |
8902 | parser_bootstrap.add_argument( | |
8903 | '--dashboard-crt', | |
e306af50 | 8904 | type=argparse.FileType('r'), |
9f95a23c TL |
8905 | help='Dashboard certificate') |
8906 | ||
e306af50 TL |
8907 | parser_bootstrap.add_argument( |
8908 | '--ssh-config', | |
8909 | type=argparse.FileType('r'), | |
8910 | help='SSH config') | |
8911 | parser_bootstrap.add_argument( | |
8912 | '--ssh-private-key', | |
8913 | type=argparse.FileType('r'), | |
8914 | help='SSH private key') | |
8915 | parser_bootstrap.add_argument( | |
8916 | '--ssh-public-key', | |
8917 | type=argparse.FileType('r'), | |
8918 | help='SSH public key') | |
f6b5b4d7 TL |
8919 | parser_bootstrap.add_argument( |
8920 | '--ssh-user', | |
8921 | default='root', | |
8922 | help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users') | |
9f95a23c TL |
8923 | parser_bootstrap.add_argument( |
8924 | '--skip-mon-network', | |
8925 | action='store_true', | |
8926 | help='set mon public_network based on bootstrap mon ip') | |
8927 | parser_bootstrap.add_argument( | |
8928 | '--skip-dashboard', | |
8929 | action='store_true', | |
8930 | help='do not enable the Ceph Dashboard') | |
8931 | parser_bootstrap.add_argument( | |
8932 | '--dashboard-password-noupdate', | |
8933 | action='store_true', | |
8934 | help='stop forced dashboard password change') | |
8935 | parser_bootstrap.add_argument( | |
8936 | '--no-minimize-config', | |
8937 | action='store_true', | |
8938 | help='do not assimilate and minimize the config file') | |
8939 | parser_bootstrap.add_argument( | |
8940 | '--skip-ping-check', | |
8941 | action='store_true', | |
8942 | help='do not verify that mon IP is pingable') | |
8943 | parser_bootstrap.add_argument( | |
8944 | '--skip-pull', | |
8945 | action='store_true', | |
33c7a0ef | 8946 | help='do not pull the default image before bootstrapping') |
9f95a23c TL |
8947 | parser_bootstrap.add_argument( |
8948 | '--skip-firewalld', | |
8949 | action='store_true', | |
8950 | help='Do not configure firewalld') | |
8951 | parser_bootstrap.add_argument( | |
8952 | '--allow-overwrite', | |
8953 | action='store_true', | |
8954 | help='allow overwrite of existing --output-* config/keyring/ssh files') | |
8955 | parser_bootstrap.add_argument( | |
8956 | '--allow-fqdn-hostname', | |
8957 | action='store_true', | |
8958 | help='allow hostname that is fully-qualified (contains ".")') | |
f67539c2 TL |
8959 | parser_bootstrap.add_argument( |
8960 | '--allow-mismatched-release', | |
8961 | action='store_true', | |
8962 | help="allow bootstrap of ceph that doesn't match this version of cephadm") | |
9f95a23c TL |
8963 | parser_bootstrap.add_argument( |
8964 | '--skip-prepare-host', | |
8965 | action='store_true', | |
8966 | help='Do not prepare host') | |
8967 | parser_bootstrap.add_argument( | |
8968 | '--orphan-initial-daemons', | |
8969 | action='store_true', | |
f67539c2 | 8970 | help='Set mon and mgr service to `unmanaged`, Do not create the crash service') |
9f95a23c TL |
8971 | parser_bootstrap.add_argument( |
8972 | '--skip-monitoring-stack', | |
8973 | action='store_true', | |
8974 | help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)') | |
e306af50 TL |
8975 | parser_bootstrap.add_argument( |
8976 | '--apply-spec', | |
8977 | help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)') | |
e306af50 TL |
8978 | parser_bootstrap.add_argument( |
8979 | '--shared_ceph_folder', | |
8980 | metavar='CEPH_SOURCE_FOLDER', | |
8981 | help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder') | |
9f95a23c | 8982 | |
f6b5b4d7 TL |
8983 | parser_bootstrap.add_argument( |
8984 | '--registry-url', | |
8985 | help='url for custom registry') | |
8986 | parser_bootstrap.add_argument( | |
8987 | '--registry-username', | |
8988 | help='username for custom registry') | |
8989 | parser_bootstrap.add_argument( | |
8990 | '--registry-password', | |
8991 | help='password for custom registry') | |
8992 | parser_bootstrap.add_argument( | |
8993 | '--registry-json', | |
8994 | help='json file with custom registry login info (URL, Username, Password)') | |
f91f0fd5 TL |
8995 | parser_bootstrap.add_argument( |
8996 | '--container-init', | |
8997 | action='store_true', | |
f67539c2 TL |
8998 | default=CONTAINER_INIT, |
8999 | help=argparse.SUPPRESS) | |
f67539c2 TL |
9000 | parser_bootstrap.add_argument( |
9001 | '--cluster-network', | |
9002 | help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)') | |
b3b6e05e TL |
9003 | parser_bootstrap.add_argument( |
9004 | '--single-host-defaults', | |
9005 | action='store_true', | |
9006 | help='adjust configuration defaults to suit a single-host cluster') | |
522d829b TL |
9007 | parser_bootstrap.add_argument( |
9008 | '--log-to-file', | |
9009 | action='store_true', | |
9010 | help='configure cluster to log to traditional log files in /var/log/ceph/$fsid') | |
f6b5b4d7 | 9011 | |
9f95a23c TL |
9012 | parser_deploy = subparsers.add_parser( |
9013 | 'deploy', help='deploy a daemon') | |
9014 | parser_deploy.set_defaults(func=command_deploy) | |
9015 | parser_deploy.add_argument( | |
9016 | '--name', | |
9017 | required=True, | |
9018 | action=CustomValidation, | |
9019 | help='daemon name (type.id)') | |
9020 | parser_deploy.add_argument( | |
9021 | '--fsid', | |
9022 | required=True, | |
9023 | help='cluster FSID') | |
9024 | parser_deploy.add_argument( | |
9025 | '--config', '-c', | |
9026 | help='config file for new daemon') | |
9027 | parser_deploy.add_argument( | |
9028 | '--config-json', | |
9029 | help='Additional configuration information in JSON format') | |
9030 | parser_deploy.add_argument( | |
9031 | '--keyring', | |
9032 | help='keyring for new daemon') | |
9033 | parser_deploy.add_argument( | |
9034 | '--key', | |
9035 | help='key for new daemon') | |
9036 | parser_deploy.add_argument( | |
9037 | '--osd-fsid', | |
9038 | help='OSD uuid, if creating an OSD container') | |
9039 | parser_deploy.add_argument( | |
9040 | '--skip-firewalld', | |
9041 | action='store_true', | |
9042 | help='Do not configure firewalld') | |
f6b5b4d7 TL |
9043 | parser_deploy.add_argument( |
9044 | '--tcp-ports', | |
9045 | help='List of tcp ports to open in the host firewall') | |
9f95a23c TL |
9046 | parser_deploy.add_argument( |
9047 | '--reconfig', | |
9048 | action='store_true', | |
9049 | help='Reconfigure a previously deployed daemon') | |
9050 | parser_deploy.add_argument( | |
9051 | '--allow-ptrace', | |
9052 | action='store_true', | |
9053 | help='Allow SYS_PTRACE on daemon container') | |
f91f0fd5 TL |
9054 | parser_deploy.add_argument( |
9055 | '--container-init', | |
9056 | action='store_true', | |
f67539c2 TL |
9057 | default=CONTAINER_INIT, |
9058 | help=argparse.SUPPRESS) | |
9059 | parser_deploy.add_argument( | |
9060 | '--memory-request', | |
9061 | help='Container memory request/target' | |
9062 | ) | |
9063 | parser_deploy.add_argument( | |
9064 | '--memory-limit', | |
9065 | help='Container memory hard limit' | |
9066 | ) | |
9067 | parser_deploy.add_argument( | |
9068 | '--meta-json', | |
9069 | help='JSON dict of additional metadata' | |
9070 | ) | |
20effc67 TL |
9071 | parser_deploy.add_argument( |
9072 | '--extra-container-args', | |
9073 | action='append', | |
9074 | default=[], | |
9075 | help='Additional container arguments to apply to deamon' | |
9076 | ) | |
9f95a23c TL |
9077 | |
9078 | parser_check_host = subparsers.add_parser( | |
9079 | 'check-host', help='check host configuration') | |
9080 | parser_check_host.set_defaults(func=command_check_host) | |
9081 | parser_check_host.add_argument( | |
9082 | '--expect-hostname', | |
9083 | help='Check that hostname matches an expected value') | |
9084 | ||
9085 | parser_prepare_host = subparsers.add_parser( | |
9086 | 'prepare-host', help='prepare a host for cephadm use') | |
9087 | parser_prepare_host.set_defaults(func=command_prepare_host) | |
9088 | parser_prepare_host.add_argument( | |
9089 | '--expect-hostname', | |
9090 | help='Set hostname') | |
9091 | ||
9092 | parser_add_repo = subparsers.add_parser( | |
9093 | 'add-repo', help='configure package repository') | |
9094 | parser_add_repo.set_defaults(func=command_add_repo) | |
9095 | parser_add_repo.add_argument( | |
9096 | '--release', | |
1911f103 | 9097 | help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE)) |
9f95a23c TL |
9098 | parser_add_repo.add_argument( |
9099 | '--version', | |
9100 | help='use specific upstream version (x.y.z)') | |
9101 | parser_add_repo.add_argument( | |
9102 | '--dev', | |
9103 | help='use specified bleeding edge build from git branch or tag') | |
9104 | parser_add_repo.add_argument( | |
9105 | '--dev-commit', | |
9106 | help='use specified bleeding edge build from git commit') | |
9107 | parser_add_repo.add_argument( | |
9108 | '--gpg-url', | |
9109 | help='specify alternative GPG key location') | |
9110 | parser_add_repo.add_argument( | |
9111 | '--repo-url', | |
9112 | default='https://download.ceph.com', | |
9113 | help='specify alternative repo location') | |
9114 | # TODO: proxy? | |
9115 | ||
9116 | parser_rm_repo = subparsers.add_parser( | |
9117 | 'rm-repo', help='remove package repository configuration') | |
9118 | parser_rm_repo.set_defaults(func=command_rm_repo) | |
9119 | ||
9120 | parser_install = subparsers.add_parser( | |
9121 | 'install', help='install ceph package(s)') | |
9122 | parser_install.set_defaults(func=command_install) | |
9123 | parser_install.add_argument( | |
9124 | 'packages', nargs='*', | |
9125 | default=['cephadm'], | |
9126 | help='packages') | |
9127 | ||
f6b5b4d7 TL |
9128 | parser_registry_login = subparsers.add_parser( |
9129 | 'registry-login', help='log host into authenticated registry') | |
9130 | parser_registry_login.set_defaults(func=command_registry_login) | |
9131 | parser_registry_login.add_argument( | |
9132 | '--registry-url', | |
9133 | help='url for custom registry') | |
9134 | parser_registry_login.add_argument( | |
9135 | '--registry-username', | |
9136 | help='username for custom registry') | |
9137 | parser_registry_login.add_argument( | |
9138 | '--registry-password', | |
9139 | help='password for custom registry') | |
9140 | parser_registry_login.add_argument( | |
9141 | '--registry-json', | |
9142 | help='json file with custom registry login info (URL, Username, Password)') | |
9143 | parser_registry_login.add_argument( | |
9144 | '--fsid', | |
9145 | help='cluster FSID') | |
9146 | ||
f91f0fd5 TL |
9147 | parser_gather_facts = subparsers.add_parser( |
9148 | 'gather-facts', help='gather and return host related information (JSON format)') | |
9149 | parser_gather_facts.set_defaults(func=command_gather_facts) | |
9150 | ||
f67539c2 TL |
9151 | parser_maintenance = subparsers.add_parser( |
9152 | 'host-maintenance', help='Manage the maintenance state of a host') | |
9153 | parser_maintenance.add_argument( | |
9154 | '--fsid', | |
9155 | help='cluster FSID') | |
9156 | parser_maintenance.add_argument( | |
9157 | 'maintenance_action', | |
9158 | type=str, | |
9159 | choices=['enter', 'exit'], | |
9160 | help='Maintenance action - enter maintenance, or exit maintenance') | |
9161 | parser_maintenance.set_defaults(func=command_maintenance) | |
9162 | ||
20effc67 TL |
9163 | parser_agent = subparsers.add_parser( |
9164 | 'agent', help='start cephadm agent') | |
9165 | parser_agent.set_defaults(func=command_agent) | |
9166 | parser_agent.add_argument( | |
9167 | '--fsid', | |
9168 | required=True, | |
9169 | help='cluster FSID') | |
9170 | parser_agent.add_argument( | |
9171 | '--daemon-id', | |
9172 | help='daemon id for agent') | |
9173 | ||
9f95a23c TL |
9174 | return parser |
9175 | ||
f6b5b4d7 | 9176 | |
522d829b | 9177 | def _parse_args(av: List[str]) -> argparse.Namespace: |
9f95a23c | 9178 | parser = _get_parser() |
f67539c2 | 9179 | |
e306af50 | 9180 | args = parser.parse_args(av) |
f67539c2 | 9181 | if 'command' in args and args.command and args.command[0] == '--': |
e306af50 | 9182 | args.command.pop(0) |
f67539c2 TL |
9183 | |
9184 | # workaround argparse to deprecate the subparser `--container-init` flag | |
9185 | # container_init and no_container_init must always be mutually exclusive | |
9186 | container_init_args = ('--container-init', '--no-container-init') | |
9187 | if set(container_init_args).issubset(av): | |
9188 | parser.error('argument %s: not allowed with argument %s' % (container_init_args)) | |
9189 | elif '--container-init' in av: | |
9190 | args.no_container_init = not args.container_init | |
9191 | else: | |
9192 | args.container_init = not args.no_container_init | |
9193 | assert args.container_init is not args.no_container_init | |
9194 | ||
e306af50 | 9195 | return args |
9f95a23c | 9196 | |
f6b5b4d7 | 9197 | |
b3b6e05e | 9198 | def cephadm_init_ctx(args: List[str]) -> CephadmContext: |
f67539c2 TL |
9199 | ctx = CephadmContext() |
9200 | ctx.set_args(_parse_args(args)) | |
9201 | return ctx | |
9202 | ||
9203 | ||
20effc67 TL |
9204 | def cephadm_init_logging(ctx: CephadmContext, args: List[str]) -> None: |
9205 | """Configure the logging for cephadm as well as updating the system | |
9206 | to have the expected log dir and logrotate configuration. | |
9207 | """ | |
f67539c2 | 9208 | global logger |
f91f0fd5 TL |
9209 | if not os.path.exists(LOG_DIR): |
9210 | os.makedirs(LOG_DIR) | |
33c7a0ef TL |
9211 | operations = ['bootstrap', 'rm-cluster'] |
9212 | if any(op in args for op in operations): | |
9213 | dictConfig(interactive_logging_config) | |
9214 | else: | |
9215 | dictConfig(logging_config) | |
9216 | ||
f91f0fd5 TL |
9217 | logger = logging.getLogger() |
9218 | ||
b3b6e05e TL |
9219 | if not os.path.exists(ctx.logrotate_dir + '/cephadm'): |
9220 | with open(ctx.logrotate_dir + '/cephadm', 'w') as f: | |
9221 | f.write("""# created by cephadm | |
9222 | /var/log/ceph/cephadm.log { | |
9223 | rotate 7 | |
9224 | daily | |
9225 | compress | |
9226 | missingok | |
9227 | notifempty | |
9228 | } | |
9229 | """) | |
9230 | ||
f67539c2 | 9231 | if ctx.verbose: |
f91f0fd5 | 9232 | for handler in logger.handlers: |
f67539c2 TL |
9233 | if handler.name == 'console': |
9234 | handler.setLevel(logging.DEBUG) | |
a4b75251 | 9235 | logger.debug('%s\ncephadm %s' % ('-' * 80, args)) |
f67539c2 TL |
9236 | |
9237 | ||
20effc67 TL |
9238 | def cephadm_require_root() -> None: |
9239 | """Exit if the process is not running as root.""" | |
f67539c2 TL |
9240 | if os.geteuid() != 0: |
9241 | sys.stderr.write('ERROR: cephadm should be run as root\n') | |
9f95a23c TL |
9242 | sys.exit(1) |
9243 | ||
20effc67 TL |
9244 | |
9245 | def main() -> None: | |
f67539c2 TL |
9246 | av: List[str] = [] |
9247 | av = sys.argv[1:] | |
9248 | ||
20effc67 | 9249 | ctx = cephadm_init_ctx(av) |
b3b6e05e TL |
9250 | if not ctx.has_function(): |
9251 | sys.stderr.write('No command specified; pass -h or --help for usage\n') | |
f67539c2 | 9252 | sys.exit(1) |
1911f103 | 9253 | |
20effc67 TL |
9254 | cephadm_require_root() |
9255 | cephadm_init_logging(ctx, av) | |
9f95a23c | 9256 | try: |
f67539c2 TL |
9257 | # podman or docker? |
9258 | ctx.container_engine = find_container_engine(ctx) | |
9259 | if ctx.func not in \ | |
a4b75251 TL |
9260 | [ |
9261 | command_check_host, | |
9262 | command_prepare_host, | |
9263 | command_add_repo, | |
9264 | command_rm_repo, | |
9265 | command_install | |
9266 | ]: | |
f67539c2 TL |
9267 | check_container_engine(ctx) |
9268 | # command handler | |
9269 | r = ctx.func(ctx) | |
9f95a23c | 9270 | except Error as e: |
f67539c2 | 9271 | if ctx.verbose: |
9f95a23c | 9272 | raise |
f67539c2 | 9273 | logger.error('ERROR: %s' % e) |
9f95a23c TL |
9274 | sys.exit(1) |
9275 | if not r: | |
9276 | r = 0 | |
9277 | sys.exit(r) | |
f67539c2 TL |
9278 | |
9279 | ||
9280 | if __name__ == '__main__': | |
9281 | main() |