]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | #!/usr/bin/python3 |
2 | ||
f67539c2 TL |
3 | import asyncio |
4 | import asyncio.subprocess | |
5 | import argparse | |
6 | import datetime | |
7 | import fcntl | |
8 | import ipaddress | |
9 | import json | |
10 | import logging | |
11 | from logging.config import dictConfig | |
12 | import os | |
13 | import platform | |
14 | import pwd | |
15 | import random | |
16 | import shlex | |
17 | import shutil | |
18 | import socket | |
19 | import string | |
20 | import subprocess | |
21 | import sys | |
22 | import tempfile | |
23 | import time | |
24 | import errno | |
25 | import struct | |
26 | from socketserver import ThreadingMixIn | |
27 | from http.server import BaseHTTPRequestHandler, HTTPServer | |
28 | import signal | |
29 | import io | |
30 | from contextlib import redirect_stdout | |
31 | import ssl | |
32 | from enum import Enum | |
33 | ||
34 | from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO | |
35 | ||
36 | import re | |
37 | import uuid | |
38 | ||
39 | from configparser import ConfigParser | |
40 | from functools import wraps | |
41 | from glob import glob | |
42 | from io import StringIO | |
43 | from threading import Thread, RLock | |
44 | from urllib.error import HTTPError | |
45 | from urllib.request import urlopen | |
46 | from pathlib import Path | |
47 | ||
48 | # Default container images ----------------------------------------------------- | |
49 | DEFAULT_IMAGE = 'docker.io/ceph/ceph:v16' | |
50 | DEFAULT_IMAGE_IS_MASTER = False | |
51 | DEFAULT_IMAGE_RELEASE = 'pacific' | |
52 | DEFAULT_PROMETHEUS_IMAGE = 'docker.io/prom/prometheus:v2.18.1' | |
53 | DEFAULT_NODE_EXPORTER_IMAGE = 'docker.io/prom/node-exporter:v0.18.1' | |
54 | DEFAULT_GRAFANA_IMAGE = 'docker.io/ceph/ceph-grafana:6.7.4' | |
55 | DEFAULT_ALERT_MANAGER_IMAGE = 'docker.io/prom/alertmanager:v0.20.0' | |
56 | DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this | |
57 | # ------------------------------------------------------------------------------ | |
58 | ||
59 | LATEST_STABLE_RELEASE = 'pacific' | |
f6b5b4d7 TL |
60 | DATA_DIR = '/var/lib/ceph' |
61 | LOG_DIR = '/var/log/ceph' | |
62 | LOCK_DIR = '/run/cephadm' | |
63 | LOGROTATE_DIR = '/etc/logrotate.d' | |
64 | UNIT_DIR = '/etc/systemd/system' | |
65 | LOG_DIR_MODE = 0o770 | |
66 | DATA_DIR_MODE = 0o700 | |
f67539c2 TL |
67 | CONTAINER_INIT = True |
68 | MIN_PODMAN_VERSION = (2, 0, 2) | |
69 | CGROUPS_SPLIT_PODMAN_VERSION = (2, 1, 0) | |
f6b5b4d7 TL |
70 | CUSTOM_PS1 = r'[ceph: \u@\h \W]\$ ' |
71 | DEFAULT_TIMEOUT = None # in seconds | |
f67539c2 | 72 | DEFAULT_RETRY = 15 |
f6b5b4d7 TL |
73 | SHELL_DEFAULT_CONF = '/etc/ceph/ceph.conf' |
74 | SHELL_DEFAULT_KEYRING = '/etc/ceph/ceph.client.admin.keyring' | |
f67539c2 TL |
75 | DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ' |
76 | ||
77 | logger: logging.Logger = None # type: ignore | |
9f95a23c TL |
78 | |
79 | """ | |
80 | You can invoke cephadm in two ways: | |
81 | ||
82 | 1. The normal way, at the command line. | |
83 | ||
84 | 2. By piping the script to the python3 binary. In this latter case, you should | |
85 | prepend one or more lines to the beginning of the script. | |
86 | ||
87 | For arguments, | |
88 | ||
89 | injected_argv = [...] | |
90 | ||
91 | e.g., | |
92 | ||
93 | injected_argv = ['ls'] | |
94 | ||
95 | For reading stdin from the '--config-json -' argument, | |
96 | ||
97 | injected_stdin = '...' | |
98 | """ | |
f67539c2 | 99 | cached_stdin = None |
f91f0fd5 | 100 | |
f67539c2 | 101 | ################################## |
9f95a23c | 102 | |
9f95a23c | 103 | |
f67539c2 | 104 | class BaseConfig: |
9f95a23c | 105 | |
f67539c2 TL |
106 | def __init__(self): |
107 | self.image: str = '' | |
108 | self.docker: bool = False | |
109 | self.data_dir: str = DATA_DIR | |
110 | self.log_dir: str = LOG_DIR | |
111 | self.logrotate_dir: str = LOGROTATE_DIR | |
112 | self.unit_dir: str = UNIT_DIR | |
113 | self.verbose: bool = False | |
114 | self.timeout: Optional[int] = DEFAULT_TIMEOUT | |
115 | self.retry: int = DEFAULT_RETRY | |
116 | self.env: List[str] = [] | |
117 | self.memory_request: Optional[int] = None | |
118 | self.memory_limit: Optional[int] = None | |
119 | ||
120 | self.container_init: bool = CONTAINER_INIT | |
121 | self.container_engine: Optional[ContainerEngine] = None | |
122 | ||
123 | def set_from_args(self, args: argparse.Namespace): | |
124 | argdict: Dict[str, Any] = vars(args) | |
125 | for k, v in argdict.items(): | |
126 | if hasattr(self, k): | |
127 | setattr(self, k, v) | |
128 | ||
129 | ||
130 | class CephadmContext: | |
9f95a23c | 131 | |
f67539c2 TL |
132 | def __init__(self): |
133 | self.__dict__['_args'] = None | |
134 | self.__dict__['_conf'] = BaseConfig() | |
9f95a23c | 135 | |
f67539c2 TL |
136 | def set_args(self, args: argparse.Namespace) -> None: |
137 | self._conf.set_from_args(args) | |
138 | self._args = args | |
f6b5b4d7 | 139 | |
f67539c2 TL |
140 | def has_function(self) -> bool: |
141 | return 'func' in self._args | |
142 | ||
143 | def __contains__(self, name: str) -> bool: | |
144 | return hasattr(self, name) | |
145 | ||
146 | def __getattr__(self, name: str) -> Any: | |
147 | if '_conf' in self.__dict__ and hasattr(self._conf, name): | |
148 | return getattr(self._conf, name) | |
149 | elif '_args' in self.__dict__ and hasattr(self._args, name): | |
150 | return getattr(self._args, name) | |
151 | else: | |
152 | return super().__getattribute__(name) | |
153 | ||
154 | def __setattr__(self, name: str, value: Any) -> None: | |
155 | if hasattr(self._conf, name): | |
156 | setattr(self._conf, name, value) | |
157 | elif hasattr(self._args, name): | |
158 | setattr(self._args, name, value) | |
159 | else: | |
160 | super().__setattr__(name, value) | |
161 | ||
162 | ||
163 | class ContainerEngine: | |
164 | def __init__(self): | |
165 | self.path = find_program(self.EXE) | |
166 | ||
167 | @property | |
168 | def EXE(self) -> str: | |
169 | raise NotImplementedError() | |
170 | ||
171 | ||
172 | class Podman(ContainerEngine): | |
173 | EXE = 'podman' | |
174 | ||
175 | def __init__(self): | |
176 | super().__init__() | |
177 | self._version = None | |
178 | ||
179 | @property | |
180 | def version(self): | |
181 | if self._version is None: | |
182 | raise RuntimeError('Please call `get_version` first') | |
183 | return self._version | |
184 | ||
185 | def get_version(self, ctx: CephadmContext): | |
186 | out, _, _ = call_throws(ctx, [self.path, 'version', '--format', '{{.Client.Version}}']) | |
187 | self._version = _parse_podman_version(out) | |
188 | ||
189 | ||
190 | class Docker(ContainerEngine): | |
191 | EXE = 'docker' | |
192 | ||
193 | ||
194 | CONTAINER_PREFERENCE = (Podman, Docker) # prefer podman to docker | |
9f95a23c | 195 | |
9f95a23c | 196 | |
f91f0fd5 TL |
197 | # Log and console output config |
198 | logging_config = { | |
199 | 'version': 1, | |
200 | 'disable_existing_loggers': True, | |
201 | 'formatters': { | |
202 | 'cephadm': { | |
203 | 'format': '%(asctime)s %(levelname)s %(message)s' | |
204 | }, | |
205 | }, | |
206 | 'handlers': { | |
f67539c2 TL |
207 | 'console': { |
208 | 'level': 'INFO', | |
209 | 'class': 'logging.StreamHandler', | |
f91f0fd5 TL |
210 | }, |
211 | 'log_file': { | |
212 | 'level': 'DEBUG', | |
213 | 'class': 'logging.handlers.RotatingFileHandler', | |
214 | 'formatter': 'cephadm', | |
215 | 'filename': '%s/cephadm.log' % LOG_DIR, | |
216 | 'maxBytes': 1024000, | |
217 | 'backupCount': 1, | |
218 | } | |
219 | }, | |
220 | 'loggers': { | |
221 | '': { | |
222 | 'level': 'DEBUG', | |
223 | 'handlers': ['console', 'log_file'], | |
224 | } | |
225 | } | |
226 | } | |
e306af50 | 227 | |
f67539c2 | 228 | |
e306af50 TL |
229 | class termcolor: |
230 | yellow = '\033[93m' | |
231 | red = '\033[31m' | |
232 | end = '\033[0m' | |
233 | ||
f6b5b4d7 | 234 | |
9f95a23c TL |
235 | class Error(Exception): |
236 | pass | |
237 | ||
f6b5b4d7 | 238 | |
9f95a23c TL |
239 | class TimeoutExpired(Error): |
240 | pass | |
241 | ||
242 | ################################## | |
243 | ||
f6b5b4d7 | 244 | |
9f95a23c TL |
245 | class Ceph(object): |
246 | daemons = ('mon', 'mgr', 'mds', 'osd', 'rgw', 'rbd-mirror', | |
f67539c2 | 247 | 'crash', 'cephfs-mirror') |
9f95a23c TL |
248 | |
249 | ################################## | |
250 | ||
f6b5b4d7 | 251 | |
9f95a23c TL |
252 | class Monitoring(object): |
253 | """Define the configs for the monitoring containers""" | |
254 | ||
255 | port_map = { | |
f67539c2 TL |
256 | 'prometheus': [9095], # Avoid default 9090, due to conflict with cockpit UI |
257 | 'node-exporter': [9100], | |
258 | 'grafana': [3000], | |
259 | 'alertmanager': [9093, 9094], | |
9f95a23c TL |
260 | } |
261 | ||
262 | components = { | |
f67539c2 TL |
263 | 'prometheus': { |
264 | 'image': DEFAULT_PROMETHEUS_IMAGE, | |
265 | 'cpus': '2', | |
266 | 'memory': '4GB', | |
267 | 'args': [ | |
268 | '--config.file=/etc/prometheus/prometheus.yml', | |
269 | '--storage.tsdb.path=/prometheus', | |
270 | '--web.listen-address=:{}'.format(port_map['prometheus'][0]), | |
9f95a23c | 271 | ], |
f67539c2 TL |
272 | 'config-json-files': [ |
273 | 'prometheus.yml', | |
9f95a23c TL |
274 | ], |
275 | }, | |
f67539c2 TL |
276 | 'node-exporter': { |
277 | 'image': DEFAULT_NODE_EXPORTER_IMAGE, | |
278 | 'cpus': '1', | |
279 | 'memory': '1GB', | |
280 | 'args': [ | |
281 | '--no-collector.timex', | |
9f95a23c TL |
282 | ], |
283 | }, | |
f67539c2 TL |
284 | 'grafana': { |
285 | 'image': DEFAULT_GRAFANA_IMAGE, | |
286 | 'cpus': '2', | |
287 | 'memory': '4GB', | |
288 | 'args': [], | |
289 | 'config-json-files': [ | |
290 | 'grafana.ini', | |
291 | 'provisioning/datasources/ceph-dashboard.yml', | |
292 | 'certs/cert_file', | |
293 | 'certs/cert_key', | |
9f95a23c TL |
294 | ], |
295 | }, | |
f67539c2 TL |
296 | 'alertmanager': { |
297 | 'image': DEFAULT_ALERT_MANAGER_IMAGE, | |
298 | 'cpus': '2', | |
299 | 'memory': '2GB', | |
300 | 'args': [ | |
301 | '--web.listen-address=:{}'.format(port_map['alertmanager'][0]), | |
302 | '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]), | |
f91f0fd5 | 303 | ], |
f67539c2 TL |
304 | 'config-json-files': [ |
305 | 'alertmanager.yml', | |
9f95a23c | 306 | ], |
f67539c2 TL |
307 | 'config-json-args': [ |
308 | 'peers', | |
9f95a23c TL |
309 | ], |
310 | }, | |
311 | } # type: ignore | |
312 | ||
f67539c2 TL |
313 | @staticmethod |
314 | def get_version(ctx, container_id, daemon_type): | |
315 | # type: (CephadmContext, str, str) -> str | |
316 | """ | |
317 | :param: daemon_type Either "prometheus", "alertmanager" or "node-exporter" | |
318 | """ | |
319 | assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter') | |
320 | cmd = daemon_type.replace('-', '_') | |
321 | code = -1 | |
322 | err = '' | |
323 | version = '' | |
324 | if daemon_type == 'alertmanager': | |
325 | for cmd in ['alertmanager', 'prometheus-alertmanager']: | |
326 | _, err, code = call(ctx, [ | |
327 | ctx.container_engine.path, 'exec', container_id, cmd, | |
328 | '--version' | |
329 | ], verbosity=CallVerbosity.DEBUG) | |
330 | if code == 0: | |
331 | break | |
332 | cmd = 'alertmanager' # reset cmd for version extraction | |
333 | else: | |
334 | _, err, code = call(ctx, [ | |
335 | ctx.container_engine.path, 'exec', container_id, cmd, '--version' | |
336 | ], verbosity=CallVerbosity.DEBUG) | |
337 | if code == 0 and \ | |
338 | err.startswith('%s, version ' % cmd): | |
339 | version = err.split(' ')[2] | |
340 | return version | |
341 | ||
9f95a23c TL |
342 | ################################## |
343 | ||
f6b5b4d7 | 344 | |
f67539c2 TL |
345 | def populate_files(config_dir, config_files, uid, gid): |
346 | # type: (str, Dict, int, int) -> None | |
347 | """create config files for different services""" | |
348 | for fname in config_files: | |
349 | config_file = os.path.join(config_dir, fname) | |
350 | config_content = dict_get_join(config_files, fname) | |
351 | logger.info('Write file: %s' % (config_file)) | |
352 | with open(config_file, 'w') as f: | |
353 | os.fchown(f.fileno(), uid, gid) | |
354 | os.fchmod(f.fileno(), 0o600) | |
355 | f.write(config_content) | |
356 | ||
357 | ||
9f95a23c TL |
358 | class NFSGanesha(object): |
359 | """Defines a NFS-Ganesha container""" | |
360 | ||
361 | daemon_type = 'nfs' | |
362 | entrypoint = '/usr/bin/ganesha.nfsd' | |
363 | daemon_args = ['-F', '-L', 'STDERR'] | |
364 | ||
365 | required_files = ['ganesha.conf'] | |
366 | ||
367 | port_map = { | |
f67539c2 | 368 | 'nfs': 2049, |
9f95a23c TL |
369 | } |
370 | ||
371 | def __init__(self, | |
f67539c2 | 372 | ctx, |
9f95a23c TL |
373 | fsid, |
374 | daemon_id, | |
375 | config_json, | |
376 | image=DEFAULT_IMAGE): | |
f67539c2 TL |
377 | # type: (CephadmContext, str, Union[int, str], Dict, str) -> None |
378 | self.ctx = ctx | |
9f95a23c TL |
379 | self.fsid = fsid |
380 | self.daemon_id = daemon_id | |
381 | self.image = image | |
382 | ||
9f95a23c | 383 | # config-json options |
f91f0fd5 TL |
384 | self.pool = dict_get(config_json, 'pool', require=True) |
385 | self.namespace = dict_get(config_json, 'namespace') | |
386 | self.userid = dict_get(config_json, 'userid') | |
387 | self.extra_args = dict_get(config_json, 'extra_args', []) | |
388 | self.files = dict_get(config_json, 'files', {}) | |
389 | self.rgw = dict_get(config_json, 'rgw', {}) | |
9f95a23c TL |
390 | |
391 | # validate the supplied args | |
392 | self.validate() | |
393 | ||
394 | @classmethod | |
f67539c2 TL |
395 | def init(cls, ctx, fsid, daemon_id): |
396 | # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha | |
397 | return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), ctx.image) | |
9f95a23c | 398 | |
f91f0fd5 | 399 | def get_container_mounts(self, data_dir): |
9f95a23c TL |
400 | # type: (str) -> Dict[str, str] |
401 | mounts = dict() | |
402 | mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' | |
403 | mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' | |
404 | mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z' | |
f91f0fd5 TL |
405 | if self.rgw: |
406 | cluster = self.rgw.get('cluster', 'ceph') | |
407 | rgw_user = self.rgw.get('user', 'admin') | |
408 | mounts[os.path.join(data_dir, 'keyring.rgw')] = \ | |
f67539c2 | 409 | '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user) |
9f95a23c TL |
410 | return mounts |
411 | ||
412 | @staticmethod | |
413 | def get_container_envs(): | |
414 | # type: () -> List[str] | |
415 | envs = [ | |
416 | 'CEPH_CONF=%s' % ('/etc/ceph/ceph.conf') | |
417 | ] | |
418 | return envs | |
419 | ||
420 | @staticmethod | |
f67539c2 TL |
421 | def get_version(ctx, container_id): |
422 | # type: (CephadmContext, str) -> Optional[str] | |
9f95a23c | 423 | version = None |
f67539c2 TL |
424 | out, err, code = call(ctx, |
425 | [ctx.container_engine.path, 'exec', container_id, | |
426 | NFSGanesha.entrypoint, '-v'], | |
427 | verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
428 | if code == 0: |
429 | match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out) | |
430 | if match: | |
431 | version = match.group(1) | |
432 | return version | |
433 | ||
434 | def validate(self): | |
e306af50 | 435 | # type: () -> None |
9f95a23c TL |
436 | if not is_fsid(self.fsid): |
437 | raise Error('not an fsid: %s' % self.fsid) | |
438 | if not self.daemon_id: | |
439 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
440 | if not self.image: | |
441 | raise Error('invalid image: %s' % self.image) | |
442 | ||
443 | # check for the required files | |
444 | if self.required_files: | |
445 | for fname in self.required_files: | |
446 | if fname not in self.files: | |
447 | raise Error('required file missing from config-json: %s' % fname) | |
448 | ||
f91f0fd5 TL |
449 | # check for an RGW config |
450 | if self.rgw: | |
451 | if not self.rgw.get('keyring'): | |
452 | raise Error('RGW keyring is missing') | |
453 | if not self.rgw.get('user'): | |
454 | raise Error('RGW user is missing') | |
455 | ||
9f95a23c TL |
456 | def get_daemon_name(self): |
457 | # type: () -> str | |
458 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
459 | ||
460 | def get_container_name(self, desc=None): | |
461 | # type: (Optional[str]) -> str | |
462 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
463 | if desc: | |
464 | cname = '%s-%s' % (cname, desc) | |
465 | return cname | |
466 | ||
1911f103 TL |
467 | def get_daemon_args(self): |
468 | # type: () -> List[str] | |
469 | return self.daemon_args + self.extra_args | |
470 | ||
9f95a23c TL |
471 | def create_daemon_dirs(self, data_dir, uid, gid): |
472 | # type: (str, int, int) -> None | |
473 | """Create files under the container data dir""" | |
474 | if not os.path.isdir(data_dir): | |
475 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
476 | ||
477 | logger.info('Creating ganesha config...') | |
478 | ||
479 | # create the ganesha conf dir | |
480 | config_dir = os.path.join(data_dir, 'etc/ganesha') | |
481 | makedirs(config_dir, uid, gid, 0o755) | |
482 | ||
483 | # populate files from the config-json | |
f67539c2 | 484 | populate_files(config_dir, self.files, uid, gid) |
9f95a23c | 485 | |
f91f0fd5 TL |
486 | # write the RGW keyring |
487 | if self.rgw: | |
488 | keyring_path = os.path.join(data_dir, 'keyring.rgw') | |
489 | with open(keyring_path, 'w') as f: | |
490 | os.fchmod(f.fileno(), 0o600) | |
491 | os.fchown(f.fileno(), uid, gid) | |
492 | f.write(self.rgw.get('keyring', '')) | |
493 | ||
9f95a23c TL |
494 | def get_rados_grace_container(self, action): |
495 | # type: (str) -> CephContainer | |
496 | """Container for a ganesha action on the grace db""" | |
497 | entrypoint = '/usr/bin/ganesha-rados-grace' | |
498 | ||
499 | assert self.pool | |
f67539c2 | 500 | args = ['--pool', self.pool] |
9f95a23c TL |
501 | if self.namespace: |
502 | args += ['--ns', self.namespace] | |
1911f103 TL |
503 | if self.userid: |
504 | args += ['--userid', self.userid] | |
9f95a23c TL |
505 | args += [action, self.get_daemon_name()] |
506 | ||
f67539c2 TL |
507 | data_dir = get_data_dir(self.fsid, self.ctx.data_dir, |
508 | self.daemon_type, self.daemon_id) | |
9f95a23c TL |
509 | volume_mounts = self.get_container_mounts(data_dir) |
510 | envs = self.get_container_envs() | |
511 | ||
f6b5b4d7 | 512 | logger.info('Creating RADOS grace for action: %s' % action) |
9f95a23c | 513 | c = CephContainer( |
f67539c2 | 514 | self.ctx, |
9f95a23c TL |
515 | image=self.image, |
516 | entrypoint=entrypoint, | |
517 | args=args, | |
518 | volume_mounts=volume_mounts, | |
f6b5b4d7 | 519 | cname=self.get_container_name(desc='grace-%s' % action), |
9f95a23c TL |
520 | envs=envs |
521 | ) | |
522 | return c | |
523 | ||
524 | ################################## | |
525 | ||
f6b5b4d7 | 526 | |
1911f103 TL |
527 | class CephIscsi(object): |
528 | """Defines a Ceph-Iscsi container""" | |
529 | ||
530 | daemon_type = 'iscsi' | |
531 | entrypoint = '/usr/bin/rbd-target-api' | |
532 | ||
533 | required_files = ['iscsi-gateway.cfg'] | |
534 | ||
535 | def __init__(self, | |
f67539c2 | 536 | ctx, |
1911f103 TL |
537 | fsid, |
538 | daemon_id, | |
539 | config_json, | |
540 | image=DEFAULT_IMAGE): | |
f67539c2 TL |
541 | # type: (CephadmContext, str, Union[int, str], Dict, str) -> None |
542 | self.ctx = ctx | |
1911f103 TL |
543 | self.fsid = fsid |
544 | self.daemon_id = daemon_id | |
545 | self.image = image | |
546 | ||
1911f103 | 547 | # config-json options |
f91f0fd5 | 548 | self.files = dict_get(config_json, 'files', {}) |
1911f103 TL |
549 | |
550 | # validate the supplied args | |
551 | self.validate() | |
552 | ||
553 | @classmethod | |
f67539c2 TL |
554 | def init(cls, ctx, fsid, daemon_id): |
555 | # type: (CephadmContext, str, Union[int, str]) -> CephIscsi | |
556 | return cls(ctx, fsid, daemon_id, | |
557 | get_parm(ctx.config_json), ctx.image) | |
1911f103 TL |
558 | |
559 | @staticmethod | |
560 | def get_container_mounts(data_dir, log_dir): | |
561 | # type: (str, str) -> Dict[str, str] | |
562 | mounts = dict() | |
563 | mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z' | |
564 | mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z' | |
565 | mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z' | |
f91f0fd5 | 566 | mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config' |
1911f103 | 567 | mounts[log_dir] = '/var/log/rbd-target-api:z' |
f91f0fd5 | 568 | mounts['/dev'] = '/dev' |
1911f103 TL |
569 | return mounts |
570 | ||
f6b5b4d7 TL |
571 | @staticmethod |
572 | def get_container_binds(): | |
573 | # type: () -> List[List[str]] | |
574 | binds = [] | |
575 | lib_modules = ['type=bind', | |
576 | 'source=/lib/modules', | |
577 | 'destination=/lib/modules', | |
578 | 'ro=true'] | |
579 | binds.append(lib_modules) | |
580 | return binds | |
581 | ||
1911f103 | 582 | @staticmethod |
f67539c2 TL |
583 | def get_version(ctx, container_id): |
584 | # type: (CephadmContext, str) -> Optional[str] | |
1911f103 | 585 | version = None |
f67539c2 TL |
586 | out, err, code = call(ctx, |
587 | [ctx.container_engine.path, 'exec', container_id, | |
588 | '/usr/bin/python3', '-c', "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"], | |
589 | verbosity=CallVerbosity.DEBUG) | |
1911f103 | 590 | if code == 0: |
f6b5b4d7 | 591 | version = out.strip() |
1911f103 TL |
592 | return version |
593 | ||
594 | def validate(self): | |
e306af50 | 595 | # type: () -> None |
1911f103 TL |
596 | if not is_fsid(self.fsid): |
597 | raise Error('not an fsid: %s' % self.fsid) | |
598 | if not self.daemon_id: | |
599 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
600 | if not self.image: | |
601 | raise Error('invalid image: %s' % self.image) | |
602 | ||
603 | # check for the required files | |
604 | if self.required_files: | |
605 | for fname in self.required_files: | |
606 | if fname not in self.files: | |
607 | raise Error('required file missing from config-json: %s' % fname) | |
608 | ||
609 | def get_daemon_name(self): | |
610 | # type: () -> str | |
611 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
612 | ||
613 | def get_container_name(self, desc=None): | |
614 | # type: (Optional[str]) -> str | |
615 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
616 | if desc: | |
617 | cname = '%s-%s' % (cname, desc) | |
618 | return cname | |
619 | ||
1911f103 TL |
620 | def create_daemon_dirs(self, data_dir, uid, gid): |
621 | # type: (str, int, int) -> None | |
622 | """Create files under the container data dir""" | |
623 | if not os.path.isdir(data_dir): | |
624 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
625 | ||
626 | logger.info('Creating ceph-iscsi config...') | |
627 | configfs_dir = os.path.join(data_dir, 'configfs') | |
628 | makedirs(configfs_dir, uid, gid, 0o755) | |
629 | ||
630 | # populate files from the config-json | |
f67539c2 | 631 | populate_files(data_dir, self.files, uid, gid) |
1911f103 TL |
632 | |
633 | @staticmethod | |
634 | def configfs_mount_umount(data_dir, mount=True): | |
e306af50 | 635 | # type: (str, bool) -> List[str] |
1911f103 TL |
636 | mount_path = os.path.join(data_dir, 'configfs') |
637 | if mount: | |
f67539c2 TL |
638 | cmd = 'if ! grep -qs {0} /proc/mounts; then ' \ |
639 | 'mount -t configfs none {0}; fi'.format(mount_path) | |
1911f103 | 640 | else: |
f67539c2 TL |
641 | cmd = 'if grep -qs {0} /proc/mounts; then ' \ |
642 | 'umount {0}; fi'.format(mount_path) | |
1911f103 TL |
643 | return cmd.split() |
644 | ||
f6b5b4d7 TL |
645 | def get_tcmu_runner_container(self): |
646 | # type: () -> CephContainer | |
f67539c2 TL |
647 | tcmu_container = get_container(self.ctx, self.fsid, self.daemon_type, self.daemon_id) |
648 | tcmu_container.entrypoint = '/usr/bin/tcmu-runner' | |
f6b5b4d7 | 649 | tcmu_container.cname = self.get_container_name(desc='tcmu') |
f91f0fd5 TL |
650 | # remove extra container args for tcmu container. |
651 | # extra args could cause issue with forking service type | |
652 | tcmu_container.container_args = [] | |
f6b5b4d7 TL |
653 | return tcmu_container |
654 | ||
1911f103 TL |
655 | ################################## |
656 | ||
f6b5b4d7 | 657 | |
f67539c2 TL |
658 | class HAproxy(object): |
659 | """Defines an HAproxy container""" | |
660 | daemon_type = 'haproxy' | |
661 | required_files = ['haproxy.cfg'] | |
662 | default_image = 'haproxy' | |
663 | ||
664 | def __init__(self, | |
665 | ctx: CephadmContext, | |
666 | fsid: str, daemon_id: Union[int, str], | |
667 | config_json: Dict, image: str) -> None: | |
668 | self.ctx = ctx | |
669 | self.fsid = fsid | |
670 | self.daemon_id = daemon_id | |
671 | self.image = image | |
672 | ||
673 | # config-json options | |
674 | self.files = dict_get(config_json, 'files', {}) | |
675 | ||
676 | self.validate() | |
677 | ||
678 | @classmethod | |
679 | def init(cls, ctx: CephadmContext, | |
680 | fsid: str, daemon_id: Union[int, str]) -> 'HAproxy': | |
681 | return cls(ctx, fsid, daemon_id, get_parm(ctx.config_json), | |
682 | ctx.image) | |
683 | ||
684 | def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: | |
685 | """Create files under the container data dir""" | |
686 | if not os.path.isdir(data_dir): | |
687 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
688 | ||
689 | # create additional directories in data dir for HAproxy to use | |
690 | if not os.path.isdir(os.path.join(data_dir, 'haproxy')): | |
691 | makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE) | |
692 | ||
693 | data_dir = os.path.join(data_dir, 'haproxy') | |
694 | populate_files(data_dir, self.files, uid, gid) | |
695 | ||
696 | def get_daemon_args(self) -> List[str]: | |
697 | return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg'] | |
698 | ||
699 | def validate(self): | |
700 | # type: () -> None | |
701 | if not is_fsid(self.fsid): | |
702 | raise Error('not an fsid: %s' % self.fsid) | |
703 | if not self.daemon_id: | |
704 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
705 | if not self.image: | |
706 | raise Error('invalid image: %s' % self.image) | |
707 | ||
708 | # check for the required files | |
709 | if self.required_files: | |
710 | for fname in self.required_files: | |
711 | if fname not in self.files: | |
712 | raise Error('required file missing from config-json: %s' % fname) | |
713 | ||
714 | def get_daemon_name(self): | |
715 | # type: () -> str | |
716 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
717 | ||
718 | def get_container_name(self, desc=None): | |
719 | # type: (Optional[str]) -> str | |
720 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
721 | if desc: | |
722 | cname = '%s-%s' % (cname, desc) | |
723 | return cname | |
724 | ||
725 | def extract_uid_gid_haproxy(self): | |
726 | # better directory for this? | |
727 | return extract_uid_gid(self.ctx, file_path='/var/lib') | |
728 | ||
729 | @staticmethod | |
730 | def get_container_mounts(data_dir: str) -> Dict[str, str]: | |
731 | mounts = dict() | |
732 | mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy' | |
733 | return mounts | |
734 | ||
735 | ################################## | |
736 | ||
737 | ||
738 | class Keepalived(object): | |
739 | """Defines an Keepalived container""" | |
740 | daemon_type = 'keepalived' | |
741 | required_files = ['keepalived.conf'] | |
742 | default_image = 'arcts/keepalived' | |
743 | ||
744 | def __init__(self, | |
745 | ctx: CephadmContext, | |
746 | fsid: str, daemon_id: Union[int, str], | |
747 | config_json: Dict, image: str) -> None: | |
748 | self.ctx = ctx | |
749 | self.fsid = fsid | |
750 | self.daemon_id = daemon_id | |
751 | self.image = image | |
752 | ||
753 | # config-json options | |
754 | self.files = dict_get(config_json, 'files', {}) | |
755 | ||
756 | self.validate() | |
757 | ||
758 | @classmethod | |
759 | def init(cls, ctx: CephadmContext, fsid: str, | |
760 | daemon_id: Union[int, str]) -> 'Keepalived': | |
761 | return cls(ctx, fsid, daemon_id, | |
762 | get_parm(ctx.config_json), ctx.image) | |
763 | ||
764 | def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: | |
765 | """Create files under the container data dir""" | |
766 | if not os.path.isdir(data_dir): | |
767 | raise OSError('data_dir is not a directory: %s' % (data_dir)) | |
768 | ||
769 | # create additional directories in data dir for keepalived to use | |
770 | if not os.path.isdir(os.path.join(data_dir, 'keepalived')): | |
771 | makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE) | |
772 | ||
773 | # populate files from the config-json | |
774 | populate_files(data_dir, self.files, uid, gid) | |
775 | ||
776 | def validate(self): | |
777 | # type: () -> None | |
778 | if not is_fsid(self.fsid): | |
779 | raise Error('not an fsid: %s' % self.fsid) | |
780 | if not self.daemon_id: | |
781 | raise Error('invalid daemon_id: %s' % self.daemon_id) | |
782 | if not self.image: | |
783 | raise Error('invalid image: %s' % self.image) | |
784 | ||
785 | # check for the required files | |
786 | if self.required_files: | |
787 | for fname in self.required_files: | |
788 | if fname not in self.files: | |
789 | raise Error('required file missing from config-json: %s' % fname) | |
790 | ||
791 | def get_daemon_name(self): | |
792 | # type: () -> str | |
793 | return '%s.%s' % (self.daemon_type, self.daemon_id) | |
794 | ||
795 | def get_container_name(self, desc=None): | |
796 | # type: (Optional[str]) -> str | |
797 | cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name()) | |
798 | if desc: | |
799 | cname = '%s-%s' % (cname, desc) | |
800 | return cname | |
801 | ||
802 | @staticmethod | |
803 | def get_container_envs(): | |
804 | # type: () -> List[str] | |
805 | envs = [ | |
806 | 'KEEPALIVED_AUTOCONF=false', | |
807 | 'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf', | |
808 | 'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf', | |
809 | 'KEEPALIVED_DEBUG=false' | |
810 | ] | |
811 | return envs | |
812 | ||
813 | @staticmethod | |
814 | def get_prestart(): | |
815 | return ( | |
816 | '# keepalived needs IP forwarding and non-local bind\n' | |
817 | 'sysctl net.ipv4.ip_forward=1\n' | |
818 | 'sysctl net.ipv4.ip_nonlocal_bind=1\n' | |
819 | ) | |
820 | ||
821 | def extract_uid_gid_keepalived(self): | |
822 | # better directory for this? | |
823 | return extract_uid_gid(self.ctx, file_path='/var/lib') | |
824 | ||
825 | @staticmethod | |
826 | def get_container_mounts(data_dir: str) -> Dict[str, str]: | |
827 | mounts = dict() | |
828 | mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf' | |
829 | return mounts | |
830 | ||
831 | ################################## | |
832 | ||
833 | ||
f91f0fd5 TL |
834 | class CustomContainer(object): |
835 | """Defines a custom container""" | |
836 | daemon_type = 'container' | |
837 | ||
f67539c2 TL |
838 | def __init__(self, |
839 | fsid: str, daemon_id: Union[int, str], | |
f91f0fd5 TL |
840 | config_json: Dict, image: str) -> None: |
841 | self.fsid = fsid | |
842 | self.daemon_id = daemon_id | |
843 | self.image = image | |
844 | ||
845 | # config-json options | |
846 | self.entrypoint = dict_get(config_json, 'entrypoint') | |
847 | self.uid = dict_get(config_json, 'uid', 65534) # nobody | |
848 | self.gid = dict_get(config_json, 'gid', 65534) # nobody | |
849 | self.volume_mounts = dict_get(config_json, 'volume_mounts', {}) | |
850 | self.args = dict_get(config_json, 'args', []) | |
851 | self.envs = dict_get(config_json, 'envs', []) | |
852 | self.privileged = dict_get(config_json, 'privileged', False) | |
853 | self.bind_mounts = dict_get(config_json, 'bind_mounts', []) | |
854 | self.ports = dict_get(config_json, 'ports', []) | |
855 | self.dirs = dict_get(config_json, 'dirs', []) | |
856 | self.files = dict_get(config_json, 'files', {}) | |
857 | ||
858 | @classmethod | |
f67539c2 TL |
859 | def init(cls, ctx: CephadmContext, |
860 | fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer': | |
861 | return cls(fsid, daemon_id, | |
862 | get_parm(ctx.config_json), ctx.image) | |
f91f0fd5 TL |
863 | |
864 | def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: | |
865 | """ | |
866 | Create dirs/files below the container data directory. | |
867 | """ | |
868 | logger.info('Creating custom container configuration ' | |
869 | 'dirs/files in {} ...'.format(data_dir)) | |
870 | ||
871 | if not os.path.isdir(data_dir): | |
872 | raise OSError('data_dir is not a directory: %s' % data_dir) | |
873 | ||
874 | for dir_path in self.dirs: | |
875 | logger.info('Creating directory: {}'.format(dir_path)) | |
876 | dir_path = os.path.join(data_dir, dir_path.strip('/')) | |
877 | makedirs(dir_path, uid, gid, 0o755) | |
878 | ||
879 | for file_path in self.files: | |
880 | logger.info('Creating file: {}'.format(file_path)) | |
881 | content = dict_get_join(self.files, file_path) | |
882 | file_path = os.path.join(data_dir, file_path.strip('/')) | |
883 | with open(file_path, 'w', encoding='utf-8') as f: | |
884 | os.fchown(f.fileno(), uid, gid) | |
885 | os.fchmod(f.fileno(), 0o600) | |
886 | f.write(content) | |
887 | ||
888 | def get_daemon_args(self) -> List[str]: | |
889 | return [] | |
890 | ||
891 | def get_container_args(self) -> List[str]: | |
892 | return self.args | |
893 | ||
894 | def get_container_envs(self) -> List[str]: | |
895 | return self.envs | |
896 | ||
897 | def get_container_mounts(self, data_dir: str) -> Dict[str, str]: | |
898 | """ | |
899 | Get the volume mounts. Relative source paths will be located below | |
900 | `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. | |
901 | ||
902 | Example: | |
903 | { | |
904 | /foo/conf: /conf | |
905 | foo/conf: /conf | |
906 | } | |
907 | becomes | |
908 | { | |
909 | /foo/conf: /conf | |
910 | /var/lib/ceph/<cluster-fsid>/<daemon-name>/foo/conf: /conf | |
911 | } | |
912 | """ | |
913 | mounts = {} | |
914 | for source, destination in self.volume_mounts.items(): | |
915 | source = os.path.join(data_dir, source) | |
916 | mounts[source] = destination | |
917 | return mounts | |
918 | ||
919 | def get_container_binds(self, data_dir: str) -> List[List[str]]: | |
920 | """ | |
921 | Get the bind mounts. Relative `source=...` paths will be located below | |
922 | `/var/lib/ceph/<cluster-fsid>/<daemon-name>`. | |
923 | ||
924 | Example: | |
925 | [ | |
926 | 'type=bind', | |
927 | 'source=lib/modules', | |
928 | 'destination=/lib/modules', | |
929 | 'ro=true' | |
930 | ] | |
931 | becomes | |
932 | [ | |
933 | ... | |
934 | 'source=/var/lib/ceph/<cluster-fsid>/<daemon-name>/lib/modules', | |
935 | ... | |
936 | ] | |
937 | """ | |
938 | binds = self.bind_mounts.copy() | |
939 | for bind in binds: | |
940 | for index, value in enumerate(bind): | |
941 | match = re.match(r'^source=(.+)$', value) | |
942 | if match: | |
943 | bind[index] = 'source={}'.format(os.path.join( | |
944 | data_dir, match.group(1))) | |
945 | return binds | |
946 | ||
947 | ################################## | |
948 | ||
949 | ||
f67539c2 TL |
950 | def touch(file_path: str, uid: Optional[int] = None, gid: Optional[int] = None) -> None: |
951 | Path(file_path).touch() | |
952 | if uid and gid: | |
953 | os.chown(file_path, uid, gid) | |
954 | ||
955 | ||
956 | ################################## | |
957 | ||
958 | ||
f91f0fd5 TL |
959 | def dict_get(d: Dict, key: str, default: Any = None, require: bool = False) -> Any: |
960 | """ | |
961 | Helper function to get a key from a dictionary. | |
962 | :param d: The dictionary to process. | |
963 | :param key: The name of the key to get. | |
964 | :param default: The default value in case the key does not | |
965 | exist. Default is `None`. | |
966 | :param require: Set to `True` if the key is required. An | |
967 | exception will be raised if the key does not exist in | |
968 | the given dictionary. | |
969 | :return: Returns the value of the given key. | |
970 | :raises: :exc:`self.Error` if the given key does not exist | |
971 | and `require` is set to `True`. | |
972 | """ | |
973 | if require and key not in d.keys(): | |
974 | raise Error('{} missing from dict'.format(key)) | |
f67539c2 | 975 | return d.get(key, default) # type: ignore |
f91f0fd5 TL |
976 | |
977 | ################################## | |
978 | ||
979 | ||
980 | def dict_get_join(d: Dict, key: str) -> Any: | |
981 | """ | |
982 | Helper function to get the value of a given key from a dictionary. | |
983 | `List` values will be converted to a string by joining them with a | |
984 | line break. | |
985 | :param d: The dictionary to process. | |
986 | :param key: The name of the key to get. | |
987 | :return: Returns the value of the given key. If it was a `list`, it | |
988 | will be joining with a line break. | |
989 | """ | |
990 | value = d.get(key) | |
991 | if isinstance(value, list): | |
992 | value = '\n'.join(map(str, value)) | |
993 | return value | |
994 | ||
995 | ################################## | |
996 | ||
997 | ||
9f95a23c | 998 | def get_supported_daemons(): |
e306af50 | 999 | # type: () -> List[str] |
9f95a23c TL |
1000 | supported_daemons = list(Ceph.daemons) |
1001 | supported_daemons.extend(Monitoring.components) | |
1002 | supported_daemons.append(NFSGanesha.daemon_type) | |
1911f103 | 1003 | supported_daemons.append(CephIscsi.daemon_type) |
f91f0fd5 | 1004 | supported_daemons.append(CustomContainer.daemon_type) |
f67539c2 TL |
1005 | supported_daemons.append(CephadmDaemon.daemon_type) |
1006 | supported_daemons.append(HAproxy.daemon_type) | |
1007 | supported_daemons.append(Keepalived.daemon_type) | |
9f95a23c TL |
1008 | assert len(supported_daemons) == len(set(supported_daemons)) |
1009 | return supported_daemons | |
1010 | ||
1011 | ################################## | |
1012 | ||
f6b5b4d7 | 1013 | |
f67539c2 TL |
1014 | class PortOccupiedError(Error): |
1015 | pass | |
1016 | ||
1017 | ||
1018 | def attempt_bind(ctx, s, address, port): | |
1019 | # type: (CephadmContext, socket.socket, str, int) -> None | |
9f95a23c TL |
1020 | try: |
1021 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) | |
1022 | s.bind((address, port)) | |
1023 | except (socket.error, OSError) as e: # py2 and py3 | |
9f95a23c | 1024 | if e.errno == errno.EADDRINUSE: |
f67539c2 TL |
1025 | msg = 'Cannot bind to IP %s port %d: %s' % (address, port, e) |
1026 | logger.warning(msg) | |
1027 | raise PortOccupiedError(msg) | |
1028 | else: | |
1029 | raise e | |
9f95a23c TL |
1030 | finally: |
1031 | s.close() | |
1032 | ||
f6b5b4d7 | 1033 | |
f67539c2 TL |
1034 | def port_in_use(ctx, port_num): |
1035 | # type: (CephadmContext, int) -> bool | |
9f95a23c | 1036 | """Detect whether a port is in use on the local machine - IPv4 and IPv6""" |
e306af50 | 1037 | logger.info('Verifying port %d ...' % port_num) |
9f95a23c | 1038 | |
f67539c2 TL |
1039 | def _port_in_use(af: socket.AddressFamily, address: str) -> bool: |
1040 | try: | |
1041 | s = socket.socket(af, socket.SOCK_STREAM) | |
1042 | attempt_bind(ctx, s, address, port_num) | |
1043 | except PortOccupiedError: | |
1044 | return True | |
1045 | except OSError as e: | |
1046 | if e.errno in (errno.EAFNOSUPPORT, errno.EADDRNOTAVAIL): | |
1047 | # Ignore EAFNOSUPPORT and EADDRNOTAVAIL as two interfaces are | |
1048 | # being tested here and one might be intentionally be disabled. | |
1049 | # In that case no error should be raised. | |
1050 | return False | |
1051 | else: | |
1052 | raise e | |
9f95a23c | 1053 | return False |
f67539c2 TL |
1054 | return any(_port_in_use(af, address) for af, address in ( |
1055 | (socket.AF_INET, '0.0.0.0'), | |
1056 | (socket.AF_INET6, '::') | |
1057 | )) | |
9f95a23c | 1058 | |
f6b5b4d7 | 1059 | |
f67539c2 TL |
1060 | def check_ip_port(ctx, ip, port): |
1061 | # type: (CephadmContext, str, int) -> None | |
1062 | if not ctx.skip_ping_check: | |
9f95a23c | 1063 | logger.info('Verifying IP %s port %d ...' % (ip, port)) |
f91f0fd5 | 1064 | if is_ipv6(ip): |
9f95a23c | 1065 | s = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) |
f91f0fd5 | 1066 | ip = unwrap_ipv6(ip) |
9f95a23c TL |
1067 | else: |
1068 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
f67539c2 | 1069 | attempt_bind(ctx, s, ip, port) |
9f95a23c TL |
1070 | |
1071 | ################################## | |
1072 | ||
f67539c2 | 1073 | |
9f95a23c TL |
1074 | # this is an abbreviated version of |
1075 | # https://github.com/benediktschmitt/py-filelock/blob/master/filelock.py | |
1076 | # that drops all of the compatibility (this is Unix/Linux only). | |
1077 | ||
9f95a23c TL |
1078 | class Timeout(TimeoutError): |
1079 | """ | |
1080 | Raised when the lock could not be acquired in *timeout* | |
1081 | seconds. | |
1082 | """ | |
1083 | ||
1084 | def __init__(self, lock_file): | |
1085 | """ | |
1086 | """ | |
1087 | #: The path of the file lock. | |
1088 | self.lock_file = lock_file | |
1089 | return None | |
1090 | ||
1091 | def __str__(self): | |
1092 | temp = "The file lock '{}' could not be acquired."\ | |
1093 | .format(self.lock_file) | |
1094 | return temp | |
1095 | ||
1096 | ||
1097 | class _Acquire_ReturnProxy(object): | |
1098 | def __init__(self, lock): | |
1099 | self.lock = lock | |
1100 | return None | |
1101 | ||
1102 | def __enter__(self): | |
1103 | return self.lock | |
1104 | ||
1105 | def __exit__(self, exc_type, exc_value, traceback): | |
1106 | self.lock.release() | |
1107 | return None | |
1108 | ||
1109 | ||
1110 | class FileLock(object): | |
f67539c2 | 1111 | def __init__(self, ctx: CephadmContext, name, timeout=-1): |
9f95a23c TL |
1112 | if not os.path.exists(LOCK_DIR): |
1113 | os.mkdir(LOCK_DIR, 0o700) | |
1114 | self._lock_file = os.path.join(LOCK_DIR, name + '.lock') | |
f67539c2 | 1115 | self.ctx = ctx |
9f95a23c TL |
1116 | |
1117 | # The file descriptor for the *_lock_file* as it is returned by the | |
1118 | # os.open() function. | |
1119 | # This file lock is only NOT None, if the object currently holds the | |
1120 | # lock. | |
f67539c2 | 1121 | self._lock_file_fd: Optional[int] = None |
9f95a23c TL |
1122 | self.timeout = timeout |
1123 | # The lock counter is used for implementing the nested locking | |
1124 | # mechanism. Whenever the lock is acquired, the counter is increased and | |
1125 | # the lock is only released, when this value is 0 again. | |
1126 | self._lock_counter = 0 | |
1127 | return None | |
1128 | ||
1129 | @property | |
1130 | def is_locked(self): | |
1131 | return self._lock_file_fd is not None | |
1132 | ||
1133 | def acquire(self, timeout=None, poll_intervall=0.05): | |
1134 | """ | |
1135 | Acquires the file lock or fails with a :exc:`Timeout` error. | |
1136 | .. code-block:: python | |
1137 | # You can use this method in the context manager (recommended) | |
1138 | with lock.acquire(): | |
1139 | pass | |
1140 | # Or use an equivalent try-finally construct: | |
1141 | lock.acquire() | |
1142 | try: | |
1143 | pass | |
1144 | finally: | |
1145 | lock.release() | |
1146 | :arg float timeout: | |
1147 | The maximum time waited for the file lock. | |
1148 | If ``timeout < 0``, there is no timeout and this method will | |
1149 | block until the lock could be acquired. | |
1150 | If ``timeout`` is None, the default :attr:`~timeout` is used. | |
1151 | :arg float poll_intervall: | |
1152 | We check once in *poll_intervall* seconds if we can acquire the | |
1153 | file lock. | |
1154 | :raises Timeout: | |
1155 | if the lock could not be acquired in *timeout* seconds. | |
1156 | .. versionchanged:: 2.0.0 | |
1157 | This method returns now a *proxy* object instead of *self*, | |
1158 | so that it can be used in a with statement without side effects. | |
1159 | """ | |
f67539c2 | 1160 | |
9f95a23c TL |
1161 | # Use the default timeout, if no timeout is provided. |
1162 | if timeout is None: | |
1163 | timeout = self.timeout | |
1164 | ||
1165 | # Increment the number right at the beginning. | |
1166 | # We can still undo it, if something fails. | |
1167 | self._lock_counter += 1 | |
1168 | ||
1169 | lock_id = id(self) | |
1170 | lock_filename = self._lock_file | |
1171 | start_time = time.time() | |
1172 | try: | |
1173 | while True: | |
1174 | if not self.is_locked: | |
1175 | logger.debug('Acquiring lock %s on %s', lock_id, | |
1176 | lock_filename) | |
1177 | self._acquire() | |
1178 | ||
1179 | if self.is_locked: | |
1180 | logger.debug('Lock %s acquired on %s', lock_id, | |
1181 | lock_filename) | |
1182 | break | |
1183 | elif timeout >= 0 and time.time() - start_time > timeout: | |
1184 | logger.warning('Timeout acquiring lock %s on %s', lock_id, | |
1185 | lock_filename) | |
1186 | raise Timeout(self._lock_file) | |
1187 | else: | |
1188 | logger.debug( | |
1189 | 'Lock %s not acquired on %s, waiting %s seconds ...', | |
1190 | lock_id, lock_filename, poll_intervall | |
1191 | ) | |
1192 | time.sleep(poll_intervall) | |
f6b5b4d7 | 1193 | except: # noqa |
9f95a23c TL |
1194 | # Something did go wrong, so decrement the counter. |
1195 | self._lock_counter = max(0, self._lock_counter - 1) | |
1196 | ||
1197 | raise | |
f67539c2 | 1198 | return _Acquire_ReturnProxy(lock=self) |
9f95a23c | 1199 | |
f6b5b4d7 | 1200 | def release(self, force=False): |
9f95a23c TL |
1201 | """ |
1202 | Releases the file lock. | |
1203 | Please note, that the lock is only completly released, if the lock | |
1204 | counter is 0. | |
1205 | Also note, that the lock file itself is not automatically deleted. | |
1206 | :arg bool force: | |
1207 | If true, the lock counter is ignored and the lock is released in | |
1208 | every case. | |
1209 | """ | |
1210 | if self.is_locked: | |
1211 | self._lock_counter -= 1 | |
1212 | ||
1213 | if self._lock_counter == 0 or force: | |
1214 | lock_id = id(self) | |
1215 | lock_filename = self._lock_file | |
1216 | ||
1217 | logger.debug('Releasing lock %s on %s', lock_id, lock_filename) | |
1218 | self._release() | |
1219 | self._lock_counter = 0 | |
1220 | logger.debug('Lock %s released on %s', lock_id, lock_filename) | |
1221 | ||
1222 | return None | |
1223 | ||
1224 | def __enter__(self): | |
1225 | self.acquire() | |
1226 | return self | |
1227 | ||
1228 | def __exit__(self, exc_type, exc_value, traceback): | |
1229 | self.release() | |
1230 | return None | |
1231 | ||
1232 | def __del__(self): | |
f6b5b4d7 | 1233 | self.release(force=True) |
9f95a23c TL |
1234 | return None |
1235 | ||
9f95a23c TL |
1236 | def _acquire(self): |
1237 | open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC | |
1238 | fd = os.open(self._lock_file, open_mode) | |
1239 | ||
1240 | try: | |
1241 | fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) | |
1242 | except (IOError, OSError): | |
1243 | os.close(fd) | |
1244 | else: | |
1245 | self._lock_file_fd = fd | |
1246 | return None | |
1247 | ||
1248 | def _release(self): | |
1249 | # Do not remove the lockfile: | |
1250 | # | |
1251 | # https://github.com/benediktschmitt/py-filelock/issues/31 | |
1252 | # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition | |
1253 | fd = self._lock_file_fd | |
1254 | self._lock_file_fd = None | |
f6b5b4d7 TL |
1255 | fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore |
1256 | os.close(fd) # type: ignore | |
9f95a23c TL |
1257 | return None |
1258 | ||
1259 | ||
1260 | ################################## | |
1261 | # Popen wrappers, lifted from ceph-volume | |
1262 | ||
adb31ebb TL |
1263 | class CallVerbosity(Enum): |
1264 | SILENT = 0 | |
1265 | # log stdout/stderr to logger.debug | |
1266 | DEBUG = 1 | |
1267 | # On a non-zero exit status, it will forcefully set | |
1268 | # logging ON for the terminal | |
1269 | VERBOSE_ON_FAILURE = 2 | |
1270 | # log at info (instead of debug) level. | |
1271 | VERBOSE = 3 | |
1272 | ||
1273 | ||
f67539c2 TL |
1274 | if sys.version_info < (3, 8): |
1275 | import itertools | |
1276 | import threading | |
1277 | import warnings | |
1278 | from asyncio import events | |
1279 | ||
1280 | class ThreadedChildWatcher(asyncio.AbstractChildWatcher): | |
1281 | """Threaded child watcher implementation. | |
1282 | The watcher uses a thread per process | |
1283 | for waiting for the process finish. | |
1284 | It doesn't require subscription on POSIX signal | |
1285 | but a thread creation is not free. | |
1286 | The watcher has O(1) complexity, its performance doesn't depend | |
1287 | on amount of spawn processes. | |
1288 | """ | |
1289 | ||
1290 | def __init__(self): | |
1291 | self._pid_counter = itertools.count(0) | |
1292 | self._threads = {} | |
1293 | ||
1294 | def is_active(self): | |
1295 | return True | |
1296 | ||
1297 | def close(self): | |
1298 | self._join_threads() | |
1299 | ||
1300 | def _join_threads(self): | |
1301 | """Internal: Join all non-daemon threads""" | |
1302 | threads = [thread for thread in list(self._threads.values()) | |
1303 | if thread.is_alive() and not thread.daemon] | |
1304 | for thread in threads: | |
1305 | thread.join() | |
1306 | ||
1307 | def __enter__(self): | |
1308 | return self | |
1309 | ||
1310 | def __exit__(self, exc_type, exc_val, exc_tb): | |
1311 | pass | |
1312 | ||
1313 | def __del__(self, _warn=warnings.warn): | |
1314 | threads = [thread for thread in list(self._threads.values()) | |
1315 | if thread.is_alive()] | |
1316 | if threads: | |
1317 | _warn(f'{self.__class__} has registered but not finished child processes', | |
1318 | ResourceWarning, | |
1319 | source=self) | |
1320 | ||
1321 | def add_child_handler(self, pid, callback, *args): | |
1322 | loop = events.get_event_loop() | |
1323 | thread = threading.Thread(target=self._do_waitpid, | |
1324 | name=f'waitpid-{next(self._pid_counter)}', | |
1325 | args=(loop, pid, callback, args), | |
1326 | daemon=True) | |
1327 | self._threads[pid] = thread | |
1328 | thread.start() | |
1329 | ||
1330 | def remove_child_handler(self, pid): | |
1331 | # asyncio never calls remove_child_handler() !!! | |
1332 | # The method is no-op but is implemented because | |
1333 | # abstract base classe requires it | |
1334 | return True | |
1335 | ||
1336 | def attach_loop(self, loop): | |
1337 | pass | |
1338 | ||
1339 | def _do_waitpid(self, loop, expected_pid, callback, args): | |
1340 | assert expected_pid > 0 | |
1341 | ||
1342 | try: | |
1343 | pid, status = os.waitpid(expected_pid, 0) | |
1344 | except ChildProcessError: | |
1345 | # The child process is already reaped | |
1346 | # (may happen if waitpid() is called elsewhere). | |
1347 | pid = expected_pid | |
1348 | returncode = 255 | |
1349 | logger.warning( | |
1350 | 'Unknown child process pid %d, will report returncode 255', | |
1351 | pid) | |
1352 | else: | |
1353 | if os.WIFEXITED(status): | |
1354 | returncode = os.WEXITSTATUS(status) | |
1355 | elif os.WIFSIGNALED(status): | |
1356 | returncode = -os.WTERMSIG(status) | |
1357 | else: | |
1358 | raise ValueError(f'unknown wait status {status}') | |
1359 | if loop.get_debug(): | |
1360 | logger.debug('process %s exited with returncode %s', | |
1361 | expected_pid, returncode) | |
1362 | ||
1363 | if loop.is_closed(): | |
1364 | logger.warning('Loop %r that handles pid %r is closed', loop, pid) | |
1365 | else: | |
1366 | loop.call_soon_threadsafe(callback, pid, returncode, *args) | |
1367 | ||
1368 | self._threads.pop(expected_pid) | |
1369 | ||
1370 | # unlike SafeChildWatcher which handles SIGCHLD in the main thread, | |
1371 | # ThreadedChildWatcher runs in a separated thread, hence allows us to | |
1372 | # run create_subprocess_exec() in non-main thread, see | |
1373 | # https://bugs.python.org/issue35621 | |
1374 | asyncio.set_child_watcher(ThreadedChildWatcher()) | |
1375 | ||
1376 | ||
1377 | try: | |
1378 | from asyncio import run as async_run # type: ignore[attr-defined] | |
1379 | except ImportError: | |
1380 | def async_run(coro): # type: ignore | |
1381 | loop = asyncio.new_event_loop() | |
1382 | try: | |
1383 | asyncio.set_event_loop(loop) | |
1384 | return loop.run_until_complete(coro) | |
1385 | finally: | |
1386 | try: | |
1387 | loop.run_until_complete(loop.shutdown_asyncgens()) | |
1388 | finally: | |
1389 | asyncio.set_event_loop(None) | |
1390 | loop.close() | |
1391 | ||
1392 | ||
1393 | def call(ctx: CephadmContext, | |
1394 | command: List[str], | |
adb31ebb TL |
1395 | desc: Optional[str] = None, |
1396 | verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, | |
1397 | timeout: Optional[int] = DEFAULT_TIMEOUT, | |
1398 | **kwargs) -> Tuple[str, str, int]: | |
9f95a23c TL |
1399 | """ |
1400 | Wrap subprocess.Popen to | |
1401 | ||
1402 | - log stdout/stderr to a logger, | |
1403 | - decode utf-8 | |
1404 | - cleanly return out, err, returncode | |
1405 | ||
9f95a23c TL |
1406 | :param timeout: timeout in seconds |
1407 | """ | |
f67539c2 TL |
1408 | |
1409 | prefix = command[0] if desc is None else desc | |
1410 | if prefix: | |
1411 | prefix += ': ' | |
1412 | timeout = timeout or ctx.timeout | |
1413 | ||
1414 | logger.debug('Running command: %s' % ' '.join(command)) | |
1415 | ||
1416 | async def tee(reader: asyncio.StreamReader) -> str: | |
1417 | collected = StringIO() | |
1418 | async for line in reader: | |
1419 | message = line.decode('utf-8') | |
1420 | collected.write(message) | |
1421 | if verbosity == CallVerbosity.VERBOSE: | |
1422 | logger.info(prefix + message.rstrip()) | |
1423 | elif verbosity != CallVerbosity.SILENT: | |
1424 | logger.debug(prefix + message.rstrip()) | |
1425 | return collected.getvalue() | |
1426 | ||
1427 | async def run_with_timeout() -> Tuple[str, str, int]: | |
1428 | process = await asyncio.create_subprocess_exec( | |
1429 | *command, | |
1430 | stdout=asyncio.subprocess.PIPE, | |
1431 | stderr=asyncio.subprocess.PIPE) | |
1432 | assert process.stdout | |
1433 | assert process.stderr | |
1434 | try: | |
1435 | stdout, stderr = await asyncio.gather(tee(process.stdout), | |
1436 | tee(process.stderr)) | |
1437 | returncode = await asyncio.wait_for(process.wait(), timeout) | |
1438 | except asyncio.TimeoutError: | |
1439 | logger.info(prefix + f'timeout after {timeout} seconds') | |
1440 | return '', '', 124 | |
9f95a23c | 1441 | else: |
f67539c2 | 1442 | return stdout, stderr, returncode |
9f95a23c | 1443 | |
f67539c2 | 1444 | stdout, stderr, returncode = async_run(run_with_timeout()) |
adb31ebb | 1445 | if returncode != 0 and verbosity == CallVerbosity.VERBOSE_ON_FAILURE: |
f67539c2 TL |
1446 | logger.info('Non-zero exit code %d from %s', |
1447 | returncode, ' '.join(command)) | |
1448 | for line in stdout.splitlines(): | |
1449 | logger.info(prefix + 'stdout ' + line) | |
1450 | for line in stderr.splitlines(): | |
1451 | logger.info(prefix + 'stderr ' + line) | |
1452 | return stdout, stderr, returncode | |
1453 | ||
1454 | ||
1455 | def call_throws( | |
1456 | ctx: CephadmContext, | |
1457 | command: List[str], | |
1458 | desc: Optional[str] = None, | |
1459 | verbosity: CallVerbosity = CallVerbosity.VERBOSE_ON_FAILURE, | |
1460 | timeout: Optional[int] = DEFAULT_TIMEOUT, | |
1461 | **kwargs) -> Tuple[str, str, int]: | |
1462 | out, err, ret = call(ctx, command, desc, verbosity, timeout, **kwargs) | |
9f95a23c TL |
1463 | if ret: |
1464 | raise RuntimeError('Failed command: %s' % ' '.join(command)) | |
1465 | return out, err, ret | |
1466 | ||
1467 | ||
f67539c2 TL |
1468 | def call_timeout(ctx, command, timeout): |
1469 | # type: (CephadmContext, List[str], int) -> int | |
9f95a23c | 1470 | logger.debug('Running command (timeout=%s): %s' |
f67539c2 | 1471 | % (timeout, ' '.join(command))) |
9f95a23c TL |
1472 | |
1473 | def raise_timeout(command, timeout): | |
1474 | # type: (List[str], int) -> NoReturn | |
f67539c2 | 1475 | msg = 'Command `%s` timed out after %s seconds' % (command, timeout) |
9f95a23c TL |
1476 | logger.debug(msg) |
1477 | raise TimeoutExpired(msg) | |
1478 | ||
f67539c2 TL |
1479 | try: |
1480 | return subprocess.call(command, timeout=timeout) | |
1481 | except subprocess.TimeoutExpired: | |
1482 | raise_timeout(command, timeout) | |
9f95a23c TL |
1483 | |
1484 | ################################## | |
1485 | ||
f6b5b4d7 | 1486 | |
f67539c2 TL |
1487 | def is_available(ctx, what, func): |
1488 | # type: (CephadmContext, str, Callable[[], bool]) -> None | |
9f95a23c TL |
1489 | """ |
1490 | Wait for a service to become available | |
1491 | ||
1492 | :param what: the name of the service | |
1493 | :param func: the callable object that determines availability | |
1494 | """ | |
f67539c2 | 1495 | retry = ctx.retry |
f6b5b4d7 | 1496 | logger.info('Waiting for %s...' % what) |
9f95a23c TL |
1497 | num = 1 |
1498 | while True: | |
1499 | if func(): | |
e306af50 | 1500 | logger.info('%s is available' |
f6b5b4d7 | 1501 | % what) |
9f95a23c TL |
1502 | break |
1503 | elif num > retry: | |
1504 | raise Error('%s not available after %s tries' | |
f67539c2 | 1505 | % (what, retry)) |
9f95a23c TL |
1506 | |
1507 | logger.info('%s not available, waiting (%s/%s)...' | |
f67539c2 | 1508 | % (what, num, retry)) |
9f95a23c TL |
1509 | |
1510 | num += 1 | |
f67539c2 | 1511 | time.sleep(2) |
9f95a23c TL |
1512 | |
1513 | ||
1514 | def read_config(fn): | |
1515 | # type: (Optional[str]) -> ConfigParser | |
f67539c2 | 1516 | cp = ConfigParser() |
9f95a23c | 1517 | if fn: |
f67539c2 | 1518 | cp.read(fn) |
9f95a23c TL |
1519 | return cp |
1520 | ||
f6b5b4d7 | 1521 | |
9f95a23c TL |
1522 | def pathify(p): |
1523 | # type: (str) -> str | |
e306af50 TL |
1524 | p = os.path.expanduser(p) |
1525 | return os.path.abspath(p) | |
9f95a23c | 1526 | |
f6b5b4d7 | 1527 | |
9f95a23c | 1528 | def get_file_timestamp(fn): |
e306af50 | 1529 | # type: (str) -> Optional[str] |
9f95a23c TL |
1530 | try: |
1531 | mt = os.path.getmtime(fn) | |
1532 | return datetime.datetime.fromtimestamp( | |
1533 | mt, tz=datetime.timezone.utc | |
1534 | ).strftime(DATEFMT) | |
adb31ebb | 1535 | except Exception: |
9f95a23c TL |
1536 | return None |
1537 | ||
f6b5b4d7 | 1538 | |
9f95a23c | 1539 | def try_convert_datetime(s): |
e306af50 | 1540 | # type: (str) -> Optional[str] |
9f95a23c TL |
1541 | # This is super irritating because |
1542 | # 1) podman and docker use different formats | |
1543 | # 2) python's strptime can't parse either one | |
1544 | # | |
1545 | # I've seen: | |
1546 | # docker 18.09.7: 2020-03-03T09:21:43.636153304Z | |
1547 | # podman 1.7.0: 2020-03-03T15:52:30.136257504-06:00 | |
1548 | # 2020-03-03 15:52:30.136257504 -0600 CST | |
1549 | # (In the podman case, there is a different string format for | |
1550 | # 'inspect' and 'inspect --format {{.Created}}'!!) | |
1551 | ||
1552 | # In *all* cases, the 9 digit second precision is too much for | |
1553 | # python's strptime. Shorten it to 6 digits. | |
1554 | p = re.compile(r'(\.[\d]{6})[\d]*') | |
1555 | s = p.sub(r'\1', s) | |
1556 | ||
adb31ebb | 1557 | # replace trailing Z with -0000, since (on python 3.6.8) it won't parse |
9f95a23c TL |
1558 | if s and s[-1] == 'Z': |
1559 | s = s[:-1] + '-0000' | |
1560 | ||
adb31ebb | 1561 | # cut off the redundant 'CST' part that strptime can't parse, if |
9f95a23c TL |
1562 | # present. |
1563 | v = s.split(' ') | |
1564 | s = ' '.join(v[0:3]) | |
1565 | ||
1566 | # try parsing with several format strings | |
1567 | fmts = [ | |
1568 | '%Y-%m-%dT%H:%M:%S.%f%z', | |
1569 | '%Y-%m-%d %H:%M:%S.%f %z', | |
1570 | ] | |
1571 | for f in fmts: | |
1572 | try: | |
1573 | # return timestamp normalized to UTC, rendered as DATEFMT. | |
1574 | return datetime.datetime.strptime(s, f).astimezone(tz=datetime.timezone.utc).strftime(DATEFMT) | |
1575 | except ValueError: | |
1576 | pass | |
1577 | return None | |
1578 | ||
f6b5b4d7 | 1579 | |
f67539c2 | 1580 | def _parse_podman_version(version_str): |
9f95a23c | 1581 | # type: (str) -> Tuple[int, ...] |
9f95a23c TL |
1582 | def to_int(val, org_e=None): |
1583 | if not val and org_e: | |
1584 | raise org_e | |
1585 | try: | |
1586 | return int(val) | |
1587 | except ValueError as e: | |
1588 | return to_int(val[0:-1], org_e or e) | |
1589 | ||
1590 | return tuple(map(to_int, version_str.split('.'))) | |
1591 | ||
1592 | ||
1593 | def get_hostname(): | |
1594 | # type: () -> str | |
1595 | return socket.gethostname() | |
1596 | ||
f6b5b4d7 | 1597 | |
9f95a23c TL |
1598 | def get_fqdn(): |
1599 | # type: () -> str | |
1600 | return socket.getfqdn() or socket.gethostname() | |
1601 | ||
f6b5b4d7 | 1602 | |
9f95a23c TL |
1603 | def get_arch(): |
1604 | # type: () -> str | |
1605 | return platform.uname().machine | |
1606 | ||
f6b5b4d7 | 1607 | |
9f95a23c TL |
1608 | def generate_service_id(): |
1609 | # type: () -> str | |
1610 | return get_hostname() + '.' + ''.join(random.choice(string.ascii_lowercase) | |
1611 | for _ in range(6)) | |
1612 | ||
f6b5b4d7 | 1613 | |
9f95a23c TL |
1614 | def generate_password(): |
1615 | # type: () -> str | |
1616 | return ''.join(random.choice(string.ascii_lowercase + string.digits) | |
1617 | for i in range(10)) | |
1618 | ||
f6b5b4d7 | 1619 | |
9f95a23c TL |
1620 | def normalize_container_id(i): |
1621 | # type: (str) -> str | |
1622 | # docker adds the sha256: prefix, but AFAICS both | |
1623 | # docker (18.09.7 in bionic at least) and podman | |
1624 | # both always use sha256, so leave off the prefix | |
1625 | # for consistency. | |
1626 | prefix = 'sha256:' | |
1627 | if i.startswith(prefix): | |
1628 | i = i[len(prefix):] | |
1629 | return i | |
1630 | ||
f6b5b4d7 | 1631 | |
9f95a23c TL |
1632 | def make_fsid(): |
1633 | # type: () -> str | |
1634 | return str(uuid.uuid1()) | |
1635 | ||
f6b5b4d7 | 1636 | |
9f95a23c TL |
1637 | def is_fsid(s): |
1638 | # type: (str) -> bool | |
1639 | try: | |
1640 | uuid.UUID(s) | |
1641 | except ValueError: | |
1642 | return False | |
1643 | return True | |
1644 | ||
f6b5b4d7 | 1645 | |
9f95a23c TL |
1646 | def infer_fsid(func): |
1647 | """ | |
1648 | If we only find a single fsid in /var/lib/ceph/*, use that | |
1649 | """ | |
1650 | @wraps(func) | |
f67539c2 TL |
1651 | def _infer_fsid(ctx: CephadmContext): |
1652 | if ctx.fsid: | |
1653 | logger.debug('Using specified fsid: %s' % ctx.fsid) | |
1654 | return func(ctx) | |
9f95a23c | 1655 | |
f6b5b4d7 | 1656 | fsids_set = set() |
f67539c2 | 1657 | daemon_list = list_daemons(ctx, detail=False) |
9f95a23c | 1658 | for daemon in daemon_list: |
f6b5b4d7 TL |
1659 | if not is_fsid(daemon['fsid']): |
1660 | # 'unknown' fsid | |
1661 | continue | |
f67539c2 TL |
1662 | elif 'name' not in ctx or not ctx.name: |
1663 | # ctx.name not specified | |
f6b5b4d7 | 1664 | fsids_set.add(daemon['fsid']) |
f67539c2 TL |
1665 | elif daemon['name'] == ctx.name: |
1666 | # ctx.name is a match | |
f6b5b4d7 TL |
1667 | fsids_set.add(daemon['fsid']) |
1668 | fsids = sorted(fsids_set) | |
9f95a23c TL |
1669 | |
1670 | if not fsids: | |
1671 | # some commands do not always require an fsid | |
1672 | pass | |
1673 | elif len(fsids) == 1: | |
1674 | logger.info('Inferring fsid %s' % fsids[0]) | |
f67539c2 | 1675 | ctx.fsid = fsids[0] |
9f95a23c TL |
1676 | else: |
1677 | raise Error('Cannot infer an fsid, one must be specified: %s' % fsids) | |
f67539c2 | 1678 | return func(ctx) |
9f95a23c TL |
1679 | |
1680 | return _infer_fsid | |
1681 | ||
f6b5b4d7 | 1682 | |
e306af50 TL |
1683 | def infer_config(func): |
1684 | """ | |
1685 | If we find a MON daemon, use the config from that container | |
1686 | """ | |
1687 | @wraps(func) | |
f67539c2 TL |
1688 | def _infer_config(ctx: CephadmContext): |
1689 | if ctx.config: | |
1690 | logger.debug('Using specified config: %s' % ctx.config) | |
1691 | return func(ctx) | |
e306af50 | 1692 | config = None |
f67539c2 TL |
1693 | if ctx.fsid: |
1694 | name = ctx.name | |
e306af50 | 1695 | if not name: |
f67539c2 | 1696 | daemon_list = list_daemons(ctx, detail=False) |
e306af50 TL |
1697 | for daemon in daemon_list: |
1698 | if daemon['name'].startswith('mon.'): | |
1699 | name = daemon['name'] | |
1700 | break | |
1701 | if name: | |
f67539c2 TL |
1702 | config = '/var/lib/ceph/{}/{}/config'.format(ctx.fsid, |
1703 | name) | |
e306af50 TL |
1704 | if config: |
1705 | logger.info('Inferring config %s' % config) | |
f67539c2 | 1706 | ctx.config = config |
e306af50 TL |
1707 | elif os.path.exists(SHELL_DEFAULT_CONF): |
1708 | logger.debug('Using default config: %s' % SHELL_DEFAULT_CONF) | |
f67539c2 TL |
1709 | ctx.config = SHELL_DEFAULT_CONF |
1710 | return func(ctx) | |
e306af50 TL |
1711 | |
1712 | return _infer_config | |
1713 | ||
f6b5b4d7 | 1714 | |
f67539c2 | 1715 | def _get_default_image(ctx: CephadmContext): |
1911f103 | 1716 | if DEFAULT_IMAGE_IS_MASTER: |
f67539c2 | 1717 | warn = """This is a development version of cephadm. |
1911f103 TL |
1718 | For information regarding the latest stable release: |
1719 | https://docs.ceph.com/docs/{}/cephadm/install | |
f67539c2 | 1720 | """.format(LATEST_STABLE_RELEASE) |
1911f103 | 1721 | for line in warn.splitlines(): |
e306af50 | 1722 | logger.warning('{}{}{}'.format(termcolor.yellow, line, termcolor.end)) |
1911f103 TL |
1723 | return DEFAULT_IMAGE |
1724 | ||
f6b5b4d7 | 1725 | |
9f95a23c TL |
1726 | def infer_image(func): |
1727 | """ | |
1728 | Use the most recent ceph image | |
1729 | """ | |
1730 | @wraps(func) | |
f67539c2 TL |
1731 | def _infer_image(ctx: CephadmContext): |
1732 | if not ctx.image: | |
1733 | ctx.image = os.environ.get('CEPHADM_IMAGE') | |
1734 | if not ctx.image: | |
1735 | ctx.image = get_last_local_ceph_image(ctx, ctx.container_engine.path) | |
1736 | if not ctx.image: | |
1737 | ctx.image = _get_default_image(ctx) | |
1738 | return func(ctx) | |
9f95a23c TL |
1739 | |
1740 | return _infer_image | |
1741 | ||
f6b5b4d7 | 1742 | |
9f95a23c TL |
1743 | def default_image(func): |
1744 | @wraps(func) | |
f67539c2 TL |
1745 | def _default_image(ctx: CephadmContext): |
1746 | if not ctx.image: | |
1747 | if 'name' in ctx and ctx.name: | |
1748 | type_ = ctx.name.split('.', 1)[0] | |
9f95a23c | 1749 | if type_ in Monitoring.components: |
f67539c2 TL |
1750 | ctx.image = Monitoring.components[type_]['image'] |
1751 | if type_ == 'haproxy': | |
1752 | ctx.image = HAproxy.default_image | |
1753 | if type_ == 'keepalived': | |
1754 | ctx.image = Keepalived.default_image | |
1755 | if not ctx.image: | |
1756 | ctx.image = os.environ.get('CEPHADM_IMAGE') | |
1757 | if not ctx.image: | |
1758 | ctx.image = _get_default_image(ctx) | |
1759 | ||
1760 | return func(ctx) | |
9f95a23c TL |
1761 | |
1762 | return _default_image | |
1763 | ||
f6b5b4d7 | 1764 | |
f67539c2 | 1765 | def get_last_local_ceph_image(ctx: CephadmContext, container_path: str): |
9f95a23c TL |
1766 | """ |
1767 | :return: The most recent local ceph image (already pulled) | |
1768 | """ | |
f67539c2 TL |
1769 | out, _, _ = call_throws(ctx, |
1770 | [container_path, 'images', | |
1771 | '--filter', 'label=ceph=True', | |
1772 | '--filter', 'dangling=false', | |
1773 | '--format', '{{.Repository}}@{{.Digest}}']) | |
adb31ebb TL |
1774 | return _filter_last_local_ceph_image(out) |
1775 | ||
1776 | ||
1777 | def _filter_last_local_ceph_image(out): | |
f67539c2 | 1778 | # type: (str) -> Optional[str] |
adb31ebb TL |
1779 | for image in out.splitlines(): |
1780 | if image and not image.endswith('@'): | |
1781 | logger.info('Using recent ceph image %s' % image) | |
1782 | return image | |
9f95a23c TL |
1783 | return None |
1784 | ||
f6b5b4d7 | 1785 | |
9f95a23c | 1786 | def write_tmp(s, uid, gid): |
f67539c2 | 1787 | # type: (str, int, int) -> IO[str] |
9f95a23c TL |
1788 | tmp_f = tempfile.NamedTemporaryFile(mode='w', |
1789 | prefix='ceph-tmp') | |
1790 | os.fchown(tmp_f.fileno(), uid, gid) | |
1791 | tmp_f.write(s) | |
1792 | tmp_f.flush() | |
1793 | ||
1794 | return tmp_f | |
1795 | ||
f6b5b4d7 | 1796 | |
9f95a23c TL |
1797 | def makedirs(dir, uid, gid, mode): |
1798 | # type: (str, int, int, int) -> None | |
1799 | if not os.path.exists(dir): | |
1800 | os.makedirs(dir, mode=mode) | |
1801 | else: | |
1802 | os.chmod(dir, mode) | |
1803 | os.chown(dir, uid, gid) | |
1804 | os.chmod(dir, mode) # the above is masked by umask... | |
1805 | ||
f6b5b4d7 | 1806 | |
f67539c2 TL |
1807 | def get_data_dir(fsid, data_dir, t, n): |
1808 | # type: (str, str, str, Union[int, str]) -> str | |
1809 | return os.path.join(data_dir, fsid, '%s.%s' % (t, n)) | |
9f95a23c | 1810 | |
f6b5b4d7 | 1811 | |
f67539c2 TL |
1812 | def get_log_dir(fsid, log_dir): |
1813 | # type: (str, str) -> str | |
1814 | return os.path.join(log_dir, fsid) | |
9f95a23c | 1815 | |
f6b5b4d7 | 1816 | |
f67539c2 TL |
1817 | def make_data_dir_base(fsid, data_dir, uid, gid): |
1818 | # type: (str, str, int, int) -> str | |
1819 | data_dir_base = os.path.join(data_dir, fsid) | |
9f95a23c TL |
1820 | makedirs(data_dir_base, uid, gid, DATA_DIR_MODE) |
1821 | makedirs(os.path.join(data_dir_base, 'crash'), uid, gid, DATA_DIR_MODE) | |
1822 | makedirs(os.path.join(data_dir_base, 'crash', 'posted'), uid, gid, | |
1823 | DATA_DIR_MODE) | |
1824 | return data_dir_base | |
1825 | ||
f6b5b4d7 | 1826 | |
f67539c2 TL |
1827 | def make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=None, gid=None): |
1828 | # type: (CephadmContext, str, str, Union[int, str], Optional[int], Optional[int]) -> str | |
f6b5b4d7 | 1829 | if uid is None or gid is None: |
f67539c2 TL |
1830 | uid, gid = extract_uid_gid(ctx) |
1831 | make_data_dir_base(fsid, ctx.data_dir, uid, gid) | |
1832 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
9f95a23c TL |
1833 | makedirs(data_dir, uid, gid, DATA_DIR_MODE) |
1834 | return data_dir | |
1835 | ||
f6b5b4d7 | 1836 | |
f67539c2 TL |
1837 | def make_log_dir(ctx, fsid, uid=None, gid=None): |
1838 | # type: (CephadmContext, str, Optional[int], Optional[int]) -> str | |
f6b5b4d7 | 1839 | if uid is None or gid is None: |
f67539c2 TL |
1840 | uid, gid = extract_uid_gid(ctx) |
1841 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
9f95a23c TL |
1842 | makedirs(log_dir, uid, gid, LOG_DIR_MODE) |
1843 | return log_dir | |
1844 | ||
f6b5b4d7 | 1845 | |
f67539c2 TL |
1846 | def make_var_run(ctx, fsid, uid, gid): |
1847 | # type: (CephadmContext, str, int, int) -> None | |
1848 | call_throws(ctx, ['install', '-d', '-m0770', '-o', str(uid), '-g', str(gid), | |
1849 | '/var/run/ceph/%s' % fsid]) | |
9f95a23c | 1850 | |
f6b5b4d7 | 1851 | |
f67539c2 TL |
1852 | def copy_tree(ctx, src, dst, uid=None, gid=None): |
1853 | # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None | |
9f95a23c TL |
1854 | """ |
1855 | Copy a directory tree from src to dst | |
1856 | """ | |
f91f0fd5 | 1857 | if uid is None or gid is None: |
f67539c2 | 1858 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
1859 | |
1860 | for src_dir in src: | |
1861 | dst_dir = dst | |
1862 | if os.path.isdir(dst): | |
1863 | dst_dir = os.path.join(dst, os.path.basename(src_dir)) | |
1864 | ||
f67539c2 | 1865 | logger.debug('copy directory `%s` -> `%s`' % (src_dir, dst_dir)) |
9f95a23c | 1866 | shutil.rmtree(dst_dir, ignore_errors=True) |
f67539c2 | 1867 | shutil.copytree(src_dir, dst_dir) # dirs_exist_ok needs python 3.8 |
9f95a23c TL |
1868 | |
1869 | for dirpath, dirnames, filenames in os.walk(dst_dir): | |
f67539c2 | 1870 | logger.debug('chown %s:%s `%s`' % (uid, gid, dirpath)) |
9f95a23c TL |
1871 | os.chown(dirpath, uid, gid) |
1872 | for filename in filenames: | |
f67539c2 | 1873 | logger.debug('chown %s:%s `%s`' % (uid, gid, filename)) |
9f95a23c TL |
1874 | os.chown(os.path.join(dirpath, filename), uid, gid) |
1875 | ||
1876 | ||
f67539c2 TL |
1877 | def copy_files(ctx, src, dst, uid=None, gid=None): |
1878 | # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None | |
9f95a23c TL |
1879 | """ |
1880 | Copy a files from src to dst | |
1881 | """ | |
f91f0fd5 | 1882 | if uid is None or gid is None: |
f67539c2 | 1883 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
1884 | |
1885 | for src_file in src: | |
1886 | dst_file = dst | |
1887 | if os.path.isdir(dst): | |
1888 | dst_file = os.path.join(dst, os.path.basename(src_file)) | |
1889 | ||
f67539c2 | 1890 | logger.debug('copy file `%s` -> `%s`' % (src_file, dst_file)) |
9f95a23c TL |
1891 | shutil.copyfile(src_file, dst_file) |
1892 | ||
f67539c2 | 1893 | logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file)) |
9f95a23c TL |
1894 | os.chown(dst_file, uid, gid) |
1895 | ||
f6b5b4d7 | 1896 | |
f67539c2 TL |
1897 | def move_files(ctx, src, dst, uid=None, gid=None): |
1898 | # type: (CephadmContext, List[str], str, Optional[int], Optional[int]) -> None | |
9f95a23c TL |
1899 | """ |
1900 | Move files from src to dst | |
1901 | """ | |
f91f0fd5 | 1902 | if uid is None or gid is None: |
f67539c2 | 1903 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
1904 | |
1905 | for src_file in src: | |
1906 | dst_file = dst | |
1907 | if os.path.isdir(dst): | |
1908 | dst_file = os.path.join(dst, os.path.basename(src_file)) | |
1909 | ||
1910 | if os.path.islink(src_file): | |
1911 | # shutil.move() in py2 does not handle symlinks correctly | |
1912 | src_rl = os.readlink(src_file) | |
1913 | logger.debug("symlink '%s' -> '%s'" % (dst_file, src_rl)) | |
1914 | os.symlink(src_rl, dst_file) | |
1915 | os.unlink(src_file) | |
1916 | else: | |
1917 | logger.debug("move file '%s' -> '%s'" % (src_file, dst_file)) | |
1918 | shutil.move(src_file, dst_file) | |
f67539c2 | 1919 | logger.debug('chown %s:%s `%s`' % (uid, gid, dst_file)) |
9f95a23c TL |
1920 | os.chown(dst_file, uid, gid) |
1921 | ||
f6b5b4d7 | 1922 | |
f67539c2 | 1923 | # copied from distutils |
9f95a23c TL |
1924 | def find_executable(executable, path=None): |
1925 | """Tries to find 'executable' in the directories listed in 'path'. | |
1926 | A string listing directories separated by 'os.pathsep'; defaults to | |
1927 | os.environ['PATH']. Returns the complete filename or None if not found. | |
1928 | """ | |
1929 | _, ext = os.path.splitext(executable) | |
1930 | if (sys.platform == 'win32') and (ext != '.exe'): | |
1931 | executable = executable + '.exe' | |
1932 | ||
1933 | if os.path.isfile(executable): | |
1934 | return executable | |
1935 | ||
1936 | if path is None: | |
1937 | path = os.environ.get('PATH', None) | |
1938 | if path is None: | |
1939 | try: | |
f67539c2 | 1940 | path = os.confstr('CS_PATH') |
9f95a23c TL |
1941 | except (AttributeError, ValueError): |
1942 | # os.confstr() or CS_PATH is not available | |
1943 | path = os.defpath | |
1944 | # bpo-35755: Don't use os.defpath if the PATH environment variable is | |
1945 | # set to an empty string | |
1946 | ||
1947 | # PATH='' doesn't match, whereas PATH=':' looks in the current directory | |
1948 | if not path: | |
1949 | return None | |
1950 | ||
1951 | paths = path.split(os.pathsep) | |
1952 | for p in paths: | |
1953 | f = os.path.join(p, executable) | |
1954 | if os.path.isfile(f): | |
1955 | # the file exists, we have a shot at spawn working | |
1956 | return f | |
1957 | return None | |
1958 | ||
f6b5b4d7 | 1959 | |
9f95a23c TL |
1960 | def find_program(filename): |
1961 | # type: (str) -> str | |
1962 | name = find_executable(filename) | |
1963 | if name is None: | |
1964 | raise ValueError('%s not found' % filename) | |
1965 | return name | |
1966 | ||
f6b5b4d7 | 1967 | |
f67539c2 TL |
1968 | def find_container_engine(ctx: CephadmContext): |
1969 | if ctx.docker: | |
1970 | return Docker() | |
1971 | else: | |
1972 | for i in CONTAINER_PREFERENCE: | |
1973 | try: | |
1974 | return i() | |
1975 | except Exception as e: | |
1976 | logger.debug('Could not locate %s: %s' % (i.EXE, e)) | |
1977 | return None | |
1978 | ||
1979 | ||
1980 | def check_container_engine(ctx): | |
1981 | # type: (CephadmContext) -> None | |
1982 | engine = ctx.container_engine | |
1983 | if not isinstance(engine, CONTAINER_PREFERENCE): | |
1984 | raise Error('Unable to locate any of %s' % [i.EXE for i in CONTAINER_PREFERENCE]) | |
1985 | elif isinstance(engine, Podman): | |
1986 | engine.get_version(ctx) | |
1987 | if engine.version < MIN_PODMAN_VERSION: | |
1988 | raise Error('podman version %d.%d.%d or later is required' % MIN_PODMAN_VERSION) | |
1989 | ||
1990 | ||
9f95a23c TL |
1991 | def get_unit_name(fsid, daemon_type, daemon_id=None): |
1992 | # type: (str, str, Optional[Union[int, str]]) -> str | |
1993 | # accept either name or type + id | |
f67539c2 TL |
1994 | if daemon_type == CephadmDaemon.daemon_type and daemon_id is not None: |
1995 | return 'ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id) | |
1996 | elif daemon_id is not None: | |
9f95a23c TL |
1997 | return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id) |
1998 | else: | |
1999 | return 'ceph-%s@%s' % (fsid, daemon_type) | |
2000 | ||
f6b5b4d7 | 2001 | |
f67539c2 TL |
2002 | def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid, name): |
2003 | daemon = get_daemon_description(ctx, fsid, name) | |
e306af50 TL |
2004 | try: |
2005 | return daemon['systemd_unit'] | |
2006 | except KeyError: | |
2007 | raise Error('Failed to get unit name for {}'.format(daemon)) | |
2008 | ||
f6b5b4d7 | 2009 | |
f67539c2 TL |
2010 | def check_unit(ctx, unit_name): |
2011 | # type: (CephadmContext, str) -> Tuple[bool, str, bool] | |
9f95a23c TL |
2012 | # NOTE: we ignore the exit code here because systemctl outputs |
2013 | # various exit codes based on the state of the service, but the | |
2014 | # string result is more explicit (and sufficient). | |
2015 | enabled = False | |
2016 | installed = False | |
2017 | try: | |
f67539c2 | 2018 | out, err, code = call(ctx, ['systemctl', 'is-enabled', unit_name], |
adb31ebb | 2019 | verbosity=CallVerbosity.DEBUG) |
9f95a23c TL |
2020 | if code == 0: |
2021 | enabled = True | |
2022 | installed = True | |
f67539c2 | 2023 | elif 'disabled' in out: |
9f95a23c TL |
2024 | installed = True |
2025 | except Exception as e: | |
2026 | logger.warning('unable to run systemctl: %s' % e) | |
2027 | enabled = False | |
2028 | installed = False | |
2029 | ||
2030 | state = 'unknown' | |
2031 | try: | |
f67539c2 | 2032 | out, err, code = call(ctx, ['systemctl', 'is-active', unit_name], |
adb31ebb | 2033 | verbosity=CallVerbosity.DEBUG) |
9f95a23c TL |
2034 | out = out.strip() |
2035 | if out in ['active']: | |
2036 | state = 'running' | |
2037 | elif out in ['inactive']: | |
2038 | state = 'stopped' | |
2039 | elif out in ['failed', 'auto-restart']: | |
2040 | state = 'error' | |
2041 | else: | |
2042 | state = 'unknown' | |
2043 | except Exception as e: | |
2044 | logger.warning('unable to run systemctl: %s' % e) | |
2045 | state = 'unknown' | |
2046 | return (enabled, state, installed) | |
2047 | ||
f6b5b4d7 | 2048 | |
f67539c2 TL |
2049 | def check_units(ctx, units, enabler=None): |
2050 | # type: (CephadmContext, List[str], Optional[Packager]) -> bool | |
9f95a23c | 2051 | for u in units: |
f67539c2 | 2052 | (enabled, state, installed) = check_unit(ctx, u) |
9f95a23c TL |
2053 | if enabled and state == 'running': |
2054 | logger.info('Unit %s is enabled and running' % u) | |
2055 | return True | |
2056 | if enabler is not None: | |
2057 | if installed: | |
2058 | logger.info('Enabling unit %s' % u) | |
2059 | enabler.enable_service(u) | |
2060 | return False | |
2061 | ||
f6b5b4d7 | 2062 | |
f67539c2 TL |
2063 | def is_container_running(ctx: CephadmContext, name: str) -> bool: |
2064 | out, err, ret = call_throws(ctx, [ | |
2065 | ctx.container_engine.path, 'ps', | |
2066 | '--format', '{{.Names}}']) | |
2067 | return name in out | |
2068 | ||
2069 | ||
9f95a23c | 2070 | def get_legacy_config_fsid(cluster, legacy_dir=None): |
f6b5b4d7 | 2071 | # type: (str, Optional[str]) -> Optional[str] |
9f95a23c TL |
2072 | config_file = '/etc/ceph/%s.conf' % cluster |
2073 | if legacy_dir is not None: | |
2074 | config_file = os.path.abspath(legacy_dir + config_file) | |
2075 | ||
2076 | if os.path.exists(config_file): | |
2077 | config = read_config(config_file) | |
2078 | if config.has_section('global') and config.has_option('global', 'fsid'): | |
2079 | return config.get('global', 'fsid') | |
2080 | return None | |
2081 | ||
f6b5b4d7 | 2082 | |
f67539c2 TL |
2083 | def get_legacy_daemon_fsid(ctx, cluster, |
2084 | daemon_type, daemon_id, legacy_dir=None): | |
2085 | # type: (CephadmContext, str, str, Union[int, str], Optional[str]) -> Optional[str] | |
9f95a23c TL |
2086 | fsid = None |
2087 | if daemon_type == 'osd': | |
2088 | try: | |
f67539c2 | 2089 | fsid_file = os.path.join(ctx.data_dir, |
9f95a23c TL |
2090 | daemon_type, |
2091 | 'ceph-%s' % daemon_id, | |
2092 | 'ceph_fsid') | |
2093 | if legacy_dir is not None: | |
2094 | fsid_file = os.path.abspath(legacy_dir + fsid_file) | |
2095 | with open(fsid_file, 'r') as f: | |
2096 | fsid = f.read().strip() | |
2097 | except IOError: | |
2098 | pass | |
2099 | if not fsid: | |
2100 | fsid = get_legacy_config_fsid(cluster, legacy_dir=legacy_dir) | |
2101 | return fsid | |
2102 | ||
f6b5b4d7 | 2103 | |
f67539c2 TL |
2104 | def get_daemon_args(ctx, fsid, daemon_type, daemon_id): |
2105 | # type: (CephadmContext, str, str, Union[int, str]) -> List[str] | |
9f95a23c TL |
2106 | r = list() # type: List[str] |
2107 | ||
2108 | if daemon_type in Ceph.daemons and daemon_type != 'crash': | |
2109 | r += [ | |
2110 | '--setuser', 'ceph', | |
2111 | '--setgroup', 'ceph', | |
2112 | '--default-log-to-file=false', | |
2113 | '--default-log-to-stderr=true', | |
f67539c2 | 2114 | '--default-log-stderr-prefix=debug ', |
9f95a23c TL |
2115 | ] |
2116 | if daemon_type == 'mon': | |
2117 | r += [ | |
2118 | '--default-mon-cluster-log-to-file=false', | |
2119 | '--default-mon-cluster-log-to-stderr=true', | |
2120 | ] | |
2121 | elif daemon_type in Monitoring.components: | |
2122 | metadata = Monitoring.components[daemon_type] | |
2123 | r += metadata.get('args', list()) | |
2124 | if daemon_type == 'alertmanager': | |
f67539c2 | 2125 | config = get_parm(ctx.config_json) |
9f95a23c TL |
2126 | peers = config.get('peers', list()) # type: ignore |
2127 | for peer in peers: | |
f67539c2 | 2128 | r += ['--cluster.peer={}'.format(peer)] |
f6b5b4d7 | 2129 | # some alertmanager, by default, look elsewhere for a config |
f67539c2 | 2130 | r += ['--config.file=/etc/alertmanager/alertmanager.yml'] |
9f95a23c | 2131 | elif daemon_type == NFSGanesha.daemon_type: |
f67539c2 | 2132 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) |
1911f103 | 2133 | r += nfs_ganesha.get_daemon_args() |
f67539c2 TL |
2134 | elif daemon_type == HAproxy.daemon_type: |
2135 | haproxy = HAproxy.init(ctx, fsid, daemon_id) | |
2136 | r += haproxy.get_daemon_args() | |
f91f0fd5 | 2137 | elif daemon_type == CustomContainer.daemon_type: |
f67539c2 | 2138 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
f91f0fd5 | 2139 | r.extend(cc.get_daemon_args()) |
9f95a23c TL |
2140 | |
2141 | return r | |
2142 | ||
f6b5b4d7 | 2143 | |
f67539c2 | 2144 | def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid, |
e306af50 | 2145 | config=None, keyring=None): |
f67539c2 TL |
2146 | # type: (CephadmContext, str, str, Union[int, str], int, int, Optional[str], Optional[str]) -> None |
2147 | data_dir = make_data_dir(ctx, fsid, daemon_type, daemon_id, uid=uid, gid=gid) | |
2148 | make_log_dir(ctx, fsid, uid=uid, gid=gid) | |
9f95a23c TL |
2149 | |
2150 | if config: | |
2151 | config_path = os.path.join(data_dir, 'config') | |
2152 | with open(config_path, 'w') as f: | |
2153 | os.fchown(f.fileno(), uid, gid) | |
2154 | os.fchmod(f.fileno(), 0o600) | |
2155 | f.write(config) | |
f91f0fd5 | 2156 | |
9f95a23c TL |
2157 | if keyring: |
2158 | keyring_path = os.path.join(data_dir, 'keyring') | |
2159 | with open(keyring_path, 'w') as f: | |
2160 | os.fchmod(f.fileno(), 0o600) | |
2161 | os.fchown(f.fileno(), uid, gid) | |
2162 | f.write(keyring) | |
2163 | ||
2164 | if daemon_type in Monitoring.components.keys(): | |
f67539c2 | 2165 | config_json: Dict[str, Any] = get_parm(ctx.config_json) |
9f95a23c TL |
2166 | required_files = Monitoring.components[daemon_type].get('config-json-files', list()) |
2167 | ||
2168 | # Set up directories specific to the monitoring component | |
2169 | config_dir = '' | |
f67539c2 | 2170 | data_dir_root = '' |
9f95a23c | 2171 | if daemon_type == 'prometheus': |
f67539c2 TL |
2172 | data_dir_root = get_data_dir(fsid, ctx.data_dir, |
2173 | daemon_type, daemon_id) | |
9f95a23c TL |
2174 | config_dir = 'etc/prometheus' |
2175 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2176 | makedirs(os.path.join(data_dir_root, config_dir, 'alerting'), uid, gid, 0o755) | |
2177 | makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) | |
2178 | elif daemon_type == 'grafana': | |
f67539c2 TL |
2179 | data_dir_root = get_data_dir(fsid, ctx.data_dir, |
2180 | daemon_type, daemon_id) | |
9f95a23c TL |
2181 | config_dir = 'etc/grafana' |
2182 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2183 | makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755) | |
2184 | makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755) | |
2185 | makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755) | |
f67539c2 | 2186 | touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid) |
9f95a23c | 2187 | elif daemon_type == 'alertmanager': |
f67539c2 TL |
2188 | data_dir_root = get_data_dir(fsid, ctx.data_dir, |
2189 | daemon_type, daemon_id) | |
9f95a23c TL |
2190 | config_dir = 'etc/alertmanager' |
2191 | makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755) | |
2192 | makedirs(os.path.join(data_dir_root, config_dir, 'data'), uid, gid, 0o755) | |
2193 | ||
9f95a23c TL |
2194 | # populate the config directory for the component from the config-json |
2195 | for fname in required_files: | |
f91f0fd5 TL |
2196 | if 'files' in config_json: # type: ignore |
2197 | content = dict_get_join(config_json['files'], fname) | |
9f95a23c TL |
2198 | with open(os.path.join(data_dir_root, config_dir, fname), 'w') as f: |
2199 | os.fchown(f.fileno(), uid, gid) | |
2200 | os.fchmod(f.fileno(), 0o600) | |
2201 | f.write(content) | |
2202 | ||
f91f0fd5 | 2203 | elif daemon_type == NFSGanesha.daemon_type: |
f67539c2 | 2204 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) |
9f95a23c TL |
2205 | nfs_ganesha.create_daemon_dirs(data_dir, uid, gid) |
2206 | ||
f91f0fd5 | 2207 | elif daemon_type == CephIscsi.daemon_type: |
f67539c2 | 2208 | ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) |
1911f103 TL |
2209 | ceph_iscsi.create_daemon_dirs(data_dir, uid, gid) |
2210 | ||
f67539c2 TL |
2211 | elif daemon_type == HAproxy.daemon_type: |
2212 | haproxy = HAproxy.init(ctx, fsid, daemon_id) | |
2213 | haproxy.create_daemon_dirs(data_dir, uid, gid) | |
2214 | ||
2215 | elif daemon_type == Keepalived.daemon_type: | |
2216 | keepalived = Keepalived.init(ctx, fsid, daemon_id) | |
2217 | keepalived.create_daemon_dirs(data_dir, uid, gid) | |
2218 | ||
f91f0fd5 | 2219 | elif daemon_type == CustomContainer.daemon_type: |
f67539c2 | 2220 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
f91f0fd5 TL |
2221 | cc.create_daemon_dirs(data_dir, uid, gid) |
2222 | ||
f6b5b4d7 | 2223 | |
9f95a23c TL |
2224 | def get_parm(option): |
2225 | # type: (str) -> Dict[str, str] | |
2226 | ||
2227 | if not option: | |
2228 | return dict() | |
2229 | ||
2230 | global cached_stdin | |
2231 | if option == '-': | |
2232 | if cached_stdin is not None: | |
2233 | j = cached_stdin | |
2234 | else: | |
f67539c2 TL |
2235 | j = sys.stdin.read() |
2236 | cached_stdin = j | |
9f95a23c TL |
2237 | else: |
2238 | # inline json string | |
2239 | if option[0] == '{' and option[-1] == '}': | |
2240 | j = option | |
2241 | # json file | |
2242 | elif os.path.exists(option): | |
2243 | with open(option, 'r') as f: | |
2244 | j = f.read() | |
2245 | else: | |
f67539c2 | 2246 | raise Error('Config file {} not found'.format(option)) |
9f95a23c TL |
2247 | |
2248 | try: | |
2249 | js = json.loads(j) | |
2250 | except ValueError as e: | |
f67539c2 | 2251 | raise Error('Invalid JSON in {}: {}'.format(option, e)) |
9f95a23c TL |
2252 | else: |
2253 | return js | |
2254 | ||
f6b5b4d7 | 2255 | |
f67539c2 TL |
2256 | def get_config_and_keyring(ctx): |
2257 | # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]] | |
801d1391 TL |
2258 | config = None |
2259 | keyring = None | |
2260 | ||
f67539c2 TL |
2261 | if 'config_json' in ctx and ctx.config_json: |
2262 | d = get_parm(ctx.config_json) | |
9f95a23c TL |
2263 | config = d.get('config') |
2264 | keyring = d.get('keyring') | |
2265 | ||
f67539c2 TL |
2266 | if 'config' in ctx and ctx.config: |
2267 | try: | |
2268 | with open(ctx.config, 'r') as f: | |
2269 | config = f.read() | |
2270 | except FileNotFoundError: | |
2271 | raise Error('config file: %s does not exist' % ctx.config) | |
2272 | ||
2273 | if 'key' in ctx and ctx.key: | |
2274 | keyring = '[%s]\n\tkey = %s\n' % (ctx.name, ctx.key) | |
2275 | elif 'keyring' in ctx and ctx.keyring: | |
2276 | try: | |
2277 | with open(ctx.keyring, 'r') as f: | |
2278 | keyring = f.read() | |
2279 | except FileNotFoundError: | |
2280 | raise Error('keyring file: %s does not exist' % ctx.keyring) | |
9f95a23c | 2281 | |
f6b5b4d7 TL |
2282 | return config, keyring |
2283 | ||
2284 | ||
f67539c2 TL |
2285 | def get_container_binds(ctx, fsid, daemon_type, daemon_id): |
2286 | # type: (CephadmContext, str, str, Union[int, str, None]) -> List[List[str]] | |
f6b5b4d7 TL |
2287 | binds = list() |
2288 | ||
2289 | if daemon_type == CephIscsi.daemon_type: | |
f6b5b4d7 | 2290 | binds.extend(CephIscsi.get_container_binds()) |
f91f0fd5 TL |
2291 | elif daemon_type == CustomContainer.daemon_type: |
2292 | assert daemon_id | |
f67539c2 TL |
2293 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
2294 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
f91f0fd5 | 2295 | binds.extend(cc.get_container_binds(data_dir)) |
f6b5b4d7 TL |
2296 | |
2297 | return binds | |
2298 | ||
9f95a23c | 2299 | |
f67539c2 | 2300 | def get_container_mounts(ctx, fsid, daemon_type, daemon_id, |
9f95a23c | 2301 | no_config=False): |
f67539c2 | 2302 | # type: (CephadmContext, str, str, Union[int, str, None], Optional[bool]) -> Dict[str, str] |
9f95a23c TL |
2303 | mounts = dict() |
2304 | ||
2305 | if daemon_type in Ceph.daemons: | |
2306 | if fsid: | |
f67539c2 | 2307 | run_path = os.path.join('/var/run/ceph', fsid) |
9f95a23c TL |
2308 | if os.path.exists(run_path): |
2309 | mounts[run_path] = '/var/run/ceph:z' | |
f67539c2 | 2310 | log_dir = get_log_dir(fsid, ctx.log_dir) |
9f95a23c TL |
2311 | mounts[log_dir] = '/var/log/ceph:z' |
2312 | crash_dir = '/var/lib/ceph/%s/crash' % fsid | |
2313 | if os.path.exists(crash_dir): | |
2314 | mounts[crash_dir] = '/var/lib/ceph/crash:z' | |
2315 | ||
2316 | if daemon_type in Ceph.daemons and daemon_id: | |
f67539c2 | 2317 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
9f95a23c TL |
2318 | if daemon_type == 'rgw': |
2319 | cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (daemon_id) | |
2320 | else: | |
2321 | cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, daemon_id) | |
2322 | if daemon_type != 'crash': | |
2323 | mounts[data_dir] = cdata_dir + ':z' | |
2324 | if not no_config: | |
2325 | mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z' | |
f67539c2 | 2326 | if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash']: |
9f95a23c TL |
2327 | # these do not search for their keyrings in a data directory |
2328 | mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, daemon_id) | |
2329 | ||
2330 | if daemon_type in ['mon', 'osd']: | |
2331 | mounts['/dev'] = '/dev' # FIXME: narrow this down? | |
2332 | mounts['/run/udev'] = '/run/udev' | |
2333 | if daemon_type == 'osd': | |
2334 | mounts['/sys'] = '/sys' # for numa.cc, pick_address, cgroups, ... | |
f67539c2 TL |
2335 | # selinux-policy in the container may not match the host. |
2336 | if HostFacts(ctx).selinux_enabled: | |
2337 | selinux_folder = '/var/lib/ceph/%s/selinux' % fsid | |
2338 | if not os.path.exists(selinux_folder): | |
2339 | os.makedirs(selinux_folder, mode=0o755) | |
2340 | mounts[selinux_folder] = '/sys/fs/selinux:ro' | |
9f95a23c TL |
2341 | mounts['/run/lvm'] = '/run/lvm' |
2342 | mounts['/run/lock/lvm'] = '/run/lock/lvm' | |
2343 | ||
e306af50 | 2344 | try: |
f67539c2 TL |
2345 | if ctx.shared_ceph_folder: # make easy manager modules/ceph-volume development |
2346 | ceph_folder = pathify(ctx.shared_ceph_folder) | |
e306af50 TL |
2347 | if os.path.exists(ceph_folder): |
2348 | mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume' | |
2349 | mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr' | |
2350 | mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph' | |
2351 | mounts[ceph_folder + '/monitoring/grafana/dashboards'] = '/etc/grafana/dashboards/ceph-dashboard' | |
2352 | mounts[ceph_folder + '/monitoring/prometheus/alerts'] = '/etc/prometheus/ceph' | |
2353 | else: | |
2354 | logger.error('{}{}{}'.format(termcolor.red, | |
f67539c2 TL |
2355 | 'Ceph shared source folder does not exist.', |
2356 | termcolor.end)) | |
e306af50 TL |
2357 | except AttributeError: |
2358 | pass | |
2359 | ||
9f95a23c | 2360 | if daemon_type in Monitoring.components and daemon_id: |
f67539c2 | 2361 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
9f95a23c TL |
2362 | if daemon_type == 'prometheus': |
2363 | mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z' | |
2364 | mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z' | |
2365 | elif daemon_type == 'node-exporter': | |
2366 | mounts['/proc'] = '/host/proc:ro' | |
2367 | mounts['/sys'] = '/host/sys:ro' | |
2368 | mounts['/'] = '/rootfs:ro' | |
f67539c2 | 2369 | elif daemon_type == 'grafana': |
9f95a23c TL |
2370 | mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z' |
2371 | mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z' | |
2372 | mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z' | |
f67539c2 | 2373 | mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z' |
9f95a23c | 2374 | elif daemon_type == 'alertmanager': |
f6b5b4d7 | 2375 | mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z' |
9f95a23c TL |
2376 | |
2377 | if daemon_type == NFSGanesha.daemon_type: | |
2378 | assert daemon_id | |
f67539c2 TL |
2379 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
2380 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) | |
f91f0fd5 | 2381 | mounts.update(nfs_ganesha.get_container_mounts(data_dir)) |
9f95a23c | 2382 | |
f67539c2 TL |
2383 | if daemon_type == HAproxy.daemon_type: |
2384 | assert daemon_id | |
2385 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
2386 | mounts.update(HAproxy.get_container_mounts(data_dir)) | |
2387 | ||
1911f103 TL |
2388 | if daemon_type == CephIscsi.daemon_type: |
2389 | assert daemon_id | |
f67539c2 TL |
2390 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
2391 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
1911f103 TL |
2392 | mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir)) |
2393 | ||
f67539c2 TL |
2394 | if daemon_type == Keepalived.daemon_type: |
2395 | assert daemon_id | |
2396 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
2397 | mounts.update(Keepalived.get_container_mounts(data_dir)) | |
2398 | ||
f91f0fd5 TL |
2399 | if daemon_type == CustomContainer.daemon_type: |
2400 | assert daemon_id | |
f67539c2 TL |
2401 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
2402 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) | |
f91f0fd5 TL |
2403 | mounts.update(cc.get_container_mounts(data_dir)) |
2404 | ||
9f95a23c TL |
2405 | return mounts |
2406 | ||
f6b5b4d7 | 2407 | |
f67539c2 TL |
2408 | def get_container(ctx: CephadmContext, |
2409 | fsid: str, daemon_type: str, daemon_id: Union[int, str], | |
f91f0fd5 TL |
2410 | privileged: bool = False, |
2411 | ptrace: bool = False, | |
2412 | container_args: Optional[List[str]] = None) -> 'CephContainer': | |
2413 | entrypoint: str = '' | |
2414 | name: str = '' | |
2415 | ceph_args: List[str] = [] | |
2416 | envs: List[str] = [] | |
2417 | host_network: bool = True | |
2418 | ||
2419 | if container_args is None: | |
2420 | container_args = [] | |
9f95a23c TL |
2421 | if daemon_type in ['mon', 'osd']: |
2422 | # mon and osd need privileged in order for libudev to query devices | |
2423 | privileged = True | |
2424 | if daemon_type == 'rgw': | |
2425 | entrypoint = '/usr/bin/radosgw' | |
2426 | name = 'client.rgw.%s' % daemon_id | |
2427 | elif daemon_type == 'rbd-mirror': | |
2428 | entrypoint = '/usr/bin/rbd-mirror' | |
2429 | name = 'client.rbd-mirror.%s' % daemon_id | |
f67539c2 TL |
2430 | elif daemon_type == 'cephfs-mirror': |
2431 | entrypoint = '/usr/bin/cephfs-mirror' | |
2432 | name = 'client.cephfs-mirror.%s' % daemon_id | |
9f95a23c TL |
2433 | elif daemon_type == 'crash': |
2434 | entrypoint = '/usr/bin/ceph-crash' | |
2435 | name = 'client.crash.%s' % daemon_id | |
2436 | elif daemon_type in ['mon', 'mgr', 'mds', 'osd']: | |
2437 | entrypoint = '/usr/bin/ceph-' + daemon_type | |
2438 | name = '%s.%s' % (daemon_type, daemon_id) | |
2439 | elif daemon_type in Monitoring.components: | |
2440 | entrypoint = '' | |
9f95a23c TL |
2441 | elif daemon_type == NFSGanesha.daemon_type: |
2442 | entrypoint = NFSGanesha.entrypoint | |
2443 | name = '%s.%s' % (daemon_type, daemon_id) | |
f91f0fd5 | 2444 | envs.extend(NFSGanesha.get_container_envs()) |
f67539c2 TL |
2445 | elif daemon_type == HAproxy.daemon_type: |
2446 | name = '%s.%s' % (daemon_type, daemon_id) | |
2447 | elif daemon_type == Keepalived.daemon_type: | |
2448 | name = '%s.%s' % (daemon_type, daemon_id) | |
2449 | envs.extend(Keepalived.get_container_envs()) | |
2450 | container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW']) | |
1911f103 TL |
2451 | elif daemon_type == CephIscsi.daemon_type: |
2452 | entrypoint = CephIscsi.entrypoint | |
2453 | name = '%s.%s' % (daemon_type, daemon_id) | |
e306af50 TL |
2454 | # So the container can modprobe iscsi_target_mod and have write perms |
2455 | # to configfs we need to make this a privileged container. | |
2456 | privileged = True | |
f91f0fd5 | 2457 | elif daemon_type == CustomContainer.daemon_type: |
f67539c2 | 2458 | cc = CustomContainer.init(ctx, fsid, daemon_id) |
f91f0fd5 TL |
2459 | entrypoint = cc.entrypoint |
2460 | host_network = False | |
2461 | envs.extend(cc.get_container_envs()) | |
2462 | container_args.extend(cc.get_container_args()) | |
9f95a23c | 2463 | |
9f95a23c | 2464 | if daemon_type in Monitoring.components: |
f67539c2 | 2465 | uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) |
9f95a23c TL |
2466 | monitoring_args = [ |
2467 | '--user', | |
2468 | str(uid), | |
2469 | # FIXME: disable cpu/memory limits for the time being (not supported | |
2470 | # by ubuntu 18.04 kernel!) | |
9f95a23c TL |
2471 | ] |
2472 | container_args.extend(monitoring_args) | |
2473 | elif daemon_type == 'crash': | |
2474 | ceph_args = ['-n', name] | |
2475 | elif daemon_type in Ceph.daemons: | |
2476 | ceph_args = ['-n', name, '-f'] | |
2477 | ||
f91f0fd5 TL |
2478 | # if using podman, set -d, --conmon-pidfile & --cidfile flags |
2479 | # so service can have Type=Forking | |
f67539c2 | 2480 | if isinstance(ctx.container_engine, Podman): |
f91f0fd5 | 2481 | runtime_dir = '/run' |
f67539c2 TL |
2482 | container_args.extend([ |
2483 | '-d', '--log-driver', 'journald', | |
f91f0fd5 TL |
2484 | '--conmon-pidfile', |
2485 | runtime_dir + '/ceph-%s@%s.%s.service-pid' % (fsid, daemon_type, daemon_id), | |
2486 | '--cidfile', | |
f67539c2 TL |
2487 | runtime_dir + '/ceph-%s@%s.%s.service-cid' % (fsid, daemon_type, daemon_id), |
2488 | ]) | |
2489 | if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION: | |
2490 | container_args.append('--cgroups=split') | |
9f95a23c TL |
2491 | |
2492 | return CephContainer( | |
f67539c2 TL |
2493 | ctx, |
2494 | image=ctx.image, | |
9f95a23c | 2495 | entrypoint=entrypoint, |
f67539c2 | 2496 | args=ceph_args + get_daemon_args(ctx, fsid, daemon_type, daemon_id), |
9f95a23c | 2497 | container_args=container_args, |
f67539c2 TL |
2498 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), |
2499 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
9f95a23c TL |
2500 | cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id), |
2501 | envs=envs, | |
2502 | privileged=privileged, | |
2503 | ptrace=ptrace, | |
f91f0fd5 | 2504 | host_network=host_network, |
9f95a23c TL |
2505 | ) |
2506 | ||
f6b5b4d7 | 2507 | |
f67539c2 TL |
2508 | def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'): |
2509 | # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int] | |
9f95a23c TL |
2510 | |
2511 | if not img: | |
f67539c2 | 2512 | img = ctx.image |
9f95a23c | 2513 | |
f6b5b4d7 TL |
2514 | if isinstance(file_path, str): |
2515 | paths = [file_path] | |
2516 | else: | |
2517 | paths = file_path | |
2518 | ||
2519 | for fp in paths: | |
2520 | try: | |
2521 | out = CephContainer( | |
f67539c2 | 2522 | ctx, |
f6b5b4d7 TL |
2523 | image=img, |
2524 | entrypoint='stat', | |
2525 | args=['-c', '%u %g', fp] | |
2526 | ).run() | |
2527 | uid, gid = out.split(' ') | |
2528 | return int(uid), int(gid) | |
2529 | except RuntimeError: | |
2530 | pass | |
2531 | raise RuntimeError('uid/gid not found') | |
2532 | ||
9f95a23c | 2533 | |
f67539c2 | 2534 | def deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid, |
9f95a23c TL |
2535 | config=None, keyring=None, |
2536 | osd_fsid=None, | |
f6b5b4d7 TL |
2537 | reconfig=False, |
2538 | ports=None): | |
f67539c2 | 2539 | # type: (CephadmContext, str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None |
f6b5b4d7 TL |
2540 | |
2541 | ports = ports or [] | |
f67539c2 TL |
2542 | if any([port_in_use(ctx, port) for port in ports]): |
2543 | raise Error("TCP Port(s) '{}' required for {} already in use".format(','.join(map(str, ports)), daemon_type)) | |
f6b5b4d7 | 2544 | |
f67539c2 | 2545 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
9f95a23c TL |
2546 | if reconfig and not os.path.exists(data_dir): |
2547 | raise Error('cannot reconfig, data path %s does not exist' % data_dir) | |
2548 | if daemon_type == 'mon' and not os.path.exists(data_dir): | |
2549 | assert config | |
2550 | assert keyring | |
2551 | # tmp keyring file | |
2552 | tmp_keyring = write_tmp(keyring, uid, gid) | |
2553 | ||
2554 | # tmp config file | |
2555 | tmp_config = write_tmp(config, uid, gid) | |
2556 | ||
2557 | # --mkfs | |
f67539c2 TL |
2558 | create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid) |
2559 | mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', daemon_id) | |
2560 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
2561 | CephContainer( | |
2562 | ctx, | |
2563 | image=ctx.image, | |
9f95a23c | 2564 | entrypoint='/usr/bin/ceph-mon', |
f67539c2 TL |
2565 | args=[ |
2566 | '--mkfs', | |
2567 | '-i', str(daemon_id), | |
2568 | '--fsid', fsid, | |
2569 | '-c', '/tmp/config', | |
2570 | '--keyring', '/tmp/keyring', | |
2571 | ] + get_daemon_args(ctx, fsid, 'mon', daemon_id), | |
9f95a23c TL |
2572 | volume_mounts={ |
2573 | log_dir: '/var/log/ceph:z', | |
2574 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (daemon_id), | |
2575 | tmp_keyring.name: '/tmp/keyring:z', | |
2576 | tmp_config.name: '/tmp/config:z', | |
2577 | }, | |
2578 | ).run() | |
2579 | ||
2580 | # write conf | |
2581 | with open(mon_dir + '/config', 'w') as f: | |
2582 | os.fchown(f.fileno(), uid, gid) | |
2583 | os.fchmod(f.fileno(), 0o600) | |
2584 | f.write(config) | |
2585 | else: | |
2586 | # dirs, conf, keyring | |
2587 | create_daemon_dirs( | |
f67539c2 | 2588 | ctx, |
9f95a23c TL |
2589 | fsid, daemon_type, daemon_id, |
2590 | uid, gid, | |
2591 | config, keyring) | |
2592 | ||
2593 | if not reconfig: | |
f67539c2 TL |
2594 | if daemon_type == CephadmDaemon.daemon_type: |
2595 | port = next(iter(ports), None) # get first tcp port provided or None | |
2596 | ||
2597 | if ctx.config_json == '-': | |
2598 | config_js = get_parm('-') | |
2599 | else: | |
2600 | config_js = get_parm(ctx.config_json) | |
2601 | assert isinstance(config_js, dict) | |
2602 | ||
2603 | cephadm_exporter = CephadmDaemon(ctx, fsid, daemon_id, port) | |
2604 | cephadm_exporter.deploy_daemon_unit(config_js) | |
2605 | else: | |
2606 | if c: | |
2607 | deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, | |
2608 | c, osd_fsid=osd_fsid, ports=ports) | |
2609 | else: | |
2610 | raise RuntimeError('attempting to deploy a daemon without a container image') | |
9f95a23c TL |
2611 | |
2612 | if not os.path.exists(data_dir + '/unit.created'): | |
2613 | with open(data_dir + '/unit.created', 'w') as f: | |
2614 | os.fchmod(f.fileno(), 0o600) | |
2615 | os.fchown(f.fileno(), uid, gid) | |
2616 | f.write('mtime is time the daemon deployment was created\n') | |
2617 | ||
2618 | with open(data_dir + '/unit.configured', 'w') as f: | |
2619 | f.write('mtime is time we were last configured\n') | |
2620 | os.fchmod(f.fileno(), 0o600) | |
2621 | os.fchown(f.fileno(), uid, gid) | |
2622 | ||
f67539c2 | 2623 | update_firewalld(ctx, daemon_type) |
9f95a23c | 2624 | |
f6b5b4d7 TL |
2625 | # Open ports explicitly required for the daemon |
2626 | if ports: | |
f67539c2 | 2627 | fw = Firewalld(ctx) |
f6b5b4d7 TL |
2628 | fw.open_ports(ports) |
2629 | fw.apply_rules() | |
2630 | ||
9f95a23c TL |
2631 | if reconfig and daemon_type not in Ceph.daemons: |
2632 | # ceph daemons do not need a restart; others (presumably) do to pick | |
2633 | # up the new config | |
f67539c2 TL |
2634 | call_throws(ctx, ['systemctl', 'reset-failed', |
2635 | get_unit_name(fsid, daemon_type, daemon_id)]) | |
2636 | call_throws(ctx, ['systemctl', 'restart', | |
2637 | get_unit_name(fsid, daemon_type, daemon_id)]) | |
2638 | ||
9f95a23c | 2639 | |
f67539c2 TL |
2640 | def _write_container_cmd_to_bash(ctx, file_obj, container, comment=None, background=False): |
2641 | # type: (CephadmContext, IO[str], CephContainer, Optional[str], Optional[bool]) -> None | |
f6b5b4d7 | 2642 | if comment: |
f91f0fd5 | 2643 | # Sometimes adding a comment, especially if there are multiple containers in one |
f6b5b4d7 TL |
2644 | # unit file, makes it easier to read and grok. |
2645 | file_obj.write('# ' + comment + '\n') | |
2646 | # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually | |
f67539c2 | 2647 | file_obj.write('! ' + ' '.join(container.rm_cmd()) + ' 2> /dev/null\n') |
f6b5b4d7 | 2648 | # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage` |
f67539c2 TL |
2649 | if isinstance(ctx.container_engine, Podman): |
2650 | file_obj.write( | |
2651 | '! ' | |
2652 | + ' '.join([shlex.quote(a) for a in container.rm_cmd(storage=True)]) | |
2653 | + ' 2> /dev/null\n') | |
f6b5b4d7 TL |
2654 | |
2655 | # container run command | |
f67539c2 TL |
2656 | file_obj.write( |
2657 | ' '.join([shlex.quote(a) for a in container.run_cmd()]) | |
2658 | + (' &' if background else '') + '\n') | |
2659 | ||
2660 | ||
2661 | def deploy_daemon_units( | |
2662 | ctx: CephadmContext, | |
2663 | fsid: str, | |
2664 | uid: int, | |
2665 | gid: int, | |
2666 | daemon_type: str, | |
2667 | daemon_id: Union[int, str], | |
2668 | c: 'CephContainer', | |
2669 | enable: bool = True, | |
2670 | start: bool = True, | |
2671 | osd_fsid: Optional[str] = None, | |
2672 | ports: Optional[List[int]] = None, | |
2673 | ) -> None: | |
9f95a23c | 2674 | # cmd |
f67539c2 TL |
2675 | data_dir = get_data_dir(fsid, ctx.data_dir, daemon_type, daemon_id) |
2676 | with open(data_dir + '/unit.run.new', 'w') as f, \ | |
2677 | open(data_dir + '/unit.meta.new', 'w') as metaf: | |
f6b5b4d7 | 2678 | f.write('set -e\n') |
f91f0fd5 TL |
2679 | |
2680 | if daemon_type in Ceph.daemons: | |
2681 | install_path = find_program('install') | |
2682 | f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid)) | |
2683 | ||
9f95a23c TL |
2684 | # pre-start cmd(s) |
2685 | if daemon_type == 'osd': | |
2686 | # osds have a pre-start step | |
2687 | assert osd_fsid | |
f6b5b4d7 TL |
2688 | simple_fn = os.path.join('/etc/ceph/osd', |
2689 | '%s-%s.json.adopted-by-cephadm' % (daemon_id, osd_fsid)) | |
2690 | if os.path.exists(simple_fn): | |
2691 | f.write('# Simple OSDs need chown on startup:\n') | |
2692 | for n in ['block', 'block.db', 'block.wal']: | |
2693 | p = os.path.join(data_dir, n) | |
2694 | f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid)) | |
2695 | else: | |
f6b5b4d7 | 2696 | prestart = CephContainer( |
f67539c2 TL |
2697 | ctx, |
2698 | image=ctx.image, | |
f6b5b4d7 TL |
2699 | entrypoint='/usr/sbin/ceph-volume', |
2700 | args=[ | |
2701 | 'lvm', 'activate', | |
2702 | str(daemon_id), osd_fsid, | |
2703 | '--no-systemd' | |
2704 | ], | |
2705 | privileged=True, | |
f67539c2 TL |
2706 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), |
2707 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
f6b5b4d7 | 2708 | cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id), |
f67539c2 TL |
2709 | memory_request=ctx.memory_request, |
2710 | memory_limit=ctx.memory_limit, | |
f6b5b4d7 | 2711 | ) |
f67539c2 | 2712 | _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate') |
9f95a23c TL |
2713 | elif daemon_type == NFSGanesha.daemon_type: |
2714 | # add nfs to the rados grace db | |
f67539c2 | 2715 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) |
9f95a23c | 2716 | prestart = nfs_ganesha.get_rados_grace_container('add') |
f67539c2 | 2717 | _write_container_cmd_to_bash(ctx, f, prestart, 'add daemon to rados grace') |
1911f103 TL |
2718 | elif daemon_type == CephIscsi.daemon_type: |
2719 | f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n') | |
f67539c2 | 2720 | ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) |
f6b5b4d7 | 2721 | tcmu_container = ceph_iscsi.get_tcmu_runner_container() |
f67539c2 TL |
2722 | _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runnter container', background=True) |
2723 | elif daemon_type == Keepalived.daemon_type: | |
2724 | f.write(Keepalived.get_prestart()) | |
2725 | ||
2726 | _write_container_cmd_to_bash(ctx, f, c, '%s.%s' % (daemon_type, str(daemon_id))) | |
2727 | ||
2728 | # some metadata about the deploy | |
2729 | meta: Dict[str, Any] = {} | |
2730 | if 'meta_json' in ctx and ctx.meta_json: | |
2731 | meta = json.loads(ctx.meta_json) or {} | |
2732 | meta.update({ | |
2733 | 'memory_request': int(ctx.memory_request) if ctx.memory_request else None, | |
2734 | 'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None, | |
2735 | }) | |
2736 | if not meta.get('ports'): | |
2737 | meta['ports'] = ports | |
2738 | metaf.write(json.dumps(meta, indent=4) + '\n') | |
1911f103 | 2739 | |
9f95a23c | 2740 | os.fchmod(f.fileno(), 0o600) |
f67539c2 | 2741 | os.fchmod(metaf.fileno(), 0o600) |
9f95a23c TL |
2742 | os.rename(data_dir + '/unit.run.new', |
2743 | data_dir + '/unit.run') | |
f67539c2 TL |
2744 | os.rename(data_dir + '/unit.meta.new', |
2745 | data_dir + '/unit.meta') | |
9f95a23c TL |
2746 | |
2747 | # post-stop command(s) | |
2748 | with open(data_dir + '/unit.poststop.new', 'w') as f: | |
2749 | if daemon_type == 'osd': | |
2750 | assert osd_fsid | |
2751 | poststop = CephContainer( | |
f67539c2 TL |
2752 | ctx, |
2753 | image=ctx.image, | |
9f95a23c TL |
2754 | entrypoint='/usr/sbin/ceph-volume', |
2755 | args=[ | |
2756 | 'lvm', 'deactivate', | |
2757 | str(daemon_id), osd_fsid, | |
2758 | ], | |
2759 | privileged=True, | |
f67539c2 TL |
2760 | volume_mounts=get_container_mounts(ctx, fsid, daemon_type, daemon_id), |
2761 | bind_mounts=get_container_binds(ctx, fsid, daemon_type, daemon_id), | |
9f95a23c TL |
2762 | cname='ceph-%s-%s.%s-deactivate' % (fsid, daemon_type, |
2763 | daemon_id), | |
2764 | ) | |
f67539c2 | 2765 | _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd') |
9f95a23c TL |
2766 | elif daemon_type == NFSGanesha.daemon_type: |
2767 | # remove nfs from the rados grace db | |
f67539c2 | 2768 | nfs_ganesha = NFSGanesha.init(ctx, fsid, daemon_id) |
9f95a23c | 2769 | poststop = nfs_ganesha.get_rados_grace_container('remove') |
f67539c2 | 2770 | _write_container_cmd_to_bash(ctx, f, poststop, 'remove daemon from rados grace') |
1911f103 | 2771 | elif daemon_type == CephIscsi.daemon_type: |
f6b5b4d7 | 2772 | # make sure we also stop the tcmu container |
f67539c2 | 2773 | ceph_iscsi = CephIscsi.init(ctx, fsid, daemon_id) |
f6b5b4d7 | 2774 | tcmu_container = ceph_iscsi.get_tcmu_runner_container() |
f67539c2 | 2775 | f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n') |
1911f103 | 2776 | f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n') |
9f95a23c TL |
2777 | os.fchmod(f.fileno(), 0o600) |
2778 | os.rename(data_dir + '/unit.poststop.new', | |
2779 | data_dir + '/unit.poststop') | |
2780 | ||
f67539c2 TL |
2781 | if c: |
2782 | with open(data_dir + '/unit.image.new', 'w') as f: | |
2783 | f.write(c.image + '\n') | |
2784 | os.fchmod(f.fileno(), 0o600) | |
2785 | os.rename(data_dir + '/unit.image.new', | |
2786 | data_dir + '/unit.image') | |
9f95a23c TL |
2787 | |
2788 | # systemd | |
f67539c2 TL |
2789 | install_base_units(ctx, fsid) |
2790 | unit = get_unit_file(ctx, fsid) | |
9f95a23c | 2791 | unit_file = 'ceph-%s@.service' % (fsid) |
f67539c2 | 2792 | with open(ctx.unit_dir + '/' + unit_file + '.new', 'w') as f: |
9f95a23c | 2793 | f.write(unit) |
f67539c2 TL |
2794 | os.rename(ctx.unit_dir + '/' + unit_file + '.new', |
2795 | ctx.unit_dir + '/' + unit_file) | |
2796 | call_throws(ctx, ['systemctl', 'daemon-reload']) | |
9f95a23c TL |
2797 | |
2798 | unit_name = get_unit_name(fsid, daemon_type, daemon_id) | |
f67539c2 | 2799 | call(ctx, ['systemctl', 'stop', unit_name], |
adb31ebb | 2800 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 2801 | call(ctx, ['systemctl', 'reset-failed', unit_name], |
adb31ebb | 2802 | verbosity=CallVerbosity.DEBUG) |
9f95a23c | 2803 | if enable: |
f67539c2 | 2804 | call_throws(ctx, ['systemctl', 'enable', unit_name]) |
9f95a23c | 2805 | if start: |
f67539c2 | 2806 | call_throws(ctx, ['systemctl', 'start', unit_name]) |
9f95a23c | 2807 | |
f6b5b4d7 TL |
2808 | |
2809 | class Firewalld(object): | |
f67539c2 TL |
2810 | def __init__(self, ctx): |
2811 | # type: (CephadmContext) -> None | |
2812 | self.ctx = ctx | |
f6b5b4d7 TL |
2813 | self.available = self.check() |
2814 | ||
2815 | def check(self): | |
2816 | # type: () -> bool | |
2817 | self.cmd = find_executable('firewall-cmd') | |
2818 | if not self.cmd: | |
2819 | logger.debug('firewalld does not appear to be present') | |
2820 | return False | |
f67539c2 | 2821 | (enabled, state, _) = check_unit(self.ctx, 'firewalld.service') |
f6b5b4d7 TL |
2822 | if not enabled: |
2823 | logger.debug('firewalld.service is not enabled') | |
2824 | return False | |
f67539c2 | 2825 | if state != 'running': |
f6b5b4d7 TL |
2826 | logger.debug('firewalld.service is not running') |
2827 | return False | |
2828 | ||
f67539c2 | 2829 | logger.info('firewalld ready') |
f6b5b4d7 TL |
2830 | return True |
2831 | ||
2832 | def enable_service_for(self, daemon_type): | |
2833 | # type: (str) -> None | |
2834 | if not self.available: | |
2835 | logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % daemon_type) | |
2836 | return | |
2837 | ||
2838 | if daemon_type == 'mon': | |
2839 | svc = 'ceph-mon' | |
2840 | elif daemon_type in ['mgr', 'mds', 'osd']: | |
2841 | svc = 'ceph' | |
2842 | elif daemon_type == NFSGanesha.daemon_type: | |
2843 | svc = 'nfs' | |
2844 | else: | |
2845 | return | |
2846 | ||
f67539c2 TL |
2847 | if not self.cmd: |
2848 | raise RuntimeError('command not defined') | |
2849 | ||
2850 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
2851 | if ret: |
2852 | logger.info('Enabling firewalld service %s in current zone...' % svc) | |
f67539c2 | 2853 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc]) |
9f95a23c TL |
2854 | if ret: |
2855 | raise RuntimeError( | |
2856 | 'unable to add service %s to current zone: %s' % (svc, err)) | |
2857 | else: | |
2858 | logger.debug('firewalld service %s is enabled in current zone' % svc) | |
f6b5b4d7 TL |
2859 | |
2860 | def open_ports(self, fw_ports): | |
2861 | # type: (List[int]) -> None | |
2862 | if not self.available: | |
2863 | logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports) | |
2864 | return | |
2865 | ||
f67539c2 TL |
2866 | if not self.cmd: |
2867 | raise RuntimeError('command not defined') | |
2868 | ||
f6b5b4d7 TL |
2869 | for port in fw_ports: |
2870 | tcp_port = str(port) + '/tcp' | |
f67539c2 | 2871 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) |
9f95a23c | 2872 | if ret: |
f6b5b4d7 | 2873 | logger.info('Enabling firewalld port %s in current zone...' % tcp_port) |
f67539c2 | 2874 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port]) |
f6b5b4d7 TL |
2875 | if ret: |
2876 | raise RuntimeError('unable to add port %s to current zone: %s' % | |
f67539c2 | 2877 | (tcp_port, err)) |
f6b5b4d7 TL |
2878 | else: |
2879 | logger.debug('firewalld port %s is enabled in current zone' % tcp_port) | |
2880 | ||
f67539c2 TL |
2881 | def close_ports(self, fw_ports): |
2882 | # type: (List[int]) -> None | |
2883 | if not self.available: | |
2884 | logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports) | |
2885 | return | |
2886 | ||
2887 | if not self.cmd: | |
2888 | raise RuntimeError('command not defined') | |
2889 | ||
2890 | for port in fw_ports: | |
2891 | tcp_port = str(port) + '/tcp' | |
2892 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG) | |
2893 | if not ret: | |
2894 | logger.info('Disabling port %s in current zone...' % tcp_port) | |
2895 | out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port]) | |
2896 | if ret: | |
2897 | raise RuntimeError('unable to remove port %s from current zone: %s' % | |
2898 | (tcp_port, err)) | |
2899 | else: | |
2900 | logger.info(f'Port {tcp_port} disabled') | |
2901 | else: | |
2902 | logger.info(f'firewalld port {tcp_port} already closed') | |
2903 | ||
f6b5b4d7 TL |
2904 | def apply_rules(self): |
2905 | # type: () -> None | |
2906 | if not self.available: | |
2907 | return | |
2908 | ||
f67539c2 TL |
2909 | if not self.cmd: |
2910 | raise RuntimeError('command not defined') | |
f6b5b4d7 | 2911 | |
f67539c2 | 2912 | call_throws(self.ctx, [self.cmd, '--reload']) |
f6b5b4d7 | 2913 | |
f67539c2 TL |
2914 | |
2915 | def update_firewalld(ctx, daemon_type): | |
2916 | # type: (CephadmContext, str) -> None | |
2917 | firewall = Firewalld(ctx) | |
f6b5b4d7 TL |
2918 | |
2919 | firewall.enable_service_for(daemon_type) | |
2920 | ||
2921 | fw_ports = [] | |
2922 | ||
2923 | if daemon_type in Monitoring.port_map.keys(): | |
2924 | fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc | |
2925 | ||
2926 | firewall.open_ports(fw_ports) | |
2927 | firewall.apply_rules() | |
9f95a23c | 2928 | |
f67539c2 TL |
2929 | |
2930 | def install_base_units(ctx, fsid): | |
2931 | # type: (CephadmContext, str) -> None | |
9f95a23c TL |
2932 | """ |
2933 | Set up ceph.target and ceph-$fsid.target units. | |
2934 | """ | |
2935 | # global unit | |
f67539c2 TL |
2936 | existed = os.path.exists(ctx.unit_dir + '/ceph.target') |
2937 | with open(ctx.unit_dir + '/ceph.target.new', 'w') as f: | |
9f95a23c TL |
2938 | f.write('[Unit]\n' |
2939 | 'Description=All Ceph clusters and services\n' | |
2940 | '\n' | |
2941 | '[Install]\n' | |
2942 | 'WantedBy=multi-user.target\n') | |
f67539c2 TL |
2943 | os.rename(ctx.unit_dir + '/ceph.target.new', |
2944 | ctx.unit_dir + '/ceph.target') | |
9f95a23c TL |
2945 | if not existed: |
2946 | # we disable before enable in case a different ceph.target | |
2947 | # (from the traditional package) is present; while newer | |
2948 | # systemd is smart enough to disable the old | |
2949 | # (/lib/systemd/...) and enable the new (/etc/systemd/...), | |
2950 | # some older versions of systemd error out with EEXIST. | |
f67539c2 TL |
2951 | call_throws(ctx, ['systemctl', 'disable', 'ceph.target']) |
2952 | call_throws(ctx, ['systemctl', 'enable', 'ceph.target']) | |
2953 | call_throws(ctx, ['systemctl', 'start', 'ceph.target']) | |
9f95a23c TL |
2954 | |
2955 | # cluster unit | |
f67539c2 TL |
2956 | existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid) |
2957 | with open(ctx.unit_dir + '/ceph-%s.target.new' % fsid, 'w') as f: | |
2958 | f.write( | |
2959 | '[Unit]\n' | |
2960 | 'Description=Ceph cluster {fsid}\n' | |
2961 | 'PartOf=ceph.target\n' | |
2962 | 'Before=ceph.target\n' | |
2963 | '\n' | |
2964 | '[Install]\n' | |
2965 | 'WantedBy=multi-user.target ceph.target\n'.format( | |
2966 | fsid=fsid) | |
9f95a23c | 2967 | ) |
f67539c2 TL |
2968 | os.rename(ctx.unit_dir + '/ceph-%s.target.new' % fsid, |
2969 | ctx.unit_dir + '/ceph-%s.target' % fsid) | |
9f95a23c | 2970 | if not existed: |
f67539c2 TL |
2971 | call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid]) |
2972 | call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid]) | |
9f95a23c TL |
2973 | |
2974 | # logrotate for the cluster | |
f67539c2 | 2975 | with open(ctx.logrotate_dir + '/ceph-%s' % fsid, 'w') as f: |
9f95a23c TL |
2976 | """ |
2977 | This is a bit sloppy in that the killall/pkill will touch all ceph daemons | |
2978 | in all containers, but I don't see an elegant way to send SIGHUP *just* to | |
2979 | the daemons for this cluster. (1) systemd kill -s will get the signal to | |
2980 | podman, but podman will exit. (2) podman kill will get the signal to the | |
2981 | first child (bash), but that isn't the ceph daemon. This is simpler and | |
2982 | should be harmless. | |
2983 | """ | |
2984 | f.write("""# created by cephadm | |
2985 | /var/log/ceph/%s/*.log { | |
2986 | rotate 7 | |
2987 | daily | |
2988 | compress | |
2989 | sharedscripts | |
2990 | postrotate | |
f67539c2 | 2991 | killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror' || true |
9f95a23c TL |
2992 | endscript |
2993 | missingok | |
2994 | notifempty | |
2995 | su root root | |
2996 | } | |
2997 | """ % fsid) | |
2998 | ||
f6b5b4d7 | 2999 | |
f67539c2 TL |
3000 | def get_unit_file(ctx, fsid): |
3001 | # type: (CephadmContext, str) -> str | |
f91f0fd5 | 3002 | extra_args = '' |
f67539c2 TL |
3003 | if isinstance(ctx.container_engine, Podman): |
3004 | extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n' | |
3005 | 'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n' | |
3006 | 'Type=forking\n' | |
3007 | 'PIDFile=%t/%n-pid\n') | |
3008 | if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION: | |
3009 | extra_args += 'Delegate=yes\n' | |
3010 | ||
3011 | docker = isinstance(ctx.container_engine, Docker) | |
9f95a23c TL |
3012 | u = """# generated by cephadm |
3013 | [Unit] | |
3014 | Description=Ceph %i for {fsid} | |
3015 | ||
3016 | # According to: | |
3017 | # http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget | |
3018 | # these can be removed once ceph-mon will dynamically change network | |
3019 | # configuration. | |
f67539c2 | 3020 | After=network-online.target local-fs.target time-sync.target{docker_after} |
9f95a23c | 3021 | Wants=network-online.target local-fs.target time-sync.target |
f67539c2 | 3022 | {docker_requires} |
9f95a23c TL |
3023 | |
3024 | PartOf=ceph-{fsid}.target | |
3025 | Before=ceph-{fsid}.target | |
3026 | ||
3027 | [Service] | |
3028 | LimitNOFILE=1048576 | |
3029 | LimitNPROC=1048576 | |
3030 | EnvironmentFile=-/etc/environment | |
9f95a23c TL |
3031 | ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run |
3032 | ExecStop=-{container_path} stop ceph-{fsid}-%i | |
3033 | ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop | |
3034 | KillMode=none | |
3035 | Restart=on-failure | |
3036 | RestartSec=10s | |
3037 | TimeoutStartSec=120 | |
e306af50 | 3038 | TimeoutStopSec=120 |
9f95a23c TL |
3039 | StartLimitInterval=30min |
3040 | StartLimitBurst=5 | |
f91f0fd5 | 3041 | {extra_args} |
9f95a23c TL |
3042 | [Install] |
3043 | WantedBy=ceph-{fsid}.target | |
f67539c2 TL |
3044 | """.format(container_path=ctx.container_engine.path, |
3045 | fsid=fsid, | |
3046 | data_dir=ctx.data_dir, | |
3047 | extra_args=extra_args, | |
3048 | # if docker, we depend on docker.service | |
3049 | docker_after=' docker.service' if docker else '', | |
3050 | docker_requires='Requires=docker.service\n' if docker else '') | |
f91f0fd5 | 3051 | |
9f95a23c TL |
3052 | return u |
3053 | ||
3054 | ################################## | |
3055 | ||
f6b5b4d7 | 3056 | |
9f95a23c TL |
3057 | class CephContainer: |
3058 | def __init__(self, | |
f67539c2 | 3059 | ctx: CephadmContext, |
f91f0fd5 TL |
3060 | image: str, |
3061 | entrypoint: str, | |
3062 | args: List[str] = [], | |
3063 | volume_mounts: Dict[str, str] = {}, | |
3064 | cname: str = '', | |
3065 | container_args: List[str] = [], | |
3066 | envs: Optional[List[str]] = None, | |
3067 | privileged: bool = False, | |
3068 | ptrace: bool = False, | |
3069 | bind_mounts: Optional[List[List[str]]] = None, | |
f67539c2 | 3070 | init: Optional[bool] = None, |
f91f0fd5 | 3071 | host_network: bool = True, |
f67539c2 TL |
3072 | memory_request: Optional[str] = None, |
3073 | memory_limit: Optional[str] = None, | |
f91f0fd5 | 3074 | ) -> None: |
f67539c2 | 3075 | self.ctx = ctx |
9f95a23c TL |
3076 | self.image = image |
3077 | self.entrypoint = entrypoint | |
3078 | self.args = args | |
3079 | self.volume_mounts = volume_mounts | |
3080 | self.cname = cname | |
3081 | self.container_args = container_args | |
3082 | self.envs = envs | |
3083 | self.privileged = privileged | |
3084 | self.ptrace = ptrace | |
f6b5b4d7 | 3085 | self.bind_mounts = bind_mounts if bind_mounts else [] |
f67539c2 | 3086 | self.init = init if init else ctx.container_init |
f91f0fd5 | 3087 | self.host_network = host_network |
f67539c2 TL |
3088 | self.memory_request = memory_request |
3089 | self.memory_limit = memory_limit | |
9f95a23c | 3090 | |
f91f0fd5 TL |
3091 | def run_cmd(self) -> List[str]: |
3092 | cmd_args: List[str] = [ | |
f67539c2 | 3093 | str(self.ctx.container_engine.path), |
f91f0fd5 TL |
3094 | 'run', |
3095 | '--rm', | |
3096 | '--ipc=host', | |
3097 | ] | |
f67539c2 TL |
3098 | |
3099 | if isinstance(self.ctx.container_engine, Podman): | |
3100 | # podman adds the container *name* to /etc/hosts (for 127.0.1.1) | |
3101 | # by default, which makes python's socket.getfqdn() return that | |
3102 | # instead of a valid hostname. | |
3103 | cmd_args.append('--no-hosts') | |
3104 | if os.path.exists('/etc/ceph/podman-auth.json'): | |
3105 | cmd_args.append('--authfile=/etc/ceph/podman-auth.json') | |
3106 | ||
f91f0fd5 TL |
3107 | envs: List[str] = [ |
3108 | '-e', 'CONTAINER_IMAGE=%s' % self.image, | |
3109 | '-e', 'NODE_NAME=%s' % get_hostname(), | |
3110 | ] | |
3111 | vols: List[str] = [] | |
3112 | binds: List[str] = [] | |
9f95a23c | 3113 | |
f67539c2 TL |
3114 | if self.memory_request: |
3115 | cmd_args.extend(['-e', 'POD_MEMORY_REQUEST', str(self.memory_request)]) | |
3116 | if self.memory_limit: | |
3117 | cmd_args.extend(['-e', 'POD_MEMORY_LIMIT', str(self.memory_limit)]) | |
3118 | cmd_args.extend(['--memory', str(self.memory_limit)]) | |
3119 | ||
f91f0fd5 TL |
3120 | if self.host_network: |
3121 | cmd_args.append('--net=host') | |
3122 | if self.entrypoint: | |
3123 | cmd_args.extend(['--entrypoint', self.entrypoint]) | |
9f95a23c | 3124 | if self.privileged: |
f91f0fd5 TL |
3125 | cmd_args.extend([ |
3126 | '--privileged', | |
3127 | # let OSD etc read block devs that haven't been chowned | |
3128 | '--group-add=disk']) | |
3129 | if self.ptrace and not self.privileged: | |
3130 | # if privileged, the SYS_PTRACE cap is already added | |
3131 | # in addition, --cap-add and --privileged are mutually | |
3132 | # exclusive since podman >= 2.0 | |
3133 | cmd_args.append('--cap-add=SYS_PTRACE') | |
3134 | if self.init: | |
3135 | cmd_args.append('--init') | |
f67539c2 | 3136 | envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1'] |
f91f0fd5 TL |
3137 | if self.cname: |
3138 | cmd_args.extend(['--name', self.cname]) | |
3139 | if self.envs: | |
3140 | for env in self.envs: | |
3141 | envs.extend(['-e', env]) | |
3142 | ||
9f95a23c TL |
3143 | vols = sum( |
3144 | [['-v', '%s:%s' % (host_dir, container_dir)] | |
3145 | for host_dir, container_dir in self.volume_mounts.items()], []) | |
f6b5b4d7 | 3146 | binds = sum([['--mount', '{}'.format(','.join(bind))] |
f91f0fd5 TL |
3147 | for bind in self.bind_mounts], []) |
3148 | ||
f67539c2 TL |
3149 | return \ |
3150 | cmd_args + self.container_args + \ | |
3151 | envs + vols + binds + \ | |
3152 | [self.image] + self.args # type: ignore | |
f91f0fd5 TL |
3153 | |
3154 | def shell_cmd(self, cmd: List[str]) -> List[str]: | |
3155 | cmd_args: List[str] = [ | |
f67539c2 | 3156 | str(self.ctx.container_engine.path), |
9f95a23c TL |
3157 | 'run', |
3158 | '--rm', | |
e306af50 | 3159 | '--ipc=host', |
f91f0fd5 TL |
3160 | ] |
3161 | envs: List[str] = [ | |
3162 | '-e', 'CONTAINER_IMAGE=%s' % self.image, | |
3163 | '-e', 'NODE_NAME=%s' % get_hostname(), | |
3164 | ] | |
3165 | vols: List[str] = [] | |
3166 | binds: List[str] = [] | |
9f95a23c | 3167 | |
f91f0fd5 TL |
3168 | if self.host_network: |
3169 | cmd_args.append('--net=host') | |
9f95a23c | 3170 | if self.privileged: |
f91f0fd5 TL |
3171 | cmd_args.extend([ |
3172 | '--privileged', | |
3173 | # let OSD etc read block devs that haven't been chowned | |
3174 | '--group-add=disk', | |
3175 | ]) | |
f67539c2 TL |
3176 | if self.init: |
3177 | cmd_args.append('--init') | |
3178 | envs += ['-e', 'CEPH_USE_RANDOM_NONCE=1'] | |
f91f0fd5 TL |
3179 | if self.envs: |
3180 | for env in self.envs: | |
3181 | envs.extend(['-e', env]) | |
3182 | ||
9f95a23c TL |
3183 | vols = sum( |
3184 | [['-v', '%s:%s' % (host_dir, container_dir)] | |
3185 | for host_dir, container_dir in self.volume_mounts.items()], []) | |
f6b5b4d7 TL |
3186 | binds = sum([['--mount', '{}'.format(','.join(bind))] |
3187 | for bind in self.bind_mounts], []) | |
f91f0fd5 TL |
3188 | |
3189 | return cmd_args + self.container_args + envs + vols + binds + [ | |
9f95a23c | 3190 | '--entrypoint', cmd[0], |
f91f0fd5 | 3191 | self.image, |
9f95a23c TL |
3192 | ] + cmd[1:] |
3193 | ||
3194 | def exec_cmd(self, cmd): | |
3195 | # type: (List[str]) -> List[str] | |
3196 | return [ | |
f67539c2 | 3197 | str(self.ctx.container_engine.path), |
9f95a23c TL |
3198 | 'exec', |
3199 | ] + self.container_args + [ | |
3200 | self.cname, | |
3201 | ] + cmd | |
3202 | ||
f6b5b4d7 TL |
3203 | def rm_cmd(self, storage=False): |
3204 | # type: (bool) -> List[str] | |
3205 | ret = [ | |
f67539c2 | 3206 | str(self.ctx.container_engine.path), |
f6b5b4d7 TL |
3207 | 'rm', '-f', |
3208 | ] | |
3209 | if storage: | |
3210 | ret.append('--storage') | |
3211 | ret.append(self.cname) | |
3212 | return ret | |
3213 | ||
3214 | def stop_cmd(self): | |
3215 | # type () -> List[str] | |
3216 | ret = [ | |
f67539c2 | 3217 | str(self.ctx.container_engine.path), |
f6b5b4d7 TL |
3218 | 'stop', self.cname, |
3219 | ] | |
3220 | return ret | |
3221 | ||
9f95a23c TL |
3222 | def run(self, timeout=DEFAULT_TIMEOUT): |
3223 | # type: (Optional[int]) -> str | |
f67539c2 TL |
3224 | out, _, _ = call_throws(self.ctx, self.run_cmd(), |
3225 | desc=self.entrypoint, timeout=timeout) | |
9f95a23c TL |
3226 | return out |
3227 | ||
3228 | ################################## | |
3229 | ||
f6b5b4d7 | 3230 | |
9f95a23c | 3231 | @infer_image |
f67539c2 TL |
3232 | def command_version(ctx): |
3233 | # type: (CephadmContext) -> int | |
3234 | c = CephContainer(ctx, ctx.image, 'ceph', ['--version']) | |
3235 | out, err, ret = call(ctx, c.run_cmd(), desc=c.entrypoint) | |
3236 | if not ret: | |
3237 | print(out.strip()) | |
3238 | return ret | |
9f95a23c TL |
3239 | |
3240 | ################################## | |
3241 | ||
f6b5b4d7 | 3242 | |
9f95a23c | 3243 | @infer_image |
f67539c2 TL |
3244 | def command_pull(ctx): |
3245 | # type: (CephadmContext) -> int | |
f6b5b4d7 | 3246 | |
f67539c2 TL |
3247 | _pull_image(ctx, ctx.image) |
3248 | return command_inspect_image(ctx) | |
9f95a23c | 3249 | |
f6b5b4d7 | 3250 | |
f67539c2 TL |
3251 | def _pull_image(ctx, image): |
3252 | # type: (CephadmContext, str) -> None | |
f6b5b4d7 TL |
3253 | logger.info('Pulling container image %s...' % image) |
3254 | ||
3255 | ignorelist = [ | |
f67539c2 TL |
3256 | 'error creating read-write layer with ID', |
3257 | 'net/http: TLS handshake timeout', | |
3258 | 'Digest did not match, expected', | |
f6b5b4d7 TL |
3259 | ] |
3260 | ||
f67539c2 TL |
3261 | cmd = [ctx.container_engine.path, 'pull', image] |
3262 | if isinstance(ctx.container_engine, Podman) and os.path.exists('/etc/ceph/podman-auth.json'): | |
3263 | cmd.append('--authfile=/etc/ceph/podman-auth.json') | |
f6b5b4d7 TL |
3264 | cmd_str = ' '.join(cmd) |
3265 | ||
3266 | for sleep_secs in [1, 4, 25]: | |
f67539c2 | 3267 | out, err, ret = call(ctx, cmd) |
f6b5b4d7 TL |
3268 | if not ret: |
3269 | return | |
3270 | ||
3271 | if not any(pattern in err for pattern in ignorelist): | |
3272 | raise RuntimeError('Failed command: %s' % cmd_str) | |
3273 | ||
f67539c2 | 3274 | logger.info('`%s` failed transiently. Retrying. waiting %s seconds...' % (cmd_str, sleep_secs)) |
f6b5b4d7 TL |
3275 | time.sleep(sleep_secs) |
3276 | ||
3277 | raise RuntimeError('Failed command: %s: maximum retries reached' % cmd_str) | |
f67539c2 | 3278 | |
9f95a23c TL |
3279 | ################################## |
3280 | ||
f6b5b4d7 | 3281 | |
9f95a23c | 3282 | @infer_image |
f67539c2 TL |
3283 | def command_inspect_image(ctx): |
3284 | # type: (CephadmContext) -> int | |
3285 | out, err, ret = call_throws(ctx, [ | |
3286 | ctx.container_engine.path, 'inspect', | |
cd265ab1 | 3287 | '--format', '{{.ID}},{{.RepoDigests}}', |
f67539c2 | 3288 | ctx.image]) |
9f95a23c TL |
3289 | if ret: |
3290 | return errno.ENOENT | |
f67539c2 | 3291 | info_from = get_image_info_from_inspect(out.strip(), ctx.image) |
f91f0fd5 | 3292 | |
f67539c2 | 3293 | ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip() |
f91f0fd5 TL |
3294 | info_from['ceph_version'] = ver |
3295 | ||
3296 | print(json.dumps(info_from, indent=4, sort_keys=True)) | |
3297 | return 0 | |
3298 | ||
3299 | ||
f67539c2 TL |
3300 | def normalize_image_digest(digest): |
3301 | # normal case: | |
3302 | # ceph/ceph -> docker.io/ceph/ceph | |
3303 | # edge cases that shouldn't ever come up: | |
3304 | # ubuntu -> docker.io/ubuntu (ubuntu alias for library/ubuntu) | |
3305 | # no change: | |
3306 | # quay.ceph.io/ceph/ceph -> ceph | |
3307 | # docker.io/ubuntu -> no change | |
3308 | bits = digest.split('/') | |
3309 | if '.' not in bits[0] or len(bits) < 3: | |
3310 | digest = DEFAULT_REGISTRY + '/' + digest | |
3311 | return digest | |
3312 | ||
3313 | ||
f91f0fd5 | 3314 | def get_image_info_from_inspect(out, image): |
f67539c2 | 3315 | # type: (str, str) -> Dict[str, Union[str,List[str]]] |
f91f0fd5 TL |
3316 | image_id, digests = out.split(',', 1) |
3317 | if not out: | |
3318 | raise Error('inspect {}: empty result'.format(image)) | |
9f95a23c | 3319 | r = { |
f91f0fd5 | 3320 | 'image_id': normalize_container_id(image_id) |
f67539c2 | 3321 | } # type: Dict[str, Union[str,List[str]]] |
f91f0fd5 | 3322 | if digests: |
f67539c2 | 3323 | r['repo_digests'] = list(map(normalize_image_digest, digests[1:-1].split(' '))) |
f91f0fd5 TL |
3324 | return r |
3325 | ||
9f95a23c TL |
3326 | ################################## |
3327 | ||
f91f0fd5 | 3328 | |
f67539c2 TL |
3329 | def check_subnet(subnets: str) -> Tuple[int, List[int], str]: |
3330 | """Determine whether the given string is a valid subnet | |
3331 | ||
3332 | :param subnets: subnet string, a single definition or comma separated list of CIDR subnets | |
3333 | :returns: return code, IP version list of the subnets and msg describing any errors validation errors | |
3334 | """ | |
3335 | ||
3336 | rc = 0 | |
3337 | versions = set() | |
3338 | errors = [] | |
3339 | subnet_list = subnets.split(',') | |
3340 | for subnet in subnet_list: | |
3341 | # ensure the format of the string is as expected address/netmask | |
3342 | if not re.search(r'\/\d+$', subnet): | |
3343 | rc = 1 | |
3344 | errors.append(f'{subnet} is not in CIDR format (address/netmask)') | |
3345 | continue | |
3346 | try: | |
3347 | v = ipaddress.ip_network(subnet).version | |
3348 | versions.add(v) | |
3349 | except ValueError as e: | |
3350 | rc = 1 | |
3351 | errors.append(f'{subnet} invalid: {str(e)}') | |
3352 | ||
3353 | return rc, list(versions), ', '.join(errors) | |
3354 | ||
3355 | ||
f6b5b4d7 TL |
3356 | def unwrap_ipv6(address): |
3357 | # type: (str) -> str | |
3358 | if address.startswith('[') and address.endswith(']'): | |
3359 | return address[1:-1] | |
3360 | return address | |
3361 | ||
3362 | ||
f91f0fd5 TL |
3363 | def wrap_ipv6(address): |
3364 | # type: (str) -> str | |
3365 | ||
3366 | # We cannot assume it's already wrapped or even an IPv6 address if | |
3367 | # it's already wrapped it'll not pass (like if it's a hostname) and trigger | |
3368 | # the ValueError | |
3369 | try: | |
f67539c2 TL |
3370 | if ipaddress.ip_address(address).version == 6: |
3371 | return f'[{address}]' | |
f91f0fd5 TL |
3372 | except ValueError: |
3373 | pass | |
3374 | ||
3375 | return address | |
3376 | ||
3377 | ||
f6b5b4d7 TL |
3378 | def is_ipv6(address): |
3379 | # type: (str) -> bool | |
3380 | address = unwrap_ipv6(address) | |
3381 | try: | |
f67539c2 | 3382 | return ipaddress.ip_address(address).version == 6 |
f6b5b4d7 | 3383 | except ValueError: |
f67539c2 | 3384 | logger.warning('Address: {} is not a valid IP address'.format(address)) |
f6b5b4d7 TL |
3385 | return False |
3386 | ||
3387 | ||
f67539c2 TL |
3388 | def prepare_mon_addresses( |
3389 | ctx: CephadmContext | |
3390 | ) -> Tuple[str, bool, Optional[str]]: | |
9f95a23c | 3391 | r = re.compile(r':(\d+)$') |
f6b5b4d7 | 3392 | base_ip = '' |
f67539c2 TL |
3393 | ipv6 = False |
3394 | ||
3395 | if ctx.mon_ip: | |
3396 | ipv6 = is_ipv6(ctx.mon_ip) | |
f91f0fd5 | 3397 | if ipv6: |
f67539c2 TL |
3398 | ctx.mon_ip = wrap_ipv6(ctx.mon_ip) |
3399 | hasport = r.findall(ctx.mon_ip) | |
9f95a23c TL |
3400 | if hasport: |
3401 | port = int(hasport[0]) | |
3402 | if port == 6789: | |
f67539c2 | 3403 | addr_arg = '[v1:%s]' % ctx.mon_ip |
9f95a23c | 3404 | elif port == 3300: |
f67539c2 | 3405 | addr_arg = '[v2:%s]' % ctx.mon_ip |
9f95a23c TL |
3406 | else: |
3407 | logger.warning('Using msgr2 protocol for unrecognized port %d' % | |
3408 | port) | |
f67539c2 TL |
3409 | addr_arg = '[v2:%s]' % ctx.mon_ip |
3410 | base_ip = ctx.mon_ip[0:-(len(str(port))) - 1] | |
3411 | check_ip_port(ctx, base_ip, port) | |
9f95a23c | 3412 | else: |
f67539c2 TL |
3413 | base_ip = ctx.mon_ip |
3414 | addr_arg = '[v2:%s:3300,v1:%s:6789]' % (ctx.mon_ip, ctx.mon_ip) | |
3415 | check_ip_port(ctx, ctx.mon_ip, 3300) | |
3416 | check_ip_port(ctx, ctx.mon_ip, 6789) | |
3417 | elif ctx.mon_addrv: | |
3418 | addr_arg = ctx.mon_addrv | |
9f95a23c TL |
3419 | if addr_arg[0] != '[' or addr_arg[-1] != ']': |
3420 | raise Error('--mon-addrv value %s must use square backets' % | |
3421 | addr_arg) | |
f6b5b4d7 | 3422 | ipv6 = addr_arg.count('[') > 1 |
9f95a23c TL |
3423 | for addr in addr_arg[1:-1].split(','): |
3424 | hasport = r.findall(addr) | |
3425 | if not hasport: | |
3426 | raise Error('--mon-addrv value %s must include port number' % | |
3427 | addr_arg) | |
3428 | port = int(hasport[0]) | |
3429 | # strip off v1: or v2: prefix | |
3430 | addr = re.sub(r'^\w+:', '', addr) | |
f67539c2 TL |
3431 | base_ip = addr[0:-(len(str(port))) - 1] |
3432 | check_ip_port(ctx, base_ip, port) | |
9f95a23c TL |
3433 | else: |
3434 | raise Error('must specify --mon-ip or --mon-addrv') | |
3435 | logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg)) | |
3436 | ||
3437 | mon_network = None | |
f67539c2 | 3438 | if not ctx.skip_mon_network: |
9f95a23c TL |
3439 | # make sure IP is configured locally, and then figure out the |
3440 | # CIDR network | |
f67539c2 TL |
3441 | for net, ifaces in list_networks(ctx).items(): |
3442 | ips: List[str] = [] | |
3443 | for iface, ls in ifaces.items(): | |
3444 | ips.extend(ls) | |
3445 | if ipaddress.ip_address(unwrap_ipv6(base_ip)) in \ | |
3446 | [ipaddress.ip_address(ip) for ip in ips]: | |
9f95a23c TL |
3447 | mon_network = net |
3448 | logger.info('Mon IP %s is in CIDR network %s' % (base_ip, | |
3449 | mon_network)) | |
3450 | break | |
3451 | if not mon_network: | |
3452 | raise Error('Failed to infer CIDR network for mon ip %s; pass ' | |
3453 | '--skip-mon-network to configure it later' % base_ip) | |
3454 | ||
f67539c2 | 3455 | return (addr_arg, ipv6, mon_network) |
9f95a23c | 3456 | |
f6b5b4d7 | 3457 | |
f67539c2 TL |
3458 | def prepare_cluster_network(ctx: CephadmContext) -> Tuple[str, bool]: |
3459 | cluster_network = '' | |
3460 | ipv6_cluster_network = False | |
3461 | # the cluster network may not exist on this node, so all we can do is | |
3462 | # validate that the address given is valid ipv4 or ipv6 subnet | |
3463 | if ctx.cluster_network: | |
3464 | rc, versions, err_msg = check_subnet(ctx.cluster_network) | |
3465 | if rc: | |
3466 | raise Error(f'Invalid --cluster-network parameter: {err_msg}') | |
3467 | cluster_network = ctx.cluster_network | |
3468 | ipv6_cluster_network = True if 6 in versions else False | |
3469 | else: | |
3470 | logger.info('- internal network (--cluster-network) has not ' | |
3471 | 'been provided, OSD replication will default to ' | |
3472 | 'the public_network') | |
9f95a23c | 3473 | |
f67539c2 TL |
3474 | return cluster_network, ipv6_cluster_network |
3475 | ||
3476 | ||
3477 | def create_initial_keys( | |
3478 | ctx: CephadmContext, | |
3479 | uid: int, gid: int, | |
3480 | mgr_id: str | |
3481 | ) -> Tuple[str, str, str, Any, Any]: # type: ignore | |
3482 | ||
3483 | _image = ctx.image | |
9f95a23c TL |
3484 | |
3485 | # create some initial keys | |
3486 | logger.info('Creating initial keys...') | |
3487 | mon_key = CephContainer( | |
f67539c2 TL |
3488 | ctx, |
3489 | image=_image, | |
9f95a23c TL |
3490 | entrypoint='/usr/bin/ceph-authtool', |
3491 | args=['--gen-print-key'], | |
3492 | ).run().strip() | |
3493 | admin_key = CephContainer( | |
f67539c2 TL |
3494 | ctx, |
3495 | image=_image, | |
9f95a23c TL |
3496 | entrypoint='/usr/bin/ceph-authtool', |
3497 | args=['--gen-print-key'], | |
3498 | ).run().strip() | |
3499 | mgr_key = CephContainer( | |
f67539c2 TL |
3500 | ctx, |
3501 | image=_image, | |
9f95a23c TL |
3502 | entrypoint='/usr/bin/ceph-authtool', |
3503 | args=['--gen-print-key'], | |
3504 | ).run().strip() | |
3505 | ||
3506 | keyring = ('[mon.]\n' | |
3507 | '\tkey = %s\n' | |
3508 | '\tcaps mon = allow *\n' | |
3509 | '[client.admin]\n' | |
3510 | '\tkey = %s\n' | |
3511 | '\tcaps mon = allow *\n' | |
3512 | '\tcaps mds = allow *\n' | |
3513 | '\tcaps mgr = allow *\n' | |
3514 | '\tcaps osd = allow *\n' | |
3515 | '[mgr.%s]\n' | |
3516 | '\tkey = %s\n' | |
3517 | '\tcaps mon = profile mgr\n' | |
3518 | '\tcaps mds = allow *\n' | |
3519 | '\tcaps osd = allow *\n' | |
3520 | % (mon_key, admin_key, mgr_id, mgr_key)) | |
3521 | ||
f67539c2 TL |
3522 | admin_keyring = write_tmp('[client.admin]\n' |
3523 | '\tkey = ' + admin_key + '\n', | |
3524 | uid, gid) | |
3525 | ||
9f95a23c | 3526 | # tmp keyring file |
f67539c2 TL |
3527 | bootstrap_keyring = write_tmp(keyring, uid, gid) |
3528 | return (mon_key, mgr_key, admin_key, | |
3529 | bootstrap_keyring, admin_keyring) | |
3530 | ||
9f95a23c | 3531 | |
f67539c2 TL |
3532 | def create_initial_monmap( |
3533 | ctx: CephadmContext, | |
3534 | uid: int, gid: int, | |
3535 | fsid: str, | |
3536 | mon_id: str, mon_addr: str | |
3537 | ) -> Any: | |
9f95a23c | 3538 | logger.info('Creating initial monmap...') |
f67539c2 | 3539 | monmap = write_tmp('', 0, 0) |
9f95a23c | 3540 | out = CephContainer( |
f67539c2 TL |
3541 | ctx, |
3542 | image=ctx.image, | |
9f95a23c | 3543 | entrypoint='/usr/bin/monmaptool', |
f67539c2 TL |
3544 | args=[ |
3545 | '--create', | |
3546 | '--clobber', | |
3547 | '--fsid', fsid, | |
3548 | '--addv', mon_id, mon_addr, | |
3549 | '/tmp/monmap' | |
9f95a23c TL |
3550 | ], |
3551 | volume_mounts={ | |
f67539c2 | 3552 | monmap.name: '/tmp/monmap:z', |
9f95a23c TL |
3553 | }, |
3554 | ).run() | |
f67539c2 | 3555 | logger.debug(f'monmaptool for {mon_id} {mon_addr} on {out}') |
9f95a23c TL |
3556 | |
3557 | # pass monmap file to ceph user for use by ceph-mon --mkfs below | |
f67539c2 TL |
3558 | os.fchown(monmap.fileno(), uid, gid) |
3559 | return monmap | |
9f95a23c | 3560 | |
f67539c2 TL |
3561 | |
3562 | def prepare_create_mon( | |
3563 | ctx: CephadmContext, | |
3564 | uid: int, gid: int, | |
3565 | fsid: str, mon_id: str, | |
3566 | bootstrap_keyring_path: str, | |
3567 | monmap_path: str | |
3568 | ): | |
9f95a23c | 3569 | logger.info('Creating mon...') |
f67539c2 TL |
3570 | create_daemon_dirs(ctx, fsid, 'mon', mon_id, uid, gid) |
3571 | mon_dir = get_data_dir(fsid, ctx.data_dir, 'mon', mon_id) | |
3572 | log_dir = get_log_dir(fsid, ctx.log_dir) | |
9f95a23c | 3573 | out = CephContainer( |
f67539c2 TL |
3574 | ctx, |
3575 | image=ctx.image, | |
9f95a23c | 3576 | entrypoint='/usr/bin/ceph-mon', |
f67539c2 TL |
3577 | args=[ |
3578 | '--mkfs', | |
3579 | '-i', mon_id, | |
3580 | '--fsid', fsid, | |
3581 | '-c', '/dev/null', | |
3582 | '--monmap', '/tmp/monmap', | |
3583 | '--keyring', '/tmp/keyring', | |
3584 | ] + get_daemon_args(ctx, fsid, 'mon', mon_id), | |
9f95a23c TL |
3585 | volume_mounts={ |
3586 | log_dir: '/var/log/ceph:z', | |
3587 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id), | |
f67539c2 TL |
3588 | bootstrap_keyring_path: '/tmp/keyring:z', |
3589 | monmap_path: '/tmp/monmap:z', | |
9f95a23c TL |
3590 | }, |
3591 | ).run() | |
f67539c2 TL |
3592 | logger.debug(f'create mon.{mon_id} on {out}') |
3593 | return (mon_dir, log_dir) | |
3594 | ||
3595 | ||
3596 | def create_mon( | |
3597 | ctx: CephadmContext, | |
3598 | uid: int, gid: int, | |
3599 | fsid: str, mon_id: str | |
3600 | ) -> None: | |
3601 | mon_c = get_container(ctx, fsid, 'mon', mon_id) | |
3602 | ctx.meta_json = json.dumps({'service_name': 'mon'}) | |
3603 | deploy_daemon(ctx, fsid, 'mon', mon_id, mon_c, uid, gid, | |
9f95a23c TL |
3604 | config=None, keyring=None) |
3605 | ||
9f95a23c | 3606 | |
f67539c2 TL |
3607 | def wait_for_mon( |
3608 | ctx: CephadmContext, | |
3609 | mon_id: str, mon_dir: str, | |
3610 | admin_keyring_path: str, config_path: str | |
3611 | ): | |
9f95a23c TL |
3612 | logger.info('Waiting for mon to start...') |
3613 | c = CephContainer( | |
f67539c2 TL |
3614 | ctx, |
3615 | image=ctx.image, | |
9f95a23c TL |
3616 | entrypoint='/usr/bin/ceph', |
3617 | args=[ | |
3618 | 'status'], | |
3619 | volume_mounts={ | |
3620 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id), | |
f67539c2 TL |
3621 | admin_keyring_path: '/etc/ceph/ceph.client.admin.keyring:z', |
3622 | config_path: '/etc/ceph/ceph.conf:z', | |
9f95a23c TL |
3623 | }, |
3624 | ) | |
3625 | ||
3626 | # wait for the service to become available | |
3627 | def is_mon_available(): | |
3628 | # type: () -> bool | |
f67539c2 TL |
3629 | timeout = ctx.timeout if ctx.timeout else 60 # seconds |
3630 | out, err, ret = call(ctx, c.run_cmd(), | |
9f95a23c TL |
3631 | desc=c.entrypoint, |
3632 | timeout=timeout) | |
3633 | return ret == 0 | |
9f95a23c | 3634 | |
f67539c2 TL |
3635 | is_available(ctx, 'mon', is_mon_available) |
3636 | ||
3637 | ||
3638 | def create_mgr( | |
3639 | ctx: CephadmContext, | |
3640 | uid: int, gid: int, | |
3641 | fsid: str, mgr_id: str, mgr_key: str, | |
3642 | config: str, clifunc: Callable | |
3643 | ) -> None: | |
3644 | logger.info('Creating mgr...') | |
3645 | mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key) | |
3646 | mgr_c = get_container(ctx, fsid, 'mgr', mgr_id) | |
3647 | # Note:the default port used by the Prometheus node exporter is opened in fw | |
3648 | ctx.meta_json = json.dumps({'service_name': 'mgr'}) | |
3649 | deploy_daemon(ctx, fsid, 'mgr', mgr_id, mgr_c, uid, gid, | |
3650 | config=config, keyring=mgr_keyring, ports=[9283]) | |
3651 | ||
3652 | # wait for the service to become available | |
3653 | logger.info('Waiting for mgr to start...') | |
3654 | ||
3655 | def is_mgr_available(): | |
3656 | # type: () -> bool | |
3657 | timeout = ctx.timeout if ctx.timeout else 60 # seconds | |
3658 | try: | |
3659 | out = clifunc(['status', '-f', 'json-pretty'], timeout=timeout) | |
3660 | j = json.loads(out) | |
3661 | return j.get('mgrmap', {}).get('available', False) | |
3662 | except Exception as e: | |
3663 | logger.debug('status failed: %s' % e) | |
3664 | return False | |
3665 | is_available(ctx, 'mgr', is_mgr_available) | |
3666 | ||
3667 | ||
3668 | def prepare_ssh( | |
3669 | ctx: CephadmContext, | |
3670 | cli: Callable, wait_for_mgr_restart: Callable | |
3671 | ) -> None: | |
3672 | ||
3673 | cli(['cephadm', 'set-user', ctx.ssh_user]) | |
3674 | ||
3675 | if ctx.ssh_config: | |
3676 | logger.info('Using provided ssh config...') | |
3677 | mounts = { | |
3678 | pathify(ctx.ssh_config.name): '/tmp/cephadm-ssh-config:z', | |
3679 | } | |
3680 | cli(['cephadm', 'set-ssh-config', '-i', '/tmp/cephadm-ssh-config'], extra_mounts=mounts) | |
3681 | ||
3682 | if ctx.ssh_private_key and ctx.ssh_public_key: | |
3683 | logger.info('Using provided ssh keys...') | |
3684 | mounts = { | |
3685 | pathify(ctx.ssh_private_key.name): '/tmp/cephadm-ssh-key:z', | |
3686 | pathify(ctx.ssh_public_key.name): '/tmp/cephadm-ssh-key.pub:z' | |
3687 | } | |
3688 | cli(['cephadm', 'set-priv-key', '-i', '/tmp/cephadm-ssh-key'], extra_mounts=mounts) | |
3689 | cli(['cephadm', 'set-pub-key', '-i', '/tmp/cephadm-ssh-key.pub'], extra_mounts=mounts) | |
3690 | else: | |
3691 | logger.info('Generating ssh key...') | |
3692 | cli(['cephadm', 'generate-key']) | |
3693 | ssh_pub = cli(['cephadm', 'get-pub-key']) | |
3694 | ||
3695 | with open(ctx.output_pub_ssh_key, 'w') as f: | |
3696 | f.write(ssh_pub) | |
3697 | logger.info('Wrote public SSH key to %s' % ctx.output_pub_ssh_key) | |
3698 | ||
3699 | logger.info('Adding key to %s@localhost authorized_keys...' % ctx.ssh_user) | |
3700 | try: | |
3701 | s_pwd = pwd.getpwnam(ctx.ssh_user) | |
3702 | except KeyError: | |
3703 | raise Error('Cannot find uid/gid for ssh-user: %s' % (ctx.ssh_user)) | |
3704 | ssh_uid = s_pwd.pw_uid | |
3705 | ssh_gid = s_pwd.pw_gid | |
3706 | ssh_dir = os.path.join(s_pwd.pw_dir, '.ssh') | |
3707 | ||
3708 | if not os.path.exists(ssh_dir): | |
3709 | makedirs(ssh_dir, ssh_uid, ssh_gid, 0o700) | |
3710 | ||
3711 | auth_keys_file = '%s/authorized_keys' % ssh_dir | |
3712 | add_newline = False | |
3713 | ||
3714 | if os.path.exists(auth_keys_file): | |
3715 | with open(auth_keys_file, 'r') as f: | |
3716 | f.seek(0, os.SEEK_END) | |
3717 | if f.tell() > 0: | |
3718 | f.seek(f.tell() - 1, os.SEEK_SET) # go to last char | |
3719 | if f.read() != '\n': | |
3720 | add_newline = True | |
3721 | ||
3722 | with open(auth_keys_file, 'a') as f: | |
3723 | os.fchown(f.fileno(), ssh_uid, ssh_gid) # just in case we created it | |
3724 | os.fchmod(f.fileno(), 0o600) # just in case we created it | |
3725 | if add_newline: | |
3726 | f.write('\n') | |
3727 | f.write(ssh_pub.strip() + '\n') | |
3728 | ||
3729 | host = get_hostname() | |
3730 | logger.info('Adding host %s...' % host) | |
3731 | try: | |
3732 | args = ['orch', 'host', 'add', host] | |
3733 | if ctx.mon_ip: | |
3734 | args.append(ctx.mon_ip) | |
3735 | cli(args) | |
3736 | except RuntimeError as e: | |
3737 | raise Error('Failed to add host <%s>: %s' % (host, e)) | |
3738 | ||
3739 | for t in ['mon', 'mgr']: | |
3740 | if not ctx.orphan_initial_daemons: | |
3741 | logger.info('Deploying %s service with default placement...' % t) | |
3742 | cli(['orch', 'apply', t]) | |
3743 | else: | |
3744 | logger.info('Deploying unmanaged %s service...' % t) | |
3745 | cli(['orch', 'apply', t, '--unmanaged']) | |
3746 | ||
3747 | if not ctx.orphan_initial_daemons: | |
3748 | logger.info('Deploying crash service with default placement...') | |
3749 | cli(['orch', 'apply', 'crash']) | |
3750 | ||
3751 | if not ctx.skip_monitoring_stack: | |
3752 | logger.info('Enabling mgr prometheus module...') | |
3753 | cli(['mgr', 'module', 'enable', 'prometheus']) | |
3754 | for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']: | |
3755 | logger.info('Deploying %s service with default placement...' % t) | |
3756 | cli(['orch', 'apply', t]) | |
3757 | ||
3758 | ||
3759 | def enable_cephadm_mgr_module( | |
3760 | cli: Callable, wait_for_mgr_restart: Callable | |
3761 | ) -> None: | |
3762 | ||
3763 | logger.info('Enabling cephadm module...') | |
3764 | cli(['mgr', 'module', 'enable', 'cephadm']) | |
3765 | wait_for_mgr_restart() | |
3766 | logger.info('Setting orchestrator backend to cephadm...') | |
3767 | cli(['orch', 'set', 'backend', 'cephadm']) | |
3768 | ||
3769 | ||
3770 | def prepare_dashboard( | |
3771 | ctx: CephadmContext, | |
3772 | uid: int, gid: int, | |
3773 | cli: Callable, wait_for_mgr_restart: Callable | |
3774 | ) -> None: | |
3775 | ||
3776 | # Configure SSL port (cephadm only allows to configure dashboard SSL port) | |
3777 | # if the user does not want to use SSL he can change this setting once the cluster is up | |
3778 | cli(['config', 'set', 'mgr', 'mgr/dashboard/ssl_server_port', str(ctx.ssl_dashboard_port)]) | |
3779 | ||
3780 | # configuring dashboard parameters | |
3781 | logger.info('Enabling the dashboard module...') | |
3782 | cli(['mgr', 'module', 'enable', 'dashboard']) | |
3783 | wait_for_mgr_restart() | |
3784 | ||
3785 | # dashboard crt and key | |
3786 | if ctx.dashboard_key and ctx.dashboard_crt: | |
3787 | logger.info('Using provided dashboard certificate...') | |
3788 | mounts = { | |
3789 | pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z', | |
3790 | pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z' | |
3791 | } | |
3792 | cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts) | |
3793 | cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts) | |
3794 | else: | |
3795 | logger.info('Generating a dashboard self-signed certificate...') | |
3796 | cli(['dashboard', 'create-self-signed-cert']) | |
3797 | ||
3798 | logger.info('Creating initial admin user...') | |
3799 | password = ctx.initial_dashboard_password or generate_password() | |
3800 | tmp_password_file = write_tmp(password, uid, gid) | |
3801 | cmd = ['dashboard', 'ac-user-create', ctx.initial_dashboard_user, '-i', '/tmp/dashboard.pw', 'administrator', '--force-password'] | |
3802 | if not ctx.dashboard_password_noupdate: | |
3803 | cmd.append('--pwd-update-required') | |
3804 | cli(cmd, extra_mounts={pathify(tmp_password_file.name): '/tmp/dashboard.pw:z'}) | |
3805 | logger.info('Fetching dashboard port number...') | |
3806 | out = cli(['config', 'get', 'mgr', 'mgr/dashboard/ssl_server_port']) | |
3807 | port = int(out) | |
3808 | ||
3809 | # Open dashboard port | |
3810 | fw = Firewalld(ctx) | |
3811 | fw.open_ports([port]) | |
3812 | fw.apply_rules() | |
3813 | ||
3814 | logger.info('Ceph Dashboard is now available at:\n\n' | |
3815 | '\t URL: https://%s:%s/\n' | |
3816 | '\t User: %s\n' | |
3817 | '\tPassword: %s\n' % ( | |
3818 | get_fqdn(), port, | |
3819 | ctx.initial_dashboard_user, | |
3820 | password)) | |
3821 | ||
3822 | ||
3823 | def prepare_bootstrap_config( | |
3824 | ctx: CephadmContext, | |
3825 | fsid: str, mon_addr: str, image: str | |
3826 | ||
3827 | ) -> str: | |
3828 | ||
3829 | cp = read_config(ctx.config) | |
3830 | if not cp.has_section('global'): | |
3831 | cp.add_section('global') | |
3832 | cp.set('global', 'fsid', fsid) | |
3833 | cp.set('global', 'mon_host', mon_addr) | |
3834 | cp.set('global', 'container_image', image) | |
3835 | if not cp.has_section('mon'): | |
3836 | cp.add_section('mon') | |
3837 | if ( | |
3838 | not cp.has_option('mon', 'auth_allow_insecure_global_id_reclaim') | |
3839 | and not cp.has_option('mon', 'auth allow insecure global id reclaim') | |
3840 | ): | |
3841 | cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false') | |
3842 | cpf = StringIO() | |
3843 | cp.write(cpf) | |
3844 | config = cpf.getvalue() | |
3845 | ||
3846 | if ctx.registry_json or ctx.registry_url: | |
3847 | command_registry_login(ctx) | |
3848 | ||
3849 | return config | |
3850 | ||
3851 | ||
3852 | def finish_bootstrap_config( | |
3853 | ctx: CephadmContext, | |
3854 | fsid: str, | |
3855 | config: str, | |
3856 | mon_id: str, mon_dir: str, | |
3857 | mon_network: Optional[str], ipv6: bool, | |
3858 | cli: Callable, | |
3859 | cluster_network: Optional[str], ipv6_cluster_network: bool | |
3860 | ||
3861 | ) -> None: | |
3862 | if not ctx.no_minimize_config: | |
9f95a23c TL |
3863 | logger.info('Assimilating anything we can from ceph.conf...') |
3864 | cli([ | |
3865 | 'config', 'assimilate-conf', | |
3866 | '-i', '/var/lib/ceph/mon/ceph-%s/config' % mon_id | |
3867 | ], { | |
3868 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id | |
3869 | }) | |
3870 | logger.info('Generating new minimal ceph.conf...') | |
3871 | cli([ | |
3872 | 'config', 'generate-minimal-conf', | |
3873 | '-o', '/var/lib/ceph/mon/ceph-%s/config' % mon_id | |
3874 | ], { | |
3875 | mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % mon_id | |
3876 | }) | |
3877 | # re-read our minimized config | |
3878 | with open(mon_dir + '/config', 'r') as f: | |
3879 | config = f.read() | |
3880 | logger.info('Restarting the monitor...') | |
f67539c2 | 3881 | call_throws(ctx, [ |
9f95a23c TL |
3882 | 'systemctl', |
3883 | 'restart', | |
3884 | get_unit_name(fsid, 'mon', mon_id) | |
3885 | ]) | |
3886 | ||
3887 | if mon_network: | |
f67539c2 | 3888 | logger.info(f'Setting mon public_network to {mon_network}') |
9f95a23c TL |
3889 | cli(['config', 'set', 'mon', 'public_network', mon_network]) |
3890 | ||
f67539c2 TL |
3891 | if cluster_network: |
3892 | logger.info(f'Setting cluster_network to {cluster_network}') | |
3893 | cli(['config', 'set', 'global', 'cluster_network', cluster_network]) | |
3894 | ||
3895 | if ipv6 or ipv6_cluster_network: | |
3896 | logger.info('Enabling IPv6 (ms_bind_ipv6) binding') | |
f6b5b4d7 TL |
3897 | cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true']) |
3898 | ||
f67539c2 TL |
3899 | with open(ctx.output_config, 'w') as f: |
3900 | f.write(config) | |
3901 | logger.info('Wrote config to %s' % ctx.output_config) | |
3902 | pass | |
3903 | ||
3904 | ||
3905 | @default_image | |
3906 | def command_bootstrap(ctx): | |
3907 | # type: (CephadmContext) -> int | |
3908 | ||
3909 | if not ctx.output_config: | |
3910 | ctx.output_config = os.path.join(ctx.output_dir, 'ceph.conf') | |
3911 | if not ctx.output_keyring: | |
3912 | ctx.output_keyring = os.path.join(ctx.output_dir, | |
3913 | 'ceph.client.admin.keyring') | |
3914 | if not ctx.output_pub_ssh_key: | |
3915 | ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, 'ceph.pub') | |
3916 | ||
3917 | # verify output files | |
3918 | for f in [ctx.output_config, ctx.output_keyring, | |
3919 | ctx.output_pub_ssh_key]: | |
3920 | if not ctx.allow_overwrite: | |
3921 | if os.path.exists(f): | |
3922 | raise Error('%s already exists; delete or pass ' | |
3923 | '--allow-overwrite to overwrite' % f) | |
3924 | dirname = os.path.dirname(f) | |
3925 | if dirname and not os.path.exists(dirname): | |
3926 | fname = os.path.basename(f) | |
3927 | logger.info(f'Creating directory {dirname} for {fname}') | |
3928 | try: | |
3929 | # use makedirs to create intermediate missing dirs | |
3930 | os.makedirs(dirname, 0o755) | |
3931 | except PermissionError: | |
3932 | raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.') | |
3933 | ||
3934 | if not ctx.skip_prepare_host: | |
3935 | command_prepare_host(ctx) | |
3936 | else: | |
3937 | logger.info('Skip prepare_host') | |
3938 | ||
3939 | # initial vars | |
3940 | fsid = ctx.fsid or make_fsid() | |
3941 | hostname = get_hostname() | |
3942 | if '.' in hostname and not ctx.allow_fqdn_hostname: | |
3943 | raise Error('hostname is a fully qualified domain name (%s); either fix (e.g., "sudo hostname %s" or similar) or pass --allow-fqdn-hostname' % (hostname, hostname.split('.')[0])) | |
3944 | mon_id = ctx.mon_id or hostname | |
3945 | mgr_id = ctx.mgr_id or generate_service_id() | |
3946 | logger.info('Cluster fsid: %s' % fsid) | |
3947 | ||
3948 | lock = FileLock(ctx, fsid) | |
3949 | lock.acquire() | |
3950 | ||
3951 | (addr_arg, ipv6, mon_network) = prepare_mon_addresses(ctx) | |
3952 | cluster_network, ipv6_cluster_network = prepare_cluster_network(ctx) | |
3953 | ||
3954 | config = prepare_bootstrap_config(ctx, fsid, addr_arg, ctx.image) | |
3955 | ||
3956 | if not ctx.skip_pull: | |
3957 | _pull_image(ctx, ctx.image) | |
3958 | ||
3959 | image_ver = CephContainer(ctx, ctx.image, 'ceph', ['--version']).run().strip() | |
3960 | logger.info(f'Ceph version: {image_ver}') | |
3961 | image_release = image_ver.split()[4] | |
3962 | if ( | |
3963 | not ctx.allow_mismatched_release | |
3964 | and image_release not in [DEFAULT_IMAGE_RELEASE, LATEST_STABLE_RELEASE] | |
3965 | ): | |
3966 | raise Error( | |
3967 | f'Container release {image_release} != cephadm release {DEFAULT_IMAGE_RELEASE}; please use matching version of cephadm (pass --allow-mismatched-release to continue anyway)' | |
3968 | ) | |
3969 | ||
3970 | logger.info('Extracting ceph user uid/gid from container image...') | |
3971 | (uid, gid) = extract_uid_gid(ctx) | |
3972 | ||
3973 | # create some initial keys | |
3974 | (mon_key, mgr_key, admin_key, bootstrap_keyring, admin_keyring) = \ | |
3975 | create_initial_keys(ctx, uid, gid, mgr_id) | |
3976 | ||
3977 | monmap = create_initial_monmap(ctx, uid, gid, fsid, mon_id, addr_arg) | |
3978 | (mon_dir, log_dir) = \ | |
3979 | prepare_create_mon(ctx, uid, gid, fsid, mon_id, | |
3980 | bootstrap_keyring.name, monmap.name) | |
3981 | ||
3982 | with open(mon_dir + '/config', 'w') as f: | |
3983 | os.fchown(f.fileno(), uid, gid) | |
3984 | os.fchmod(f.fileno(), 0o600) | |
3985 | f.write(config) | |
3986 | ||
3987 | make_var_run(ctx, fsid, uid, gid) | |
3988 | create_mon(ctx, uid, gid, fsid, mon_id) | |
3989 | ||
3990 | # config to issue various CLI commands | |
3991 | tmp_config = write_tmp(config, uid, gid) | |
3992 | ||
3993 | # a CLI helper to reduce our typing | |
3994 | def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT): | |
3995 | # type: (List[str], Dict[str, str], Optional[int]) -> str | |
3996 | mounts = { | |
3997 | log_dir: '/var/log/ceph:z', | |
3998 | admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z', | |
3999 | tmp_config.name: '/etc/ceph/ceph.conf:z', | |
4000 | } | |
4001 | for k, v in extra_mounts.items(): | |
4002 | mounts[k] = v | |
4003 | timeout = timeout or ctx.timeout | |
4004 | return CephContainer( | |
4005 | ctx, | |
4006 | image=ctx.image, | |
4007 | entrypoint='/usr/bin/ceph', | |
4008 | args=cmd, | |
4009 | volume_mounts=mounts, | |
4010 | ).run(timeout=timeout) | |
4011 | ||
4012 | wait_for_mon(ctx, mon_id, mon_dir, admin_keyring.name, tmp_config.name) | |
4013 | ||
4014 | finish_bootstrap_config(ctx, fsid, config, mon_id, mon_dir, | |
4015 | mon_network, ipv6, cli, | |
4016 | cluster_network, ipv6_cluster_network) | |
9f95a23c TL |
4017 | |
4018 | # output files | |
f67539c2 | 4019 | with open(ctx.output_keyring, 'w') as f: |
9f95a23c TL |
4020 | os.fchmod(f.fileno(), 0o600) |
4021 | f.write('[client.admin]\n' | |
4022 | '\tkey = ' + admin_key + '\n') | |
f67539c2 | 4023 | logger.info('Wrote keyring to %s' % ctx.output_keyring) |
9f95a23c | 4024 | |
f67539c2 TL |
4025 | # create mgr |
4026 | create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli) | |
9f95a23c | 4027 | |
f67539c2 TL |
4028 | def json_loads_retry(cli_func): |
4029 | for sleep_secs in [1, 4, 4]: | |
4030 | try: | |
4031 | return json.loads(cli_func()) | |
4032 | except json.JSONDecodeError: | |
4033 | logger.debug('Invalid JSON. Retrying in %s seconds...' % sleep_secs) | |
4034 | time.sleep(sleep_secs) | |
4035 | return json.loads(cli_func()) | |
9f95a23c TL |
4036 | |
4037 | # wait for mgr to restart (after enabling a module) | |
4038 | def wait_for_mgr_restart(): | |
f67539c2 TL |
4039 | # first get latest mgrmap epoch from the mon. try newer 'mgr |
4040 | # stat' command first, then fall back to 'mgr dump' if | |
4041 | # necessary | |
4042 | try: | |
4043 | j = json_loads_retry(lambda: cli(['mgr', 'stat'])) | |
4044 | except Exception: | |
4045 | j = json_loads_retry(lambda: cli(['mgr', 'dump'])) | |
9f95a23c | 4046 | epoch = j['epoch'] |
f67539c2 | 4047 | |
9f95a23c TL |
4048 | # wait for mgr to have it |
4049 | logger.info('Waiting for the mgr to restart...') | |
f67539c2 | 4050 | |
9f95a23c TL |
4051 | def mgr_has_latest_epoch(): |
4052 | # type: () -> bool | |
4053 | try: | |
4054 | out = cli(['tell', 'mgr', 'mgr_status']) | |
4055 | j = json.loads(out) | |
4056 | return j['mgrmap_epoch'] >= epoch | |
4057 | except Exception as e: | |
4058 | logger.debug('tell mgr mgr_status failed: %s' % e) | |
4059 | return False | |
f67539c2 | 4060 | is_available(ctx, 'mgr epoch %d' % epoch, mgr_has_latest_epoch) |
e306af50 | 4061 | |
f67539c2 | 4062 | enable_cephadm_mgr_module(cli, wait_for_mgr_restart) |
e306af50 | 4063 | |
f67539c2 TL |
4064 | # ssh |
4065 | if not ctx.skip_ssh: | |
4066 | prepare_ssh(ctx, cli, wait_for_mgr_restart) | |
4067 | ||
4068 | if ctx.registry_url and ctx.registry_username and ctx.registry_password: | |
4069 | cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_url', ctx.registry_url, '--force']) | |
4070 | cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_username', ctx.registry_username, '--force']) | |
4071 | cli(['config', 'set', 'mgr', 'mgr/cephadm/registry_password', ctx.registry_password, '--force']) | |
4072 | ||
4073 | cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force']) | |
4074 | ||
4075 | if ctx.with_exporter: | |
4076 | cli(['config-key', 'set', 'mgr/cephadm/exporter_enabled', 'true']) | |
4077 | if ctx.exporter_config: | |
4078 | logger.info('Applying custom cephadm exporter settings') | |
4079 | # validated within the parser, so we can just apply to the store | |
4080 | with tempfile.NamedTemporaryFile(buffering=0) as tmp: | |
4081 | tmp.write(json.dumps(ctx.exporter_config).encode('utf-8')) | |
4082 | mounts = { | |
4083 | tmp.name: '/tmp/exporter-config.json:z' | |
4084 | } | |
4085 | cli(['cephadm', 'set-exporter-config', '-i', '/tmp/exporter-config.json'], extra_mounts=mounts) | |
4086 | logger.info('-> Use ceph orch apply cephadm-exporter to deploy') | |
9f95a23c | 4087 | else: |
f67539c2 TL |
4088 | # generate a default SSL configuration for the exporter(s) |
4089 | logger.info('Generating a default cephadm exporter configuration (self-signed)') | |
4090 | cli(['cephadm', 'generate-exporter-config']) | |
4091 | # | |
4092 | # deploy the service (commented out until the cephadm changes are in the ceph container build) | |
4093 | logger.info('Deploying cephadm exporter service with default placement...') | |
4094 | cli(['orch', 'apply', 'cephadm-exporter']) | |
f6b5b4d7 | 4095 | |
f67539c2 TL |
4096 | if not ctx.skip_dashboard: |
4097 | prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart) | |
f6b5b4d7 | 4098 | |
f67539c2 TL |
4099 | if ctx.apply_spec: |
4100 | logger.info('Applying %s to cluster' % ctx.apply_spec) | |
e306af50 | 4101 | |
f67539c2 | 4102 | with open(ctx.apply_spec) as f: |
e306af50 TL |
4103 | for line in f: |
4104 | if 'hostname:' in line: | |
4105 | line = line.replace('\n', '') | |
4106 | split = line.split(': ') | |
f67539c2 | 4107 | if split[1] != hostname: |
e306af50 TL |
4108 | logger.info('Adding ssh key to %s' % split[1]) |
4109 | ||
4110 | ssh_key = '/etc/ceph/ceph.pub' | |
f67539c2 TL |
4111 | if ctx.ssh_public_key: |
4112 | ssh_key = ctx.ssh_public_key.name | |
4113 | out, err, code = call_throws(ctx, ['sudo', '-u', ctx.ssh_user, 'ssh-copy-id', '-f', '-i', ssh_key, '-o StrictHostKeyChecking=no', '%s@%s' % (ctx.ssh_user, split[1])]) | |
e306af50 TL |
4114 | |
4115 | mounts = {} | |
f67539c2 | 4116 | mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:z' |
e306af50 TL |
4117 | |
4118 | out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts) | |
4119 | logger.info(out) | |
9f95a23c TL |
4120 | |
4121 | logger.info('You can access the Ceph CLI with:\n\n' | |
4122 | '\tsudo %s shell --fsid %s -c %s -k %s\n' % ( | |
4123 | sys.argv[0], | |
4124 | fsid, | |
f67539c2 TL |
4125 | ctx.output_config, |
4126 | ctx.output_keyring)) | |
9f95a23c TL |
4127 | logger.info('Please consider enabling telemetry to help improve Ceph:\n\n' |
4128 | '\tceph telemetry on\n\n' | |
4129 | 'For more information see:\n\n' | |
f67539c2 | 4130 | '\thttps://docs.ceph.com/docs/pacific/mgr/telemetry/\n') |
9f95a23c TL |
4131 | logger.info('Bootstrap complete.') |
4132 | return 0 | |
4133 | ||
4134 | ################################## | |
4135 | ||
f67539c2 TL |
4136 | |
4137 | def command_registry_login(ctx: CephadmContext): | |
4138 | if ctx.registry_json: | |
4139 | logger.info('Pulling custom registry login info from %s.' % ctx.registry_json) | |
4140 | d = get_parm(ctx.registry_json) | |
f6b5b4d7 | 4141 | if d.get('url') and d.get('username') and d.get('password'): |
f67539c2 TL |
4142 | ctx.registry_url = d.get('url') |
4143 | ctx.registry_username = d.get('username') | |
4144 | ctx.registry_password = d.get('password') | |
4145 | registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password) | |
f6b5b4d7 | 4146 | else: |
f67539c2 TL |
4147 | raise Error('json provided for custom registry login did not include all necessary fields. ' |
4148 | 'Please setup json file as\n' | |
4149 | '{\n' | |
4150 | ' "url": "REGISTRY_URL",\n' | |
4151 | ' "username": "REGISTRY_USERNAME",\n' | |
4152 | ' "password": "REGISTRY_PASSWORD"\n' | |
4153 | '}\n') | |
4154 | elif ctx.registry_url and ctx.registry_username and ctx.registry_password: | |
4155 | registry_login(ctx, ctx.registry_url, ctx.registry_username, ctx.registry_password) | |
f6b5b4d7 | 4156 | else: |
f67539c2 TL |
4157 | raise Error('Invalid custom registry arguments received. To login to a custom registry include ' |
4158 | '--registry-url, --registry-username and --registry-password ' | |
4159 | 'options or --registry-json option') | |
f6b5b4d7 TL |
4160 | return 0 |
4161 | ||
f67539c2 TL |
4162 | |
4163 | def registry_login(ctx: CephadmContext, url, username, password): | |
4164 | logger.info('Logging into custom registry.') | |
f6b5b4d7 | 4165 | try: |
f67539c2 TL |
4166 | engine = ctx.container_engine |
4167 | cmd = [engine.path, 'login', | |
4168 | '-u', username, '-p', password, | |
4169 | url] | |
4170 | if isinstance(engine, Podman): | |
4171 | cmd.append('--authfile=/etc/ceph/podman-auth.json') | |
4172 | out, _, _ = call_throws(ctx, cmd) | |
4173 | if isinstance(engine, Podman): | |
4174 | os.chmod('/etc/ceph/podman-auth.json', 0o600) | |
4175 | except Exception: | |
4176 | raise Error('Failed to login to custom registry @ %s as %s with given password' % (ctx.registry_url, ctx.registry_username)) | |
f6b5b4d7 TL |
4177 | |
4178 | ################################## | |
4179 | ||
4180 | ||
f67539c2 TL |
4181 | def extract_uid_gid_monitoring(ctx, daemon_type): |
4182 | # type: (CephadmContext, str) -> Tuple[int, int] | |
9f95a23c TL |
4183 | |
4184 | if daemon_type == 'prometheus': | |
f67539c2 | 4185 | uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus') |
9f95a23c TL |
4186 | elif daemon_type == 'node-exporter': |
4187 | uid, gid = 65534, 65534 | |
4188 | elif daemon_type == 'grafana': | |
f67539c2 | 4189 | uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana') |
9f95a23c | 4190 | elif daemon_type == 'alertmanager': |
f67539c2 | 4191 | uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus']) |
9f95a23c | 4192 | else: |
f67539c2 | 4193 | raise Error('{} not implemented yet'.format(daemon_type)) |
9f95a23c TL |
4194 | return uid, gid |
4195 | ||
4196 | ||
4197 | @default_image | |
f67539c2 TL |
4198 | def command_deploy(ctx): |
4199 | # type: (CephadmContext) -> None | |
4200 | daemon_type, daemon_id = ctx.name.split('.', 1) | |
9f95a23c | 4201 | |
f67539c2 TL |
4202 | lock = FileLock(ctx, ctx.fsid) |
4203 | lock.acquire() | |
9f95a23c TL |
4204 | |
4205 | if daemon_type not in get_supported_daemons(): | |
4206 | raise Error('daemon type %s not recognized' % daemon_type) | |
4207 | ||
e306af50 | 4208 | redeploy = False |
f67539c2 TL |
4209 | unit_name = get_unit_name(ctx.fsid, daemon_type, daemon_id) |
4210 | container_name = 'ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id) | |
4211 | (_, state, _) = check_unit(ctx, unit_name) | |
4212 | if state == 'running' or is_container_running(ctx, container_name): | |
e306af50 TL |
4213 | redeploy = True |
4214 | ||
f67539c2 TL |
4215 | if ctx.reconfig: |
4216 | logger.info('%s daemon %s ...' % ('Reconfig', ctx.name)) | |
e306af50 | 4217 | elif redeploy: |
f67539c2 | 4218 | logger.info('%s daemon %s ...' % ('Redeploy', ctx.name)) |
e306af50 | 4219 | else: |
f67539c2 | 4220 | logger.info('%s daemon %s ...' % ('Deploy', ctx.name)) |
9f95a23c | 4221 | |
f6b5b4d7 | 4222 | # Get and check ports explicitly required to be opened |
f67539c2 TL |
4223 | daemon_ports = [] # type: List[int] |
4224 | ||
4225 | # only check port in use if not reconfig or redeploy since service | |
4226 | # we are redeploying/reconfiguring will already be using the port | |
4227 | if not ctx.reconfig and not redeploy: | |
4228 | if ctx.tcp_ports: | |
4229 | daemon_ports = list(map(int, ctx.tcp_ports.split())) | |
f6b5b4d7 | 4230 | |
9f95a23c | 4231 | if daemon_type in Ceph.daemons: |
f67539c2 TL |
4232 | config, keyring = get_config_and_keyring(ctx) |
4233 | uid, gid = extract_uid_gid(ctx) | |
4234 | make_var_run(ctx, ctx.fsid, uid, gid) | |
f6b5b4d7 | 4235 | |
f67539c2 TL |
4236 | c = get_container(ctx, ctx.fsid, daemon_type, daemon_id, |
4237 | ptrace=ctx.allow_ptrace) | |
4238 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, | |
9f95a23c | 4239 | config=config, keyring=keyring, |
f67539c2 TL |
4240 | osd_fsid=ctx.osd_fsid, |
4241 | reconfig=ctx.reconfig, | |
f6b5b4d7 | 4242 | ports=daemon_ports) |
9f95a23c TL |
4243 | |
4244 | elif daemon_type in Monitoring.components: | |
4245 | # monitoring daemon - prometheus, grafana, alertmanager, node-exporter | |
9f95a23c | 4246 | # Default Checks |
f67539c2 | 4247 | if not ctx.reconfig and not redeploy: |
f6b5b4d7 | 4248 | daemon_ports.extend(Monitoring.port_map[daemon_type]) |
9f95a23c TL |
4249 | |
4250 | # make sure provided config-json is sufficient | |
f67539c2 | 4251 | config = get_parm(ctx.config_json) # type: ignore |
9f95a23c TL |
4252 | required_files = Monitoring.components[daemon_type].get('config-json-files', list()) |
4253 | required_args = Monitoring.components[daemon_type].get('config-json-args', list()) | |
4254 | if required_files: | |
4255 | if not config or not all(c in config.get('files', {}).keys() for c in required_files): # type: ignore | |
f67539c2 TL |
4256 | raise Error('{} deployment requires config-json which must ' |
4257 | 'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files))) | |
9f95a23c TL |
4258 | if required_args: |
4259 | if not config or not all(c in config.keys() for c in required_args): # type: ignore | |
f67539c2 TL |
4260 | raise Error('{} deployment requires config-json which must ' |
4261 | 'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args))) | |
9f95a23c | 4262 | |
f67539c2 TL |
4263 | uid, gid = extract_uid_gid_monitoring(ctx, daemon_type) |
4264 | c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) | |
4265 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, | |
4266 | reconfig=ctx.reconfig, | |
f6b5b4d7 | 4267 | ports=daemon_ports) |
9f95a23c TL |
4268 | |
4269 | elif daemon_type == NFSGanesha.daemon_type: | |
f67539c2 | 4270 | if not ctx.reconfig and not redeploy: |
f6b5b4d7 TL |
4271 | daemon_ports.extend(NFSGanesha.port_map.values()) |
4272 | ||
f67539c2 | 4273 | config, keyring = get_config_and_keyring(ctx) |
9f95a23c | 4274 | # TODO: extract ganesha uid/gid (997, 994) ? |
f67539c2 TL |
4275 | uid, gid = extract_uid_gid(ctx) |
4276 | c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) | |
4277 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, | |
9f95a23c | 4278 | config=config, keyring=keyring, |
f67539c2 | 4279 | reconfig=ctx.reconfig, |
f6b5b4d7 | 4280 | ports=daemon_ports) |
e306af50 | 4281 | |
1911f103 | 4282 | elif daemon_type == CephIscsi.daemon_type: |
f67539c2 TL |
4283 | config, keyring = get_config_and_keyring(ctx) |
4284 | uid, gid = extract_uid_gid(ctx) | |
4285 | c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) | |
4286 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, | |
1911f103 | 4287 | config=config, keyring=keyring, |
f67539c2 TL |
4288 | reconfig=ctx.reconfig, |
4289 | ports=daemon_ports) | |
4290 | ||
4291 | elif daemon_type == HAproxy.daemon_type: | |
4292 | haproxy = HAproxy.init(ctx, ctx.fsid, daemon_id) | |
4293 | uid, gid = haproxy.extract_uid_gid_haproxy() | |
4294 | c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) | |
4295 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, | |
4296 | reconfig=ctx.reconfig, | |
4297 | ports=daemon_ports) | |
4298 | ||
4299 | elif daemon_type == Keepalived.daemon_type: | |
4300 | keepalived = Keepalived.init(ctx, ctx.fsid, daemon_id) | |
4301 | uid, gid = keepalived.extract_uid_gid_keepalived() | |
4302 | c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) | |
4303 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, uid, gid, | |
4304 | reconfig=ctx.reconfig, | |
f6b5b4d7 | 4305 | ports=daemon_ports) |
f91f0fd5 TL |
4306 | |
4307 | elif daemon_type == CustomContainer.daemon_type: | |
f67539c2 TL |
4308 | cc = CustomContainer.init(ctx, ctx.fsid, daemon_id) |
4309 | if not ctx.reconfig and not redeploy: | |
f91f0fd5 | 4310 | daemon_ports.extend(cc.ports) |
f67539c2 | 4311 | c = get_container(ctx, ctx.fsid, daemon_type, daemon_id, |
f91f0fd5 | 4312 | privileged=cc.privileged, |
f67539c2 TL |
4313 | ptrace=ctx.allow_ptrace) |
4314 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, c, | |
f91f0fd5 | 4315 | uid=cc.uid, gid=cc.gid, config=None, |
f67539c2 | 4316 | keyring=None, reconfig=ctx.reconfig, |
f91f0fd5 TL |
4317 | ports=daemon_ports) |
4318 | ||
f67539c2 TL |
4319 | elif daemon_type == CephadmDaemon.daemon_type: |
4320 | # get current user gid and uid | |
4321 | uid = os.getuid() | |
4322 | gid = os.getgid() | |
4323 | config_js = get_parm(ctx.config_json) # type: Dict[str, str] | |
4324 | if not daemon_ports: | |
4325 | logger.info('cephadm-exporter will use default port ({})'.format(CephadmDaemon.default_port)) | |
4326 | daemon_ports = [CephadmDaemon.default_port] | |
4327 | ||
4328 | CephadmDaemon.validate_config(config_js) | |
4329 | ||
4330 | deploy_daemon(ctx, ctx.fsid, daemon_type, daemon_id, None, | |
4331 | uid, gid, ports=daemon_ports) | |
4332 | ||
9f95a23c | 4333 | else: |
f91f0fd5 TL |
4334 | raise Error('daemon type {} not implemented in command_deploy function' |
4335 | .format(daemon_type)) | |
9f95a23c TL |
4336 | |
4337 | ################################## | |
4338 | ||
f6b5b4d7 | 4339 | |
9f95a23c | 4340 | @infer_image |
f67539c2 TL |
4341 | def command_run(ctx): |
4342 | # type: (CephadmContext) -> int | |
4343 | (daemon_type, daemon_id) = ctx.name.split('.', 1) | |
4344 | c = get_container(ctx, ctx.fsid, daemon_type, daemon_id) | |
9f95a23c | 4345 | command = c.run_cmd() |
f67539c2 | 4346 | return call_timeout(ctx, command, ctx.timeout) |
9f95a23c TL |
4347 | |
4348 | ################################## | |
4349 | ||
f6b5b4d7 | 4350 | |
f67539c2 TL |
4351 | def fsid_conf_mismatch(ctx): |
4352 | # type: (CephadmContext) -> bool | |
4353 | (config, _) = get_config_and_keyring(ctx) | |
4354 | if config: | |
4355 | for c in config.split('\n'): | |
4356 | if 'fsid = ' in c.strip(): | |
4357 | if 'fsid = ' + ctx.fsid != c.strip(): | |
4358 | return True | |
4359 | return False | |
4360 | ||
4361 | ||
9f95a23c | 4362 | @infer_fsid |
e306af50 | 4363 | @infer_config |
9f95a23c | 4364 | @infer_image |
f67539c2 TL |
4365 | def command_shell(ctx): |
4366 | # type: (CephadmContext) -> int | |
4367 | if fsid_conf_mismatch(ctx): | |
4368 | raise Error('fsid does not match ceph conf') | |
4369 | ||
4370 | if ctx.fsid: | |
4371 | make_log_dir(ctx, ctx.fsid) | |
4372 | if ctx.name: | |
4373 | if '.' in ctx.name: | |
4374 | (daemon_type, daemon_id) = ctx.name.split('.', 1) | |
9f95a23c | 4375 | else: |
f67539c2 | 4376 | daemon_type = ctx.name |
9f95a23c TL |
4377 | daemon_id = None |
4378 | else: | |
4379 | daemon_type = 'osd' # get the most mounts | |
4380 | daemon_id = None | |
4381 | ||
f67539c2 | 4382 | if daemon_id and not ctx.fsid: |
9f95a23c TL |
4383 | raise Error('must pass --fsid to specify cluster') |
4384 | ||
4385 | # use /etc/ceph files by default, if present. we do this instead of | |
4386 | # making these defaults in the arg parser because we don't want an error | |
4387 | # if they don't exist. | |
f67539c2 TL |
4388 | if not ctx.keyring and os.path.exists(SHELL_DEFAULT_KEYRING): |
4389 | ctx.keyring = SHELL_DEFAULT_KEYRING | |
4390 | ||
4391 | container_args: List[str] = ['-i'] | |
4392 | mounts = get_container_mounts(ctx, ctx.fsid, daemon_type, daemon_id, | |
4393 | no_config=True if ctx.config else False) | |
4394 | binds = get_container_binds(ctx, ctx.fsid, daemon_type, daemon_id) | |
4395 | if ctx.config: | |
4396 | mounts[pathify(ctx.config)] = '/etc/ceph/ceph.conf:z' | |
4397 | if ctx.keyring: | |
4398 | mounts[pathify(ctx.keyring)] = '/etc/ceph/ceph.keyring:z' | |
4399 | if ctx.mount: | |
4400 | for _mount in ctx.mount: | |
f91f0fd5 TL |
4401 | split_src_dst = _mount.split(':') |
4402 | mount = pathify(split_src_dst[0]) | |
4403 | filename = os.path.basename(split_src_dst[0]) | |
4404 | if len(split_src_dst) > 1: | |
4405 | dst = split_src_dst[1] + ':z' if len(split_src_dst) == 3 else split_src_dst[1] | |
4406 | mounts[mount] = dst | |
4407 | else: | |
4408 | mounts[mount] = '/mnt/{}:z'.format(filename) | |
f67539c2 TL |
4409 | if ctx.command: |
4410 | command = ctx.command | |
9f95a23c TL |
4411 | else: |
4412 | command = ['bash'] | |
4413 | container_args += [ | |
f67539c2 | 4414 | '-t', |
9f95a23c | 4415 | '-e', 'LANG=C', |
f67539c2 | 4416 | '-e', 'PS1=%s' % CUSTOM_PS1, |
9f95a23c | 4417 | ] |
f67539c2 TL |
4418 | if ctx.fsid: |
4419 | home = os.path.join(ctx.data_dir, ctx.fsid, 'home') | |
9f95a23c TL |
4420 | if not os.path.exists(home): |
4421 | logger.debug('Creating root home at %s' % home) | |
4422 | makedirs(home, 0, 0, 0o660) | |
4423 | if os.path.exists('/etc/skel'): | |
4424 | for f in os.listdir('/etc/skel'): | |
4425 | if f.startswith('.bash'): | |
4426 | shutil.copyfile(os.path.join('/etc/skel', f), | |
4427 | os.path.join(home, f)) | |
4428 | mounts[home] = '/root' | |
4429 | ||
4430 | c = CephContainer( | |
f67539c2 TL |
4431 | ctx, |
4432 | image=ctx.image, | |
9f95a23c TL |
4433 | entrypoint='doesnotmatter', |
4434 | args=[], | |
4435 | container_args=container_args, | |
4436 | volume_mounts=mounts, | |
f6b5b4d7 | 4437 | bind_mounts=binds, |
f67539c2 | 4438 | envs=ctx.env, |
9f95a23c TL |
4439 | privileged=True) |
4440 | command = c.shell_cmd(command) | |
4441 | ||
f67539c2 | 4442 | return call_timeout(ctx, command, ctx.timeout) |
9f95a23c TL |
4443 | |
4444 | ################################## | |
4445 | ||
f6b5b4d7 | 4446 | |
9f95a23c | 4447 | @infer_fsid |
f67539c2 TL |
4448 | def command_enter(ctx): |
4449 | # type: (CephadmContext) -> int | |
4450 | if not ctx.fsid: | |
9f95a23c | 4451 | raise Error('must pass --fsid to specify cluster') |
f67539c2 TL |
4452 | (daemon_type, daemon_id) = ctx.name.split('.', 1) |
4453 | container_args = ['-i'] # type: List[str] | |
4454 | if ctx.command: | |
4455 | command = ctx.command | |
9f95a23c TL |
4456 | else: |
4457 | command = ['sh'] | |
4458 | container_args += [ | |
f67539c2 | 4459 | '-t', |
9f95a23c | 4460 | '-e', 'LANG=C', |
f67539c2 | 4461 | '-e', 'PS1=%s' % CUSTOM_PS1, |
9f95a23c | 4462 | ] |
1911f103 | 4463 | c = CephContainer( |
f67539c2 TL |
4464 | ctx, |
4465 | image=ctx.image, | |
1911f103 TL |
4466 | entrypoint='doesnotmatter', |
4467 | container_args=container_args, | |
f67539c2 | 4468 | cname='ceph-%s-%s.%s' % (ctx.fsid, daemon_type, daemon_id), |
1911f103 | 4469 | ) |
9f95a23c | 4470 | command = c.exec_cmd(command) |
f67539c2 | 4471 | return call_timeout(ctx, command, ctx.timeout) |
9f95a23c TL |
4472 | |
4473 | ################################## | |
4474 | ||
f6b5b4d7 | 4475 | |
9f95a23c TL |
4476 | @infer_fsid |
4477 | @infer_image | |
f67539c2 TL |
4478 | def command_ceph_volume(ctx): |
4479 | # type: (CephadmContext) -> None | |
4480 | if ctx.fsid: | |
4481 | make_log_dir(ctx, ctx.fsid) | |
9f95a23c | 4482 | |
f67539c2 TL |
4483 | lock = FileLock(ctx, ctx.fsid) |
4484 | lock.acquire() | |
1911f103 | 4485 | |
f67539c2 TL |
4486 | (uid, gid) = (0, 0) # ceph-volume runs as root |
4487 | mounts = get_container_mounts(ctx, ctx.fsid, 'osd', None) | |
9f95a23c TL |
4488 | |
4489 | tmp_config = None | |
4490 | tmp_keyring = None | |
4491 | ||
f67539c2 | 4492 | (config, keyring) = get_config_and_keyring(ctx) |
9f95a23c | 4493 | |
801d1391 | 4494 | if config: |
9f95a23c TL |
4495 | # tmp config file |
4496 | tmp_config = write_tmp(config, uid, gid) | |
9f95a23c | 4497 | mounts[tmp_config.name] = '/etc/ceph/ceph.conf:z' |
801d1391 TL |
4498 | |
4499 | if keyring: | |
4500 | # tmp keyring file | |
4501 | tmp_keyring = write_tmp(keyring, uid, gid) | |
9f95a23c TL |
4502 | mounts[tmp_keyring.name] = '/var/lib/ceph/bootstrap-osd/ceph.keyring:z' |
4503 | ||
4504 | c = CephContainer( | |
f67539c2 TL |
4505 | ctx, |
4506 | image=ctx.image, | |
9f95a23c | 4507 | entrypoint='/usr/sbin/ceph-volume', |
f67539c2 TL |
4508 | envs=ctx.env, |
4509 | args=ctx.command, | |
9f95a23c TL |
4510 | privileged=True, |
4511 | volume_mounts=mounts, | |
4512 | ) | |
f67539c2 TL |
4513 | verbosity = CallVerbosity.VERBOSE if ctx.log_output else CallVerbosity.VERBOSE_ON_FAILURE |
4514 | out, err, code = call_throws(ctx, c.run_cmd(), verbosity=verbosity) | |
9f95a23c TL |
4515 | if not code: |
4516 | print(out) | |
4517 | ||
4518 | ################################## | |
4519 | ||
f6b5b4d7 | 4520 | |
9f95a23c | 4521 | @infer_fsid |
f67539c2 TL |
4522 | def command_unit(ctx): |
4523 | # type: (CephadmContext) -> None | |
4524 | if not ctx.fsid: | |
9f95a23c | 4525 | raise Error('must pass --fsid to specify cluster') |
e306af50 | 4526 | |
f67539c2 | 4527 | unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) |
e306af50 | 4528 | |
f67539c2 | 4529 | call_throws(ctx, [ |
9f95a23c | 4530 | 'systemctl', |
f67539c2 | 4531 | ctx.command, |
adb31ebb TL |
4532 | unit_name], |
4533 | verbosity=CallVerbosity.VERBOSE, | |
4534 | desc='' | |
4535 | ) | |
9f95a23c TL |
4536 | |
4537 | ################################## | |
4538 | ||
f6b5b4d7 | 4539 | |
9f95a23c | 4540 | @infer_fsid |
f67539c2 TL |
4541 | def command_logs(ctx): |
4542 | # type: (CephadmContext) -> None | |
4543 | if not ctx.fsid: | |
9f95a23c TL |
4544 | raise Error('must pass --fsid to specify cluster') |
4545 | ||
f67539c2 | 4546 | unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) |
9f95a23c TL |
4547 | |
4548 | cmd = [find_program('journalctl')] | |
4549 | cmd.extend(['-u', unit_name]) | |
f67539c2 TL |
4550 | if ctx.command: |
4551 | cmd.extend(ctx.command) | |
9f95a23c TL |
4552 | |
4553 | # call this directly, without our wrapper, so that we get an unmolested | |
4554 | # stdout with logger prefixing. | |
f67539c2 TL |
4555 | logger.debug('Running command: %s' % ' '.join(cmd)) |
4556 | subprocess.call(cmd) # type: ignore | |
9f95a23c TL |
4557 | |
4558 | ################################## | |
4559 | ||
f6b5b4d7 | 4560 | |
f67539c2 TL |
4561 | def list_networks(ctx): |
4562 | # type: (CephadmContext) -> Dict[str,Dict[str,List[str]]] | |
9f95a23c | 4563 | |
f67539c2 TL |
4564 | # sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag, |
4565 | # so we'll need to use a regex to parse 'ip' command output. | |
4566 | # | |
4567 | # out, _, _ = call_throws(['ip', '-j', 'route', 'ls']) | |
4568 | # j = json.loads(out) | |
4569 | # for x in j: | |
9f95a23c | 4570 | |
f67539c2 TL |
4571 | res = _list_ipv4_networks(ctx) |
4572 | res.update(_list_ipv6_networks(ctx)) | |
f6b5b4d7 TL |
4573 | return res |
4574 | ||
4575 | ||
f67539c2 TL |
4576 | def _list_ipv4_networks(ctx: CephadmContext): |
4577 | execstr: Optional[str] = find_executable('ip') | |
4578 | if not execstr: | |
4579 | raise FileNotFoundError("unable to find 'ip' command") | |
4580 | out, _, _ = call_throws(ctx, [execstr, 'route', 'ls']) | |
f6b5b4d7 TL |
4581 | return _parse_ipv4_route(out) |
4582 | ||
9f95a23c | 4583 | |
f6b5b4d7 | 4584 | def _parse_ipv4_route(out): |
f67539c2 TL |
4585 | r = {} # type: Dict[str,Dict[str,List[str]]] |
4586 | p = re.compile(r'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)') | |
9f95a23c TL |
4587 | for line in out.splitlines(): |
4588 | m = p.findall(line) | |
4589 | if not m: | |
4590 | continue | |
4591 | net = m[0][0] | |
f67539c2 TL |
4592 | iface = m[0][1] |
4593 | ip = m[0][4] | |
9f95a23c | 4594 | if net not in r: |
f67539c2 TL |
4595 | r[net] = {} |
4596 | if iface not in r[net]: | |
4597 | r[net][iface] = [] | |
4598 | r[net][iface].append(ip) | |
9f95a23c TL |
4599 | return r |
4600 | ||
f6b5b4d7 | 4601 | |
f67539c2 TL |
4602 | def _list_ipv6_networks(ctx: CephadmContext): |
4603 | execstr: Optional[str] = find_executable('ip') | |
4604 | if not execstr: | |
4605 | raise FileNotFoundError("unable to find 'ip' command") | |
4606 | routes, _, _ = call_throws(ctx, [execstr, '-6', 'route', 'ls']) | |
4607 | ips, _, _ = call_throws(ctx, [execstr, '-6', 'addr', 'ls']) | |
f6b5b4d7 TL |
4608 | return _parse_ipv6_route(routes, ips) |
4609 | ||
4610 | ||
4611 | def _parse_ipv6_route(routes, ips): | |
f67539c2 | 4612 | r = {} # type: Dict[str,Dict[str,List[str]]] |
f6b5b4d7 TL |
4613 | route_p = re.compile(r'^(\S+) dev (\S+) proto (\S+) metric (\S+) .*pref (\S+)$') |
4614 | ip_p = re.compile(r'^\s+inet6 (\S+)/(.*)scope (.*)$') | |
f67539c2 | 4615 | iface_p = re.compile(r'^(\d+): (\S+): (.*)$') |
f6b5b4d7 TL |
4616 | for line in routes.splitlines(): |
4617 | m = route_p.findall(line) | |
4618 | if not m or m[0][0].lower() == 'default': | |
4619 | continue | |
4620 | net = m[0][0] | |
f67539c2 TL |
4621 | if '/' not in net: # only consider networks with a mask |
4622 | continue | |
4623 | iface = m[0][1] | |
f6b5b4d7 | 4624 | if net not in r: |
f67539c2 TL |
4625 | r[net] = {} |
4626 | if iface not in r[net]: | |
4627 | r[net][iface] = [] | |
f6b5b4d7 | 4628 | |
f67539c2 | 4629 | iface = None |
f6b5b4d7 TL |
4630 | for line in ips.splitlines(): |
4631 | m = ip_p.findall(line) | |
4632 | if not m: | |
f67539c2 TL |
4633 | m = iface_p.findall(line) |
4634 | if m: | |
4635 | # drop @... suffix, if present | |
4636 | iface = m[0][1].split('@')[0] | |
f6b5b4d7 TL |
4637 | continue |
4638 | ip = m[0][0] | |
4639 | # find the network it belongs to | |
4640 | net = [n for n in r.keys() | |
f67539c2 | 4641 | if ipaddress.ip_address(ip) in ipaddress.ip_network(n)] |
f6b5b4d7 | 4642 | if net: |
f67539c2 TL |
4643 | assert(iface) |
4644 | r[net[0]][iface].append(ip) | |
f6b5b4d7 TL |
4645 | |
4646 | return r | |
4647 | ||
4648 | ||
f67539c2 TL |
4649 | def command_list_networks(ctx): |
4650 | # type: (CephadmContext) -> None | |
4651 | r = list_networks(ctx) | |
9f95a23c TL |
4652 | print(json.dumps(r, indent=4)) |
4653 | ||
4654 | ################################## | |
4655 | ||
f6b5b4d7 | 4656 | |
f67539c2 TL |
4657 | def command_ls(ctx): |
4658 | # type: (CephadmContext) -> None | |
4659 | ls = list_daemons(ctx, detail=not ctx.no_detail, | |
4660 | legacy_dir=ctx.legacy_dir) | |
9f95a23c TL |
4661 | print(json.dumps(ls, indent=4)) |
4662 | ||
f6b5b4d7 | 4663 | |
f67539c2 TL |
4664 | def with_units_to_int(v: str) -> int: |
4665 | if v.endswith('iB'): | |
4666 | v = v[:-2] | |
4667 | elif v.endswith('B'): | |
4668 | v = v[:-1] | |
4669 | mult = 1 | |
4670 | if v[-1].upper() == 'K': | |
4671 | mult = 1024 | |
4672 | v = v[:-1] | |
4673 | elif v[-1].upper() == 'M': | |
4674 | mult = 1024 * 1024 | |
4675 | v = v[:-1] | |
4676 | elif v[-1].upper() == 'G': | |
4677 | mult = 1024 * 1024 * 1024 | |
4678 | v = v[:-1] | |
4679 | elif v[-1].upper() == 'T': | |
4680 | mult = 1024 * 1024 * 1024 * 1024 | |
4681 | v = v[:-1] | |
4682 | return int(float(v) * mult) | |
4683 | ||
4684 | ||
4685 | def list_daemons(ctx, detail=True, legacy_dir=None): | |
4686 | # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]] | |
4687 | host_version: Optional[str] = None | |
9f95a23c | 4688 | ls = [] |
f67539c2 | 4689 | container_path = ctx.container_engine.path |
9f95a23c | 4690 | |
f67539c2 | 4691 | data_dir = ctx.data_dir |
9f95a23c TL |
4692 | if legacy_dir is not None: |
4693 | data_dir = os.path.abspath(legacy_dir + data_dir) | |
4694 | ||
4695 | # keep track of ceph versions we see | |
4696 | seen_versions = {} # type: Dict[str, Optional[str]] | |
4697 | ||
f67539c2 TL |
4698 | # keep track of image digests |
4699 | seen_digests = {} # type: Dict[str, List[str]] | |
4700 | ||
4701 | # keep track of memory usage we've seen | |
4702 | seen_memusage = {} # type: Dict[str, int] | |
4703 | out, err, code = call( | |
4704 | ctx, | |
4705 | [container_path, 'stats', '--format', '{{.ID}},{{.MemUsage}}', '--no-stream'], | |
4706 | verbosity=CallVerbosity.DEBUG | |
4707 | ) | |
4708 | seen_memusage_cid_len = 0 | |
4709 | if not code: | |
4710 | for line in out.splitlines(): | |
4711 | (cid, usage) = line.split(',') | |
4712 | (used, limit) = usage.split(' / ') | |
4713 | seen_memusage[cid] = with_units_to_int(used) | |
4714 | if not seen_memusage_cid_len: | |
4715 | seen_memusage_cid_len = len(cid) | |
4716 | ||
9f95a23c TL |
4717 | # /var/lib/ceph |
4718 | if os.path.exists(data_dir): | |
4719 | for i in os.listdir(data_dir): | |
4720 | if i in ['mon', 'osd', 'mds', 'mgr']: | |
4721 | daemon_type = i | |
4722 | for j in os.listdir(os.path.join(data_dir, i)): | |
4723 | if '-' not in j: | |
4724 | continue | |
4725 | (cluster, daemon_id) = j.split('-', 1) | |
f67539c2 TL |
4726 | fsid = get_legacy_daemon_fsid(ctx, |
4727 | cluster, daemon_type, daemon_id, | |
4728 | legacy_dir=legacy_dir) | |
e306af50 | 4729 | legacy_unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) |
f67539c2 | 4730 | val: Dict[str, Any] = { |
9f95a23c TL |
4731 | 'style': 'legacy', |
4732 | 'name': '%s.%s' % (daemon_type, daemon_id), | |
4733 | 'fsid': fsid if fsid is not None else 'unknown', | |
e306af50 | 4734 | 'systemd_unit': legacy_unit_name, |
9f95a23c TL |
4735 | } |
4736 | if detail: | |
f67539c2 TL |
4737 | (val['enabled'], val['state'], _) = \ |
4738 | check_unit(ctx, legacy_unit_name) | |
9f95a23c TL |
4739 | if not host_version: |
4740 | try: | |
f67539c2 TL |
4741 | out, err, code = call(ctx, |
4742 | ['ceph', '-v'], | |
4743 | verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
4744 | if not code and out.startswith('ceph version '): |
4745 | host_version = out.split(' ')[2] | |
4746 | except Exception: | |
4747 | pass | |
f67539c2 TL |
4748 | val['host_version'] = host_version |
4749 | ls.append(val) | |
9f95a23c TL |
4750 | elif is_fsid(i): |
4751 | fsid = str(i) # convince mypy that fsid is a str here | |
4752 | for j in os.listdir(os.path.join(data_dir, i)): | |
f67539c2 | 4753 | if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)): |
9f95a23c TL |
4754 | name = j |
4755 | (daemon_type, daemon_id) = j.split('.', 1) | |
4756 | unit_name = get_unit_name(fsid, | |
4757 | daemon_type, | |
4758 | daemon_id) | |
4759 | else: | |
4760 | continue | |
f67539c2 | 4761 | val = { |
9f95a23c TL |
4762 | 'style': 'cephadm:v1', |
4763 | 'name': name, | |
4764 | 'fsid': fsid, | |
e306af50 | 4765 | 'systemd_unit': unit_name, |
9f95a23c TL |
4766 | } |
4767 | if detail: | |
4768 | # get container id | |
f67539c2 TL |
4769 | (val['enabled'], val['state'], _) = \ |
4770 | check_unit(ctx, unit_name) | |
9f95a23c TL |
4771 | container_id = None |
4772 | image_name = None | |
4773 | image_id = None | |
f67539c2 | 4774 | image_digests = None |
9f95a23c TL |
4775 | version = None |
4776 | start_stamp = None | |
4777 | ||
f67539c2 TL |
4778 | cmd = [ |
4779 | container_path, 'inspect', | |
4780 | '--format', '{{.Id}},{{.Config.Image}},{{.Image}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}', | |
4781 | 'ceph-%s-%s' % (fsid, j) | |
4782 | ] | |
4783 | out, err, code = call(ctx, cmd, verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
4784 | if not code: |
4785 | (container_id, image_name, image_id, start, | |
4786 | version) = out.strip().split(',') | |
4787 | image_id = normalize_container_id(image_id) | |
4788 | daemon_type = name.split('.', 1)[0] | |
4789 | start_stamp = try_convert_datetime(start) | |
f67539c2 TL |
4790 | |
4791 | # collect digests for this image id | |
4792 | image_digests = seen_digests.get(image_id) | |
4793 | if not image_digests: | |
4794 | out, err, code = call( | |
4795 | ctx, | |
4796 | [ | |
4797 | container_path, 'image', 'inspect', image_id, | |
4798 | '--format', '{{.RepoDigests}}', | |
4799 | ], | |
4800 | verbosity=CallVerbosity.DEBUG) | |
4801 | if not code: | |
4802 | image_digests = out.strip()[1:-1].split(' ') | |
4803 | seen_digests[image_id] = image_digests | |
4804 | ||
4805 | # identify software version inside the container (if we can) | |
9f95a23c TL |
4806 | if not version or '.' not in version: |
4807 | version = seen_versions.get(image_id, None) | |
4808 | if daemon_type == NFSGanesha.daemon_type: | |
f67539c2 | 4809 | version = NFSGanesha.get_version(ctx, container_id) |
1911f103 | 4810 | if daemon_type == CephIscsi.daemon_type: |
f67539c2 | 4811 | version = CephIscsi.get_version(ctx, container_id) |
9f95a23c TL |
4812 | elif not version: |
4813 | if daemon_type in Ceph.daemons: | |
f67539c2 TL |
4814 | out, err, code = call(ctx, |
4815 | [container_path, 'exec', container_id, | |
4816 | 'ceph', '-v'], | |
4817 | verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
4818 | if not code and \ |
4819 | out.startswith('ceph version '): | |
4820 | version = out.split(' ')[2] | |
4821 | seen_versions[image_id] = version | |
4822 | elif daemon_type == 'grafana': | |
f67539c2 TL |
4823 | out, err, code = call(ctx, |
4824 | [container_path, 'exec', container_id, | |
4825 | 'grafana-server', '-v'], | |
4826 | verbosity=CallVerbosity.DEBUG) | |
9f95a23c TL |
4827 | if not code and \ |
4828 | out.startswith('Version '): | |
4829 | version = out.split(' ')[1] | |
4830 | seen_versions[image_id] = version | |
4831 | elif daemon_type in ['prometheus', | |
4832 | 'alertmanager', | |
4833 | 'node-exporter']: | |
f67539c2 TL |
4834 | version = Monitoring.get_version(ctx, container_id, daemon_type) |
4835 | seen_versions[image_id] = version | |
4836 | elif daemon_type == 'haproxy': | |
4837 | out, err, code = call(ctx, | |
4838 | [container_path, 'exec', container_id, | |
4839 | 'haproxy', '-v'], | |
4840 | verbosity=CallVerbosity.DEBUG) | |
4841 | if not code and \ | |
4842 | out.startswith('HA-Proxy version '): | |
4843 | version = out.split(' ')[2] | |
4844 | seen_versions[image_id] = version | |
4845 | elif daemon_type == 'keepalived': | |
4846 | out, err, code = call(ctx, | |
4847 | [container_path, 'exec', container_id, | |
4848 | 'keepalived', '--version'], | |
4849 | verbosity=CallVerbosity.DEBUG) | |
9f95a23c | 4850 | if not code and \ |
f67539c2 TL |
4851 | err.startswith('Keepalived '): |
4852 | version = err.split(' ')[1] | |
4853 | if version[0] == 'v': | |
4854 | version = version[1:] | |
9f95a23c | 4855 | seen_versions[image_id] = version |
f91f0fd5 TL |
4856 | elif daemon_type == CustomContainer.daemon_type: |
4857 | # Because a custom container can contain | |
4858 | # everything, we do not know which command | |
4859 | # to execute to get the version. | |
4860 | pass | |
9f95a23c | 4861 | else: |
f91f0fd5 | 4862 | logger.warning('version for unknown daemon type %s' % daemon_type) |
9f95a23c | 4863 | else: |
f67539c2 | 4864 | vfile = os.path.join(data_dir, fsid, j, 'unit.image') # type: ignore |
9f95a23c TL |
4865 | try: |
4866 | with open(vfile, 'r') as f: | |
4867 | image_name = f.read().strip() or None | |
4868 | except IOError: | |
4869 | pass | |
f67539c2 TL |
4870 | |
4871 | # unit.meta? | |
4872 | mfile = os.path.join(data_dir, fsid, j, 'unit.meta') # type: ignore | |
4873 | try: | |
4874 | with open(mfile, 'r') as f: | |
4875 | meta = json.loads(f.read()) | |
4876 | val.update(meta) | |
4877 | except IOError: | |
4878 | pass | |
4879 | ||
4880 | val['container_id'] = container_id | |
4881 | val['container_image_name'] = image_name | |
4882 | val['container_image_id'] = image_id | |
4883 | val['container_image_digests'] = image_digests | |
4884 | if container_id: | |
4885 | val['memory_usage'] = seen_memusage.get(container_id[0:seen_memusage_cid_len]) | |
4886 | val['version'] = version | |
4887 | val['started'] = start_stamp | |
4888 | val['created'] = get_file_timestamp( | |
9f95a23c TL |
4889 | os.path.join(data_dir, fsid, j, 'unit.created') |
4890 | ) | |
f67539c2 | 4891 | val['deployed'] = get_file_timestamp( |
9f95a23c | 4892 | os.path.join(data_dir, fsid, j, 'unit.image')) |
f67539c2 | 4893 | val['configured'] = get_file_timestamp( |
9f95a23c TL |
4894 | os.path.join(data_dir, fsid, j, 'unit.configured')) |
4895 | ||
f67539c2 | 4896 | ls.append(val) |
9f95a23c | 4897 | |
9f95a23c TL |
4898 | return ls |
4899 | ||
4900 | ||
f67539c2 TL |
4901 | def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None): |
4902 | # type: (CephadmContext, str, str, bool, Optional[str]) -> Dict[str, str] | |
e306af50 | 4903 | |
f67539c2 | 4904 | for d in list_daemons(ctx, detail=detail, legacy_dir=legacy_dir): |
e306af50 TL |
4905 | if d['fsid'] != fsid: |
4906 | continue | |
4907 | if d['name'] != name: | |
4908 | continue | |
4909 | return d | |
4910 | raise Error('Daemon not found: {}. See `cephadm ls`'.format(name)) | |
4911 | ||
9f95a23c TL |
4912 | ################################## |
4913 | ||
f67539c2 | 4914 | |
9f95a23c | 4915 | @default_image |
f67539c2 TL |
4916 | def command_adopt(ctx): |
4917 | # type: (CephadmContext) -> None | |
9f95a23c | 4918 | |
f67539c2 TL |
4919 | if not ctx.skip_pull: |
4920 | _pull_image(ctx, ctx.image) | |
9f95a23c | 4921 | |
f67539c2 | 4922 | (daemon_type, daemon_id) = ctx.name.split('.', 1) |
9f95a23c TL |
4923 | |
4924 | # legacy check | |
f67539c2 TL |
4925 | if ctx.style != 'legacy': |
4926 | raise Error('adoption of style %s not implemented' % ctx.style) | |
9f95a23c TL |
4927 | |
4928 | # lock | |
f67539c2 TL |
4929 | fsid = get_legacy_daemon_fsid(ctx, |
4930 | ctx.cluster, | |
9f95a23c TL |
4931 | daemon_type, |
4932 | daemon_id, | |
f67539c2 | 4933 | legacy_dir=ctx.legacy_dir) |
9f95a23c TL |
4934 | if not fsid: |
4935 | raise Error('could not detect legacy fsid; set fsid in ceph.conf') | |
f67539c2 TL |
4936 | lock = FileLock(ctx, fsid) |
4937 | lock.acquire() | |
9f95a23c TL |
4938 | |
4939 | # call correct adoption | |
4940 | if daemon_type in Ceph.daemons: | |
f67539c2 | 4941 | command_adopt_ceph(ctx, daemon_type, daemon_id, fsid) |
9f95a23c | 4942 | elif daemon_type == 'prometheus': |
f67539c2 | 4943 | command_adopt_prometheus(ctx, daemon_id, fsid) |
9f95a23c | 4944 | elif daemon_type == 'grafana': |
f67539c2 | 4945 | command_adopt_grafana(ctx, daemon_id, fsid) |
9f95a23c TL |
4946 | elif daemon_type == 'node-exporter': |
4947 | raise Error('adoption of node-exporter not implemented') | |
4948 | elif daemon_type == 'alertmanager': | |
f67539c2 | 4949 | command_adopt_alertmanager(ctx, daemon_id, fsid) |
9f95a23c TL |
4950 | else: |
4951 | raise Error('daemon type %s not recognized' % daemon_type) | |
4952 | ||
4953 | ||
1911f103 | 4954 | class AdoptOsd(object): |
f67539c2 TL |
4955 | def __init__(self, ctx, osd_data_dir, osd_id): |
4956 | # type: (CephadmContext, str, str) -> None | |
4957 | self.ctx = ctx | |
1911f103 TL |
4958 | self.osd_data_dir = osd_data_dir |
4959 | self.osd_id = osd_id | |
4960 | ||
4961 | def check_online_osd(self): | |
4962 | # type: () -> Tuple[Optional[str], Optional[str]] | |
4963 | ||
4964 | osd_fsid, osd_type = None, None | |
4965 | ||
4966 | path = os.path.join(self.osd_data_dir, 'fsid') | |
4967 | try: | |
4968 | with open(path, 'r') as f: | |
4969 | osd_fsid = f.read().strip() | |
f67539c2 | 4970 | logger.info('Found online OSD at %s' % path) |
1911f103 TL |
4971 | except IOError: |
4972 | logger.info('Unable to read OSD fsid from %s' % path) | |
e306af50 TL |
4973 | if os.path.exists(os.path.join(self.osd_data_dir, 'type')): |
4974 | with open(os.path.join(self.osd_data_dir, 'type')) as f: | |
4975 | osd_type = f.read().strip() | |
4976 | else: | |
4977 | logger.info('"type" file missing for OSD data dir') | |
1911f103 TL |
4978 | |
4979 | return osd_fsid, osd_type | |
4980 | ||
4981 | def check_offline_lvm_osd(self): | |
4982 | # type: () -> Tuple[Optional[str], Optional[str]] | |
1911f103 TL |
4983 | osd_fsid, osd_type = None, None |
4984 | ||
4985 | c = CephContainer( | |
f67539c2 TL |
4986 | self.ctx, |
4987 | image=self.ctx.image, | |
1911f103 TL |
4988 | entrypoint='/usr/sbin/ceph-volume', |
4989 | args=['lvm', 'list', '--format=json'], | |
4990 | privileged=True | |
4991 | ) | |
f67539c2 | 4992 | out, err, code = call_throws(self.ctx, c.run_cmd()) |
1911f103 TL |
4993 | if not code: |
4994 | try: | |
4995 | js = json.loads(out) | |
4996 | if self.osd_id in js: | |
f67539c2 | 4997 | logger.info('Found offline LVM OSD {}'.format(self.osd_id)) |
1911f103 TL |
4998 | osd_fsid = js[self.osd_id][0]['tags']['ceph.osd_fsid'] |
4999 | for device in js[self.osd_id]: | |
5000 | if device['tags']['ceph.type'] == 'block': | |
5001 | osd_type = 'bluestore' | |
5002 | break | |
5003 | if device['tags']['ceph.type'] == 'data': | |
5004 | osd_type = 'filestore' | |
5005 | break | |
5006 | except ValueError as e: | |
f67539c2 | 5007 | logger.info('Invalid JSON in ceph-volume lvm list: {}'.format(e)) |
1911f103 TL |
5008 | |
5009 | return osd_fsid, osd_type | |
5010 | ||
5011 | def check_offline_simple_osd(self): | |
5012 | # type: () -> Tuple[Optional[str], Optional[str]] | |
1911f103 TL |
5013 | osd_fsid, osd_type = None, None |
5014 | ||
f67539c2 | 5015 | osd_file = glob('/etc/ceph/osd/{}-[a-f0-9-]*.json'.format(self.osd_id)) |
1911f103 TL |
5016 | if len(osd_file) == 1: |
5017 | with open(osd_file[0], 'r') as f: | |
5018 | try: | |
5019 | js = json.loads(f.read()) | |
f67539c2 TL |
5020 | logger.info('Found offline simple OSD {}'.format(self.osd_id)) |
5021 | osd_fsid = js['fsid'] | |
5022 | osd_type = js['type'] | |
5023 | if osd_type != 'filestore': | |
1911f103 TL |
5024 | # need this to be mounted for the adopt to work, as it |
5025 | # needs to move files from this directory | |
f67539c2 | 5026 | call_throws(self.ctx, ['mount', js['data']['path'], self.osd_data_dir]) |
1911f103 | 5027 | except ValueError as e: |
f67539c2 | 5028 | logger.info('Invalid JSON in {}: {}'.format(osd_file, e)) |
1911f103 TL |
5029 | |
5030 | return osd_fsid, osd_type | |
5031 | ||
9f95a23c | 5032 | |
f67539c2 TL |
5033 | def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid): |
5034 | # type: (CephadmContext, str, str, str) -> None | |
9f95a23c | 5035 | |
f67539c2 | 5036 | (uid, gid) = extract_uid_gid(ctx) |
9f95a23c TL |
5037 | |
5038 | data_dir_src = ('/var/lib/ceph/%s/%s-%s' % | |
f67539c2 TL |
5039 | (daemon_type, ctx.cluster, daemon_id)) |
5040 | data_dir_src = os.path.abspath(ctx.legacy_dir + data_dir_src) | |
9f95a23c | 5041 | |
1911f103 TL |
5042 | if not os.path.exists(data_dir_src): |
5043 | raise Error("{}.{} data directory '{}' does not exist. " | |
f67539c2 TL |
5044 | 'Incorrect ID specified, or daemon already adopted?'.format( |
5045 | daemon_type, daemon_id, data_dir_src)) | |
1911f103 | 5046 | |
9f95a23c TL |
5047 | osd_fsid = None |
5048 | if daemon_type == 'osd': | |
f67539c2 | 5049 | adopt_osd = AdoptOsd(ctx, data_dir_src, daemon_id) |
1911f103 TL |
5050 | osd_fsid, osd_type = adopt_osd.check_online_osd() |
5051 | if not osd_fsid: | |
5052 | osd_fsid, osd_type = adopt_osd.check_offline_lvm_osd() | |
5053 | if not osd_fsid: | |
5054 | osd_fsid, osd_type = adopt_osd.check_offline_simple_osd() | |
5055 | if not osd_fsid: | |
5056 | raise Error('Unable to find OSD {}'.format(daemon_id)) | |
5057 | logger.info('objectstore_type is %s' % osd_type) | |
e306af50 | 5058 | assert osd_type |
1911f103 | 5059 | if osd_type == 'filestore': |
9f95a23c TL |
5060 | raise Error('FileStore is not supported by cephadm') |
5061 | ||
5062 | # NOTE: implicit assumption here that the units correspond to the | |
5063 | # cluster we are adopting based on the /etc/{defaults,sysconfig}/ceph | |
5064 | # CLUSTER field. | |
5065 | unit_name = 'ceph-%s@%s' % (daemon_type, daemon_id) | |
f67539c2 | 5066 | (enabled, state, _) = check_unit(ctx, unit_name) |
9f95a23c TL |
5067 | if state == 'running': |
5068 | logger.info('Stopping old systemd unit %s...' % unit_name) | |
f67539c2 | 5069 | call_throws(ctx, ['systemctl', 'stop', unit_name]) |
9f95a23c TL |
5070 | if enabled: |
5071 | logger.info('Disabling old systemd unit %s...' % unit_name) | |
f67539c2 | 5072 | call_throws(ctx, ['systemctl', 'disable', unit_name]) |
9f95a23c TL |
5073 | |
5074 | # data | |
5075 | logger.info('Moving data...') | |
f67539c2 | 5076 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
9f95a23c | 5077 | uid=uid, gid=gid) |
f67539c2 | 5078 | move_files(ctx, glob(os.path.join(data_dir_src, '*')), |
9f95a23c TL |
5079 | data_dir_dst, |
5080 | uid=uid, gid=gid) | |
f67539c2 | 5081 | logger.debug('Remove dir `%s`' % (data_dir_src)) |
9f95a23c | 5082 | if os.path.ismount(data_dir_src): |
f67539c2 | 5083 | call_throws(ctx, ['umount', data_dir_src]) |
9f95a23c TL |
5084 | os.rmdir(data_dir_src) |
5085 | ||
5086 | logger.info('Chowning content...') | |
f67539c2 | 5087 | call_throws(ctx, ['chown', '-c', '-R', '%d.%d' % (uid, gid), data_dir_dst]) |
9f95a23c TL |
5088 | |
5089 | if daemon_type == 'mon': | |
5090 | # rename *.ldb -> *.sst, in case they are coming from ubuntu | |
5091 | store = os.path.join(data_dir_dst, 'store.db') | |
5092 | num_renamed = 0 | |
5093 | if os.path.exists(store): | |
5094 | for oldf in os.listdir(store): | |
5095 | if oldf.endswith('.ldb'): | |
5096 | newf = oldf.replace('.ldb', '.sst') | |
5097 | oldp = os.path.join(store, oldf) | |
5098 | newp = os.path.join(store, newf) | |
5099 | logger.debug('Renaming %s -> %s' % (oldp, newp)) | |
5100 | os.rename(oldp, newp) | |
5101 | if num_renamed: | |
5102 | logger.info('Renamed %d leveldb *.ldb files to *.sst', | |
5103 | num_renamed) | |
5104 | if daemon_type == 'osd': | |
5105 | for n in ['block', 'block.db', 'block.wal']: | |
5106 | p = os.path.join(data_dir_dst, n) | |
5107 | if os.path.exists(p): | |
5108 | logger.info('Chowning %s...' % p) | |
5109 | os.chown(p, uid, gid) | |
5110 | # disable the ceph-volume 'simple' mode files on the host | |
5111 | simple_fn = os.path.join('/etc/ceph/osd', | |
5112 | '%s-%s.json' % (daemon_id, osd_fsid)) | |
5113 | if os.path.exists(simple_fn): | |
5114 | new_fn = simple_fn + '.adopted-by-cephadm' | |
5115 | logger.info('Renaming %s -> %s', simple_fn, new_fn) | |
5116 | os.rename(simple_fn, new_fn) | |
5117 | logger.info('Disabling host unit ceph-volume@ simple unit...') | |
f67539c2 TL |
5118 | call(ctx, ['systemctl', 'disable', |
5119 | 'ceph-volume@simple-%s-%s.service' % (daemon_id, osd_fsid)]) | |
9f95a23c TL |
5120 | else: |
5121 | # assume this is an 'lvm' c-v for now, but don't error | |
5122 | # out if it's not. | |
5123 | logger.info('Disabling host unit ceph-volume@ lvm unit...') | |
f67539c2 TL |
5124 | call(ctx, ['systemctl', 'disable', |
5125 | 'ceph-volume@lvm-%s-%s.service' % (daemon_id, osd_fsid)]) | |
9f95a23c TL |
5126 | |
5127 | # config | |
f67539c2 TL |
5128 | config_src = '/etc/ceph/%s.conf' % (ctx.cluster) |
5129 | config_src = os.path.abspath(ctx.legacy_dir + config_src) | |
9f95a23c | 5130 | config_dst = os.path.join(data_dir_dst, 'config') |
f67539c2 | 5131 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
9f95a23c TL |
5132 | |
5133 | # logs | |
5134 | logger.info('Moving logs...') | |
5135 | log_dir_src = ('/var/log/ceph/%s-%s.%s.log*' % | |
f67539c2 TL |
5136 | (ctx.cluster, daemon_type, daemon_id)) |
5137 | log_dir_src = os.path.abspath(ctx.legacy_dir + log_dir_src) | |
5138 | log_dir_dst = make_log_dir(ctx, fsid, uid=uid, gid=gid) | |
5139 | move_files(ctx, glob(log_dir_src), | |
9f95a23c TL |
5140 | log_dir_dst, |
5141 | uid=uid, gid=gid) | |
5142 | ||
5143 | logger.info('Creating new units...') | |
f67539c2 TL |
5144 | make_var_run(ctx, fsid, uid, gid) |
5145 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
5146 | deploy_daemon_units(ctx, fsid, uid, gid, daemon_type, daemon_id, c, | |
9f95a23c | 5147 | enable=True, # unconditionally enable the new unit |
f67539c2 | 5148 | start=(state == 'running' or ctx.force_start), |
9f95a23c | 5149 | osd_fsid=osd_fsid) |
f67539c2 | 5150 | update_firewalld(ctx, daemon_type) |
9f95a23c TL |
5151 | |
5152 | ||
f67539c2 TL |
5153 | def command_adopt_prometheus(ctx, daemon_id, fsid): |
5154 | # type: (CephadmContext, str, str) -> None | |
9f95a23c | 5155 | daemon_type = 'prometheus' |
f67539c2 | 5156 | (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) |
9f95a23c | 5157 | |
f67539c2 | 5158 | _stop_and_disable(ctx, 'prometheus') |
9f95a23c | 5159 | |
f67539c2 TL |
5160 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
5161 | uid=uid, gid=gid) | |
9f95a23c TL |
5162 | |
5163 | # config | |
5164 | config_src = '/etc/prometheus/prometheus.yml' | |
f67539c2 | 5165 | config_src = os.path.abspath(ctx.legacy_dir + config_src) |
9f95a23c | 5166 | config_dst = os.path.join(data_dir_dst, 'etc/prometheus') |
1911f103 | 5167 | makedirs(config_dst, uid, gid, 0o755) |
f67539c2 | 5168 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
9f95a23c TL |
5169 | |
5170 | # data | |
5171 | data_src = '/var/lib/prometheus/metrics/' | |
f67539c2 | 5172 | data_src = os.path.abspath(ctx.legacy_dir + data_src) |
9f95a23c | 5173 | data_dst = os.path.join(data_dir_dst, 'data') |
f67539c2 | 5174 | copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) |
9f95a23c | 5175 | |
f67539c2 TL |
5176 | make_var_run(ctx, fsid, uid, gid) |
5177 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
5178 | deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) | |
5179 | update_firewalld(ctx, daemon_type) | |
9f95a23c | 5180 | |
f6b5b4d7 | 5181 | |
f67539c2 TL |
5182 | def command_adopt_grafana(ctx, daemon_id, fsid): |
5183 | # type: (CephadmContext, str, str) -> None | |
9f95a23c TL |
5184 | |
5185 | daemon_type = 'grafana' | |
f67539c2 | 5186 | (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) |
9f95a23c | 5187 | |
f67539c2 | 5188 | _stop_and_disable(ctx, 'grafana-server') |
9f95a23c | 5189 | |
f67539c2 TL |
5190 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
5191 | uid=uid, gid=gid) | |
9f95a23c TL |
5192 | |
5193 | # config | |
5194 | config_src = '/etc/grafana/grafana.ini' | |
f67539c2 | 5195 | config_src = os.path.abspath(ctx.legacy_dir + config_src) |
9f95a23c TL |
5196 | config_dst = os.path.join(data_dir_dst, 'etc/grafana') |
5197 | makedirs(config_dst, uid, gid, 0o755) | |
f67539c2 | 5198 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
9f95a23c TL |
5199 | |
5200 | prov_src = '/etc/grafana/provisioning/' | |
f67539c2 | 5201 | prov_src = os.path.abspath(ctx.legacy_dir + prov_src) |
9f95a23c | 5202 | prov_dst = os.path.join(data_dir_dst, 'etc/grafana') |
f67539c2 | 5203 | copy_tree(ctx, [prov_src], prov_dst, uid=uid, gid=gid) |
9f95a23c TL |
5204 | |
5205 | # cert | |
5206 | cert = '/etc/grafana/grafana.crt' | |
5207 | key = '/etc/grafana/grafana.key' | |
5208 | if os.path.exists(cert) and os.path.exists(key): | |
5209 | cert_src = '/etc/grafana/grafana.crt' | |
f67539c2 | 5210 | cert_src = os.path.abspath(ctx.legacy_dir + cert_src) |
9f95a23c TL |
5211 | makedirs(os.path.join(data_dir_dst, 'etc/grafana/certs'), uid, gid, 0o755) |
5212 | cert_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_file') | |
f67539c2 | 5213 | copy_files(ctx, [cert_src], cert_dst, uid=uid, gid=gid) |
9f95a23c TL |
5214 | |
5215 | key_src = '/etc/grafana/grafana.key' | |
f67539c2 | 5216 | key_src = os.path.abspath(ctx.legacy_dir + key_src) |
9f95a23c | 5217 | key_dst = os.path.join(data_dir_dst, 'etc/grafana/certs/cert_key') |
f67539c2 | 5218 | copy_files(ctx, [key_src], key_dst, uid=uid, gid=gid) |
9f95a23c TL |
5219 | |
5220 | _adjust_grafana_ini(os.path.join(config_dst, 'grafana.ini')) | |
5221 | else: | |
f67539c2 | 5222 | logger.debug('Skipping ssl, missing cert {} or key {}'.format(cert, key)) |
9f95a23c | 5223 | |
9f95a23c TL |
5224 | # data - possible custom dashboards/plugins |
5225 | data_src = '/var/lib/grafana/' | |
f67539c2 | 5226 | data_src = os.path.abspath(ctx.legacy_dir + data_src) |
9f95a23c | 5227 | data_dst = os.path.join(data_dir_dst, 'data') |
f67539c2 | 5228 | copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) |
9f95a23c | 5229 | |
f67539c2 TL |
5230 | make_var_run(ctx, fsid, uid, gid) |
5231 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
5232 | deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) | |
5233 | update_firewalld(ctx, daemon_type) | |
9f95a23c | 5234 | |
f6b5b4d7 | 5235 | |
f67539c2 TL |
5236 | def command_adopt_alertmanager(ctx, daemon_id, fsid): |
5237 | # type: (CephadmContext, str, str) -> None | |
801d1391 TL |
5238 | |
5239 | daemon_type = 'alertmanager' | |
f67539c2 | 5240 | (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type) |
801d1391 | 5241 | |
f67539c2 | 5242 | _stop_and_disable(ctx, 'prometheus-alertmanager') |
801d1391 | 5243 | |
f67539c2 TL |
5244 | data_dir_dst = make_data_dir(ctx, fsid, daemon_type, daemon_id, |
5245 | uid=uid, gid=gid) | |
801d1391 TL |
5246 | |
5247 | # config | |
5248 | config_src = '/etc/prometheus/alertmanager.yml' | |
f67539c2 | 5249 | config_src = os.path.abspath(ctx.legacy_dir + config_src) |
801d1391 TL |
5250 | config_dst = os.path.join(data_dir_dst, 'etc/alertmanager') |
5251 | makedirs(config_dst, uid, gid, 0o755) | |
f67539c2 | 5252 | copy_files(ctx, [config_src], config_dst, uid=uid, gid=gid) |
801d1391 TL |
5253 | |
5254 | # data | |
5255 | data_src = '/var/lib/prometheus/alertmanager/' | |
f67539c2 | 5256 | data_src = os.path.abspath(ctx.legacy_dir + data_src) |
801d1391 | 5257 | data_dst = os.path.join(data_dir_dst, 'etc/alertmanager/data') |
f67539c2 | 5258 | copy_tree(ctx, [data_src], data_dst, uid=uid, gid=gid) |
801d1391 | 5259 | |
f67539c2 TL |
5260 | make_var_run(ctx, fsid, uid, gid) |
5261 | c = get_container(ctx, fsid, daemon_type, daemon_id) | |
5262 | deploy_daemon(ctx, fsid, daemon_type, daemon_id, c, uid, gid) | |
5263 | update_firewalld(ctx, daemon_type) | |
801d1391 | 5264 | |
f6b5b4d7 | 5265 | |
9f95a23c TL |
5266 | def _adjust_grafana_ini(filename): |
5267 | # type: (str) -> None | |
5268 | ||
5269 | # Update cert_file, cert_key pathnames in server section | |
5270 | # ConfigParser does not preserve comments | |
5271 | try: | |
f67539c2 | 5272 | with open(filename, 'r') as grafana_ini: |
9f95a23c | 5273 | lines = grafana_ini.readlines() |
f67539c2 TL |
5274 | with open('{}.new'.format(filename), 'w') as grafana_ini: |
5275 | server_section = False | |
9f95a23c TL |
5276 | for line in lines: |
5277 | if line.startswith('['): | |
f67539c2 | 5278 | server_section = False |
9f95a23c | 5279 | if line.startswith('[server]'): |
f67539c2 | 5280 | server_section = True |
9f95a23c TL |
5281 | if server_section: |
5282 | line = re.sub(r'^cert_file.*', | |
f67539c2 | 5283 | 'cert_file = /etc/grafana/certs/cert_file', line) |
9f95a23c | 5284 | line = re.sub(r'^cert_key.*', |
f67539c2 | 5285 | 'cert_key = /etc/grafana/certs/cert_key', line) |
9f95a23c | 5286 | grafana_ini.write(line) |
f67539c2 | 5287 | os.rename('{}.new'.format(filename), filename) |
9f95a23c | 5288 | except OSError as err: |
f67539c2 | 5289 | raise Error('Cannot update {}: {}'.format(filename, err)) |
9f95a23c TL |
5290 | |
5291 | ||
f67539c2 TL |
5292 | def _stop_and_disable(ctx, unit_name): |
5293 | # type: (CephadmContext, str) -> None | |
9f95a23c | 5294 | |
f67539c2 | 5295 | (enabled, state, _) = check_unit(ctx, unit_name) |
9f95a23c TL |
5296 | if state == 'running': |
5297 | logger.info('Stopping old systemd unit %s...' % unit_name) | |
f67539c2 | 5298 | call_throws(ctx, ['systemctl', 'stop', unit_name]) |
9f95a23c TL |
5299 | if enabled: |
5300 | logger.info('Disabling old systemd unit %s...' % unit_name) | |
f67539c2 | 5301 | call_throws(ctx, ['systemctl', 'disable', unit_name]) |
9f95a23c TL |
5302 | |
5303 | ################################## | |
5304 | ||
9f95a23c | 5305 | |
f67539c2 TL |
5306 | def command_rm_daemon(ctx): |
5307 | # type: (CephadmContext) -> None | |
5308 | lock = FileLock(ctx, ctx.fsid) | |
5309 | lock.acquire() | |
9f95a23c | 5310 | |
f67539c2 TL |
5311 | (daemon_type, daemon_id) = ctx.name.split('.', 1) |
5312 | unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name) | |
e306af50 | 5313 | |
f67539c2 | 5314 | if daemon_type in ['mon', 'osd'] and not ctx.force: |
9f95a23c | 5315 | raise Error('must pass --force to proceed: ' |
f67539c2 | 5316 | 'this command may destroy precious data!') |
e306af50 | 5317 | |
f67539c2 | 5318 | call(ctx, ['systemctl', 'stop', unit_name], |
adb31ebb | 5319 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 5320 | call(ctx, ['systemctl', 'reset-failed', unit_name], |
adb31ebb | 5321 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 5322 | call(ctx, ['systemctl', 'disable', unit_name], |
adb31ebb | 5323 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 5324 | data_dir = get_data_dir(ctx.fsid, ctx.data_dir, daemon_type, daemon_id) |
9f95a23c | 5325 | if daemon_type in ['mon', 'osd', 'prometheus'] and \ |
f67539c2 | 5326 | not ctx.force_delete_data: |
9f95a23c | 5327 | # rename it out of the way -- do not delete |
f67539c2 | 5328 | backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed') |
9f95a23c TL |
5329 | if not os.path.exists(backup_dir): |
5330 | makedirs(backup_dir, 0, 0, DATA_DIR_MODE) | |
5331 | dirname = '%s.%s_%s' % (daemon_type, daemon_id, | |
5332 | datetime.datetime.utcnow().strftime(DATEFMT)) | |
5333 | os.rename(data_dir, | |
5334 | os.path.join(backup_dir, dirname)) | |
5335 | else: | |
f67539c2 TL |
5336 | if daemon_type == CephadmDaemon.daemon_type: |
5337 | CephadmDaemon.uninstall(ctx, ctx.fsid, daemon_type, daemon_id) | |
5338 | call_throws(ctx, ['rm', '-rf', data_dir]) | |
9f95a23c TL |
5339 | |
5340 | ################################## | |
5341 | ||
f6b5b4d7 | 5342 | |
f67539c2 TL |
5343 | def command_rm_cluster(ctx): |
5344 | # type: (CephadmContext) -> None | |
5345 | if not ctx.force: | |
9f95a23c | 5346 | raise Error('must pass --force to proceed: ' |
f67539c2 | 5347 | 'this command may destroy precious data!') |
9f95a23c | 5348 | |
f67539c2 TL |
5349 | lock = FileLock(ctx, ctx.fsid) |
5350 | lock.acquire() | |
9f95a23c TL |
5351 | |
5352 | # stop + disable individual daemon units | |
f67539c2 TL |
5353 | for d in list_daemons(ctx, detail=False): |
5354 | if d['fsid'] != ctx.fsid: | |
9f95a23c TL |
5355 | continue |
5356 | if d['style'] != 'cephadm:v1': | |
5357 | continue | |
f67539c2 TL |
5358 | unit_name = get_unit_name(ctx.fsid, d['name']) |
5359 | call(ctx, ['systemctl', 'stop', unit_name], | |
adb31ebb | 5360 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 5361 | call(ctx, ['systemctl', 'reset-failed', unit_name], |
adb31ebb | 5362 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 5363 | call(ctx, ['systemctl', 'disable', unit_name], |
adb31ebb | 5364 | verbosity=CallVerbosity.DEBUG) |
9f95a23c TL |
5365 | |
5366 | # cluster units | |
f67539c2 TL |
5367 | for unit_name in ['ceph-%s.target' % ctx.fsid]: |
5368 | call(ctx, ['systemctl', 'stop', unit_name], | |
adb31ebb | 5369 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 5370 | call(ctx, ['systemctl', 'reset-failed', unit_name], |
adb31ebb | 5371 | verbosity=CallVerbosity.DEBUG) |
f67539c2 | 5372 | call(ctx, ['systemctl', 'disable', unit_name], |
adb31ebb | 5373 | verbosity=CallVerbosity.DEBUG) |
9f95a23c | 5374 | |
f67539c2 TL |
5375 | slice_name = 'system-%s.slice' % (('ceph-%s' % ctx.fsid).replace('-', '\\x2d')) |
5376 | call(ctx, ['systemctl', 'stop', slice_name], | |
adb31ebb | 5377 | verbosity=CallVerbosity.DEBUG) |
9f95a23c TL |
5378 | |
5379 | # rm units | |
f67539c2 TL |
5380 | call_throws(ctx, ['rm', '-f', ctx.unit_dir + # noqa: W504 |
5381 | '/ceph-%s@.service' % ctx.fsid]) | |
5382 | call_throws(ctx, ['rm', '-f', ctx.unit_dir + # noqa: W504 | |
5383 | '/ceph-%s.target' % ctx.fsid]) | |
5384 | call_throws(ctx, ['rm', '-rf', | |
5385 | ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid]) | |
9f95a23c | 5386 | # rm data |
f67539c2 TL |
5387 | call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid]) |
5388 | ||
5389 | if not ctx.keep_logs: | |
5390 | # rm logs | |
5391 | call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid]) | |
5392 | call_throws(ctx, ['rm', '-rf', ctx.log_dir + # noqa: W504 | |
5393 | '/*.wants/ceph-%s@*' % ctx.fsid]) | |
5394 | ||
9f95a23c | 5395 | # rm logrotate config |
f67539c2 | 5396 | call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid]) |
9f95a23c | 5397 | |
1911f103 TL |
5398 | # clean up config, keyring, and pub key files |
5399 | files = ['/etc/ceph/ceph.conf', '/etc/ceph/ceph.pub', '/etc/ceph/ceph.client.admin.keyring'] | |
5400 | ||
5401 | if os.path.exists(files[0]): | |
5402 | valid_fsid = False | |
5403 | with open(files[0]) as f: | |
f67539c2 | 5404 | if ctx.fsid in f.read(): |
1911f103 TL |
5405 | valid_fsid = True |
5406 | if valid_fsid: | |
5407 | for n in range(0, len(files)): | |
5408 | if os.path.exists(files[n]): | |
5409 | os.remove(files[n]) | |
5410 | ||
9f95a23c TL |
5411 | ################################## |
5412 | ||
f67539c2 TL |
5413 | |
5414 | def check_time_sync(ctx, enabler=None): | |
5415 | # type: (CephadmContext, Optional[Packager]) -> bool | |
9f95a23c TL |
5416 | units = [ |
5417 | 'chrony.service', # 18.04 (at least) | |
f67539c2 | 5418 | 'chronyd.service', # el / opensuse |
9f95a23c | 5419 | 'systemd-timesyncd.service', |
f67539c2 | 5420 | 'ntpd.service', # el7 (at least) |
9f95a23c | 5421 | 'ntp.service', # 18.04 (at least) |
f91f0fd5 | 5422 | 'ntpsec.service', # 20.04 (at least) / buster |
9f95a23c | 5423 | ] |
f67539c2 | 5424 | if not check_units(ctx, units, enabler): |
9f95a23c TL |
5425 | logger.warning('No time sync service is running; checked for %s' % units) |
5426 | return False | |
5427 | return True | |
5428 | ||
f6b5b4d7 | 5429 | |
f67539c2 TL |
5430 | def command_check_host(ctx: CephadmContext) -> None: |
5431 | container_path = ctx.container_engine.path | |
f6b5b4d7 | 5432 | |
1911f103 | 5433 | errors = [] |
9f95a23c TL |
5434 | commands = ['systemctl', 'lvcreate'] |
5435 | ||
f67539c2 TL |
5436 | try: |
5437 | check_container_engine(ctx) | |
5438 | logger.info('podman|docker (%s) is present' % container_path) | |
5439 | except Error as e: | |
5440 | errors.append(str(e)) | |
1911f103 | 5441 | |
9f95a23c TL |
5442 | for command in commands: |
5443 | try: | |
5444 | find_program(command) | |
5445 | logger.info('%s is present' % command) | |
5446 | except ValueError: | |
1911f103 | 5447 | errors.append('%s binary does not appear to be installed' % command) |
9f95a23c TL |
5448 | |
5449 | # check for configured+running chronyd or ntp | |
f67539c2 | 5450 | if not check_time_sync(ctx): |
1911f103 | 5451 | errors.append('No time synchronization is active') |
9f95a23c | 5452 | |
f67539c2 TL |
5453 | if 'expect_hostname' in ctx and ctx.expect_hostname: |
5454 | if get_hostname().lower() != ctx.expect_hostname.lower(): | |
1911f103 | 5455 | errors.append('hostname "%s" does not match expected hostname "%s"' % ( |
f67539c2 | 5456 | get_hostname(), ctx.expect_hostname)) |
9f95a23c | 5457 | logger.info('Hostname "%s" matches what is expected.', |
f67539c2 | 5458 | ctx.expect_hostname) |
9f95a23c | 5459 | |
1911f103 | 5460 | if errors: |
f67539c2 | 5461 | raise Error('\nERROR: '.join(errors)) |
1911f103 | 5462 | |
9f95a23c TL |
5463 | logger.info('Host looks OK') |
5464 | ||
5465 | ################################## | |
5466 | ||
f6b5b4d7 | 5467 | |
f67539c2 | 5468 | def command_prepare_host(ctx: CephadmContext) -> None: |
9f95a23c TL |
5469 | logger.info('Verifying podman|docker is present...') |
5470 | pkg = None | |
f67539c2 TL |
5471 | try: |
5472 | check_container_engine(ctx) | |
5473 | except Error as e: | |
5474 | logger.warning(str(e)) | |
9f95a23c | 5475 | if not pkg: |
f67539c2 | 5476 | pkg = create_packager(ctx) |
9f95a23c TL |
5477 | pkg.install_podman() |
5478 | ||
5479 | logger.info('Verifying lvm2 is present...') | |
5480 | if not find_executable('lvcreate'): | |
5481 | if not pkg: | |
f67539c2 | 5482 | pkg = create_packager(ctx) |
9f95a23c TL |
5483 | pkg.install(['lvm2']) |
5484 | ||
5485 | logger.info('Verifying time synchronization is in place...') | |
f67539c2 | 5486 | if not check_time_sync(ctx): |
9f95a23c | 5487 | if not pkg: |
f67539c2 | 5488 | pkg = create_packager(ctx) |
9f95a23c TL |
5489 | pkg.install(['chrony']) |
5490 | # check again, and this time try to enable | |
5491 | # the service | |
f67539c2 | 5492 | check_time_sync(ctx, enabler=pkg) |
9f95a23c | 5493 | |
f67539c2 TL |
5494 | if 'expect_hostname' in ctx and ctx.expect_hostname and ctx.expect_hostname != get_hostname(): |
5495 | logger.warning('Adjusting hostname from %s -> %s...' % (get_hostname(), ctx.expect_hostname)) | |
5496 | call_throws(ctx, ['hostname', ctx.expect_hostname]) | |
9f95a23c | 5497 | with open('/etc/hostname', 'w') as f: |
f67539c2 | 5498 | f.write(ctx.expect_hostname + '\n') |
9f95a23c TL |
5499 | |
5500 | logger.info('Repeating the final host check...') | |
f67539c2 | 5501 | command_check_host(ctx) |
9f95a23c TL |
5502 | |
5503 | ################################## | |
5504 | ||
f6b5b4d7 | 5505 | |
9f95a23c TL |
5506 | class CustomValidation(argparse.Action): |
5507 | ||
5508 | def _check_name(self, values): | |
5509 | try: | |
5510 | (daemon_type, daemon_id) = values.split('.', 1) | |
5511 | except ValueError: | |
5512 | raise argparse.ArgumentError(self, | |
f67539c2 | 5513 | 'must be of the format <type>.<id>. For example, osd.1 or prometheus.myhost.com') |
9f95a23c TL |
5514 | |
5515 | daemons = get_supported_daemons() | |
5516 | if daemon_type not in daemons: | |
5517 | raise argparse.ArgumentError(self, | |
f67539c2 TL |
5518 | 'name must declare the type of daemon e.g. ' |
5519 | '{}'.format(', '.join(daemons))) | |
9f95a23c TL |
5520 | |
5521 | def __call__(self, parser, namespace, values, option_string=None): | |
f67539c2 | 5522 | if self.dest == 'name': |
9f95a23c TL |
5523 | self._check_name(values) |
5524 | setattr(namespace, self.dest, values) | |
f67539c2 TL |
5525 | elif self.dest == 'exporter_config': |
5526 | cfg = get_parm(values) | |
5527 | # run the class' validate method, and convert to an argparse error | |
5528 | # if problems are found | |
5529 | try: | |
5530 | CephadmDaemon.validate_config(cfg) | |
5531 | except Error as e: | |
5532 | raise argparse.ArgumentError(self, | |
5533 | str(e)) | |
5534 | setattr(namespace, self.dest, cfg) | |
9f95a23c TL |
5535 | |
5536 | ################################## | |
5537 | ||
f6b5b4d7 | 5538 | |
9f95a23c | 5539 | def get_distro(): |
e306af50 | 5540 | # type: () -> Tuple[Optional[str], Optional[str], Optional[str]] |
9f95a23c TL |
5541 | distro = None |
5542 | distro_version = None | |
5543 | distro_codename = None | |
5544 | with open('/etc/os-release', 'r') as f: | |
5545 | for line in f.readlines(): | |
5546 | line = line.strip() | |
5547 | if '=' not in line or line.startswith('#'): | |
5548 | continue | |
5549 | (var, val) = line.split('=', 1) | |
5550 | if val[0] == '"' and val[-1] == '"': | |
5551 | val = val[1:-1] | |
5552 | if var == 'ID': | |
5553 | distro = val.lower() | |
5554 | elif var == 'VERSION_ID': | |
5555 | distro_version = val.lower() | |
5556 | elif var == 'VERSION_CODENAME': | |
5557 | distro_codename = val.lower() | |
5558 | return distro, distro_version, distro_codename | |
5559 | ||
f6b5b4d7 | 5560 | |
9f95a23c | 5561 | class Packager(object): |
f67539c2 TL |
5562 | def __init__(self, ctx: CephadmContext, |
5563 | stable=None, version=None, branch=None, commit=None): | |
9f95a23c TL |
5564 | assert \ |
5565 | (stable and not version and not branch and not commit) or \ | |
5566 | (not stable and version and not branch and not commit) or \ | |
5567 | (not stable and not version and branch) or \ | |
5568 | (not stable and not version and not branch and not commit) | |
f67539c2 | 5569 | self.ctx = ctx |
9f95a23c TL |
5570 | self.stable = stable |
5571 | self.version = version | |
5572 | self.branch = branch | |
5573 | self.commit = commit | |
5574 | ||
5575 | def add_repo(self): | |
5576 | raise NotImplementedError | |
5577 | ||
5578 | def rm_repo(self): | |
5579 | raise NotImplementedError | |
5580 | ||
5581 | def query_shaman(self, distro, distro_version, branch, commit): | |
5582 | # query shaman | |
f91f0fd5 | 5583 | logger.info('Fetching repo metadata from shaman and chacra...') |
9f95a23c TL |
5584 | shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format( |
5585 | distro=distro, | |
5586 | distro_version=distro_version, | |
5587 | branch=branch, | |
5588 | sha1=commit or 'latest', | |
5589 | arch=get_arch() | |
5590 | ) | |
5591 | try: | |
5592 | shaman_response = urlopen(shaman_url) | |
5593 | except HTTPError as err: | |
f91f0fd5 | 5594 | logger.error('repository not found in shaman (might not be available yet)') |
9f95a23c | 5595 | raise Error('%s, failed to fetch %s' % (err, shaman_url)) |
f67539c2 | 5596 | chacra_url = '' |
9f95a23c TL |
5597 | try: |
5598 | chacra_url = shaman_response.geturl() | |
5599 | chacra_response = urlopen(chacra_url) | |
5600 | except HTTPError as err: | |
f91f0fd5 | 5601 | logger.error('repository not found in chacra (might not be available yet)') |
9f95a23c TL |
5602 | raise Error('%s, failed to fetch %s' % (err, chacra_url)) |
5603 | return chacra_response.read().decode('utf-8') | |
5604 | ||
5605 | def repo_gpgkey(self): | |
f67539c2 TL |
5606 | if self.ctx.gpg_url: |
5607 | return self.ctx.gpg_url | |
9f95a23c TL |
5608 | if self.stable or self.version: |
5609 | return 'https://download.ceph.com/keys/release.asc', 'release' | |
5610 | else: | |
5611 | return 'https://download.ceph.com/keys/autobuild.asc', 'autobuild' | |
5612 | ||
5613 | def enable_service(self, service): | |
5614 | """ | |
5615 | Start and enable the service (typically using systemd). | |
5616 | """ | |
f67539c2 | 5617 | call_throws(self.ctx, ['systemctl', 'enable', '--now', service]) |
9f95a23c TL |
5618 | |
5619 | ||
5620 | class Apt(Packager): | |
5621 | DISTRO_NAMES = { | |
5622 | 'ubuntu': 'ubuntu', | |
5623 | 'debian': 'debian', | |
5624 | } | |
5625 | ||
f67539c2 TL |
5626 | def __init__(self, ctx: CephadmContext, |
5627 | stable, version, branch, commit, | |
9f95a23c | 5628 | distro, distro_version, distro_codename): |
f67539c2 | 5629 | super(Apt, self).__init__(ctx, stable=stable, version=version, |
9f95a23c | 5630 | branch=branch, commit=commit) |
f67539c2 | 5631 | self.ctx = ctx |
9f95a23c TL |
5632 | self.distro = self.DISTRO_NAMES[distro] |
5633 | self.distro_codename = distro_codename | |
f91f0fd5 | 5634 | self.distro_version = distro_version |
9f95a23c TL |
5635 | |
5636 | def repo_path(self): | |
5637 | return '/etc/apt/sources.list.d/ceph.list' | |
5638 | ||
5639 | def add_repo(self): | |
f67539c2 | 5640 | |
9f95a23c | 5641 | url, name = self.repo_gpgkey() |
f91f0fd5 | 5642 | logger.info('Installing repo GPG key from %s...' % url) |
9f95a23c TL |
5643 | try: |
5644 | response = urlopen(url) | |
5645 | except HTTPError as err: | |
f91f0fd5 | 5646 | logger.error('failed to fetch GPG repo key from %s: %s' % ( |
9f95a23c TL |
5647 | url, err)) |
5648 | raise Error('failed to fetch GPG key') | |
5649 | key = response.read().decode('utf-8') | |
5650 | with open('/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name, 'w') as f: | |
5651 | f.write(key) | |
5652 | ||
5653 | if self.version: | |
5654 | content = 'deb %s/debian-%s/ %s main\n' % ( | |
f67539c2 | 5655 | self.ctx.repo_url, self.version, self.distro_codename) |
9f95a23c TL |
5656 | elif self.stable: |
5657 | content = 'deb %s/debian-%s/ %s main\n' % ( | |
f67539c2 | 5658 | self.ctx.repo_url, self.stable, self.distro_codename) |
9f95a23c TL |
5659 | else: |
5660 | content = self.query_shaman(self.distro, self.distro_codename, self.branch, | |
5661 | self.commit) | |
5662 | ||
f91f0fd5 | 5663 | logger.info('Installing repo file at %s...' % self.repo_path()) |
9f95a23c TL |
5664 | with open(self.repo_path(), 'w') as f: |
5665 | f.write(content) | |
5666 | ||
5667 | def rm_repo(self): | |
5668 | for name in ['autobuild', 'release']: | |
5669 | p = '/etc/apt/trusted.gpg.d/ceph.%s.gpg' % name | |
5670 | if os.path.exists(p): | |
f91f0fd5 | 5671 | logger.info('Removing repo GPG key %s...' % p) |
9f95a23c TL |
5672 | os.unlink(p) |
5673 | if os.path.exists(self.repo_path()): | |
f91f0fd5 | 5674 | logger.info('Removing repo at %s...' % self.repo_path()) |
9f95a23c TL |
5675 | os.unlink(self.repo_path()) |
5676 | ||
f91f0fd5 TL |
5677 | if self.distro == 'ubuntu': |
5678 | self.rm_kubic_repo() | |
5679 | ||
9f95a23c | 5680 | def install(self, ls): |
f91f0fd5 | 5681 | logger.info('Installing packages %s...' % ls) |
f67539c2 | 5682 | call_throws(self.ctx, ['apt-get', 'install', '-y'] + ls) |
9f95a23c TL |
5683 | |
5684 | def install_podman(self): | |
5685 | if self.distro == 'ubuntu': | |
f91f0fd5 TL |
5686 | logger.info('Setting up repo for podman...') |
5687 | self.add_kubic_repo() | |
f67539c2 | 5688 | call_throws(self.ctx, ['apt-get', 'update']) |
9f95a23c | 5689 | |
f91f0fd5 | 5690 | logger.info('Attempting podman install...') |
9f95a23c TL |
5691 | try: |
5692 | self.install(['podman']) | |
f67539c2 | 5693 | except Error: |
f91f0fd5 | 5694 | logger.info('Podman did not work. Falling back to docker...') |
9f95a23c TL |
5695 | self.install(['docker.io']) |
5696 | ||
f91f0fd5 TL |
5697 | def kubic_repo_url(self): |
5698 | return 'https://download.opensuse.org/repositories/devel:/kubic:/' \ | |
5699 | 'libcontainers:/stable/xUbuntu_%s/' % self.distro_version | |
5700 | ||
5701 | def kubic_repo_path(self): | |
5702 | return '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list' | |
5703 | ||
5704 | def kubric_repo_gpgkey_url(self): | |
5705 | return '%s/Release.key' % self.kubic_repo_url() | |
5706 | ||
5707 | def kubric_repo_gpgkey_path(self): | |
5708 | return '/etc/apt/trusted.gpg.d/kubic.release.gpg' | |
5709 | ||
5710 | def add_kubic_repo(self): | |
5711 | url = self.kubric_repo_gpgkey_url() | |
5712 | logger.info('Installing repo GPG key from %s...' % url) | |
5713 | try: | |
5714 | response = urlopen(url) | |
5715 | except HTTPError as err: | |
5716 | logger.error('failed to fetch GPG repo key from %s: %s' % ( | |
5717 | url, err)) | |
5718 | raise Error('failed to fetch GPG key') | |
5719 | key = response.read().decode('utf-8') | |
5720 | tmp_key = write_tmp(key, 0, 0) | |
5721 | keyring = self.kubric_repo_gpgkey_path() | |
f67539c2 | 5722 | call_throws(self.ctx, ['apt-key', '--keyring', keyring, 'add', tmp_key.name]) |
f91f0fd5 TL |
5723 | |
5724 | logger.info('Installing repo file at %s...' % self.kubic_repo_path()) | |
5725 | content = 'deb %s /\n' % self.kubic_repo_url() | |
5726 | with open(self.kubic_repo_path(), 'w') as f: | |
5727 | f.write(content) | |
5728 | ||
5729 | def rm_kubic_repo(self): | |
5730 | keyring = self.kubric_repo_gpgkey_path() | |
5731 | if os.path.exists(keyring): | |
5732 | logger.info('Removing repo GPG key %s...' % keyring) | |
5733 | os.unlink(keyring) | |
5734 | ||
5735 | p = self.kubic_repo_path() | |
5736 | if os.path.exists(p): | |
5737 | logger.info('Removing repo at %s...' % p) | |
5738 | os.unlink(p) | |
5739 | ||
f6b5b4d7 | 5740 | |
9f95a23c TL |
5741 | class YumDnf(Packager): |
5742 | DISTRO_NAMES = { | |
5743 | 'centos': ('centos', 'el'), | |
5744 | 'rhel': ('centos', 'el'), | |
5745 | 'scientific': ('centos', 'el'), | |
5746 | 'fedora': ('fedora', 'fc'), | |
5747 | } | |
5748 | ||
f67539c2 TL |
5749 | def __init__(self, ctx: CephadmContext, |
5750 | stable, version, branch, commit, | |
9f95a23c | 5751 | distro, distro_version): |
f67539c2 | 5752 | super(YumDnf, self).__init__(ctx, stable=stable, version=version, |
9f95a23c | 5753 | branch=branch, commit=commit) |
f67539c2 | 5754 | self.ctx = ctx |
9f95a23c TL |
5755 | self.major = int(distro_version.split('.')[0]) |
5756 | self.distro_normalized = self.DISTRO_NAMES[distro][0] | |
5757 | self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major) | |
5758 | if (self.distro_code == 'fc' and self.major >= 30) or \ | |
5759 | (self.distro_code == 'el' and self.major >= 8): | |
5760 | self.tool = 'dnf' | |
5761 | else: | |
5762 | self.tool = 'yum' | |
5763 | ||
5764 | def custom_repo(self, **kw): | |
5765 | """ | |
5766 | Repo files need special care in that a whole line should not be present | |
5767 | if there is no value for it. Because we were using `format()` we could | |
5768 | not conditionally add a line for a repo file. So the end result would | |
5769 | contain a key with a missing value (say if we were passing `None`). | |
5770 | ||
5771 | For example, it could look like:: | |
5772 | ||
5773 | [ceph repo] | |
5774 | name= ceph repo | |
5775 | proxy= | |
5776 | gpgcheck= | |
5777 | ||
5778 | Which breaks. This function allows us to conditionally add lines, | |
5779 | preserving an order and be more careful. | |
5780 | ||
5781 | Previously, and for historical purposes, this is how the template used | |
5782 | to look:: | |
5783 | ||
5784 | custom_repo = | |
5785 | [{repo_name}] | |
5786 | name={name} | |
5787 | baseurl={baseurl} | |
5788 | enabled={enabled} | |
5789 | gpgcheck={gpgcheck} | |
5790 | type={_type} | |
5791 | gpgkey={gpgkey} | |
5792 | proxy={proxy} | |
5793 | ||
5794 | """ | |
5795 | lines = [] | |
5796 | ||
5797 | # by using tuples (vs a dict) we preserve the order of what we want to | |
5798 | # return, like starting with a [repo name] | |
5799 | tmpl = ( | |
5800 | ('reponame', '[%s]'), | |
5801 | ('name', 'name=%s'), | |
5802 | ('baseurl', 'baseurl=%s'), | |
5803 | ('enabled', 'enabled=%s'), | |
5804 | ('gpgcheck', 'gpgcheck=%s'), | |
5805 | ('_type', 'type=%s'), | |
5806 | ('gpgkey', 'gpgkey=%s'), | |
5807 | ('proxy', 'proxy=%s'), | |
5808 | ('priority', 'priority=%s'), | |
5809 | ) | |
5810 | ||
5811 | for line in tmpl: | |
5812 | tmpl_key, tmpl_value = line # key values from tmpl | |
5813 | ||
5814 | # ensure that there is an actual value (not None nor empty string) | |
5815 | if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''): | |
5816 | lines.append(tmpl_value % kw.get(tmpl_key)) | |
5817 | ||
5818 | return '\n'.join(lines) | |
5819 | ||
5820 | def repo_path(self): | |
5821 | return '/etc/yum.repos.d/ceph.repo' | |
5822 | ||
5823 | def repo_baseurl(self): | |
5824 | assert self.stable or self.version | |
5825 | if self.version: | |
f67539c2 | 5826 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.version, |
9f95a23c TL |
5827 | self.distro_code) |
5828 | else: | |
f67539c2 | 5829 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, self.stable, |
9f95a23c TL |
5830 | self.distro_code) |
5831 | ||
5832 | def add_repo(self): | |
5833 | if self.stable or self.version: | |
5834 | content = '' | |
5835 | for n, t in { | |
5836 | 'Ceph': '$basearch', | |
5837 | 'Ceph-noarch': 'noarch', | |
5838 | 'Ceph-source': 'SRPMS'}.items(): | |
5839 | content += '[%s]\n' % (n) | |
5840 | content += self.custom_repo( | |
5841 | name='Ceph %s' % t, | |
5842 | baseurl=self.repo_baseurl() + '/' + t, | |
5843 | enabled=1, | |
5844 | gpgcheck=1, | |
5845 | gpgkey=self.repo_gpgkey()[0], | |
5846 | ) | |
5847 | content += '\n\n' | |
5848 | else: | |
5849 | content = self.query_shaman(self.distro_normalized, self.major, | |
5850 | self.branch, | |
5851 | self.commit) | |
5852 | ||
f91f0fd5 | 5853 | logger.info('Writing repo to %s...' % self.repo_path()) |
9f95a23c TL |
5854 | with open(self.repo_path(), 'w') as f: |
5855 | f.write(content) | |
5856 | ||
5857 | if self.distro_code.startswith('el'): | |
5858 | logger.info('Enabling EPEL...') | |
f67539c2 | 5859 | call_throws(self.ctx, [self.tool, 'install', '-y', 'epel-release']) |
9f95a23c TL |
5860 | |
5861 | def rm_repo(self): | |
5862 | if os.path.exists(self.repo_path()): | |
5863 | os.unlink(self.repo_path()) | |
9f95a23c TL |
5864 | |
5865 | def install(self, ls): | |
5866 | logger.info('Installing packages %s...' % ls) | |
f67539c2 | 5867 | call_throws(self.ctx, [self.tool, 'install', '-y'] + ls) |
9f95a23c TL |
5868 | |
5869 | def install_podman(self): | |
5870 | self.install(['podman']) | |
5871 | ||
5872 | ||
5873 | class Zypper(Packager): | |
5874 | DISTRO_NAMES = [ | |
5875 | 'sles', | |
5876 | 'opensuse-tumbleweed', | |
5877 | 'opensuse-leap' | |
5878 | ] | |
5879 | ||
f67539c2 TL |
5880 | def __init__(self, ctx: CephadmContext, |
5881 | stable, version, branch, commit, | |
9f95a23c | 5882 | distro, distro_version): |
f67539c2 | 5883 | super(Zypper, self).__init__(ctx, stable=stable, version=version, |
9f95a23c | 5884 | branch=branch, commit=commit) |
f67539c2 | 5885 | self.ctx = ctx |
9f95a23c TL |
5886 | self.tool = 'zypper' |
5887 | self.distro = 'opensuse' | |
5888 | self.distro_version = '15.1' | |
5889 | if 'tumbleweed' not in distro and distro_version is not None: | |
5890 | self.distro_version = distro_version | |
5891 | ||
5892 | def custom_repo(self, **kw): | |
5893 | """ | |
5894 | See YumDnf for format explanation. | |
5895 | """ | |
5896 | lines = [] | |
5897 | ||
5898 | # by using tuples (vs a dict) we preserve the order of what we want to | |
5899 | # return, like starting with a [repo name] | |
5900 | tmpl = ( | |
5901 | ('reponame', '[%s]'), | |
5902 | ('name', 'name=%s'), | |
5903 | ('baseurl', 'baseurl=%s'), | |
5904 | ('enabled', 'enabled=%s'), | |
5905 | ('gpgcheck', 'gpgcheck=%s'), | |
5906 | ('_type', 'type=%s'), | |
5907 | ('gpgkey', 'gpgkey=%s'), | |
5908 | ('proxy', 'proxy=%s'), | |
5909 | ('priority', 'priority=%s'), | |
5910 | ) | |
5911 | ||
5912 | for line in tmpl: | |
5913 | tmpl_key, tmpl_value = line # key values from tmpl | |
5914 | ||
5915 | # ensure that there is an actual value (not None nor empty string) | |
5916 | if tmpl_key in kw and kw.get(tmpl_key) not in (None, ''): | |
5917 | lines.append(tmpl_value % kw.get(tmpl_key)) | |
5918 | ||
5919 | return '\n'.join(lines) | |
5920 | ||
5921 | def repo_path(self): | |
5922 | return '/etc/zypp/repos.d/ceph.repo' | |
5923 | ||
5924 | def repo_baseurl(self): | |
5925 | assert self.stable or self.version | |
5926 | if self.version: | |
f67539c2 TL |
5927 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, |
5928 | self.stable, self.distro) | |
9f95a23c | 5929 | else: |
f67539c2 TL |
5930 | return '%s/rpm-%s/%s' % (self.ctx.repo_url, |
5931 | self.stable, self.distro) | |
9f95a23c TL |
5932 | |
5933 | def add_repo(self): | |
5934 | if self.stable or self.version: | |
5935 | content = '' | |
5936 | for n, t in { | |
5937 | 'Ceph': '$basearch', | |
5938 | 'Ceph-noarch': 'noarch', | |
5939 | 'Ceph-source': 'SRPMS'}.items(): | |
5940 | content += '[%s]\n' % (n) | |
5941 | content += self.custom_repo( | |
5942 | name='Ceph %s' % t, | |
5943 | baseurl=self.repo_baseurl() + '/' + t, | |
5944 | enabled=1, | |
5945 | gpgcheck=1, | |
5946 | gpgkey=self.repo_gpgkey()[0], | |
5947 | ) | |
5948 | content += '\n\n' | |
5949 | else: | |
5950 | content = self.query_shaman(self.distro, self.distro_version, | |
5951 | self.branch, | |
5952 | self.commit) | |
5953 | ||
f91f0fd5 | 5954 | logger.info('Writing repo to %s...' % self.repo_path()) |
9f95a23c TL |
5955 | with open(self.repo_path(), 'w') as f: |
5956 | f.write(content) | |
5957 | ||
5958 | def rm_repo(self): | |
5959 | if os.path.exists(self.repo_path()): | |
5960 | os.unlink(self.repo_path()) | |
5961 | ||
5962 | def install(self, ls): | |
5963 | logger.info('Installing packages %s...' % ls) | |
f67539c2 | 5964 | call_throws(self.ctx, [self.tool, 'in', '-y'] + ls) |
9f95a23c TL |
5965 | |
5966 | def install_podman(self): | |
5967 | self.install(['podman']) | |
5968 | ||
5969 | ||
f67539c2 TL |
5970 | def create_packager(ctx: CephadmContext, |
5971 | stable=None, version=None, branch=None, commit=None): | |
9f95a23c TL |
5972 | distro, distro_version, distro_codename = get_distro() |
5973 | if distro in YumDnf.DISTRO_NAMES: | |
f67539c2 | 5974 | return YumDnf(ctx, stable=stable, version=version, |
9f95a23c | 5975 | branch=branch, commit=commit, |
f67539c2 | 5976 | distro=distro, distro_version=distro_version) |
9f95a23c | 5977 | elif distro in Apt.DISTRO_NAMES: |
f67539c2 | 5978 | return Apt(ctx, stable=stable, version=version, |
9f95a23c TL |
5979 | branch=branch, commit=commit, |
5980 | distro=distro, distro_version=distro_version, | |
5981 | distro_codename=distro_codename) | |
5982 | elif distro in Zypper.DISTRO_NAMES: | |
f67539c2 | 5983 | return Zypper(ctx, stable=stable, version=version, |
9f95a23c TL |
5984 | branch=branch, commit=commit, |
5985 | distro=distro, distro_version=distro_version) | |
5986 | raise Error('Distro %s version %s not supported' % (distro, distro_version)) | |
5987 | ||
5988 | ||
f67539c2 TL |
5989 | def command_add_repo(ctx: CephadmContext): |
5990 | if ctx.version and ctx.release: | |
9f95a23c | 5991 | raise Error('you can specify either --release or --version but not both') |
f67539c2 | 5992 | if not ctx.version and not ctx.release and not ctx.dev and not ctx.dev_commit: |
1911f103 | 5993 | raise Error('please supply a --release, --version, --dev or --dev-commit argument') |
f67539c2 | 5994 | if ctx.version: |
9f95a23c | 5995 | try: |
f67539c2 TL |
5996 | (x, y, z) = ctx.version.split('.') |
5997 | except Exception: | |
9f95a23c TL |
5998 | raise Error('version must be in the form x.y.z (e.g., 15.2.0)') |
5999 | ||
f67539c2 TL |
6000 | pkg = create_packager(ctx, stable=ctx.release, |
6001 | version=ctx.version, | |
6002 | branch=ctx.dev, | |
6003 | commit=ctx.dev_commit) | |
9f95a23c TL |
6004 | pkg.add_repo() |
6005 | ||
f6b5b4d7 | 6006 | |
f67539c2 TL |
6007 | def command_rm_repo(ctx: CephadmContext): |
6008 | pkg = create_packager(ctx) | |
9f95a23c TL |
6009 | pkg.rm_repo() |
6010 | ||
f6b5b4d7 | 6011 | |
f67539c2 TL |
6012 | def command_install(ctx: CephadmContext): |
6013 | pkg = create_packager(ctx) | |
6014 | pkg.install(ctx.packages) | |
9f95a23c TL |
6015 | |
6016 | ################################## | |
6017 | ||
f67539c2 | 6018 | |
f91f0fd5 TL |
6019 | def get_ipv4_address(ifname): |
6020 | # type: (str) -> str | |
6021 | def _extract(sock, offset): | |
6022 | return socket.inet_ntop( | |
f67539c2 TL |
6023 | socket.AF_INET, |
6024 | fcntl.ioctl( | |
6025 | sock.fileno(), | |
6026 | offset, | |
6027 | struct.pack('256s', bytes(ifname[:15], 'utf-8')) | |
6028 | )[20:24]) | |
f91f0fd5 TL |
6029 | |
6030 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) | |
6031 | try: | |
6032 | addr = _extract(s, 35093) # '0x8915' = SIOCGIFADDR | |
6033 | dq_mask = _extract(s, 35099) # 0x891b = SIOCGIFNETMASK | |
6034 | except OSError: | |
6035 | # interface does not have an ipv4 address | |
6036 | return '' | |
6037 | ||
6038 | dec_mask = sum([bin(int(i)).count('1') | |
6039 | for i in dq_mask.split('.')]) | |
6040 | return '{}/{}'.format(addr, dec_mask) | |
6041 | ||
6042 | ||
6043 | def get_ipv6_address(ifname): | |
6044 | # type: (str) -> str | |
6045 | if not os.path.exists('/proc/net/if_inet6'): | |
6046 | return '' | |
6047 | ||
6048 | raw = read_file(['/proc/net/if_inet6']) | |
6049 | data = raw.splitlines() | |
6050 | # based on docs @ https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/ch11s04.html | |
6051 | # field 0 is ipv6, field 2 is scope | |
6052 | for iface_setting in data: | |
6053 | field = iface_setting.split() | |
6054 | if field[-1] == ifname: | |
6055 | ipv6_raw = field[0] | |
f67539c2 | 6056 | ipv6_fmtd = ':'.join([ipv6_raw[_p:_p + 4] for _p in range(0, len(field[0]), 4)]) |
f91f0fd5 TL |
6057 | # apply naming rules using ipaddress module |
6058 | ipv6 = ipaddress.ip_address(ipv6_fmtd) | |
f67539c2 | 6059 | return '{}/{}'.format(str(ipv6), int('0x{}'.format(field[2]), 16)) |
f91f0fd5 TL |
6060 | return '' |
6061 | ||
6062 | ||
6063 | def bytes_to_human(num, mode='decimal'): | |
6064 | # type: (float, str) -> str | |
6065 | """Convert a bytes value into it's human-readable form. | |
6066 | ||
6067 | :param num: number, in bytes, to convert | |
6068 | :param mode: Either decimal (default) or binary to determine divisor | |
6069 | :returns: string representing the bytes value in a more readable format | |
6070 | """ | |
6071 | unit_list = ['', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB'] | |
6072 | divisor = 1000.0 | |
f67539c2 | 6073 | yotta = 'YB' |
f91f0fd5 TL |
6074 | |
6075 | if mode == 'binary': | |
6076 | unit_list = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB'] | |
6077 | divisor = 1024.0 | |
f67539c2 | 6078 | yotta = 'YiB' |
f91f0fd5 TL |
6079 | |
6080 | for unit in unit_list: | |
6081 | if abs(num) < divisor: | |
f67539c2 | 6082 | return '%3.1f%s' % (num, unit) |
f91f0fd5 | 6083 | num /= divisor |
f67539c2 | 6084 | return '%.1f%s' % (num, yotta) |
f91f0fd5 TL |
6085 | |
6086 | ||
6087 | def read_file(path_list, file_name=''): | |
6088 | # type: (List[str], str) -> str | |
6089 | """Returns the content of the first file found within the `path_list` | |
6090 | ||
6091 | :param path_list: list of file paths to search | |
6092 | :param file_name: optional file_name to be applied to a file path | |
6093 | :returns: content of the file or 'Unknown' | |
6094 | """ | |
6095 | for path in path_list: | |
6096 | if file_name: | |
6097 | file_path = os.path.join(path, file_name) | |
6098 | else: | |
6099 | file_path = path | |
6100 | if os.path.exists(file_path): | |
6101 | with open(file_path, 'r') as f: | |
6102 | try: | |
6103 | content = f.read().strip() | |
6104 | except OSError: | |
6105 | # sysfs may populate the file, but for devices like | |
6106 | # virtio reads can fail | |
f67539c2 | 6107 | return 'Unknown' |
f91f0fd5 TL |
6108 | else: |
6109 | return content | |
f67539c2 | 6110 | return 'Unknown' |
f91f0fd5 TL |
6111 | |
6112 | ################################## | |
f67539c2 TL |
6113 | |
6114 | ||
f91f0fd5 TL |
6115 | class HostFacts(): |
6116 | _dmi_path_list = ['/sys/class/dmi/id'] | |
6117 | _nic_path_list = ['/sys/class/net'] | |
6118 | _selinux_path_list = ['/etc/selinux/config'] | |
6119 | _apparmor_path_list = ['/etc/apparmor'] | |
6120 | _disk_vendor_workarounds = { | |
f67539c2 | 6121 | '0x1af4': 'Virtio Block Device' |
f91f0fd5 TL |
6122 | } |
6123 | ||
f67539c2 TL |
6124 | def __init__(self, ctx: CephadmContext): |
6125 | self.ctx: CephadmContext = ctx | |
6126 | self.cpu_model: str = 'Unknown' | |
6127 | self.cpu_count: int = 0 | |
6128 | self.cpu_cores: int = 0 | |
6129 | self.cpu_threads: int = 0 | |
6130 | self.interfaces: Dict[str, Any] = {} | |
f91f0fd5 | 6131 | |
f67539c2 | 6132 | self._meminfo: List[str] = read_file(['/proc/meminfo']).splitlines() |
f91f0fd5 TL |
6133 | self._get_cpuinfo() |
6134 | self._process_nics() | |
f67539c2 TL |
6135 | self.arch: str = platform.processor() |
6136 | self.kernel: str = platform.release() | |
f91f0fd5 TL |
6137 | |
6138 | def _get_cpuinfo(self): | |
6139 | # type: () -> None | |
6140 | """Determine cpu information via /proc/cpuinfo""" | |
6141 | raw = read_file(['/proc/cpuinfo']) | |
6142 | output = raw.splitlines() | |
6143 | cpu_set = set() | |
6144 | ||
6145 | for line in output: | |
f67539c2 TL |
6146 | field = [f.strip() for f in line.split(':')] |
6147 | if 'model name' in line: | |
f91f0fd5 | 6148 | self.cpu_model = field[1] |
f67539c2 | 6149 | if 'physical id' in line: |
f91f0fd5 | 6150 | cpu_set.add(field[1]) |
f67539c2 | 6151 | if 'siblings' in line: |
f91f0fd5 | 6152 | self.cpu_threads = int(field[1].strip()) |
f67539c2 | 6153 | if 'cpu cores' in line: |
f91f0fd5 TL |
6154 | self.cpu_cores = int(field[1].strip()) |
6155 | pass | |
6156 | self.cpu_count = len(cpu_set) | |
6157 | ||
6158 | def _get_block_devs(self): | |
6159 | # type: () -> List[str] | |
6160 | """Determine the list of block devices by looking at /sys/block""" | |
6161 | return [dev for dev in os.listdir('/sys/block') | |
6162 | if not dev.startswith('dm')] | |
6163 | ||
6164 | def _get_devs_by_type(self, rota='0'): | |
6165 | # type: (str) -> List[str] | |
6166 | """Filter block devices by a given rotational attribute (0=flash, 1=spinner)""" | |
6167 | devs = list() | |
6168 | for blk_dev in self._get_block_devs(): | |
6169 | rot_path = '/sys/block/{}/queue/rotational'.format(blk_dev) | |
6170 | rot_value = read_file([rot_path]) | |
6171 | if rot_value == rota: | |
6172 | devs.append(blk_dev) | |
6173 | return devs | |
6174 | ||
6175 | @property | |
6176 | def operating_system(self): | |
6177 | # type: () -> str | |
6178 | """Determine OS version""" | |
6179 | raw_info = read_file(['/etc/os-release']) | |
6180 | os_release = raw_info.splitlines() | |
6181 | rel_str = 'Unknown' | |
6182 | rel_dict = dict() | |
6183 | ||
6184 | for line in os_release: | |
f67539c2 | 6185 | if '=' in line: |
f91f0fd5 TL |
6186 | var_name, var_value = line.split('=') |
6187 | rel_dict[var_name] = var_value.strip('"') | |
6188 | ||
6189 | # Would normally use PRETTY_NAME, but NAME and VERSION are more | |
6190 | # consistent | |
f67539c2 TL |
6191 | if all(_v in rel_dict for _v in ['NAME', 'VERSION']): |
6192 | rel_str = '{} {}'.format(rel_dict['NAME'], rel_dict['VERSION']) | |
f91f0fd5 TL |
6193 | return rel_str |
6194 | ||
6195 | @property | |
6196 | def hostname(self): | |
6197 | # type: () -> str | |
6198 | """Return the hostname""" | |
6199 | return platform.node() | |
6200 | ||
6201 | @property | |
6202 | def subscribed(self): | |
6203 | # type: () -> str | |
6204 | """Highlevel check to see if the host is subscribed to receive updates/support""" | |
6205 | def _red_hat(): | |
6206 | # type: () -> str | |
6207 | # RHEL 7 and RHEL 8 | |
6208 | entitlements_dir = '/etc/pki/entitlement' | |
6209 | if os.path.exists(entitlements_dir): | |
6210 | pems = glob('{}/*.pem'.format(entitlements_dir)) | |
6211 | if len(pems) >= 2: | |
f67539c2 | 6212 | return 'Yes' |
f91f0fd5 | 6213 | |
f67539c2 | 6214 | return 'No' |
f91f0fd5 TL |
6215 | |
6216 | os_name = self.operating_system | |
f67539c2 | 6217 | if os_name.upper().startswith('RED HAT'): |
f91f0fd5 TL |
6218 | return _red_hat() |
6219 | ||
f67539c2 | 6220 | return 'Unknown' |
f91f0fd5 TL |
6221 | |
6222 | @property | |
6223 | def hdd_count(self): | |
6224 | # type: () -> int | |
6225 | """Return a count of HDDs (spinners)""" | |
6226 | return len(self._get_devs_by_type(rota='1')) | |
6227 | ||
6228 | def _get_capacity(self, dev): | |
6229 | # type: (str) -> int | |
6230 | """Determine the size of a given device""" | |
6231 | size_path = os.path.join('/sys/block', dev, 'size') | |
6232 | size_blocks = int(read_file([size_path])) | |
6233 | blk_path = os.path.join('/sys/block', dev, 'queue', 'logical_block_size') | |
6234 | blk_count = int(read_file([blk_path])) | |
6235 | return size_blocks * blk_count | |
6236 | ||
6237 | def _get_capacity_by_type(self, rota='0'): | |
6238 | # type: (str) -> int | |
6239 | """Return the total capacity of a category of device (flash or hdd)""" | |
6240 | devs = self._get_devs_by_type(rota=rota) | |
6241 | capacity = 0 | |
6242 | for dev in devs: | |
6243 | capacity += self._get_capacity(dev) | |
6244 | return capacity | |
6245 | ||
6246 | def _dev_list(self, dev_list): | |
6247 | # type: (List[str]) -> List[Dict[str, object]] | |
6248 | """Return a 'pretty' name list for each device in the `dev_list`""" | |
6249 | disk_list = list() | |
6250 | ||
6251 | for dev in dev_list: | |
6252 | disk_model = read_file(['/sys/block/{}/device/model'.format(dev)]).strip() | |
6253 | disk_rev = read_file(['/sys/block/{}/device/rev'.format(dev)]).strip() | |
6254 | disk_wwid = read_file(['/sys/block/{}/device/wwid'.format(dev)]).strip() | |
6255 | vendor = read_file(['/sys/block/{}/device/vendor'.format(dev)]).strip() | |
6256 | disk_vendor = HostFacts._disk_vendor_workarounds.get(vendor, vendor) | |
6257 | disk_size_bytes = self._get_capacity(dev) | |
6258 | disk_list.append({ | |
f67539c2 TL |
6259 | 'description': '{} {} ({})'.format(disk_vendor, disk_model, bytes_to_human(disk_size_bytes)), |
6260 | 'vendor': disk_vendor, | |
6261 | 'model': disk_model, | |
6262 | 'rev': disk_rev, | |
6263 | 'wwid': disk_wwid, | |
6264 | 'dev_name': dev, | |
6265 | 'disk_size_bytes': disk_size_bytes, | |
6266 | }) | |
f91f0fd5 TL |
6267 | return disk_list |
6268 | ||
6269 | @property | |
6270 | def hdd_list(self): | |
6271 | # type: () -> List[Dict[str, object]] | |
6272 | """Return a list of devices that are HDDs (spinners)""" | |
6273 | devs = self._get_devs_by_type(rota='1') | |
6274 | return self._dev_list(devs) | |
6275 | ||
6276 | @property | |
6277 | def flash_list(self): | |
6278 | # type: () -> List[Dict[str, object]] | |
6279 | """Return a list of devices that are flash based (SSD, NVMe)""" | |
6280 | devs = self._get_devs_by_type(rota='0') | |
6281 | return self._dev_list(devs) | |
6282 | ||
6283 | @property | |
6284 | def hdd_capacity_bytes(self): | |
6285 | # type: () -> int | |
6286 | """Return the total capacity for all HDD devices (bytes)""" | |
6287 | return self._get_capacity_by_type(rota='1') | |
6288 | ||
6289 | @property | |
6290 | def hdd_capacity(self): | |
6291 | # type: () -> str | |
6292 | """Return the total capacity for all HDD devices (human readable format)""" | |
6293 | return bytes_to_human(self.hdd_capacity_bytes) | |
6294 | ||
6295 | @property | |
6296 | def cpu_load(self): | |
6297 | # type: () -> Dict[str, float] | |
6298 | """Return the cpu load average data for the host""" | |
6299 | raw = read_file(['/proc/loadavg']).strip() | |
6300 | data = raw.split() | |
6301 | return { | |
f67539c2 TL |
6302 | '1min': float(data[0]), |
6303 | '5min': float(data[1]), | |
6304 | '15min': float(data[2]), | |
f91f0fd5 TL |
6305 | } |
6306 | ||
6307 | @property | |
6308 | def flash_count(self): | |
6309 | # type: () -> int | |
6310 | """Return the number of flash devices in the system (SSD, NVMe)""" | |
6311 | return len(self._get_devs_by_type(rota='0')) | |
6312 | ||
6313 | @property | |
6314 | def flash_capacity_bytes(self): | |
6315 | # type: () -> int | |
6316 | """Return the total capacity for all flash devices (bytes)""" | |
6317 | return self._get_capacity_by_type(rota='0') | |
6318 | ||
6319 | @property | |
6320 | def flash_capacity(self): | |
6321 | # type: () -> str | |
6322 | """Return the total capacity for all Flash devices (human readable format)""" | |
6323 | return bytes_to_human(self.flash_capacity_bytes) | |
6324 | ||
6325 | def _process_nics(self): | |
6326 | # type: () -> None | |
6327 | """Look at the NIC devices and extract network related metadata""" | |
6328 | # from https://github.com/torvalds/linux/blob/master/include/uapi/linux/if_arp.h | |
6329 | hw_lookup = { | |
f67539c2 TL |
6330 | '1': 'ethernet', |
6331 | '32': 'infiniband', | |
6332 | '772': 'loopback', | |
f91f0fd5 TL |
6333 | } |
6334 | ||
6335 | for nic_path in HostFacts._nic_path_list: | |
6336 | if not os.path.exists(nic_path): | |
6337 | continue | |
6338 | for iface in os.listdir(nic_path): | |
6339 | ||
f67539c2 TL |
6340 | lower_devs_list = [os.path.basename(link.replace('lower_', '')) for link in glob(os.path.join(nic_path, iface, 'lower_*'))] |
6341 | upper_devs_list = [os.path.basename(link.replace('upper_', '')) for link in glob(os.path.join(nic_path, iface, 'upper_*'))] | |
f91f0fd5 TL |
6342 | |
6343 | try: | |
6344 | mtu = int(read_file([os.path.join(nic_path, iface, 'mtu')])) | |
6345 | except ValueError: | |
6346 | mtu = 0 | |
6347 | ||
6348 | operstate = read_file([os.path.join(nic_path, iface, 'operstate')]) | |
6349 | try: | |
6350 | speed = int(read_file([os.path.join(nic_path, iface, 'speed')])) | |
6351 | except (OSError, ValueError): | |
6352 | # OSError : device doesn't support the ethtool get_link_ksettings | |
6353 | # ValueError : raised when the read fails, and returns Unknown | |
6354 | # | |
6355 | # Either way, we show a -1 when speed isn't available | |
6356 | speed = -1 | |
6357 | ||
6358 | if os.path.exists(os.path.join(nic_path, iface, 'bridge')): | |
f67539c2 | 6359 | nic_type = 'bridge' |
f91f0fd5 | 6360 | elif os.path.exists(os.path.join(nic_path, iface, 'bonding')): |
f67539c2 | 6361 | nic_type = 'bonding' |
f91f0fd5 | 6362 | else: |
f67539c2 | 6363 | nic_type = hw_lookup.get(read_file([os.path.join(nic_path, iface, 'type')]), 'Unknown') |
f91f0fd5 TL |
6364 | |
6365 | dev_link = os.path.join(nic_path, iface, 'device') | |
6366 | if os.path.exists(dev_link): | |
6367 | iftype = 'physical' | |
6368 | driver_path = os.path.join(dev_link, 'driver') | |
6369 | if os.path.exists(driver_path): | |
f67539c2 | 6370 | driver = os.path.basename(os.path.realpath(driver_path)) |
f91f0fd5 TL |
6371 | else: |
6372 | driver = 'Unknown' | |
6373 | ||
6374 | else: | |
6375 | iftype = 'logical' | |
6376 | driver = '' | |
6377 | ||
6378 | self.interfaces[iface] = { | |
f67539c2 TL |
6379 | 'mtu': mtu, |
6380 | 'upper_devs_list': upper_devs_list, | |
6381 | 'lower_devs_list': lower_devs_list, | |
6382 | 'operstate': operstate, | |
6383 | 'iftype': iftype, | |
6384 | 'nic_type': nic_type, | |
6385 | 'driver': driver, | |
6386 | 'speed': speed, | |
6387 | 'ipv4_address': get_ipv4_address(iface), | |
6388 | 'ipv6_address': get_ipv6_address(iface), | |
f91f0fd5 TL |
6389 | } |
6390 | ||
6391 | @property | |
6392 | def nic_count(self): | |
6393 | # type: () -> int | |
6394 | """Return a total count of all physical NICs detected in the host""" | |
6395 | phys_devs = [] | |
6396 | for iface in self.interfaces: | |
f67539c2 | 6397 | if self.interfaces[iface]['iftype'] == 'physical': |
f91f0fd5 TL |
6398 | phys_devs.append(iface) |
6399 | return len(phys_devs) | |
6400 | ||
f91f0fd5 TL |
6401 | def _get_mem_data(self, field_name): |
6402 | # type: (str) -> int | |
6403 | for line in self._meminfo: | |
6404 | if line.startswith(field_name): | |
6405 | _d = line.split() | |
6406 | return int(_d[1]) | |
6407 | return 0 | |
6408 | ||
6409 | @property | |
6410 | def memory_total_kb(self): | |
6411 | # type: () -> int | |
6412 | """Determine the memory installed (kb)""" | |
6413 | return self._get_mem_data('MemTotal') | |
6414 | ||
6415 | @property | |
6416 | def memory_free_kb(self): | |
6417 | # type: () -> int | |
6418 | """Determine the memory free (not cache, immediately usable)""" | |
6419 | return self._get_mem_data('MemFree') | |
6420 | ||
6421 | @property | |
6422 | def memory_available_kb(self): | |
6423 | # type: () -> int | |
6424 | """Determine the memory available to new applications without swapping""" | |
6425 | return self._get_mem_data('MemAvailable') | |
6426 | ||
6427 | @property | |
6428 | def vendor(self): | |
6429 | # type: () -> str | |
6430 | """Determine server vendor from DMI data in sysfs""" | |
f67539c2 | 6431 | return read_file(HostFacts._dmi_path_list, 'sys_vendor') |
f91f0fd5 TL |
6432 | |
6433 | @property | |
6434 | def model(self): | |
6435 | # type: () -> str | |
6436 | """Determine server model information from DMI data in sysfs""" | |
f67539c2 TL |
6437 | family = read_file(HostFacts._dmi_path_list, 'product_family') |
6438 | product = read_file(HostFacts._dmi_path_list, 'product_name') | |
f91f0fd5 | 6439 | if family == 'Unknown' and product: |
f67539c2 | 6440 | return '{}'.format(product) |
f91f0fd5 | 6441 | |
f67539c2 | 6442 | return '{} ({})'.format(family, product) |
f91f0fd5 TL |
6443 | |
6444 | @property | |
6445 | def bios_version(self): | |
6446 | # type: () -> str | |
6447 | """Determine server BIOS version from DMI data in sysfs""" | |
f67539c2 | 6448 | return read_file(HostFacts._dmi_path_list, 'bios_version') |
f91f0fd5 TL |
6449 | |
6450 | @property | |
6451 | def bios_date(self): | |
6452 | # type: () -> str | |
6453 | """Determine server BIOS date from DMI data in sysfs""" | |
f67539c2 | 6454 | return read_file(HostFacts._dmi_path_list, 'bios_date') |
f91f0fd5 TL |
6455 | |
6456 | @property | |
6457 | def timestamp(self): | |
6458 | # type: () -> float | |
6459 | """Return the current time as Epoch seconds""" | |
6460 | return time.time() | |
6461 | ||
6462 | @property | |
6463 | def system_uptime(self): | |
6464 | # type: () -> float | |
6465 | """Return the system uptime (in secs)""" | |
6466 | raw_time = read_file(['/proc/uptime']) | |
6467 | up_secs, _ = raw_time.split() | |
6468 | return float(up_secs) | |
6469 | ||
f67539c2 | 6470 | @property |
f91f0fd5 TL |
6471 | def kernel_security(self): |
6472 | # type: () -> Dict[str, str] | |
6473 | """Determine the security features enabled in the kernel - SELinux, AppArmor""" | |
f67539c2 | 6474 | def _fetch_selinux() -> Dict[str, str]: |
f91f0fd5 TL |
6475 | """Read the selinux config file to determine state""" |
6476 | security = {} | |
6477 | for selinux_path in HostFacts._selinux_path_list: | |
6478 | if os.path.exists(selinux_path): | |
6479 | selinux_config = read_file([selinux_path]).splitlines() | |
6480 | security['type'] = 'SELinux' | |
6481 | for line in selinux_config: | |
6482 | if line.strip().startswith('#'): | |
6483 | continue | |
6484 | k, v = line.split('=') | |
6485 | security[k] = v | |
f67539c2 TL |
6486 | if security['SELINUX'].lower() == 'disabled': |
6487 | security['description'] = 'SELinux: Disabled' | |
f91f0fd5 | 6488 | else: |
f67539c2 | 6489 | security['description'] = 'SELinux: Enabled({}, {})'.format(security['SELINUX'], security['SELINUXTYPE']) |
f91f0fd5 | 6490 | return security |
f67539c2 | 6491 | return {} |
f91f0fd5 | 6492 | |
f67539c2 | 6493 | def _fetch_apparmor() -> Dict[str, str]: |
f91f0fd5 TL |
6494 | """Read the apparmor profiles directly, returning an overview of AppArmor status""" |
6495 | security = {} | |
6496 | for apparmor_path in HostFacts._apparmor_path_list: | |
6497 | if os.path.exists(apparmor_path): | |
f67539c2 TL |
6498 | security['type'] = 'AppArmor' |
6499 | security['description'] = 'AppArmor: Enabled' | |
f91f0fd5 TL |
6500 | try: |
6501 | profiles = read_file(['/sys/kernel/security/apparmor/profiles']) | |
6502 | except OSError: | |
6503 | pass | |
6504 | else: | |
6505 | summary = {} # type: Dict[str, int] | |
6506 | for line in profiles.split('\n'): | |
6507 | item, mode = line.split(' ') | |
f67539c2 | 6508 | mode = mode.strip('()') |
f91f0fd5 TL |
6509 | if mode in summary: |
6510 | summary[mode] += 1 | |
6511 | else: | |
6512 | summary[mode] = 0 | |
f67539c2 TL |
6513 | summary_str = ','.join(['{} {}'.format(v, k) for k, v in summary.items()]) |
6514 | security = {**security, **summary} # type: ignore | |
6515 | security['description'] += '({})'.format(summary_str) | |
f91f0fd5 TL |
6516 | |
6517 | return security | |
f67539c2 | 6518 | return {} |
f91f0fd5 | 6519 | |
f67539c2 | 6520 | ret = {} |
f91f0fd5 TL |
6521 | if os.path.exists('/sys/kernel/security/lsm'): |
6522 | lsm = read_file(['/sys/kernel/security/lsm']).strip() | |
6523 | if 'selinux' in lsm: | |
f67539c2 | 6524 | ret = _fetch_selinux() |
f91f0fd5 | 6525 | elif 'apparmor' in lsm: |
f67539c2 | 6526 | ret = _fetch_apparmor() |
f91f0fd5 TL |
6527 | else: |
6528 | return { | |
f67539c2 TL |
6529 | 'type': 'Unknown', |
6530 | 'description': 'Linux Security Module framework is active, but is not using SELinux or AppArmor' | |
f91f0fd5 TL |
6531 | } |
6532 | ||
f67539c2 TL |
6533 | if ret: |
6534 | return ret | |
6535 | ||
f91f0fd5 | 6536 | return { |
f67539c2 TL |
6537 | 'type': 'None', |
6538 | 'description': 'Linux Security Module framework is not available' | |
f91f0fd5 TL |
6539 | } |
6540 | ||
f67539c2 TL |
6541 | @property |
6542 | def selinux_enabled(self): | |
6543 | return (self.kernel_security['type'] == 'SELinux') and \ | |
6544 | (self.kernel_security['description'] != 'SELinux: Disabled') | |
6545 | ||
adb31ebb TL |
6546 | @property |
6547 | def kernel_parameters(self): | |
6548 | # type: () -> Dict[str, str] | |
6549 | """Get kernel parameters required/used in Ceph clusters""" | |
6550 | ||
6551 | k_param = {} | |
f67539c2 | 6552 | out, _, _ = call_throws(self.ctx, ['sysctl', '-a'], verbosity=CallVerbosity.SILENT) |
adb31ebb TL |
6553 | if out: |
6554 | param_list = out.split('\n') | |
f67539c2 | 6555 | param_dict = {param.split(' = ')[0]: param.split(' = ')[-1] for param in param_list} |
adb31ebb TL |
6556 | |
6557 | # return only desired parameters | |
6558 | if 'net.ipv4.ip_nonlocal_bind' in param_dict: | |
6559 | k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind'] | |
6560 | ||
6561 | return k_param | |
6562 | ||
f91f0fd5 TL |
6563 | def dump(self): |
6564 | # type: () -> str | |
6565 | """Return the attributes of this HostFacts object as json""" | |
f67539c2 TL |
6566 | data = { |
6567 | k: getattr(self, k) for k in dir(self) | |
6568 | if not k.startswith('_') | |
6569 | and isinstance(getattr(self, k), (float, int, str, list, dict, tuple)) | |
f91f0fd5 TL |
6570 | } |
6571 | return json.dumps(data, indent=2, sort_keys=True) | |
6572 | ||
6573 | ################################## | |
6574 | ||
f67539c2 TL |
6575 | |
6576 | def command_gather_facts(ctx: CephadmContext): | |
f91f0fd5 | 6577 | """gather_facts is intended to provide host releated metadata to the caller""" |
f67539c2 | 6578 | host = HostFacts(ctx) |
f91f0fd5 TL |
6579 | print(host.dump()) |
6580 | ||
f67539c2 TL |
6581 | ################################## |
6582 | ||
6583 | ||
6584 | def command_verify_prereqs(ctx: CephadmContext): | |
6585 | if ctx.service_type == 'haproxy' or ctx.service_type == 'keepalived': | |
6586 | out, err, code = call( | |
6587 | ctx, ['sysctl', '-n', 'net.ipv4.ip_nonlocal_bind'] | |
6588 | ) | |
6589 | if out.strip() != '1': | |
6590 | raise Error('net.ipv4.ip_nonlocal_bind not set to 1') | |
6591 | ||
6592 | ################################## | |
6593 | ||
6594 | ||
6595 | class CephadmCache: | |
6596 | task_types = ['disks', 'daemons', 'host', 'http_server'] | |
6597 | ||
6598 | def __init__(self): | |
6599 | self.started_epoch_secs = time.time() | |
6600 | self.tasks = { | |
6601 | 'daemons': 'inactive', | |
6602 | 'disks': 'inactive', | |
6603 | 'host': 'inactive', | |
6604 | 'http_server': 'inactive', | |
6605 | } | |
6606 | self.errors = [] | |
6607 | self.disks = {} | |
6608 | self.daemons = {} | |
6609 | self.host = {} | |
6610 | self.lock = RLock() | |
6611 | ||
6612 | @property | |
6613 | def health(self): | |
6614 | return { | |
6615 | 'started_epoch_secs': self.started_epoch_secs, | |
6616 | 'tasks': self.tasks, | |
6617 | 'errors': self.errors, | |
6618 | } | |
6619 | ||
6620 | def to_json(self): | |
6621 | return { | |
6622 | 'health': self.health, | |
6623 | 'host': self.host, | |
6624 | 'daemons': self.daemons, | |
6625 | 'disks': self.disks, | |
6626 | } | |
6627 | ||
6628 | def update_health(self, task_type, task_status, error_msg=None): | |
6629 | assert task_type in CephadmCache.task_types | |
6630 | with self.lock: | |
6631 | self.tasks[task_type] = task_status | |
6632 | if error_msg: | |
6633 | self.errors.append(error_msg) | |
6634 | ||
6635 | def update_task(self, task_type, content): | |
6636 | assert task_type in CephadmCache.task_types | |
6637 | assert isinstance(content, dict) | |
6638 | with self.lock: | |
6639 | current = getattr(self, task_type) | |
6640 | for k in content: | |
6641 | current[k] = content[k] | |
6642 | ||
6643 | setattr(self, task_type, current) | |
6644 | ||
6645 | ||
6646 | class CephadmHTTPServer(ThreadingMixIn, HTTPServer): | |
6647 | allow_reuse_address = True | |
6648 | daemon_threads = True | |
6649 | cephadm_cache: CephadmCache | |
6650 | token: str | |
6651 | ||
6652 | ||
6653 | class CephadmDaemonHandler(BaseHTTPRequestHandler): | |
6654 | server: CephadmHTTPServer | |
6655 | api_version = 'v1' | |
6656 | valid_routes = [ | |
6657 | f'/{api_version}/metadata', | |
6658 | f'/{api_version}/metadata/health', | |
6659 | f'/{api_version}/metadata/disks', | |
6660 | f'/{api_version}/metadata/daemons', | |
6661 | f'/{api_version}/metadata/host', | |
6662 | ] | |
6663 | ||
6664 | class Decorators: | |
6665 | @classmethod | |
6666 | def authorize(cls, f): | |
6667 | """Implement a basic token check. | |
6668 | ||
6669 | The token is installed at deployment time and must be provided to | |
6670 | ensure we only respond to callers who know our token i.e. mgr | |
6671 | """ | |
6672 | def wrapper(self, *args, **kwargs): | |
6673 | auth = self.headers.get('Authorization', None) | |
6674 | if auth != 'Bearer ' + self.server.token: | |
6675 | self.send_error(401) | |
6676 | return | |
6677 | f(self, *args, **kwargs) | |
6678 | return wrapper | |
6679 | ||
6680 | def _help_page(self): | |
6681 | return """<!DOCTYPE html> | |
6682 | <html> | |
6683 | <head><title>cephadm metadata exporter</title></head> | |
6684 | <style> | |
6685 | body {{ | |
6686 | font-family: sans-serif; | |
6687 | font-size: 0.8em; | |
6688 | }} | |
6689 | table {{ | |
6690 | border-width: 0px; | |
6691 | border-spacing: 0px; | |
6692 | margin-left:20px; | |
6693 | }} | |
6694 | tr:hover {{ | |
6695 | background: PowderBlue; | |
6696 | }} | |
6697 | td,th {{ | |
6698 | padding: 5px; | |
6699 | }} | |
6700 | </style> | |
6701 | <body> | |
6702 | <h1>cephadm metadata exporter {api_version}</h1> | |
6703 | <table> | |
6704 | <thead> | |
6705 | <tr><th>Endpoint</th><th>Methods</th><th>Response</th><th>Description</th></tr> | |
6706 | </thead> | |
6707 | <tr><td><a href='{api_version}/metadata'>{api_version}/metadata</a></td><td>GET</td><td>JSON</td><td>Return <b>all</b> metadata for the host</td></tr> | |
6708 | <tr><td><a href='{api_version}/metadata/daemons'>{api_version}/metadata/daemons</a></td><td>GET</td><td>JSON</td><td>Return daemon and systemd states for ceph daemons (ls)</td></tr> | |
6709 | <tr><td><a href='{api_version}/metadata/disks'>{api_version}/metadata/disks</a></td><td>GET</td><td>JSON</td><td>show disk inventory (ceph-volume)</td></tr> | |
6710 | <tr><td><a href='{api_version}/metadata/health'>{api_version}/metadata/health</a></td><td>GET</td><td>JSON</td><td>Show current health of the exporter sub-tasks</td></tr> | |
6711 | <tr><td><a href='{api_version}/metadata/host'>{api_version}/metadata/host</a></td><td>GET</td><td>JSON</td><td>Show host metadata (gather-facts)</td></tr> | |
6712 | </table> | |
6713 | </body> | |
6714 | </html>""".format(api_version=CephadmDaemonHandler.api_version) | |
6715 | ||
6716 | def _fetch_root(self): | |
6717 | self.send_response(200) | |
6718 | self.send_header('Content-type', 'text/html; charset=utf-8') | |
6719 | self.end_headers() | |
6720 | self.wfile.write(self._help_page().encode('utf-8')) | |
6721 | ||
6722 | @Decorators.authorize | |
6723 | def do_GET(self): | |
6724 | """Handle *all* GET requests""" | |
6725 | ||
6726 | if self.path == '/': | |
6727 | # provide a html response if someone hits the root url, to document the | |
6728 | # available api endpoints | |
6729 | return self._fetch_root() | |
6730 | elif self.path in CephadmDaemonHandler.valid_routes: | |
6731 | u = self.path.split('/')[-1] | |
6732 | data = json.dumps({}) | |
6733 | status_code = 200 | |
6734 | ||
6735 | tasks = self.server.cephadm_cache.health.get('tasks', {}) | |
6736 | assert tasks | |
6737 | ||
6738 | # We're using the http status code to help indicate thread health | |
6739 | # - 200 (OK): request successful | |
6740 | # - 204 (No Content): access to a cache relating to a dead thread | |
6741 | # - 206 (Partial content): one or more theads are inactive | |
6742 | # - 500 (Server Error): all threads inactive | |
6743 | if u == 'metadata': | |
6744 | data = json.dumps(self.server.cephadm_cache.to_json()) | |
6745 | if all([tasks[task_name] == 'inactive' for task_name in tasks if task_name != 'http_server']): | |
6746 | # All the subtasks are dead! | |
6747 | status_code = 500 | |
6748 | elif any([tasks[task_name] == 'inactive' for task_name in tasks if task_name != 'http_server']): | |
6749 | status_code = 206 | |
6750 | ||
6751 | # Individual GETs against the a tasks endpoint will also return a 503 if the corresponding thread is inactive | |
6752 | elif u == 'daemons': | |
6753 | data = json.dumps(self.server.cephadm_cache.daemons) | |
6754 | if tasks['daemons'] == 'inactive': | |
6755 | status_code = 204 | |
6756 | elif u == 'disks': | |
6757 | data = json.dumps(self.server.cephadm_cache.disks) | |
6758 | if tasks['disks'] == 'inactive': | |
6759 | status_code = 204 | |
6760 | elif u == 'host': | |
6761 | data = json.dumps(self.server.cephadm_cache.host) | |
6762 | if tasks['host'] == 'inactive': | |
6763 | status_code = 204 | |
6764 | ||
6765 | # a GET against health will always return a 200, since the op is always successful | |
6766 | elif u == 'health': | |
6767 | data = json.dumps(self.server.cephadm_cache.health) | |
6768 | ||
6769 | self.send_response(status_code) | |
6770 | self.send_header('Content-type', 'application/json') | |
6771 | self.end_headers() | |
6772 | self.wfile.write(data.encode('utf-8')) | |
6773 | else: | |
6774 | # Invalid GET URL | |
6775 | bad_request_msg = 'Valid URLs are: {}'.format(', '.join(CephadmDaemonHandler.valid_routes)) | |
6776 | self.send_response(404, message=bad_request_msg) # reason | |
6777 | self.send_header('Content-type', 'application/json') | |
6778 | self.end_headers() | |
6779 | self.wfile.write(json.dumps({'message': bad_request_msg}).encode('utf-8')) | |
6780 | ||
6781 | def log_message(self, format, *args): | |
6782 | rqst = ' '.join(str(a) for a in args) | |
6783 | logger.info(f'client:{self.address_string()} [{self.log_date_time_string()}] {rqst}') | |
6784 | ||
6785 | ||
6786 | class CephadmDaemon(): | |
6787 | ||
6788 | daemon_type = 'cephadm-exporter' | |
6789 | default_port = 9443 | |
6790 | key_name = 'key' | |
6791 | crt_name = 'crt' | |
6792 | token_name = 'token' | |
6793 | config_requirements = [ | |
6794 | key_name, | |
6795 | crt_name, | |
6796 | token_name, | |
6797 | ] | |
6798 | loop_delay = 1 | |
6799 | thread_check_interval = 5 | |
6800 | ||
6801 | def __init__(self, ctx: CephadmContext, fsid, daemon_id=None, port=None): | |
6802 | self.ctx = ctx | |
6803 | self.fsid = fsid | |
6804 | self.daemon_id = daemon_id | |
6805 | if not port: | |
6806 | self.port = CephadmDaemon.default_port | |
6807 | else: | |
6808 | self.port = port | |
6809 | self.workers: List[Thread] = [] | |
6810 | self.http_server: CephadmHTTPServer | |
6811 | self.stop = False | |
6812 | self.cephadm_cache = CephadmCache() | |
6813 | self.errors: List[str] = [] | |
6814 | self.token = read_file([os.path.join(self.daemon_path, CephadmDaemon.token_name)]) | |
6815 | ||
6816 | @classmethod | |
6817 | def validate_config(cls, config): | |
6818 | reqs = ', '.join(CephadmDaemon.config_requirements) | |
6819 | errors = [] | |
6820 | ||
6821 | if not config or not all([k_name in config for k_name in CephadmDaemon.config_requirements]): | |
6822 | raise Error(f'config must contain the following fields : {reqs}') | |
6823 | ||
6824 | if not all([isinstance(config[k_name], str) for k_name in CephadmDaemon.config_requirements]): | |
6825 | errors.append(f'the following fields must be strings: {reqs}') | |
6826 | ||
6827 | crt = config[CephadmDaemon.crt_name] | |
6828 | key = config[CephadmDaemon.key_name] | |
6829 | token = config[CephadmDaemon.token_name] | |
6830 | ||
6831 | if not crt.startswith('-----BEGIN CERTIFICATE-----') or not crt.endswith('-----END CERTIFICATE-----\n'): | |
6832 | errors.append('crt field is not a valid SSL certificate') | |
6833 | if not key.startswith('-----BEGIN PRIVATE KEY-----') or not key.endswith('-----END PRIVATE KEY-----\n'): | |
6834 | errors.append('key is not a valid SSL private key') | |
6835 | if len(token) < 8: | |
6836 | errors.append("'token' must be more than 8 characters long") | |
6837 | ||
6838 | if 'port' in config: | |
6839 | try: | |
6840 | p = int(config['port']) | |
6841 | if p <= 1024: | |
6842 | raise ValueError | |
6843 | except (TypeError, ValueError): | |
6844 | errors.append('port must be an integer > 1024') | |
6845 | ||
6846 | if errors: | |
6847 | raise Error('Parameter errors : {}'.format(', '.join(errors))) | |
6848 | ||
6849 | @property | |
6850 | def port_active(self): | |
6851 | return port_in_use(self.ctx, self.port) | |
6852 | ||
6853 | @property | |
6854 | def can_run(self): | |
6855 | # if port is in use | |
6856 | if self.port_active: | |
6857 | self.errors.append(f'TCP port {self.port} already in use, unable to bind') | |
6858 | if not os.path.exists(os.path.join(self.daemon_path, CephadmDaemon.key_name)): | |
6859 | self.errors.append(f"Key file '{CephadmDaemon.key_name}' is missing from {self.daemon_path}") | |
6860 | if not os.path.exists(os.path.join(self.daemon_path, CephadmDaemon.crt_name)): | |
6861 | self.errors.append(f"Certificate file '{CephadmDaemon.crt_name}' is missing from {self.daemon_path}") | |
6862 | if self.token == 'Unknown': | |
6863 | self.errors.append(f"Authentication token '{CephadmDaemon.token_name}' is missing from {self.daemon_path}") | |
6864 | return len(self.errors) == 0 | |
6865 | ||
6866 | @staticmethod | |
6867 | def _unit_name(fsid, daemon_id): | |
6868 | return '{}.service'.format(get_unit_name(fsid, CephadmDaemon.daemon_type, daemon_id)) | |
6869 | ||
6870 | @property | |
6871 | def unit_name(self): | |
6872 | return CephadmDaemon._unit_name(self.fsid, self.daemon_id) | |
6873 | ||
6874 | @property | |
6875 | def daemon_path(self): | |
6876 | return os.path.join( | |
6877 | self.ctx.data_dir, | |
6878 | self.fsid, | |
6879 | f'{self.daemon_type}.{self.daemon_id}' | |
6880 | ) | |
6881 | ||
6882 | @property | |
6883 | def binary_path(self): | |
6884 | path = os.path.realpath(__file__) | |
6885 | assert os.path.isfile(path) | |
6886 | return path | |
6887 | ||
6888 | def _handle_thread_exception(self, exc, thread_type): | |
6889 | e_msg = f'{exc.__class__.__name__} exception: {str(exc)}' | |
6890 | thread_info = getattr(self.cephadm_cache, thread_type) | |
6891 | errors = thread_info.get('scrape_errors', []) | |
6892 | errors.append(e_msg) | |
6893 | logger.error(e_msg) | |
6894 | logger.exception(exc) | |
6895 | self.cephadm_cache.update_task( | |
6896 | thread_type, | |
6897 | { | |
6898 | 'scrape_errors': errors, | |
6899 | 'data': None, | |
6900 | } | |
6901 | ) | |
6902 | ||
6903 | def _scrape_host_facts(self, refresh_interval=10): | |
6904 | ctr = 0 | |
6905 | exception_encountered = False | |
6906 | ||
6907 | while True: | |
6908 | ||
6909 | if self.stop or exception_encountered: | |
6910 | break | |
6911 | ||
6912 | if ctr >= refresh_interval: | |
6913 | ctr = 0 | |
6914 | logger.debug('executing host-facts scrape') | |
6915 | errors = [] | |
6916 | s_time = time.time() | |
6917 | ||
6918 | try: | |
6919 | facts = HostFacts(self.ctx) | |
6920 | except Exception as e: | |
6921 | self._handle_thread_exception(e, 'host') | |
6922 | exception_encountered = True | |
6923 | else: | |
6924 | elapsed = time.time() - s_time | |
6925 | try: | |
6926 | data = json.loads(facts.dump()) | |
6927 | except json.decoder.JSONDecodeError: | |
6928 | errors.append('host-facts provided invalid JSON') | |
6929 | logger.warning(errors[-1]) | |
6930 | data = {} | |
6931 | self.cephadm_cache.update_task( | |
6932 | 'host', | |
6933 | { | |
6934 | 'scrape_timestamp': s_time, | |
6935 | 'scrape_duration_secs': elapsed, | |
6936 | 'scrape_errors': errors, | |
6937 | 'data': data, | |
6938 | } | |
6939 | ) | |
6940 | logger.debug(f'completed host-facts scrape - {elapsed}s') | |
6941 | ||
6942 | time.sleep(CephadmDaemon.loop_delay) | |
6943 | ctr += CephadmDaemon.loop_delay | |
6944 | logger.info('host-facts thread stopped') | |
6945 | ||
6946 | def _scrape_ceph_volume(self, refresh_interval=15): | |
6947 | # we're invoking the ceph_volume command, so we need to set the args that it | |
6948 | # expects to use | |
6949 | self.ctx.command = 'inventory --format=json'.split() | |
6950 | self.ctx.fsid = self.fsid | |
6951 | self.ctx.log_output = False | |
6952 | ||
6953 | ctr = 0 | |
6954 | exception_encountered = False | |
6955 | ||
6956 | while True: | |
6957 | if self.stop or exception_encountered: | |
6958 | break | |
6959 | ||
6960 | if ctr >= refresh_interval: | |
6961 | ctr = 0 | |
6962 | logger.debug('executing ceph-volume scrape') | |
6963 | errors = [] | |
6964 | s_time = time.time() | |
6965 | stream = io.StringIO() | |
6966 | try: | |
6967 | with redirect_stdout(stream): | |
6968 | command_ceph_volume(self.ctx) | |
6969 | except Exception as e: | |
6970 | self._handle_thread_exception(e, 'disks') | |
6971 | exception_encountered = True | |
6972 | else: | |
6973 | elapsed = time.time() - s_time | |
6974 | ||
6975 | # if the call to ceph-volume returns junk with the | |
6976 | # json, it won't parse | |
6977 | stdout = stream.getvalue() | |
6978 | ||
6979 | data = [] | |
6980 | if stdout: | |
6981 | try: | |
6982 | data = json.loads(stdout) | |
6983 | except json.decoder.JSONDecodeError: | |
6984 | errors.append('ceph-volume thread provided bad json data') | |
6985 | logger.warning(errors[-1]) | |
6986 | else: | |
6987 | errors.append('ceph-volume did not return any data') | |
6988 | logger.warning(errors[-1]) | |
6989 | ||
6990 | self.cephadm_cache.update_task( | |
6991 | 'disks', | |
6992 | { | |
6993 | 'scrape_timestamp': s_time, | |
6994 | 'scrape_duration_secs': elapsed, | |
6995 | 'scrape_errors': errors, | |
6996 | 'data': data, | |
6997 | } | |
6998 | ) | |
6999 | ||
7000 | logger.debug(f'completed ceph-volume scrape - {elapsed}s') | |
7001 | time.sleep(CephadmDaemon.loop_delay) | |
7002 | ctr += CephadmDaemon.loop_delay | |
7003 | ||
7004 | logger.info('ceph-volume thread stopped') | |
7005 | ||
7006 | def _scrape_list_daemons(self, refresh_interval=20): | |
7007 | ctr = 0 | |
7008 | exception_encountered = False | |
7009 | while True: | |
7010 | if self.stop or exception_encountered: | |
7011 | break | |
7012 | ||
7013 | if ctr >= refresh_interval: | |
7014 | ctr = 0 | |
7015 | logger.debug('executing list-daemons scrape') | |
7016 | errors = [] | |
7017 | s_time = time.time() | |
7018 | ||
7019 | try: | |
7020 | # list daemons should ideally be invoked with a fsid | |
7021 | data = list_daemons(self.ctx) | |
7022 | except Exception as e: | |
7023 | self._handle_thread_exception(e, 'daemons') | |
7024 | exception_encountered = True | |
7025 | else: | |
7026 | if not isinstance(data, list): | |
7027 | errors.append('list-daemons did not supply a list?') | |
7028 | logger.warning(errors[-1]) | |
7029 | data = [] | |
7030 | elapsed = time.time() - s_time | |
7031 | self.cephadm_cache.update_task( | |
7032 | 'daemons', | |
7033 | { | |
7034 | 'scrape_timestamp': s_time, | |
7035 | 'scrape_duration_secs': elapsed, | |
7036 | 'scrape_errors': errors, | |
7037 | 'data': data, | |
7038 | } | |
7039 | ) | |
7040 | logger.debug(f'completed list-daemons scrape - {elapsed}s') | |
7041 | ||
7042 | time.sleep(CephadmDaemon.loop_delay) | |
7043 | ctr += CephadmDaemon.loop_delay | |
7044 | logger.info('list-daemons thread stopped') | |
7045 | ||
7046 | def _create_thread(self, target, name, refresh_interval=None): | |
7047 | if refresh_interval: | |
7048 | t = Thread(target=target, args=(refresh_interval,)) | |
7049 | else: | |
7050 | t = Thread(target=target) | |
7051 | t.daemon = True | |
7052 | t.name = name | |
7053 | self.cephadm_cache.update_health(name, 'active') | |
7054 | t.start() | |
7055 | ||
7056 | start_msg = f'Started {name} thread' | |
7057 | if refresh_interval: | |
7058 | logger.info(f'{start_msg}, with a refresh interval of {refresh_interval}s') | |
7059 | else: | |
7060 | logger.info(f'{start_msg}') | |
7061 | return t | |
7062 | ||
7063 | def reload(self, *args): | |
7064 | """reload -HUP received | |
7065 | ||
7066 | This is a placeholder function only, and serves to provide the hook that could | |
7067 | be exploited later if the exporter evolves to incorporate a config file | |
7068 | """ | |
7069 | logger.info('Reload request received - ignoring, no action needed') | |
7070 | ||
7071 | def shutdown(self, *args): | |
7072 | logger.info('Shutdown request received') | |
7073 | self.stop = True | |
7074 | self.http_server.shutdown() | |
7075 | ||
7076 | def run(self): | |
7077 | logger.info(f"cephadm exporter starting for FSID '{self.fsid}'") | |
7078 | if not self.can_run: | |
7079 | logger.error('Unable to start the exporter daemon') | |
7080 | for e in self.errors: | |
7081 | logger.error(e) | |
7082 | return | |
7083 | ||
7084 | # register signal handlers for running under systemd control | |
7085 | signal.signal(signal.SIGTERM, self.shutdown) | |
7086 | signal.signal(signal.SIGINT, self.shutdown) | |
7087 | signal.signal(signal.SIGHUP, self.reload) | |
7088 | logger.debug('Signal handlers attached') | |
7089 | ||
7090 | host_facts = self._create_thread(self._scrape_host_facts, 'host', 5) | |
7091 | self.workers.append(host_facts) | |
7092 | ||
7093 | daemons = self._create_thread(self._scrape_list_daemons, 'daemons', 20) | |
7094 | self.workers.append(daemons) | |
7095 | ||
7096 | disks = self._create_thread(self._scrape_ceph_volume, 'disks', 20) | |
7097 | self.workers.append(disks) | |
7098 | ||
7099 | self.http_server = CephadmHTTPServer(('0.0.0.0', self.port), CephadmDaemonHandler) # IPv4 only | |
7100 | self.http_server.socket = ssl.wrap_socket(self.http_server.socket, | |
7101 | keyfile=os.path.join(self.daemon_path, CephadmDaemon.key_name), | |
7102 | certfile=os.path.join(self.daemon_path, CephadmDaemon.crt_name), | |
7103 | server_side=True) | |
7104 | ||
7105 | self.http_server.cephadm_cache = self.cephadm_cache | |
7106 | self.http_server.token = self.token | |
7107 | server_thread = self._create_thread(self.http_server.serve_forever, 'http_server') | |
7108 | logger.info(f'https server listening on {self.http_server.server_address[0]}:{self.http_server.server_port}') | |
7109 | ||
7110 | ctr = 0 | |
7111 | while server_thread.is_alive(): | |
7112 | if self.stop: | |
7113 | break | |
7114 | ||
7115 | if ctr >= CephadmDaemon.thread_check_interval: | |
7116 | ctr = 0 | |
7117 | for worker in self.workers: | |
7118 | if self.cephadm_cache.tasks[worker.name] == 'inactive': | |
7119 | continue | |
7120 | if not worker.is_alive(): | |
7121 | logger.warning(f'{worker.name} thread not running') | |
7122 | stop_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') | |
7123 | self.cephadm_cache.update_health(worker.name, 'inactive', f'{worker.name} stopped at {stop_time}') | |
7124 | ||
7125 | time.sleep(CephadmDaemon.loop_delay) | |
7126 | ctr += CephadmDaemon.loop_delay | |
7127 | ||
7128 | logger.info('Main http server thread stopped') | |
7129 | ||
7130 | @property | |
7131 | def unit_run(self): | |
7132 | ||
7133 | return """set -e | |
7134 | {py3} {bin_path} exporter --fsid {fsid} --id {daemon_id} --port {port} &""".format( | |
7135 | py3=shutil.which('python3'), | |
7136 | bin_path=self.binary_path, | |
7137 | fsid=self.fsid, | |
7138 | daemon_id=self.daemon_id, | |
7139 | port=self.port | |
7140 | ) | |
7141 | ||
7142 | @property | |
7143 | def unit_file(self): | |
7144 | docker = isinstance(self.ctx.container_engine, Docker) | |
7145 | return """#generated by cephadm | |
7146 | [Unit] | |
7147 | Description=cephadm exporter service for cluster {fsid} | |
7148 | After=network-online.target{docker_after} | |
7149 | Wants=network-online.target | |
7150 | {docker_requires} | |
7151 | ||
7152 | PartOf=ceph-{fsid}.target | |
7153 | Before=ceph-{fsid}.target | |
7154 | ||
7155 | [Service] | |
7156 | Type=forking | |
7157 | ExecStart=/bin/bash {daemon_path}/unit.run | |
7158 | ExecReload=/bin/kill -HUP $MAINPID | |
7159 | Restart=on-failure | |
7160 | RestartSec=10s | |
7161 | ||
7162 | [Install] | |
7163 | WantedBy=ceph-{fsid}.target | |
7164 | """.format(fsid=self.fsid, | |
7165 | daemon_path=self.daemon_path, | |
7166 | # if docker, we depend on docker.service | |
7167 | docker_after=' docker.service' if docker else '', | |
7168 | docker_requires='Requires=docker.service\n' if docker else '') | |
7169 | ||
7170 | def deploy_daemon_unit(self, config=None): | |
7171 | """deploy a specific unit file for cephadm | |
7172 | ||
7173 | The normal deploy_daemon_units doesn't apply for this | |
7174 | daemon since it's not a container, so we just create a | |
7175 | simple service definition and add it to the fsid's target | |
7176 | """ | |
7177 | if not config: | |
7178 | raise Error('Attempting to deploy cephadm daemon without a config') | |
7179 | assert isinstance(config, dict) | |
7180 | ||
7181 | # Create the required config files in the daemons dir, with restricted permissions | |
7182 | for filename in config: | |
7183 | with open(os.open(os.path.join(self.daemon_path, filename), os.O_CREAT | os.O_WRONLY, mode=0o600), 'w') as f: | |
7184 | f.write(config[filename]) | |
7185 | ||
7186 | # When __file__ is <stdin> we're being invoked over remoto via the orchestrator, so | |
7187 | # we pick up the file from where the orchestrator placed it - otherwise we'll | |
7188 | # copy it to the binary location for this cluster | |
7189 | if not __file__ == '<stdin>': | |
7190 | shutil.copy(__file__, | |
7191 | self.binary_path) | |
7192 | ||
7193 | with open(os.path.join(self.daemon_path, 'unit.run'), 'w') as f: | |
7194 | f.write(self.unit_run) | |
7195 | ||
7196 | with open( | |
7197 | os.path.join(self.ctx.unit_dir, | |
7198 | f'{self.unit_name}.new'), | |
7199 | 'w' | |
7200 | ) as f: | |
7201 | f.write(self.unit_file) | |
7202 | os.rename( | |
7203 | os.path.join(self.ctx.unit_dir, f'{self.unit_name}.new'), | |
7204 | os.path.join(self.ctx.unit_dir, self.unit_name)) | |
7205 | ||
7206 | call_throws(self.ctx, ['systemctl', 'daemon-reload']) | |
7207 | call(self.ctx, ['systemctl', 'stop', self.unit_name], | |
7208 | verbosity=CallVerbosity.DEBUG) | |
7209 | call(self.ctx, ['systemctl', 'reset-failed', self.unit_name], | |
7210 | verbosity=CallVerbosity.DEBUG) | |
7211 | call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name]) | |
7212 | ||
7213 | @classmethod | |
7214 | def uninstall(cls, ctx: CephadmContext, fsid, daemon_type, daemon_id): | |
7215 | unit_name = CephadmDaemon._unit_name(fsid, daemon_id) | |
7216 | unit_path = os.path.join(ctx.unit_dir, unit_name) | |
7217 | unit_run = os.path.join(ctx.data_dir, fsid, f'{daemon_type}.{daemon_id}', 'unit.run') | |
7218 | port = None | |
7219 | try: | |
7220 | with open(unit_run, 'r') as u: | |
7221 | contents = u.read().strip(' &') | |
7222 | except OSError: | |
7223 | logger.warning(f'Unable to access the unit.run file @ {unit_run}') | |
7224 | return | |
7225 | ||
7226 | port = None | |
7227 | for line in contents.split('\n'): | |
7228 | if '--port ' in line: | |
7229 | try: | |
7230 | port = int(line.split('--port ')[-1]) | |
7231 | except ValueError: | |
7232 | logger.warning('Unexpected format in unit.run file: port is not numeric') | |
7233 | logger.warning('Unable to remove the systemd file and close the port') | |
7234 | return | |
7235 | break | |
7236 | ||
7237 | if port: | |
7238 | fw = Firewalld(ctx) | |
7239 | try: | |
7240 | fw.close_ports([port]) | |
7241 | except RuntimeError: | |
7242 | logger.error(f'Unable to close port {port}') | |
7243 | ||
7244 | stdout, stderr, rc = call(ctx, ['rm', '-f', unit_path]) | |
7245 | if rc: | |
7246 | logger.error(f'Unable to remove the systemd file @ {unit_path}') | |
7247 | else: | |
7248 | logger.info(f'removed systemd unit file @ {unit_path}') | |
7249 | stdout, stderr, rc = call(ctx, ['systemctl', 'daemon-reload']) | |
7250 | ||
7251 | ||
7252 | def command_exporter(ctx: CephadmContext): | |
7253 | exporter = CephadmDaemon(ctx, ctx.fsid, daemon_id=ctx.id, port=ctx.port) | |
7254 | ||
7255 | if ctx.fsid not in os.listdir(ctx.data_dir): | |
7256 | raise Error(f"cluster fsid '{ctx.fsid}' not found in '{ctx.data_dir}'") | |
7257 | ||
7258 | exporter.run() | |
7259 | ||
7260 | ################################## | |
7261 | ||
7262 | ||
7263 | def systemd_target_state(target_name: str, subsystem: str = 'ceph') -> bool: | |
7264 | # TODO: UNITTEST | |
7265 | return os.path.exists( | |
7266 | os.path.join( | |
7267 | UNIT_DIR, | |
7268 | f'{subsystem}.target.wants', | |
7269 | target_name | |
7270 | ) | |
7271 | ) | |
7272 | ||
7273 | ||
7274 | @infer_fsid | |
7275 | def command_maintenance(ctx: CephadmContext): | |
7276 | if not ctx.fsid: | |
7277 | raise Error('must pass --fsid to specify cluster') | |
7278 | ||
7279 | target = f'ceph-{ctx.fsid}.target' | |
7280 | ||
7281 | if ctx.maintenance_action.lower() == 'enter': | |
7282 | logger.info('Requested to place host into maintenance') | |
7283 | if systemd_target_state(target): | |
7284 | _out, _err, code = call(ctx, | |
7285 | ['systemctl', 'disable', target], | |
7286 | verbosity=CallVerbosity.DEBUG) | |
7287 | if code: | |
7288 | logger.error(f'Failed to disable the {target} target') | |
7289 | return 'failed - to disable the target' | |
7290 | else: | |
7291 | # stopping a target waits by default | |
7292 | _out, _err, code = call(ctx, | |
7293 | ['systemctl', 'stop', target], | |
7294 | verbosity=CallVerbosity.DEBUG) | |
7295 | if code: | |
7296 | logger.error(f'Failed to stop the {target} target') | |
7297 | return 'failed - to disable the target' | |
7298 | else: | |
7299 | return f'success - systemd target {target} disabled' | |
7300 | ||
7301 | else: | |
7302 | return 'skipped - target already disabled' | |
7303 | ||
7304 | else: | |
7305 | logger.info('Requested to exit maintenance state') | |
7306 | # exit maintenance request | |
7307 | if not systemd_target_state(target): | |
7308 | _out, _err, code = call(ctx, | |
7309 | ['systemctl', 'enable', target], | |
7310 | verbosity=CallVerbosity.DEBUG) | |
7311 | if code: | |
7312 | logger.error(f'Failed to enable the {target} target') | |
7313 | return 'failed - unable to enable the target' | |
7314 | else: | |
7315 | # starting a target waits by default | |
7316 | _out, _err, code = call(ctx, | |
7317 | ['systemctl', 'start', target], | |
7318 | verbosity=CallVerbosity.DEBUG) | |
7319 | if code: | |
7320 | logger.error(f'Failed to start the {target} target') | |
7321 | return 'failed - unable to start the target' | |
7322 | else: | |
7323 | return f'success - systemd target {target} enabled and started' | |
f91f0fd5 TL |
7324 | |
7325 | ################################## | |
7326 | ||
f6b5b4d7 | 7327 | |
9f95a23c TL |
7328 | def _get_parser(): |
7329 | # type: () -> argparse.ArgumentParser | |
7330 | parser = argparse.ArgumentParser( | |
7331 | description='Bootstrap Ceph daemons with systemd and containers.', | |
7332 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
7333 | parser.add_argument( | |
7334 | '--image', | |
7335 | help='container image. Can also be set via the "CEPHADM_IMAGE" ' | |
7336 | 'env var') | |
7337 | parser.add_argument( | |
7338 | '--docker', | |
7339 | action='store_true', | |
7340 | help='use docker instead of podman') | |
7341 | parser.add_argument( | |
7342 | '--data-dir', | |
7343 | default=DATA_DIR, | |
7344 | help='base directory for daemon data') | |
7345 | parser.add_argument( | |
7346 | '--log-dir', | |
7347 | default=LOG_DIR, | |
7348 | help='base directory for daemon logs') | |
7349 | parser.add_argument( | |
7350 | '--logrotate-dir', | |
7351 | default=LOGROTATE_DIR, | |
7352 | help='location of logrotate configuration files') | |
7353 | parser.add_argument( | |
7354 | '--unit-dir', | |
7355 | default=UNIT_DIR, | |
7356 | help='base directory for systemd units') | |
7357 | parser.add_argument( | |
7358 | '--verbose', '-v', | |
7359 | action='store_true', | |
7360 | help='Show debug-level log messages') | |
7361 | parser.add_argument( | |
7362 | '--timeout', | |
7363 | type=int, | |
7364 | default=DEFAULT_TIMEOUT, | |
7365 | help='timeout in seconds') | |
7366 | parser.add_argument( | |
7367 | '--retry', | |
7368 | type=int, | |
7369 | default=DEFAULT_RETRY, | |
7370 | help='max number of retries') | |
e306af50 TL |
7371 | parser.add_argument( |
7372 | '--env', '-e', | |
7373 | action='append', | |
7374 | default=[], | |
7375 | help='set environment variable') | |
f67539c2 TL |
7376 | parser.add_argument( |
7377 | '--no-container-init', | |
7378 | action='store_true', | |
7379 | default=not CONTAINER_INIT, | |
7380 | help='Do not run podman/docker with `--init`') | |
9f95a23c TL |
7381 | |
7382 | subparsers = parser.add_subparsers(help='sub-command') | |
7383 | ||
7384 | parser_version = subparsers.add_parser( | |
7385 | 'version', help='get ceph version from container') | |
7386 | parser_version.set_defaults(func=command_version) | |
7387 | ||
7388 | parser_pull = subparsers.add_parser( | |
7389 | 'pull', help='pull latest image version') | |
7390 | parser_pull.set_defaults(func=command_pull) | |
7391 | ||
7392 | parser_inspect_image = subparsers.add_parser( | |
7393 | 'inspect-image', help='inspect local container image') | |
7394 | parser_inspect_image.set_defaults(func=command_inspect_image) | |
7395 | ||
7396 | parser_ls = subparsers.add_parser( | |
7397 | 'ls', help='list daemon instances on this host') | |
7398 | parser_ls.set_defaults(func=command_ls) | |
7399 | parser_ls.add_argument( | |
7400 | '--no-detail', | |
7401 | action='store_true', | |
7402 | help='Do not include daemon status') | |
7403 | parser_ls.add_argument( | |
7404 | '--legacy-dir', | |
7405 | default='/', | |
7406 | help='base directory for legacy daemon data') | |
7407 | ||
7408 | parser_list_networks = subparsers.add_parser( | |
7409 | 'list-networks', help='list IP networks') | |
7410 | parser_list_networks.set_defaults(func=command_list_networks) | |
7411 | ||
7412 | parser_adopt = subparsers.add_parser( | |
7413 | 'adopt', help='adopt daemon deployed with a different tool') | |
7414 | parser_adopt.set_defaults(func=command_adopt) | |
7415 | parser_adopt.add_argument( | |
7416 | '--name', '-n', | |
7417 | required=True, | |
7418 | help='daemon name (type.id)') | |
7419 | parser_adopt.add_argument( | |
7420 | '--style', | |
7421 | required=True, | |
7422 | help='deployment style (legacy, ...)') | |
7423 | parser_adopt.add_argument( | |
7424 | '--cluster', | |
7425 | default='ceph', | |
7426 | help='cluster name') | |
7427 | parser_adopt.add_argument( | |
7428 | '--legacy-dir', | |
7429 | default='/', | |
7430 | help='base directory for legacy daemon data') | |
7431 | parser_adopt.add_argument( | |
7432 | '--config-json', | |
7433 | help='Additional configuration information in JSON format') | |
7434 | parser_adopt.add_argument( | |
7435 | '--skip-firewalld', | |
7436 | action='store_true', | |
7437 | help='Do not configure firewalld') | |
7438 | parser_adopt.add_argument( | |
7439 | '--skip-pull', | |
7440 | action='store_true', | |
7441 | help='do not pull the latest image before adopting') | |
1911f103 TL |
7442 | parser_adopt.add_argument( |
7443 | '--force-start', | |
7444 | action='store_true', | |
f67539c2 | 7445 | help='start newly adoped daemon, even if it was not running previously') |
f91f0fd5 TL |
7446 | parser_adopt.add_argument( |
7447 | '--container-init', | |
7448 | action='store_true', | |
f67539c2 TL |
7449 | default=CONTAINER_INIT, |
7450 | help=argparse.SUPPRESS) | |
9f95a23c TL |
7451 | |
7452 | parser_rm_daemon = subparsers.add_parser( | |
7453 | 'rm-daemon', help='remove daemon instance') | |
7454 | parser_rm_daemon.set_defaults(func=command_rm_daemon) | |
7455 | parser_rm_daemon.add_argument( | |
7456 | '--name', '-n', | |
7457 | required=True, | |
7458 | action=CustomValidation, | |
7459 | help='daemon name (type.id)') | |
7460 | parser_rm_daemon.add_argument( | |
7461 | '--fsid', | |
7462 | required=True, | |
7463 | help='cluster FSID') | |
7464 | parser_rm_daemon.add_argument( | |
7465 | '--force', | |
7466 | action='store_true', | |
7467 | help='proceed, even though this may destroy valuable data') | |
7468 | parser_rm_daemon.add_argument( | |
7469 | '--force-delete-data', | |
7470 | action='store_true', | |
7471 | help='delete valuable daemon data instead of making a backup') | |
7472 | ||
7473 | parser_rm_cluster = subparsers.add_parser( | |
7474 | 'rm-cluster', help='remove all daemons for a cluster') | |
7475 | parser_rm_cluster.set_defaults(func=command_rm_cluster) | |
7476 | parser_rm_cluster.add_argument( | |
7477 | '--fsid', | |
7478 | required=True, | |
7479 | help='cluster FSID') | |
7480 | parser_rm_cluster.add_argument( | |
7481 | '--force', | |
7482 | action='store_true', | |
7483 | help='proceed, even though this may destroy valuable data') | |
f67539c2 TL |
7484 | parser_rm_cluster.add_argument( |
7485 | '--keep-logs', | |
7486 | action='store_true', | |
7487 | help='do not remove log files') | |
9f95a23c TL |
7488 | |
7489 | parser_run = subparsers.add_parser( | |
7490 | 'run', help='run a ceph daemon, in a container, in the foreground') | |
7491 | parser_run.set_defaults(func=command_run) | |
7492 | parser_run.add_argument( | |
7493 | '--name', '-n', | |
7494 | required=True, | |
7495 | help='daemon name (type.id)') | |
7496 | parser_run.add_argument( | |
7497 | '--fsid', | |
7498 | required=True, | |
7499 | help='cluster FSID') | |
7500 | ||
7501 | parser_shell = subparsers.add_parser( | |
7502 | 'shell', help='run an interactive shell inside a daemon container') | |
7503 | parser_shell.set_defaults(func=command_shell) | |
7504 | parser_shell.add_argument( | |
7505 | '--fsid', | |
7506 | help='cluster FSID') | |
7507 | parser_shell.add_argument( | |
7508 | '--name', '-n', | |
7509 | help='daemon name (type.id)') | |
7510 | parser_shell.add_argument( | |
7511 | '--config', '-c', | |
7512 | help='ceph.conf to pass through to the container') | |
7513 | parser_shell.add_argument( | |
7514 | '--keyring', '-k', | |
7515 | help='ceph.keyring to pass through to the container') | |
e306af50 TL |
7516 | parser_shell.add_argument( |
7517 | '--mount', '-m', | |
f67539c2 TL |
7518 | help=('mount a file or directory in the container. ' |
7519 | 'Support multiple mounts. ' | |
7520 | 'ie: `--mount /foo /bar:/bar`. ' | |
7521 | 'When no destination is passed, default is /mnt'), | |
7522 | nargs='+') | |
9f95a23c TL |
7523 | parser_shell.add_argument( |
7524 | '--env', '-e', | |
7525 | action='append', | |
7526 | default=[], | |
7527 | help='set environment variable') | |
7528 | parser_shell.add_argument( | |
e306af50 | 7529 | 'command', nargs=argparse.REMAINDER, |
9f95a23c TL |
7530 | help='command (optional)') |
7531 | ||
7532 | parser_enter = subparsers.add_parser( | |
7533 | 'enter', help='run an interactive shell inside a running daemon container') | |
7534 | parser_enter.set_defaults(func=command_enter) | |
7535 | parser_enter.add_argument( | |
7536 | '--fsid', | |
7537 | help='cluster FSID') | |
7538 | parser_enter.add_argument( | |
7539 | '--name', '-n', | |
7540 | required=True, | |
7541 | help='daemon name (type.id)') | |
7542 | parser_enter.add_argument( | |
e306af50 | 7543 | 'command', nargs=argparse.REMAINDER, |
9f95a23c TL |
7544 | help='command') |
7545 | ||
7546 | parser_ceph_volume = subparsers.add_parser( | |
7547 | 'ceph-volume', help='run ceph-volume inside a container') | |
7548 | parser_ceph_volume.set_defaults(func=command_ceph_volume) | |
7549 | parser_ceph_volume.add_argument( | |
7550 | '--fsid', | |
7551 | help='cluster FSID') | |
7552 | parser_ceph_volume.add_argument( | |
7553 | '--config-json', | |
7554 | help='JSON file with config and (client.bootrap-osd) key') | |
801d1391 TL |
7555 | parser_ceph_volume.add_argument( |
7556 | '--config', '-c', | |
7557 | help='ceph conf file') | |
7558 | parser_ceph_volume.add_argument( | |
7559 | '--keyring', '-k', | |
7560 | help='ceph.keyring to pass through to the container') | |
f67539c2 TL |
7561 | parser_ceph_volume.add_argument( |
7562 | '--log-output', | |
7563 | action='store_true', | |
7564 | default=True, | |
7565 | help='suppress ceph volume output from the log') | |
9f95a23c | 7566 | parser_ceph_volume.add_argument( |
e306af50 | 7567 | 'command', nargs=argparse.REMAINDER, |
9f95a23c TL |
7568 | help='command') |
7569 | ||
7570 | parser_unit = subparsers.add_parser( | |
f67539c2 | 7571 | 'unit', help="operate on the daemon's systemd unit") |
9f95a23c TL |
7572 | parser_unit.set_defaults(func=command_unit) |
7573 | parser_unit.add_argument( | |
7574 | 'command', | |
7575 | help='systemd command (start, stop, restart, enable, disable, ...)') | |
7576 | parser_unit.add_argument( | |
7577 | '--fsid', | |
7578 | help='cluster FSID') | |
7579 | parser_unit.add_argument( | |
7580 | '--name', '-n', | |
7581 | required=True, | |
7582 | help='daemon name (type.id)') | |
7583 | ||
7584 | parser_logs = subparsers.add_parser( | |
7585 | 'logs', help='print journald logs for a daemon container') | |
7586 | parser_logs.set_defaults(func=command_logs) | |
7587 | parser_logs.add_argument( | |
7588 | '--fsid', | |
7589 | help='cluster FSID') | |
7590 | parser_logs.add_argument( | |
7591 | '--name', '-n', | |
7592 | required=True, | |
7593 | help='daemon name (type.id)') | |
7594 | parser_logs.add_argument( | |
7595 | 'command', nargs='*', | |
7596 | help='additional journalctl args') | |
7597 | ||
7598 | parser_bootstrap = subparsers.add_parser( | |
7599 | 'bootstrap', help='bootstrap a cluster (mon + mgr daemons)') | |
7600 | parser_bootstrap.set_defaults(func=command_bootstrap) | |
7601 | parser_bootstrap.add_argument( | |
7602 | '--config', '-c', | |
7603 | help='ceph conf file to incorporate') | |
7604 | parser_bootstrap.add_argument( | |
7605 | '--mon-id', | |
7606 | required=False, | |
7607 | help='mon id (default: local hostname)') | |
7608 | parser_bootstrap.add_argument( | |
7609 | '--mon-addrv', | |
7610 | help='mon IPs (e.g., [v2:localipaddr:3300,v1:localipaddr:6789])') | |
7611 | parser_bootstrap.add_argument( | |
7612 | '--mon-ip', | |
7613 | help='mon IP') | |
7614 | parser_bootstrap.add_argument( | |
7615 | '--mgr-id', | |
7616 | required=False, | |
7617 | help='mgr id (default: randomly generated)') | |
7618 | parser_bootstrap.add_argument( | |
7619 | '--fsid', | |
7620 | help='cluster FSID') | |
7621 | parser_bootstrap.add_argument( | |
7622 | '--output-dir', | |
7623 | default='/etc/ceph', | |
7624 | help='directory to write config, keyring, and pub key files') | |
7625 | parser_bootstrap.add_argument( | |
7626 | '--output-keyring', | |
7627 | help='location to write keyring file with new cluster admin and mon keys') | |
7628 | parser_bootstrap.add_argument( | |
7629 | '--output-config', | |
7630 | help='location to write conf file to connect to new cluster') | |
7631 | parser_bootstrap.add_argument( | |
7632 | '--output-pub-ssh-key', | |
f67539c2 | 7633 | help="location to write the cluster's public SSH key") |
9f95a23c TL |
7634 | parser_bootstrap.add_argument( |
7635 | '--skip-ssh', | |
7636 | action='store_true', | |
7637 | help='skip setup of ssh key on local host') | |
7638 | parser_bootstrap.add_argument( | |
7639 | '--initial-dashboard-user', | |
7640 | default='admin', | |
7641 | help='Initial user for the dashboard') | |
7642 | parser_bootstrap.add_argument( | |
7643 | '--initial-dashboard-password', | |
7644 | help='Initial password for the initial dashboard user') | |
f6b5b4d7 TL |
7645 | parser_bootstrap.add_argument( |
7646 | '--ssl-dashboard-port', | |
7647 | type=int, | |
f67539c2 | 7648 | default=8443, |
f6b5b4d7 | 7649 | help='Port number used to connect with dashboard using SSL') |
9f95a23c TL |
7650 | parser_bootstrap.add_argument( |
7651 | '--dashboard-key', | |
e306af50 | 7652 | type=argparse.FileType('r'), |
9f95a23c TL |
7653 | help='Dashboard key') |
7654 | parser_bootstrap.add_argument( | |
7655 | '--dashboard-crt', | |
e306af50 | 7656 | type=argparse.FileType('r'), |
9f95a23c TL |
7657 | help='Dashboard certificate') |
7658 | ||
e306af50 TL |
7659 | parser_bootstrap.add_argument( |
7660 | '--ssh-config', | |
7661 | type=argparse.FileType('r'), | |
7662 | help='SSH config') | |
7663 | parser_bootstrap.add_argument( | |
7664 | '--ssh-private-key', | |
7665 | type=argparse.FileType('r'), | |
7666 | help='SSH private key') | |
7667 | parser_bootstrap.add_argument( | |
7668 | '--ssh-public-key', | |
7669 | type=argparse.FileType('r'), | |
7670 | help='SSH public key') | |
f6b5b4d7 TL |
7671 | parser_bootstrap.add_argument( |
7672 | '--ssh-user', | |
7673 | default='root', | |
7674 | help='set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users') | |
e306af50 | 7675 | |
9f95a23c TL |
7676 | parser_bootstrap.add_argument( |
7677 | '--skip-mon-network', | |
7678 | action='store_true', | |
7679 | help='set mon public_network based on bootstrap mon ip') | |
7680 | parser_bootstrap.add_argument( | |
7681 | '--skip-dashboard', | |
7682 | action='store_true', | |
7683 | help='do not enable the Ceph Dashboard') | |
7684 | parser_bootstrap.add_argument( | |
7685 | '--dashboard-password-noupdate', | |
7686 | action='store_true', | |
7687 | help='stop forced dashboard password change') | |
7688 | parser_bootstrap.add_argument( | |
7689 | '--no-minimize-config', | |
7690 | action='store_true', | |
7691 | help='do not assimilate and minimize the config file') | |
7692 | parser_bootstrap.add_argument( | |
7693 | '--skip-ping-check', | |
7694 | action='store_true', | |
7695 | help='do not verify that mon IP is pingable') | |
7696 | parser_bootstrap.add_argument( | |
7697 | '--skip-pull', | |
7698 | action='store_true', | |
7699 | help='do not pull the latest image before bootstrapping') | |
7700 | parser_bootstrap.add_argument( | |
7701 | '--skip-firewalld', | |
7702 | action='store_true', | |
7703 | help='Do not configure firewalld') | |
7704 | parser_bootstrap.add_argument( | |
7705 | '--allow-overwrite', | |
7706 | action='store_true', | |
7707 | help='allow overwrite of existing --output-* config/keyring/ssh files') | |
7708 | parser_bootstrap.add_argument( | |
7709 | '--allow-fqdn-hostname', | |
7710 | action='store_true', | |
7711 | help='allow hostname that is fully-qualified (contains ".")') | |
f67539c2 TL |
7712 | parser_bootstrap.add_argument( |
7713 | '--allow-mismatched-release', | |
7714 | action='store_true', | |
7715 | help="allow bootstrap of ceph that doesn't match this version of cephadm") | |
9f95a23c TL |
7716 | parser_bootstrap.add_argument( |
7717 | '--skip-prepare-host', | |
7718 | action='store_true', | |
7719 | help='Do not prepare host') | |
7720 | parser_bootstrap.add_argument( | |
7721 | '--orphan-initial-daemons', | |
7722 | action='store_true', | |
f67539c2 | 7723 | help='Set mon and mgr service to `unmanaged`, Do not create the crash service') |
9f95a23c TL |
7724 | parser_bootstrap.add_argument( |
7725 | '--skip-monitoring-stack', | |
7726 | action='store_true', | |
7727 | help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)') | |
e306af50 TL |
7728 | parser_bootstrap.add_argument( |
7729 | '--apply-spec', | |
7730 | help='Apply cluster spec after bootstrap (copy ssh key, add hosts and apply services)') | |
7731 | ||
e306af50 TL |
7732 | parser_bootstrap.add_argument( |
7733 | '--shared_ceph_folder', | |
7734 | metavar='CEPH_SOURCE_FOLDER', | |
7735 | help='Development mode. Several folders in containers are volumes mapped to different sub-folders in the ceph source folder') | |
9f95a23c | 7736 | |
f6b5b4d7 TL |
7737 | parser_bootstrap.add_argument( |
7738 | '--registry-url', | |
7739 | help='url for custom registry') | |
7740 | parser_bootstrap.add_argument( | |
7741 | '--registry-username', | |
7742 | help='username for custom registry') | |
7743 | parser_bootstrap.add_argument( | |
7744 | '--registry-password', | |
7745 | help='password for custom registry') | |
7746 | parser_bootstrap.add_argument( | |
7747 | '--registry-json', | |
7748 | help='json file with custom registry login info (URL, Username, Password)') | |
f91f0fd5 TL |
7749 | parser_bootstrap.add_argument( |
7750 | '--container-init', | |
7751 | action='store_true', | |
f67539c2 TL |
7752 | default=CONTAINER_INIT, |
7753 | help=argparse.SUPPRESS) | |
7754 | parser_bootstrap.add_argument( | |
7755 | '--with-exporter', | |
7756 | action='store_true', | |
7757 | help='Automatically deploy cephadm metadata exporter to each node') | |
7758 | parser_bootstrap.add_argument( | |
7759 | '--exporter-config', | |
7760 | action=CustomValidation, | |
7761 | help=f'Exporter configuration information in JSON format (providing: {", ".join(CephadmDaemon.config_requirements)}, port information)') | |
7762 | parser_bootstrap.add_argument( | |
7763 | '--cluster-network', | |
7764 | help='subnet to use for cluster replication, recovery and heartbeats (in CIDR notation network/mask)') | |
f6b5b4d7 | 7765 | |
9f95a23c TL |
7766 | parser_deploy = subparsers.add_parser( |
7767 | 'deploy', help='deploy a daemon') | |
7768 | parser_deploy.set_defaults(func=command_deploy) | |
7769 | parser_deploy.add_argument( | |
7770 | '--name', | |
7771 | required=True, | |
7772 | action=CustomValidation, | |
7773 | help='daemon name (type.id)') | |
7774 | parser_deploy.add_argument( | |
7775 | '--fsid', | |
7776 | required=True, | |
7777 | help='cluster FSID') | |
7778 | parser_deploy.add_argument( | |
7779 | '--config', '-c', | |
7780 | help='config file for new daemon') | |
7781 | parser_deploy.add_argument( | |
7782 | '--config-json', | |
7783 | help='Additional configuration information in JSON format') | |
7784 | parser_deploy.add_argument( | |
7785 | '--keyring', | |
7786 | help='keyring for new daemon') | |
7787 | parser_deploy.add_argument( | |
7788 | '--key', | |
7789 | help='key for new daemon') | |
7790 | parser_deploy.add_argument( | |
7791 | '--osd-fsid', | |
7792 | help='OSD uuid, if creating an OSD container') | |
7793 | parser_deploy.add_argument( | |
7794 | '--skip-firewalld', | |
7795 | action='store_true', | |
7796 | help='Do not configure firewalld') | |
f6b5b4d7 TL |
7797 | parser_deploy.add_argument( |
7798 | '--tcp-ports', | |
7799 | help='List of tcp ports to open in the host firewall') | |
9f95a23c TL |
7800 | parser_deploy.add_argument( |
7801 | '--reconfig', | |
7802 | action='store_true', | |
7803 | help='Reconfigure a previously deployed daemon') | |
7804 | parser_deploy.add_argument( | |
7805 | '--allow-ptrace', | |
7806 | action='store_true', | |
7807 | help='Allow SYS_PTRACE on daemon container') | |
f91f0fd5 TL |
7808 | parser_deploy.add_argument( |
7809 | '--container-init', | |
7810 | action='store_true', | |
f67539c2 TL |
7811 | default=CONTAINER_INIT, |
7812 | help=argparse.SUPPRESS) | |
7813 | parser_deploy.add_argument( | |
7814 | '--memory-request', | |
7815 | help='Container memory request/target' | |
7816 | ) | |
7817 | parser_deploy.add_argument( | |
7818 | '--memory-limit', | |
7819 | help='Container memory hard limit' | |
7820 | ) | |
7821 | parser_deploy.add_argument( | |
7822 | '--meta-json', | |
7823 | help='JSON dict of additional metadata' | |
7824 | ) | |
9f95a23c TL |
7825 | |
7826 | parser_check_host = subparsers.add_parser( | |
7827 | 'check-host', help='check host configuration') | |
7828 | parser_check_host.set_defaults(func=command_check_host) | |
7829 | parser_check_host.add_argument( | |
7830 | '--expect-hostname', | |
7831 | help='Check that hostname matches an expected value') | |
7832 | ||
7833 | parser_prepare_host = subparsers.add_parser( | |
7834 | 'prepare-host', help='prepare a host for cephadm use') | |
7835 | parser_prepare_host.set_defaults(func=command_prepare_host) | |
7836 | parser_prepare_host.add_argument( | |
7837 | '--expect-hostname', | |
7838 | help='Set hostname') | |
7839 | ||
7840 | parser_add_repo = subparsers.add_parser( | |
7841 | 'add-repo', help='configure package repository') | |
7842 | parser_add_repo.set_defaults(func=command_add_repo) | |
7843 | parser_add_repo.add_argument( | |
7844 | '--release', | |
1911f103 | 7845 | help='use latest version of a named release (e.g., {})'.format(LATEST_STABLE_RELEASE)) |
9f95a23c TL |
7846 | parser_add_repo.add_argument( |
7847 | '--version', | |
7848 | help='use specific upstream version (x.y.z)') | |
7849 | parser_add_repo.add_argument( | |
7850 | '--dev', | |
7851 | help='use specified bleeding edge build from git branch or tag') | |
7852 | parser_add_repo.add_argument( | |
7853 | '--dev-commit', | |
7854 | help='use specified bleeding edge build from git commit') | |
7855 | parser_add_repo.add_argument( | |
7856 | '--gpg-url', | |
7857 | help='specify alternative GPG key location') | |
7858 | parser_add_repo.add_argument( | |
7859 | '--repo-url', | |
7860 | default='https://download.ceph.com', | |
7861 | help='specify alternative repo location') | |
7862 | # TODO: proxy? | |
7863 | ||
7864 | parser_rm_repo = subparsers.add_parser( | |
7865 | 'rm-repo', help='remove package repository configuration') | |
7866 | parser_rm_repo.set_defaults(func=command_rm_repo) | |
7867 | ||
7868 | parser_install = subparsers.add_parser( | |
7869 | 'install', help='install ceph package(s)') | |
7870 | parser_install.set_defaults(func=command_install) | |
7871 | parser_install.add_argument( | |
7872 | 'packages', nargs='*', | |
7873 | default=['cephadm'], | |
7874 | help='packages') | |
7875 | ||
f6b5b4d7 TL |
7876 | parser_registry_login = subparsers.add_parser( |
7877 | 'registry-login', help='log host into authenticated registry') | |
7878 | parser_registry_login.set_defaults(func=command_registry_login) | |
7879 | parser_registry_login.add_argument( | |
7880 | '--registry-url', | |
7881 | help='url for custom registry') | |
7882 | parser_registry_login.add_argument( | |
7883 | '--registry-username', | |
7884 | help='username for custom registry') | |
7885 | parser_registry_login.add_argument( | |
7886 | '--registry-password', | |
7887 | help='password for custom registry') | |
7888 | parser_registry_login.add_argument( | |
7889 | '--registry-json', | |
7890 | help='json file with custom registry login info (URL, Username, Password)') | |
7891 | parser_registry_login.add_argument( | |
7892 | '--fsid', | |
7893 | help='cluster FSID') | |
7894 | ||
f91f0fd5 TL |
7895 | parser_gather_facts = subparsers.add_parser( |
7896 | 'gather-facts', help='gather and return host related information (JSON format)') | |
7897 | parser_gather_facts.set_defaults(func=command_gather_facts) | |
7898 | ||
f67539c2 TL |
7899 | parser_exporter = subparsers.add_parser( |
7900 | 'exporter', help='Start cephadm in exporter mode (web service), providing host/daemon/disk metadata') | |
7901 | parser_exporter.add_argument( | |
7902 | '--fsid', | |
7903 | required=True, | |
7904 | type=str, | |
7905 | help='fsid of the cephadm exporter to run against') | |
7906 | parser_exporter.add_argument( | |
7907 | '--port', | |
7908 | type=int, | |
7909 | default=int(CephadmDaemon.default_port), | |
7910 | help='port number for the cephadm exporter service') | |
7911 | parser_exporter.add_argument( | |
7912 | '--id', | |
7913 | type=str, | |
7914 | default=get_hostname().split('.')[0], | |
7915 | help='daemon identifer for the exporter') | |
7916 | parser_exporter.set_defaults(func=command_exporter) | |
7917 | ||
7918 | parser_maintenance = subparsers.add_parser( | |
7919 | 'host-maintenance', help='Manage the maintenance state of a host') | |
7920 | parser_maintenance.add_argument( | |
7921 | '--fsid', | |
7922 | help='cluster FSID') | |
7923 | parser_maintenance.add_argument( | |
7924 | 'maintenance_action', | |
7925 | type=str, | |
7926 | choices=['enter', 'exit'], | |
7927 | help='Maintenance action - enter maintenance, or exit maintenance') | |
7928 | parser_maintenance.set_defaults(func=command_maintenance) | |
7929 | ||
7930 | parser_verify_prereqs = subparsers.add_parser( | |
7931 | 'verify-prereqs', | |
7932 | help='verify system prerequisites for a given service are met on this host') | |
7933 | parser_verify_prereqs.set_defaults(func=command_verify_prereqs) | |
7934 | parser_verify_prereqs.add_argument( | |
7935 | '--daemon-type', | |
7936 | required=True, | |
7937 | help='service type of service to whose prereqs will be checked') | |
7938 | ||
9f95a23c TL |
7939 | return parser |
7940 | ||
f6b5b4d7 | 7941 | |
9f95a23c TL |
7942 | def _parse_args(av): |
7943 | parser = _get_parser() | |
f67539c2 | 7944 | |
e306af50 | 7945 | args = parser.parse_args(av) |
f67539c2 | 7946 | if 'command' in args and args.command and args.command[0] == '--': |
e306af50 | 7947 | args.command.pop(0) |
f67539c2 TL |
7948 | |
7949 | # workaround argparse to deprecate the subparser `--container-init` flag | |
7950 | # container_init and no_container_init must always be mutually exclusive | |
7951 | container_init_args = ('--container-init', '--no-container-init') | |
7952 | if set(container_init_args).issubset(av): | |
7953 | parser.error('argument %s: not allowed with argument %s' % (container_init_args)) | |
7954 | elif '--container-init' in av: | |
7955 | args.no_container_init = not args.container_init | |
7956 | else: | |
7957 | args.container_init = not args.no_container_init | |
7958 | assert args.container_init is not args.no_container_init | |
7959 | ||
e306af50 | 7960 | return args |
9f95a23c | 7961 | |
f6b5b4d7 | 7962 | |
f67539c2 | 7963 | def cephadm_init_ctx(args: List[str]) -> Optional[CephadmContext]: |
f91f0fd5 | 7964 | |
f67539c2 TL |
7965 | ctx = CephadmContext() |
7966 | ctx.set_args(_parse_args(args)) | |
7967 | return ctx | |
7968 | ||
7969 | ||
7970 | def cephadm_init(args: List[str]) -> Optional[CephadmContext]: | |
7971 | ||
7972 | global logger | |
7973 | ctx = cephadm_init_ctx(args) | |
7974 | assert ctx is not None | |
f91f0fd5 TL |
7975 | |
7976 | # Logger configuration | |
7977 | if not os.path.exists(LOG_DIR): | |
7978 | os.makedirs(LOG_DIR) | |
7979 | dictConfig(logging_config) | |
7980 | logger = logging.getLogger() | |
7981 | ||
f67539c2 | 7982 | if ctx.verbose: |
f91f0fd5 | 7983 | for handler in logger.handlers: |
f67539c2 TL |
7984 | if handler.name == 'console': |
7985 | handler.setLevel(logging.DEBUG) | |
9f95a23c | 7986 | |
f67539c2 | 7987 | if not ctx.has_function(): |
9f95a23c | 7988 | sys.stderr.write('No command specified; pass -h or --help for usage\n') |
f67539c2 TL |
7989 | return None |
7990 | ||
7991 | return ctx | |
7992 | ||
7993 | ||
7994 | def main(): | |
7995 | ||
7996 | # root? | |
7997 | if os.geteuid() != 0: | |
7998 | sys.stderr.write('ERROR: cephadm should be run as root\n') | |
9f95a23c TL |
7999 | sys.exit(1) |
8000 | ||
f67539c2 TL |
8001 | av: List[str] = [] |
8002 | av = sys.argv[1:] | |
8003 | ||
8004 | ctx = cephadm_init(av) | |
8005 | if not ctx: # error, exit | |
8006 | sys.exit(1) | |
1911f103 | 8007 | |
9f95a23c | 8008 | try: |
f67539c2 TL |
8009 | # podman or docker? |
8010 | ctx.container_engine = find_container_engine(ctx) | |
8011 | if ctx.func not in \ | |
8012 | [command_check_host, command_prepare_host, command_add_repo]: | |
8013 | check_container_engine(ctx) | |
8014 | # command handler | |
8015 | r = ctx.func(ctx) | |
9f95a23c | 8016 | except Error as e: |
f67539c2 | 8017 | if ctx.verbose: |
9f95a23c | 8018 | raise |
f67539c2 | 8019 | logger.error('ERROR: %s' % e) |
9f95a23c TL |
8020 | sys.exit(1) |
8021 | if not r: | |
8022 | r = 0 | |
8023 | sys.exit(r) | |
f67539c2 TL |
8024 | |
8025 | ||
8026 | if __name__ == '__main__': | |
8027 | main() |