]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | """ |
2 | Ceph cluster task, deployed via cephadm orchestrator | |
3 | """ | |
9f95a23c TL |
4 | import argparse |
5 | import configobj | |
6 | import contextlib | |
7 | import logging | |
8 | import os | |
9 | import json | |
10 | import re | |
11 | import uuid | |
f91f0fd5 | 12 | import yaml |
9f95a23c | 13 | |
20effc67 | 14 | from copy import deepcopy |
f67539c2 | 15 | from io import BytesIO, StringIO |
9f95a23c | 16 | from tarfile import ReadError |
e306af50 | 17 | from tasks.ceph_manager import CephManager |
9f95a23c TL |
18 | from teuthology import misc as teuthology |
19 | from teuthology import contextutil | |
1e59de90 | 20 | from teuthology import packaging |
9f95a23c TL |
21 | from teuthology.orchestra import run |
22 | from teuthology.orchestra.daemon import DaemonGroup | |
23 | from teuthology.config import config as teuth_config | |
20effc67 TL |
24 | from textwrap import dedent |
25 | from tasks.cephfs.filesystem import MDSCluster, Filesystem | |
1e59de90 | 26 | from tasks.util import chacra |
9f95a23c TL |
27 | |
28 | # these items we use from ceph.py should probably eventually move elsewhere | |
29 | from tasks.ceph import get_mons, healthy | |
f67539c2 | 30 | from tasks.vip import subst_vip |
9f95a23c TL |
31 | |
32 | CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus'] | |
33 | ||
34 | log = logging.getLogger(__name__) | |
35 | ||
36 | ||
37 | def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs): | |
f67539c2 | 38 | teuthology.get_testdir(ctx) |
9f95a23c TL |
39 | return remote.run( |
40 | args=[ | |
41 | 'sudo', | |
42 | ctx.cephadm, | |
43 | '--image', ctx.ceph[cluster_name].image, | |
44 | 'shell', | |
45 | '-c', '/etc/ceph/{}.conf'.format(cluster_name), | |
46 | '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
47 | '--fsid', ctx.ceph[cluster_name].fsid, | |
48 | ] + extra_cephadm_args + [ | |
49 | '--', | |
50 | ] + args, | |
51 | **kwargs | |
52 | ) | |
53 | ||
b3b6e05e | 54 | |
9f95a23c TL |
55 | def build_initial_config(ctx, config): |
56 | cluster_name = config['cluster'] | |
57 | ||
58 | path = os.path.join(os.path.dirname(__file__), 'cephadm.conf') | |
59 | conf = configobj.ConfigObj(path, file_error=True) | |
60 | ||
61 | conf.setdefault('global', {}) | |
62 | conf['global']['fsid'] = ctx.ceph[cluster_name].fsid | |
63 | ||
64 | # overrides | |
65 | for section, keys in config.get('conf',{}).items(): | |
66 | for key, value in keys.items(): | |
67 | log.info(" override: [%s] %s = %s" % (section, key, value)) | |
68 | if section not in conf: | |
69 | conf[section] = {} | |
70 | conf[section][key] = value | |
71 | ||
72 | return conf | |
73 | ||
b3b6e05e | 74 | |
20effc67 TL |
75 | def distribute_iscsi_gateway_cfg(ctx, conf_data): |
76 | """ | |
77 | Distribute common gateway config to get the IPs. | |
78 | These will help in iscsi clients with finding trusted_ip_list. | |
79 | """ | |
80 | log.info('Distributing iscsi-gateway.cfg...') | |
81 | for remote, roles in ctx.cluster.remotes.items(): | |
82 | remote.write_file( | |
83 | path='/etc/ceph/iscsi-gateway.cfg', | |
84 | data=conf_data, | |
85 | sudo=True) | |
86 | ||
f67539c2 TL |
87 | def update_archive_setting(ctx, key, value): |
88 | """ | |
89 | Add logs directory to job's info log file | |
90 | """ | |
b3b6e05e TL |
91 | if ctx.archive is None: |
92 | return | |
f67539c2 TL |
93 | with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file: |
94 | info_yaml = yaml.safe_load(info_file) | |
95 | info_file.seek(0) | |
96 | if 'archive' in info_yaml: | |
97 | info_yaml['archive'][key] = value | |
98 | else: | |
99 | info_yaml['archive'] = {key: value} | |
100 | yaml.safe_dump(info_yaml, info_file, default_flow_style=False) | |
101 | ||
b3b6e05e | 102 | |
9f95a23c TL |
103 | @contextlib.contextmanager |
104 | def normalize_hostnames(ctx): | |
105 | """ | |
106 | Ensure we have short hostnames throughout, for consistency between | |
107 | remote.shortname and socket.gethostname() in cephadm. | |
108 | """ | |
109 | log.info('Normalizing hostnames...') | |
1e59de90 TL |
110 | cluster = ctx.cluster.filter(lambda r: '.' in r.hostname) |
111 | cluster.run(args=[ | |
9f95a23c TL |
112 | 'sudo', |
113 | 'hostname', | |
114 | run.Raw('$(hostname -s)'), | |
115 | ]) | |
116 | ||
117 | try: | |
118 | yield | |
119 | finally: | |
120 | pass | |
121 | ||
b3b6e05e | 122 | |
9f95a23c TL |
123 | @contextlib.contextmanager |
124 | def download_cephadm(ctx, config, ref): | |
125 | cluster_name = config['cluster'] | |
126 | ||
127 | if config.get('cephadm_mode') != 'cephadm-package': | |
b3b6e05e | 128 | if ctx.config.get('redhat'): |
1e59de90 TL |
129 | _fetch_cephadm_from_rpm(ctx) |
130 | # TODO: come up with a sensible way to detect if we need an "old, uncompiled" | |
131 | # cephadm | |
132 | elif 'cephadm_git_url' in config and 'cephadm_branch' in config: | |
133 | _fetch_cephadm_from_github(ctx, config, ref) | |
9f95a23c | 134 | else: |
1e59de90 TL |
135 | _fetch_cephadm_from_chachra(ctx, config, cluster_name) |
136 | ||
137 | try: | |
138 | yield | |
139 | finally: | |
140 | _rm_cluster(ctx, cluster_name) | |
141 | if config.get('cephadm_mode') == 'root': | |
142 | _rm_cephadm(ctx) | |
143 | ||
144 | ||
145 | def _fetch_cephadm_from_rpm(ctx): | |
146 | log.info("Copying cephadm installed from an RPM package") | |
147 | # cephadm already installed from redhat.install task | |
148 | ctx.cluster.run( | |
149 | args=[ | |
150 | 'cp', | |
151 | run.Raw('$(which cephadm)'), | |
152 | ctx.cephadm, | |
153 | run.Raw('&&'), | |
154 | 'ls', '-l', | |
155 | ctx.cephadm, | |
156 | ] | |
157 | ) | |
158 | ||
159 | ||
160 | def _fetch_cephadm_from_github(ctx, config, ref): | |
161 | ref = config.get('cephadm_branch', ref) | |
162 | git_url = config.get('cephadm_git_url', teuth_config.get_ceph_git_url()) | |
163 | log.info('Downloading cephadm (repo %s ref %s)...' % (git_url, ref)) | |
164 | if git_url.startswith('https://github.com/'): | |
165 | # git archive doesn't like https:// URLs, which we use with github. | |
166 | rest = git_url.split('https://github.com/', 1)[1] | |
167 | rest = re.sub(r'\.git/?$', '', rest).strip() # no .git suffix | |
168 | ctx.cluster.run( | |
169 | args=[ | |
170 | 'curl', '--silent', | |
171 | 'https://raw.githubusercontent.com/' + rest + '/' + ref + '/src/cephadm/cephadm', | |
172 | run.Raw('>'), | |
173 | ctx.cephadm, | |
174 | run.Raw('&&'), | |
175 | 'ls', '-l', | |
176 | ctx.cephadm, | |
177 | ], | |
178 | ) | |
179 | else: | |
9f95a23c TL |
180 | ctx.cluster.run( |
181 | args=[ | |
1e59de90 | 182 | 'git', 'clone', git_url, 'testrepo', |
9f95a23c | 183 | run.Raw('&&'), |
1e59de90 | 184 | 'cd', 'testrepo', |
9f95a23c | 185 | run.Raw('&&'), |
1e59de90 TL |
186 | 'git', 'show', f'{ref}:src/cephadm/cephadm', |
187 | run.Raw('>'), | |
188 | ctx.cephadm, | |
189 | run.Raw('&&'), | |
190 | 'ls', '-l', ctx.cephadm, | |
9f95a23c TL |
191 | ], |
192 | ) | |
1e59de90 TL |
193 | # sanity-check the resulting file and set executable bit |
194 | cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm) | |
195 | ctx.cluster.run( | |
196 | args=[ | |
197 | 'test', '-s', ctx.cephadm, | |
198 | run.Raw('&&'), | |
199 | 'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'), | |
200 | run.Raw('&&'), | |
201 | 'chmod', '+x', ctx.cephadm, | |
202 | ], | |
203 | ) | |
9f95a23c | 204 | |
1e59de90 TL |
205 | |
206 | def _fetch_cephadm_from_chachra(ctx, config, cluster_name): | |
207 | log.info('Downloading "compiled" cephadm from cachra') | |
208 | bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote | |
209 | bp = packaging.get_builder_project()( | |
210 | config.get('project', 'ceph'), | |
211 | config, | |
212 | ctx=ctx, | |
213 | remote=bootstrap_remote, | |
214 | ) | |
215 | log.info('builder_project result: %s' % (bp._result.json())) | |
216 | ||
217 | flavor = config.get('flavor', 'default') | |
218 | branch = config.get('branch') | |
219 | sha1 = config.get('sha1') | |
220 | ||
221 | # pull the cephadm binary from chacra | |
222 | url = chacra.get_binary_url( | |
223 | 'cephadm', | |
224 | project=bp.project, | |
225 | distro=bp.distro.split('/')[0], | |
226 | release=bp.distro.split('/')[1], | |
227 | arch=bp.arch, | |
228 | flavor=flavor, | |
229 | branch=branch, | |
230 | sha1=sha1, | |
231 | ) | |
232 | log.info("Discovered cachra url: %s", url) | |
233 | ctx.cluster.run( | |
234 | args=[ | |
235 | 'curl', '--silent', '-L', url, | |
236 | run.Raw('>'), | |
9f95a23c | 237 | ctx.cephadm, |
1e59de90 TL |
238 | run.Raw('&&'), |
239 | 'ls', '-l', | |
240 | ctx.cephadm, | |
241 | ], | |
242 | ) | |
9f95a23c | 243 | |
1e59de90 TL |
244 | # sanity-check the resulting file and set executable bit |
245 | cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm) | |
246 | ctx.cluster.run( | |
247 | args=[ | |
248 | 'test', '-s', ctx.cephadm, | |
249 | run.Raw('&&'), | |
250 | 'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'), | |
251 | run.Raw('&&'), | |
252 | 'chmod', '+x', ctx.cephadm, | |
253 | ], | |
254 | ) | |
255 | ||
256 | ||
257 | def _rm_cluster(ctx, cluster_name): | |
258 | log.info('Removing cluster...') | |
259 | ctx.cluster.run(args=[ | |
260 | 'sudo', | |
261 | ctx.cephadm, | |
262 | 'rm-cluster', | |
263 | '--fsid', ctx.ceph[cluster_name].fsid, | |
264 | '--force', | |
265 | ]) | |
266 | ||
267 | ||
268 | def _rm_cephadm(ctx): | |
269 | log.info('Removing cephadm ...') | |
270 | ctx.cluster.run( | |
271 | args=[ | |
272 | 'rm', | |
273 | '-rf', | |
274 | ctx.cephadm, | |
275 | ], | |
276 | ) | |
9f95a23c | 277 | |
b3b6e05e | 278 | |
9f95a23c TL |
279 | @contextlib.contextmanager |
280 | def ceph_log(ctx, config): | |
281 | cluster_name = config['cluster'] | |
282 | fsid = ctx.ceph[cluster_name].fsid | |
283 | ||
f67539c2 TL |
284 | update_archive_setting(ctx, 'log', '/var/log/ceph') |
285 | ||
f91f0fd5 | 286 | |
9f95a23c TL |
287 | try: |
288 | yield | |
289 | ||
290 | except Exception: | |
291 | # we need to know this below | |
292 | ctx.summary['success'] = False | |
293 | raise | |
294 | ||
295 | finally: | |
296 | log.info('Checking cluster log for badness...') | |
297 | def first_in_ceph_log(pattern, excludes): | |
298 | """ | |
299 | Find the first occurrence of the pattern specified in the Ceph log, | |
300 | Returns None if none found. | |
301 | ||
302 | :param pattern: Pattern scanned for. | |
303 | :param excludes: Patterns to ignore. | |
304 | :return: First line of text (or None if not found) | |
305 | """ | |
306 | args = [ | |
307 | 'sudo', | |
308 | 'egrep', pattern, | |
309 | '/var/log/ceph/{fsid}/ceph.log'.format( | |
310 | fsid=fsid), | |
311 | ] | |
312 | if excludes: | |
313 | for exclude in excludes: | |
314 | args.extend([run.Raw('|'), 'egrep', '-v', exclude]) | |
315 | args.extend([ | |
316 | run.Raw('|'), 'head', '-n', '1', | |
317 | ]) | |
318 | r = ctx.ceph[cluster_name].bootstrap_remote.run( | |
e306af50 | 319 | stdout=StringIO(), |
9f95a23c TL |
320 | args=args, |
321 | ) | |
322 | stdout = r.stdout.getvalue() | |
323 | if stdout != '': | |
324 | return stdout | |
325 | return None | |
326 | ||
327 | if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', | |
cd265ab1 | 328 | config.get('log-ignorelist')) is not None: |
9f95a23c TL |
329 | log.warning('Found errors (ERR|WRN|SEC) in cluster log') |
330 | ctx.summary['success'] = False | |
331 | # use the most severe problem as the failure reason | |
332 | if 'failure_reason' not in ctx.summary: | |
333 | for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: | |
cd265ab1 | 334 | match = first_in_ceph_log(pattern, config['log-ignorelist']) |
9f95a23c TL |
335 | if match is not None: |
336 | ctx.summary['failure_reason'] = \ | |
337 | '"{match}" in cluster log'.format( | |
338 | match=match.rstrip('\n'), | |
339 | ) | |
340 | break | |
341 | ||
342 | if ctx.archive is not None and \ | |
343 | not (ctx.config.get('archive-on-error') and ctx.summary['success']): | |
344 | # and logs | |
345 | log.info('Compressing logs...') | |
346 | run.wait( | |
347 | ctx.cluster.run( | |
348 | args=[ | |
349 | 'sudo', | |
350 | 'find', | |
351 | '/var/log/ceph', # all logs, not just for the cluster | |
f91f0fd5 | 352 | '/var/log/rbd-target-api', # ceph-iscsi |
9f95a23c TL |
353 | '-name', |
354 | '*.log', | |
355 | '-print0', | |
356 | run.Raw('|'), | |
357 | 'sudo', | |
358 | 'xargs', | |
359 | '-0', | |
360 | '--no-run-if-empty', | |
361 | '--', | |
362 | 'gzip', | |
363 | '--', | |
364 | ], | |
365 | wait=False, | |
366 | ), | |
367 | ) | |
368 | ||
369 | log.info('Archiving logs...') | |
370 | path = os.path.join(ctx.archive, 'remote') | |
371 | try: | |
372 | os.makedirs(path) | |
373 | except OSError: | |
374 | pass | |
375 | for remote in ctx.cluster.remotes.keys(): | |
20effc67 | 376 | sub = os.path.join(path, remote.shortname) |
9f95a23c TL |
377 | try: |
378 | os.makedirs(sub) | |
379 | except OSError: | |
380 | pass | |
e306af50 TL |
381 | try: |
382 | teuthology.pull_directory(remote, '/var/log/ceph', # everything | |
383 | os.path.join(sub, 'log')) | |
384 | except ReadError: | |
385 | pass | |
9f95a23c | 386 | |
b3b6e05e | 387 | |
9f95a23c TL |
388 | @contextlib.contextmanager |
389 | def ceph_crash(ctx, config): | |
390 | """ | |
391 | Gather crash dumps from /var/lib/ceph/$fsid/crash | |
392 | """ | |
393 | cluster_name = config['cluster'] | |
394 | fsid = ctx.ceph[cluster_name].fsid | |
395 | ||
f67539c2 | 396 | update_archive_setting(ctx, 'crash', '/var/lib/ceph/crash') |
f91f0fd5 | 397 | |
9f95a23c TL |
398 | try: |
399 | yield | |
400 | ||
401 | finally: | |
402 | if ctx.archive is not None: | |
403 | log.info('Archiving crash dumps...') | |
404 | path = os.path.join(ctx.archive, 'remote') | |
405 | try: | |
406 | os.makedirs(path) | |
407 | except OSError: | |
408 | pass | |
409 | for remote in ctx.cluster.remotes.keys(): | |
20effc67 | 410 | sub = os.path.join(path, remote.shortname) |
9f95a23c TL |
411 | try: |
412 | os.makedirs(sub) | |
413 | except OSError: | |
414 | pass | |
415 | try: | |
416 | teuthology.pull_directory(remote, | |
417 | '/var/lib/ceph/%s/crash' % fsid, | |
418 | os.path.join(sub, 'crash')) | |
419 | except ReadError: | |
420 | pass | |
421 | ||
b3b6e05e | 422 | |
20effc67 TL |
423 | @contextlib.contextmanager |
424 | def pull_image(ctx, config): | |
425 | cluster_name = config['cluster'] | |
426 | log.info(f'Pulling image {ctx.ceph[cluster_name].image} on all hosts...') | |
427 | run.wait( | |
428 | ctx.cluster.run( | |
429 | args=[ | |
430 | 'sudo', | |
431 | ctx.cephadm, | |
432 | '--image', ctx.ceph[cluster_name].image, | |
433 | 'pull', | |
434 | ], | |
435 | wait=False, | |
436 | ) | |
437 | ) | |
438 | ||
439 | try: | |
440 | yield | |
441 | finally: | |
442 | pass | |
443 | ||
aee94f69 TL |
444 | @contextlib.contextmanager |
445 | def setup_ca_signed_keys(ctx, config): | |
446 | # generate our ca key | |
447 | cluster_name = config['cluster'] | |
448 | bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote | |
449 | bootstrap_remote.run(args=[ | |
450 | 'sudo', 'ssh-keygen', '-t', 'rsa', '-f', '/root/ca-key', '-N', '' | |
451 | ]) | |
452 | ||
453 | # not using read_file here because it runs dd as a non-root | |
454 | # user and would hit permission issues | |
455 | r = bootstrap_remote.run(args=[ | |
456 | 'sudo', 'cat', '/root/ca-key.pub' | |
457 | ], stdout=StringIO()) | |
458 | ca_key_pub_contents = r.stdout.getvalue() | |
459 | ||
460 | # make CA key accepted on each host | |
461 | for remote in ctx.cluster.remotes.keys(): | |
462 | # write key to each host's /etc/ssh dir | |
463 | remote.run(args=[ | |
464 | 'sudo', 'echo', ca_key_pub_contents, | |
465 | run.Raw('|'), | |
466 | 'sudo', 'tee', '-a', '/etc/ssh/ca-key.pub', | |
467 | ]) | |
468 | # make sshd accept the CA signed key | |
469 | remote.run(args=[ | |
470 | 'sudo', 'echo', 'TrustedUserCAKeys /etc/ssh/ca-key.pub', | |
471 | run.Raw('|'), | |
472 | 'sudo', 'tee', '-a', '/etc/ssh/sshd_config', | |
473 | run.Raw('&&'), | |
474 | 'sudo', 'systemctl', 'restart', 'sshd', | |
475 | ]) | |
476 | ||
477 | # generate a new key pair and sign the pub key to make a cert | |
478 | bootstrap_remote.run(args=[ | |
479 | 'sudo', 'ssh-keygen', '-t', 'rsa', '-f', '/root/cephadm-ssh-key', '-N', '', | |
480 | run.Raw('&&'), | |
481 | 'sudo', 'ssh-keygen', '-s', '/root/ca-key', '-I', 'user_root', '-n', 'root', '-V', '+52w', '/root/cephadm-ssh-key', | |
482 | ]) | |
483 | ||
484 | # for debugging, to make sure this setup has worked as intended | |
485 | for remote in ctx.cluster.remotes.keys(): | |
486 | remote.run(args=[ | |
487 | 'sudo', 'cat', '/etc/ssh/ca-key.pub' | |
488 | ]) | |
489 | remote.run(args=[ | |
490 | 'sudo', 'cat', '/etc/ssh/sshd_config', | |
491 | run.Raw('|'), | |
492 | 'grep', 'TrustedUserCAKeys' | |
493 | ]) | |
494 | bootstrap_remote.run(args=[ | |
495 | 'sudo', 'ls', '/root/' | |
496 | ]) | |
497 | ||
498 | ctx.ca_signed_key_info = {} | |
499 | ctx.ca_signed_key_info['ca-key'] = '/root/ca-key' | |
500 | ctx.ca_signed_key_info['ca-key-pub'] = '/root/ca-key.pub' | |
501 | ctx.ca_signed_key_info['private-key'] = '/root/cephadm-ssh-key' | |
502 | ctx.ca_signed_key_info['ca-signed-cert'] = '/root/cephadm-ssh-key-cert.pub' | |
503 | ||
504 | try: | |
505 | yield | |
506 | finally: | |
507 | pass | |
20effc67 | 508 | |
9f95a23c | 509 | @contextlib.contextmanager |
f67539c2 | 510 | def ceph_bootstrap(ctx, config): |
e306af50 | 511 | """ |
f67539c2 | 512 | Bootstrap ceph cluster. |
e306af50 TL |
513 | |
514 | :param ctx: the argparse.Namespace object | |
515 | :param config: the config dict | |
e306af50 | 516 | """ |
9f95a23c TL |
517 | cluster_name = config['cluster'] |
518 | testdir = teuthology.get_testdir(ctx) | |
519 | fsid = ctx.ceph[cluster_name].fsid | |
520 | ||
1911f103 TL |
521 | bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote |
522 | first_mon = ctx.ceph[cluster_name].first_mon | |
523 | first_mon_role = ctx.ceph[cluster_name].first_mon_role | |
9f95a23c | 524 | mons = ctx.ceph[cluster_name].mons |
f91f0fd5 | 525 | |
9f95a23c TL |
526 | ctx.cluster.run(args=[ |
527 | 'sudo', 'mkdir', '-p', '/etc/ceph', | |
528 | ]); | |
529 | ctx.cluster.run(args=[ | |
530 | 'sudo', 'chmod', '777', '/etc/ceph', | |
531 | ]); | |
532 | try: | |
533 | # write seed config | |
534 | log.info('Writing seed config...') | |
535 | conf_fp = BytesIO() | |
536 | seed_config = build_initial_config(ctx, config) | |
537 | seed_config.write(conf_fp) | |
f67539c2 | 538 | bootstrap_remote.write_file( |
9f95a23c TL |
539 | path='{}/seed.{}.conf'.format(testdir, cluster_name), |
540 | data=conf_fp.getvalue()) | |
e306af50 | 541 | log.debug('Final config:\n' + conf_fp.getvalue().decode()) |
9f95a23c TL |
542 | ctx.ceph[cluster_name].conf = seed_config |
543 | ||
544 | # register initial daemons | |
545 | ctx.daemons.register_daemon( | |
546 | bootstrap_remote, 'mon', first_mon, | |
547 | cluster=cluster_name, | |
548 | fsid=fsid, | |
549 | logger=log.getChild('mon.' + first_mon), | |
550 | wait=False, | |
551 | started=True, | |
552 | ) | |
1911f103 TL |
553 | if not ctx.ceph[cluster_name].roleless: |
554 | first_mgr = ctx.ceph[cluster_name].first_mgr | |
555 | ctx.daemons.register_daemon( | |
556 | bootstrap_remote, 'mgr', first_mgr, | |
557 | cluster=cluster_name, | |
558 | fsid=fsid, | |
559 | logger=log.getChild('mgr.' + first_mgr), | |
560 | wait=False, | |
561 | started=True, | |
562 | ) | |
9f95a23c TL |
563 | |
564 | # bootstrap | |
565 | log.info('Bootstrapping...') | |
566 | cmd = [ | |
567 | 'sudo', | |
568 | ctx.cephadm, | |
569 | '--image', ctx.ceph[cluster_name].image, | |
570 | '-v', | |
571 | 'bootstrap', | |
572 | '--fsid', fsid, | |
9f95a23c TL |
573 | '--config', '{}/seed.{}.conf'.format(testdir, cluster_name), |
574 | '--output-config', '/etc/ceph/{}.conf'.format(cluster_name), | |
575 | '--output-keyring', | |
576 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
9f95a23c | 577 | ] |
aee94f69 TL |
578 | |
579 | if not config.get("use-ca-signed-key", False): | |
580 | cmd += ['--output-pub-ssh-key', '{}/{}.pub'.format(testdir, cluster_name)] | |
581 | else: | |
582 | # ctx.ca_signed_key_info should have been set up in | |
583 | # setup_ca_signed_keys function which we expect to have | |
584 | # run before bootstrap if use-ca-signed-key is true | |
585 | signed_key_info = ctx.ca_signed_key_info | |
586 | cmd += [ | |
587 | "--ssh-private-key", signed_key_info['private-key'], | |
588 | "--ssh-signed-cert", signed_key_info['ca-signed-cert'], | |
589 | ] | |
590 | ||
1e59de90 TL |
591 | if config.get("no_cgroups_split") is True: |
592 | cmd.insert(cmd.index("bootstrap"), "--no-cgroups-split") | |
b3b6e05e TL |
593 | |
594 | if config.get('registry-login'): | |
595 | registry = config['registry-login'] | |
596 | cmd += [ | |
597 | "--registry-url", registry['url'], | |
598 | "--registry-username", registry['username'], | |
599 | "--registry-password", registry['password'], | |
600 | ] | |
601 | ||
1911f103 TL |
602 | if not ctx.ceph[cluster_name].roleless: |
603 | cmd += [ | |
604 | '--mon-id', first_mon, | |
605 | '--mgr-id', first_mgr, | |
606 | '--orphan-initial-daemons', # we will do it explicitly! | |
607 | '--skip-monitoring-stack', # we'll provision these explicitly | |
608 | ] | |
b3b6e05e | 609 | |
9f95a23c TL |
610 | if mons[first_mon_role].startswith('['): |
611 | cmd += ['--mon-addrv', mons[first_mon_role]] | |
612 | else: | |
613 | cmd += ['--mon-ip', mons[first_mon_role]] | |
614 | if config.get('skip_dashboard'): | |
615 | cmd += ['--skip-dashboard'] | |
f67539c2 TL |
616 | if config.get('skip_monitoring_stack'): |
617 | cmd += ['--skip-monitoring-stack'] | |
b3b6e05e TL |
618 | if config.get('single_host_defaults'): |
619 | cmd += ['--single-host-defaults'] | |
620 | if not config.get('avoid_pacific_features', False): | |
621 | cmd += ['--skip-admin-label'] | |
9f95a23c TL |
622 | # bootstrap makes the keyring root 0600, so +r it for our purposes |
623 | cmd += [ | |
624 | run.Raw('&&'), | |
625 | 'sudo', 'chmod', '+r', | |
626 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
627 | ] | |
628 | bootstrap_remote.run(args=cmd) | |
629 | ||
630 | # fetch keys and configs | |
631 | log.info('Fetching config...') | |
f67539c2 TL |
632 | ctx.ceph[cluster_name].config_file = \ |
633 | bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.conf') | |
9f95a23c | 634 | log.info('Fetching client.admin keyring...') |
f67539c2 TL |
635 | ctx.ceph[cluster_name].admin_keyring = \ |
636 | bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.client.admin.keyring') | |
9f95a23c | 637 | log.info('Fetching mon keyring...') |
f67539c2 TL |
638 | ctx.ceph[cluster_name].mon_keyring = \ |
639 | bootstrap_remote.read_file(f'/var/lib/ceph/{fsid}/mon.{first_mon}/keyring', sudo=True) | |
9f95a23c | 640 | |
aee94f69 TL |
641 | if not config.get("use-ca-signed-key", False): |
642 | # fetch ssh key, distribute to additional nodes | |
643 | log.info('Fetching pub ssh key...') | |
644 | ssh_pub_key = bootstrap_remote.read_file( | |
645 | f'{testdir}/{cluster_name}.pub').decode('ascii').strip() | |
9f95a23c | 646 | |
aee94f69 TL |
647 | log.info('Installing pub ssh key for root users...') |
648 | ctx.cluster.run(args=[ | |
649 | 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh', | |
650 | run.Raw('&&'), | |
651 | 'echo', ssh_pub_key, | |
652 | run.Raw('|'), | |
653 | 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys', | |
654 | run.Raw('&&'), | |
655 | 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys', | |
656 | ]) | |
9f95a23c TL |
657 | |
658 | # set options | |
f67539c2 TL |
659 | if config.get('allow_ptrace', True): |
660 | _shell(ctx, cluster_name, bootstrap_remote, | |
661 | ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true']) | |
9f95a23c | 662 | |
b3b6e05e TL |
663 | if not config.get('avoid_pacific_features', False): |
664 | log.info('Distributing conf and client.admin keyring to all hosts + 0755') | |
665 | _shell(ctx, cluster_name, bootstrap_remote, | |
666 | ['ceph', 'orch', 'client-keyring', 'set', 'client.admin', | |
667 | '*', '--mode', '0755'], | |
668 | check_status=False) | |
669 | ||
9f95a23c TL |
670 | # add other hosts |
671 | for remote in ctx.cluster.remotes.keys(): | |
672 | if remote == bootstrap_remote: | |
673 | continue | |
b3b6e05e TL |
674 | |
675 | # note: this may be redundant (see above), but it avoids | |
676 | # us having to wait for cephadm to do it. | |
1911f103 | 677 | log.info('Writing (initial) conf and keyring to %s' % remote.shortname) |
f67539c2 | 678 | remote.write_file( |
9f95a23c TL |
679 | path='/etc/ceph/{}.conf'.format(cluster_name), |
680 | data=ctx.ceph[cluster_name].config_file) | |
f67539c2 | 681 | remote.write_file( |
9f95a23c TL |
682 | path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), |
683 | data=ctx.ceph[cluster_name].admin_keyring) | |
684 | ||
685 | log.info('Adding host %s to orchestrator...' % remote.shortname) | |
20effc67 | 686 | _shell(ctx, cluster_name, bootstrap_remote, [ |
9f95a23c TL |
687 | 'ceph', 'orch', 'host', 'add', |
688 | remote.shortname | |
689 | ]) | |
20effc67 | 690 | r = _shell(ctx, cluster_name, bootstrap_remote, |
9f95a23c | 691 | ['ceph', 'orch', 'host', 'ls', '--format=json'], |
e306af50 | 692 | stdout=StringIO()) |
9f95a23c TL |
693 | hosts = [node['hostname'] for node in json.loads(r.stdout.getvalue())] |
694 | assert remote.shortname in hosts | |
695 | ||
696 | yield | |
697 | ||
698 | finally: | |
699 | log.info('Cleaning up testdir ceph.* files...') | |
700 | ctx.cluster.run(args=[ | |
701 | 'rm', '-f', | |
702 | '{}/seed.{}.conf'.format(testdir, cluster_name), | |
703 | '{}/{}.pub'.format(testdir, cluster_name), | |
704 | ]) | |
705 | ||
706 | log.info('Stopping all daemons...') | |
707 | ||
708 | # this doesn't block until they are all stopped... | |
709 | #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target']) | |
710 | ||
f67539c2 | 711 | # stop the daemons we know |
e306af50 | 712 | for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES, True): |
9f95a23c | 713 | cluster, type_, id_ = teuthology.split_role(role) |
e306af50 TL |
714 | try: |
715 | ctx.daemons.get_daemon(type_, id_, cluster).stop() | |
716 | except Exception: | |
f67539c2 | 717 | log.exception(f'Failed to stop "{role}"') |
f91f0fd5 | 718 | raise |
9f95a23c | 719 | |
f67539c2 TL |
720 | # tear down anything left (but leave the logs behind) |
721 | ctx.cluster.run( | |
722 | args=[ | |
723 | 'sudo', | |
724 | ctx.cephadm, | |
725 | 'rm-cluster', | |
726 | '--fsid', fsid, | |
727 | '--force', | |
728 | '--keep-logs', | |
729 | ], | |
730 | check_status=False, # may fail if upgrading from old cephadm | |
731 | ) | |
732 | ||
9f95a23c TL |
733 | # clean up /etc/ceph |
734 | ctx.cluster.run(args=[ | |
735 | 'sudo', 'rm', '-f', | |
736 | '/etc/ceph/{}.conf'.format(cluster_name), | |
737 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
738 | ]) | |
739 | ||
b3b6e05e | 740 | |
9f95a23c TL |
741 | @contextlib.contextmanager |
742 | def ceph_mons(ctx, config): | |
743 | """ | |
744 | Deploy any additional mons | |
745 | """ | |
746 | cluster_name = config['cluster'] | |
747 | fsid = ctx.ceph[cluster_name].fsid | |
9f95a23c TL |
748 | |
749 | try: | |
f67539c2 TL |
750 | daemons = {} |
751 | if config.get('add_mons_via_daemon_add'): | |
752 | # This is the old way of adding mons that works with the (early) octopus | |
753 | # cephadm scheduler. | |
754 | num_mons = 1 | |
755 | for remote, roles in ctx.cluster.remotes.items(): | |
756 | for mon in [r for r in roles | |
757 | if teuthology.is_type('mon', cluster_name)(r)]: | |
758 | c_, _, id_ = teuthology.split_role(mon) | |
759 | if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: | |
760 | continue | |
761 | log.info('Adding %s on %s' % (mon, remote.shortname)) | |
762 | num_mons += 1 | |
763 | _shell(ctx, cluster_name, remote, [ | |
764 | 'ceph', 'orch', 'daemon', 'add', 'mon', | |
765 | remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_, | |
766 | ]) | |
767 | ctx.daemons.register_daemon( | |
768 | remote, 'mon', id_, | |
769 | cluster=cluster_name, | |
770 | fsid=fsid, | |
771 | logger=log.getChild(mon), | |
772 | wait=False, | |
773 | started=True, | |
774 | ) | |
775 | daemons[mon] = (remote, id_) | |
776 | ||
777 | with contextutil.safe_while(sleep=1, tries=180) as proceed: | |
778 | while proceed(): | |
779 | log.info('Waiting for %d mons in monmap...' % (num_mons)) | |
780 | r = _shell( | |
781 | ctx=ctx, | |
782 | cluster_name=cluster_name, | |
783 | remote=remote, | |
784 | args=[ | |
785 | 'ceph', 'mon', 'dump', '-f', 'json', | |
786 | ], | |
787 | stdout=StringIO(), | |
788 | ) | |
789 | j = json.loads(r.stdout.getvalue()) | |
790 | if len(j['mons']) == num_mons: | |
791 | break | |
792 | else: | |
793 | nodes = [] | |
794 | for remote, roles in ctx.cluster.remotes.items(): | |
795 | for mon in [r for r in roles | |
796 | if teuthology.is_type('mon', cluster_name)(r)]: | |
797 | c_, _, id_ = teuthology.split_role(mon) | |
798 | log.info('Adding %s on %s' % (mon, remote.shortname)) | |
799 | nodes.append(remote.shortname | |
800 | + ':' + ctx.ceph[cluster_name].mons[mon] | |
801 | + '=' + id_) | |
802 | if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: | |
803 | continue | |
804 | daemons[mon] = (remote, id_) | |
805 | ||
806 | _shell(ctx, cluster_name, remote, [ | |
807 | 'ceph', 'orch', 'apply', 'mon', | |
808 | str(len(nodes)) + ';' + ';'.join(nodes)] | |
809 | ) | |
810 | for mgr, i in daemons.items(): | |
811 | remote, id_ = i | |
9f95a23c TL |
812 | ctx.daemons.register_daemon( |
813 | remote, 'mon', id_, | |
814 | cluster=cluster_name, | |
815 | fsid=fsid, | |
816 | logger=log.getChild(mon), | |
817 | wait=False, | |
818 | started=True, | |
819 | ) | |
820 | ||
f67539c2 TL |
821 | with contextutil.safe_while(sleep=1, tries=180) as proceed: |
822 | while proceed(): | |
823 | log.info('Waiting for %d mons in monmap...' % (len(nodes))) | |
824 | r = _shell( | |
825 | ctx=ctx, | |
826 | cluster_name=cluster_name, | |
827 | remote=remote, | |
828 | args=[ | |
829 | 'ceph', 'mon', 'dump', '-f', 'json', | |
830 | ], | |
831 | stdout=StringIO(), | |
832 | ) | |
833 | j = json.loads(r.stdout.getvalue()) | |
834 | if len(j['mons']) == len(nodes): | |
835 | break | |
9f95a23c | 836 | |
1911f103 | 837 | # refresh our (final) ceph.conf file |
f91f0fd5 | 838 | bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote |
1911f103 TL |
839 | log.info('Generating final ceph.conf file...') |
840 | r = _shell( | |
841 | ctx=ctx, | |
842 | cluster_name=cluster_name, | |
f91f0fd5 | 843 | remote=bootstrap_remote, |
1911f103 TL |
844 | args=[ |
845 | 'ceph', 'config', 'generate-minimal-conf', | |
846 | ], | |
e306af50 | 847 | stdout=StringIO(), |
1911f103 TL |
848 | ) |
849 | ctx.ceph[cluster_name].config_file = r.stdout.getvalue() | |
9f95a23c TL |
850 | |
851 | yield | |
852 | ||
853 | finally: | |
854 | pass | |
855 | ||
b3b6e05e | 856 | |
9f95a23c TL |
857 | @contextlib.contextmanager |
858 | def ceph_mgrs(ctx, config): | |
859 | """ | |
860 | Deploy any additional mgrs | |
861 | """ | |
862 | cluster_name = config['cluster'] | |
863 | fsid = ctx.ceph[cluster_name].fsid | |
864 | ||
865 | try: | |
866 | nodes = [] | |
867 | daemons = {} | |
868 | for remote, roles in ctx.cluster.remotes.items(): | |
869 | for mgr in [r for r in roles | |
870 | if teuthology.is_type('mgr', cluster_name)(r)]: | |
871 | c_, _, id_ = teuthology.split_role(mgr) | |
9f95a23c TL |
872 | log.info('Adding %s on %s' % (mgr, remote.shortname)) |
873 | nodes.append(remote.shortname + '=' + id_) | |
f67539c2 TL |
874 | if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mgr: |
875 | continue | |
9f95a23c TL |
876 | daemons[mgr] = (remote, id_) |
877 | if nodes: | |
878 | _shell(ctx, cluster_name, remote, [ | |
879 | 'ceph', 'orch', 'apply', 'mgr', | |
f67539c2 | 880 | str(len(nodes)) + ';' + ';'.join(nodes)] |
9f95a23c TL |
881 | ) |
882 | for mgr, i in daemons.items(): | |
883 | remote, id_ = i | |
884 | ctx.daemons.register_daemon( | |
885 | remote, 'mgr', id_, | |
886 | cluster=cluster_name, | |
887 | fsid=fsid, | |
888 | logger=log.getChild(mgr), | |
889 | wait=False, | |
890 | started=True, | |
891 | ) | |
892 | ||
893 | yield | |
894 | ||
895 | finally: | |
896 | pass | |
897 | ||
b3b6e05e | 898 | |
9f95a23c TL |
899 | @contextlib.contextmanager |
900 | def ceph_osds(ctx, config): | |
901 | """ | |
902 | Deploy OSDs | |
903 | """ | |
904 | cluster_name = config['cluster'] | |
905 | fsid = ctx.ceph[cluster_name].fsid | |
1911f103 | 906 | |
9f95a23c TL |
907 | try: |
908 | log.info('Deploying OSDs...') | |
909 | ||
910 | # provision OSDs in numeric order | |
911 | id_to_remote = {} | |
912 | devs_by_remote = {} | |
913 | for remote, roles in ctx.cluster.remotes.items(): | |
914 | devs_by_remote[remote] = teuthology.get_scratch_devices(remote) | |
915 | for osd in [r for r in roles | |
916 | if teuthology.is_type('osd', cluster_name)(r)]: | |
917 | _, _, id_ = teuthology.split_role(osd) | |
918 | id_to_remote[int(id_)] = (osd, remote) | |
919 | ||
920 | cur = 0 | |
921 | for osd_id in sorted(id_to_remote.keys()): | |
922 | osd, remote = id_to_remote[osd_id] | |
923 | _, _, id_ = teuthology.split_role(osd) | |
924 | assert int(id_) == cur | |
925 | devs = devs_by_remote[remote] | |
926 | assert devs ## FIXME ## | |
927 | dev = devs.pop() | |
e306af50 TL |
928 | if all(_ in dev for _ in ('lv', 'vg')): |
929 | short_dev = dev.replace('/dev/', '') | |
930 | else: | |
931 | short_dev = dev | |
9f95a23c TL |
932 | log.info('Deploying %s on %s with %s...' % ( |
933 | osd, remote.shortname, dev)) | |
934 | _shell(ctx, cluster_name, remote, [ | |
935 | 'ceph-volume', 'lvm', 'zap', dev]) | |
1e59de90 TL |
936 | add_osd_args = ['ceph', 'orch', 'daemon', 'add', 'osd', |
937 | remote.shortname + ':' + short_dev] | |
938 | osd_method = config.get('osd_method') | |
939 | if osd_method: | |
940 | add_osd_args.append(osd_method) | |
941 | _shell(ctx, cluster_name, remote, add_osd_args) | |
9f95a23c TL |
942 | ctx.daemons.register_daemon( |
943 | remote, 'osd', id_, | |
944 | cluster=cluster_name, | |
945 | fsid=fsid, | |
946 | logger=log.getChild(osd), | |
947 | wait=False, | |
948 | started=True, | |
949 | ) | |
950 | cur += 1 | |
951 | ||
20effc67 TL |
952 | if cur == 0: |
953 | _shell(ctx, cluster_name, remote, [ | |
954 | 'ceph', 'orch', 'apply', 'osd', '--all-available-devices', | |
955 | ]) | |
956 | # expect the number of scratch devs | |
957 | num_osds = sum(map(len, devs_by_remote.values())) | |
958 | assert num_osds | |
959 | else: | |
960 | # expect the number of OSDs we created | |
961 | num_osds = cur | |
962 | ||
963 | log.info(f'Waiting for {num_osds} OSDs to come up...') | |
964 | with contextutil.safe_while(sleep=1, tries=120) as proceed: | |
965 | while proceed(): | |
966 | p = _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, | |
967 | ['ceph', 'osd', 'stat', '-f', 'json'], stdout=StringIO()) | |
968 | j = json.loads(p.stdout.getvalue()) | |
969 | if int(j.get('num_up_osds', 0)) == num_osds: | |
970 | break; | |
971 | ||
972 | if not hasattr(ctx, 'managers'): | |
973 | ctx.managers = {} | |
974 | ctx.managers[cluster_name] = CephManager( | |
975 | ctx.ceph[cluster_name].bootstrap_remote, | |
976 | ctx=ctx, | |
977 | logger=log.getChild('ceph_manager.' + cluster_name), | |
978 | cluster=cluster_name, | |
979 | cephadm=True, | |
980 | ) | |
981 | ||
9f95a23c TL |
982 | yield |
983 | finally: | |
984 | pass | |
985 | ||
b3b6e05e | 986 | |
9f95a23c TL |
987 | @contextlib.contextmanager |
988 | def ceph_mdss(ctx, config): | |
989 | """ | |
990 | Deploy MDSss | |
991 | """ | |
992 | cluster_name = config['cluster'] | |
993 | fsid = ctx.ceph[cluster_name].fsid | |
994 | ||
995 | nodes = [] | |
996 | daemons = {} | |
997 | for remote, roles in ctx.cluster.remotes.items(): | |
998 | for role in [r for r in roles | |
999 | if teuthology.is_type('mds', cluster_name)(r)]: | |
1000 | c_, _, id_ = teuthology.split_role(role) | |
1001 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
1002 | nodes.append(remote.shortname + '=' + id_) | |
1003 | daemons[role] = (remote, id_) | |
1004 | if nodes: | |
1005 | _shell(ctx, cluster_name, remote, [ | |
1006 | 'ceph', 'orch', 'apply', 'mds', | |
1007 | 'all', | |
1008 | str(len(nodes)) + ';' + ';'.join(nodes)] | |
1009 | ) | |
1010 | for role, i in daemons.items(): | |
1011 | remote, id_ = i | |
1012 | ctx.daemons.register_daemon( | |
1013 | remote, 'mds', id_, | |
1014 | cluster=cluster_name, | |
1015 | fsid=fsid, | |
1016 | logger=log.getChild(role), | |
1017 | wait=False, | |
1018 | started=True, | |
1019 | ) | |
1020 | ||
1021 | yield | |
1022 | ||
20effc67 TL |
1023 | @contextlib.contextmanager |
1024 | def cephfs_setup(ctx, config): | |
1025 | mdss = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) | |
1026 | ||
1027 | # If there are any MDSs, then create a filesystem for them to use | |
1028 | # Do this last because requires mon cluster to be up and running | |
1029 | if len(mdss) > 0: | |
1030 | log.info('Setting up CephFS filesystem(s)...') | |
1031 | cephfs_config = config.get('cephfs', {}) | |
1032 | fs_configs = cephfs_config.pop('fs', [{'name': 'cephfs'}]) | |
1033 | set_allow_multifs = len(fs_configs) > 1 | |
1034 | ||
1035 | # wait for standbys to become available (slow due to valgrind, perhaps) | |
1036 | mdsc = MDSCluster(ctx) | |
1037 | with contextutil.safe_while(sleep=2,tries=150) as proceed: | |
1038 | while proceed(): | |
1039 | if len(mdsc.get_standby_daemons()) >= len(mdss): | |
1040 | break | |
1041 | ||
1042 | fss = [] | |
1043 | for fs_config in fs_configs: | |
1044 | assert isinstance(fs_config, dict) | |
1045 | name = fs_config.pop('name') | |
1046 | temp = deepcopy(cephfs_config) | |
1047 | teuthology.deep_merge(temp, fs_config) | |
1e59de90 TL |
1048 | subvols = config.get('subvols', None) |
1049 | if subvols: | |
1050 | teuthology.deep_merge(temp, {'subvols': subvols}) | |
20effc67 TL |
1051 | fs = Filesystem(ctx, fs_config=temp, name=name, create=True) |
1052 | if set_allow_multifs: | |
1053 | fs.set_allow_multifs() | |
1054 | set_allow_multifs = False | |
1055 | fss.append(fs) | |
1056 | ||
1057 | yield | |
1058 | ||
1059 | for fs in fss: | |
1060 | fs.destroy() | |
1061 | else: | |
1062 | yield | |
b3b6e05e | 1063 | |
9f95a23c TL |
1064 | @contextlib.contextmanager |
1065 | def ceph_monitoring(daemon_type, ctx, config): | |
1066 | """ | |
1067 | Deploy prometheus, node-exporter, etc. | |
1068 | """ | |
1069 | cluster_name = config['cluster'] | |
1070 | fsid = ctx.ceph[cluster_name].fsid | |
1071 | ||
1072 | nodes = [] | |
1073 | daemons = {} | |
1074 | for remote, roles in ctx.cluster.remotes.items(): | |
1075 | for role in [r for r in roles | |
1076 | if teuthology.is_type(daemon_type, cluster_name)(r)]: | |
1077 | c_, _, id_ = teuthology.split_role(role) | |
1078 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
1079 | nodes.append(remote.shortname + '=' + id_) | |
1080 | daemons[role] = (remote, id_) | |
1081 | if nodes: | |
1082 | _shell(ctx, cluster_name, remote, [ | |
1083 | 'ceph', 'orch', 'apply', daemon_type, | |
1084 | str(len(nodes)) + ';' + ';'.join(nodes)] | |
1085 | ) | |
1086 | for role, i in daemons.items(): | |
1087 | remote, id_ = i | |
1088 | ctx.daemons.register_daemon( | |
1089 | remote, daemon_type, id_, | |
1090 | cluster=cluster_name, | |
1091 | fsid=fsid, | |
1092 | logger=log.getChild(role), | |
1093 | wait=False, | |
1094 | started=True, | |
1095 | ) | |
1096 | ||
1097 | yield | |
1098 | ||
b3b6e05e | 1099 | |
9f95a23c TL |
1100 | @contextlib.contextmanager |
1101 | def ceph_rgw(ctx, config): | |
1102 | """ | |
1103 | Deploy rgw | |
1104 | """ | |
1105 | cluster_name = config['cluster'] | |
1106 | fsid = ctx.ceph[cluster_name].fsid | |
1107 | ||
1108 | nodes = {} | |
1109 | daemons = {} | |
1110 | for remote, roles in ctx.cluster.remotes.items(): | |
1111 | for role in [r for r in roles | |
1112 | if teuthology.is_type('rgw', cluster_name)(r)]: | |
1113 | c_, _, id_ = teuthology.split_role(role) | |
1114 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
f67539c2 TL |
1115 | svc = '.'.join(id_.split('.')[0:2]) |
1116 | if svc not in nodes: | |
1117 | nodes[svc] = [] | |
1118 | nodes[svc].append(remote.shortname + '=' + id_) | |
9f95a23c | 1119 | daemons[role] = (remote, id_) |
e306af50 | 1120 | |
f67539c2 | 1121 | for svc, nodes in nodes.items(): |
9f95a23c | 1122 | _shell(ctx, cluster_name, remote, [ |
f67539c2 | 1123 | 'ceph', 'orch', 'apply', 'rgw', svc, |
e306af50 TL |
1124 | '--placement', |
1125 | str(len(nodes)) + ';' + ';'.join(nodes)] | |
9f95a23c TL |
1126 | ) |
1127 | for role, i in daemons.items(): | |
1128 | remote, id_ = i | |
1129 | ctx.daemons.register_daemon( | |
1130 | remote, 'rgw', id_, | |
1131 | cluster=cluster_name, | |
1132 | fsid=fsid, | |
1133 | logger=log.getChild(role), | |
1134 | wait=False, | |
1135 | started=True, | |
1136 | ) | |
1137 | ||
1138 | yield | |
1139 | ||
f91f0fd5 TL |
1140 | |
1141 | @contextlib.contextmanager | |
1142 | def ceph_iscsi(ctx, config): | |
1143 | """ | |
1144 | Deploy iSCSIs | |
1145 | """ | |
1146 | cluster_name = config['cluster'] | |
1147 | fsid = ctx.ceph[cluster_name].fsid | |
1148 | ||
1149 | nodes = [] | |
1150 | daemons = {} | |
20effc67 TL |
1151 | ips = [] |
1152 | ||
f91f0fd5 TL |
1153 | for remote, roles in ctx.cluster.remotes.items(): |
1154 | for role in [r for r in roles | |
20effc67 | 1155 | if teuthology.is_type('iscsi', cluster_name)(r)]: |
f91f0fd5 TL |
1156 | c_, _, id_ = teuthology.split_role(role) |
1157 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
1158 | nodes.append(remote.shortname + '=' + id_) | |
1159 | daemons[role] = (remote, id_) | |
20effc67 TL |
1160 | ips.append(remote.ip_address) |
1161 | trusted_ip_list = ','.join(ips) | |
f91f0fd5 | 1162 | if nodes: |
20effc67 TL |
1163 | poolname = 'datapool' |
1164 | # ceph osd pool create datapool 3 3 replicated | |
f91f0fd5 TL |
1165 | _shell(ctx, cluster_name, remote, [ |
1166 | 'ceph', 'osd', 'pool', 'create', | |
1167 | poolname, '3', '3', 'replicated'] | |
1168 | ) | |
1169 | ||
1170 | _shell(ctx, cluster_name, remote, [ | |
20effc67 | 1171 | 'rbd', 'pool', 'init', poolname] |
f91f0fd5 TL |
1172 | ) |
1173 | ||
20effc67 | 1174 | # ceph orch apply iscsi datapool (admin)user (admin)password |
f91f0fd5 TL |
1175 | _shell(ctx, cluster_name, remote, [ |
1176 | 'ceph', 'orch', 'apply', 'iscsi', | |
20effc67 TL |
1177 | poolname, 'admin', 'admin', |
1178 | '--trusted_ip_list', trusted_ip_list, | |
f91f0fd5 TL |
1179 | '--placement', str(len(nodes)) + ';' + ';'.join(nodes)] |
1180 | ) | |
20effc67 TL |
1181 | |
1182 | # used by iscsi client to identify valid gateway ip's | |
1183 | conf_data = dedent(f""" | |
1184 | [config] | |
1185 | trusted_ip_list = {trusted_ip_list} | |
1186 | """) | |
1187 | distribute_iscsi_gateway_cfg(ctx, conf_data) | |
1188 | ||
f91f0fd5 TL |
1189 | for role, i in daemons.items(): |
1190 | remote, id_ = i | |
1191 | ctx.daemons.register_daemon( | |
1192 | remote, 'iscsi', id_, | |
1193 | cluster=cluster_name, | |
1194 | fsid=fsid, | |
1195 | logger=log.getChild(role), | |
1196 | wait=False, | |
1197 | started=True, | |
1198 | ) | |
1199 | ||
1200 | yield | |
1201 | ||
b3b6e05e | 1202 | |
9f95a23c TL |
1203 | @contextlib.contextmanager |
1204 | def ceph_clients(ctx, config): | |
1205 | cluster_name = config['cluster'] | |
9f95a23c TL |
1206 | |
1207 | log.info('Setting up client nodes...') | |
1208 | clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) | |
9f95a23c TL |
1209 | for remote, roles_for_host in clients.remotes.items(): |
1210 | for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', | |
1211 | cluster_name): | |
1212 | name = teuthology.ceph_role(role) | |
1213 | client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, | |
1214 | name) | |
1215 | r = _shell( | |
1216 | ctx=ctx, | |
1217 | cluster_name=cluster_name, | |
1218 | remote=remote, | |
1219 | args=[ | |
1220 | 'ceph', 'auth', | |
1221 | 'get-or-create', name, | |
1222 | 'mon', 'allow *', | |
1223 | 'osd', 'allow *', | |
1224 | 'mds', 'allow *', | |
1225 | 'mgr', 'allow *', | |
1226 | ], | |
e306af50 | 1227 | stdout=StringIO(), |
9f95a23c TL |
1228 | ) |
1229 | keyring = r.stdout.getvalue() | |
f67539c2 | 1230 | remote.sudo_write_file(client_keyring, keyring, mode='0644') |
9f95a23c TL |
1231 | yield |
1232 | ||
b3b6e05e | 1233 | |
9f95a23c TL |
1234 | @contextlib.contextmanager |
1235 | def ceph_initial(): | |
1236 | try: | |
1237 | yield | |
1238 | finally: | |
1239 | log.info('Teardown complete') | |
1240 | ||
b3b6e05e | 1241 | |
9f95a23c TL |
1242 | ## public methods |
1243 | @contextlib.contextmanager | |
1244 | def stop(ctx, config): | |
1245 | """ | |
1246 | Stop ceph daemons | |
1247 | ||
1248 | For example:: | |
1249 | tasks: | |
1250 | - ceph.stop: [mds.*] | |
1251 | ||
1252 | tasks: | |
1253 | - ceph.stop: [osd.0, osd.2] | |
1254 | ||
1255 | tasks: | |
1256 | - ceph.stop: | |
1257 | daemons: [osd.0, osd.2] | |
1258 | ||
1259 | """ | |
1260 | if config is None: | |
1261 | config = {} | |
1262 | elif isinstance(config, list): | |
1263 | config = {'daemons': config} | |
1264 | ||
1265 | daemons = ctx.daemons.resolve_role_list( | |
1266 | config.get('daemons', None), CEPH_ROLE_TYPES, True) | |
1267 | clusters = set() | |
1268 | ||
1269 | for role in daemons: | |
1270 | cluster, type_, id_ = teuthology.split_role(role) | |
1271 | ctx.daemons.get_daemon(type_, id_, cluster).stop() | |
1272 | clusters.add(cluster) | |
1273 | ||
1274 | # for cluster in clusters: | |
1275 | # ctx.ceph[cluster].watchdog.stop() | |
1276 | # ctx.ceph[cluster].watchdog.join() | |
1277 | ||
1278 | yield | |
1279 | ||
b3b6e05e | 1280 | |
9f95a23c TL |
1281 | def shell(ctx, config): |
1282 | """ | |
1283 | Execute (shell) commands | |
1284 | """ | |
1285 | cluster_name = config.get('cluster', 'ceph') | |
1286 | ||
b3b6e05e TL |
1287 | args = [] |
1288 | for k in config.pop('env', []): | |
1289 | args.extend(['-e', k + '=' + ctx.config.get(k, '')]) | |
1290 | for k in config.pop('volumes', []): | |
1291 | args.extend(['-v', k]) | |
9f95a23c | 1292 | |
f67539c2 TL |
1293 | if 'all-roles' in config and len(config) == 1: |
1294 | a = config['all-roles'] | |
9f95a23c | 1295 | roles = teuthology.all_roles(ctx.cluster) |
f67539c2 TL |
1296 | config = dict((id_, a) for id_ in roles if not id_.startswith('host.')) |
1297 | elif 'all-hosts' in config and len(config) == 1: | |
1298 | a = config['all-hosts'] | |
1299 | roles = teuthology.all_roles(ctx.cluster) | |
1300 | config = dict((id_, a) for id_ in roles if id_.startswith('host.')) | |
9f95a23c | 1301 | |
f67539c2 | 1302 | for role, cmd in config.items(): |
9f95a23c TL |
1303 | (remote,) = ctx.cluster.only(role).remotes.keys() |
1304 | log.info('Running commands on role %s host %s', role, remote.name) | |
f67539c2 TL |
1305 | if isinstance(cmd, list): |
1306 | for c in cmd: | |
1307 | _shell(ctx, cluster_name, remote, | |
1308 | ['bash', '-c', subst_vip(ctx, c)], | |
b3b6e05e | 1309 | extra_cephadm_args=args) |
f67539c2 TL |
1310 | else: |
1311 | assert isinstance(cmd, str) | |
9f95a23c | 1312 | _shell(ctx, cluster_name, remote, |
f67539c2 | 1313 | ['bash', '-ex', '-c', subst_vip(ctx, cmd)], |
b3b6e05e | 1314 | extra_cephadm_args=args) |
9f95a23c | 1315 | |
f67539c2 TL |
1316 | |
1317 | def apply(ctx, config): | |
1318 | """ | |
1319 | Apply spec | |
1320 | ||
1321 | tasks: | |
1322 | - cephadm.apply: | |
1323 | specs: | |
1324 | - service_type: rgw | |
1325 | service_id: foo | |
1326 | spec: | |
1327 | rgw_frontend_port: 8000 | |
1328 | - service_type: rgw | |
1329 | service_id: bar | |
1330 | spec: | |
1331 | rgw_frontend_port: 9000 | |
1332 | zone: bar | |
1333 | realm: asdf | |
1334 | ||
1335 | """ | |
1336 | cluster_name = config.get('cluster', 'ceph') | |
1337 | ||
1338 | specs = config.get('specs', []) | |
1339 | y = subst_vip(ctx, yaml.dump_all(specs)) | |
1340 | ||
1341 | log.info(f'Applying spec(s):\n{y}') | |
1342 | _shell( | |
1343 | ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, | |
1344 | ['ceph', 'orch', 'apply', '-i', '-'], | |
1345 | stdin=y, | |
1346 | ) | |
1347 | ||
1348 | ||
1349 | def wait_for_service(ctx, config): | |
1350 | """ | |
1351 | Wait for a service to be fully started | |
1352 | ||
1353 | tasks: | |
1354 | - cephadm.wait_for_service: | |
1355 | service: rgw.foo | |
1356 | timeout: 60 # defaults to 300 | |
1357 | ||
1358 | """ | |
1359 | cluster_name = config.get('cluster', 'ceph') | |
1360 | timeout = config.get('timeout', 300) | |
1361 | service = config.get('service') | |
1362 | assert service | |
1363 | ||
1364 | log.info( | |
1365 | f'Waiting for {cluster_name} service {service} to start (timeout {timeout})...' | |
1366 | ) | |
1367 | with contextutil.safe_while(sleep=1, tries=timeout) as proceed: | |
1368 | while proceed(): | |
1369 | r = _shell( | |
1370 | ctx=ctx, | |
1371 | cluster_name=cluster_name, | |
1372 | remote=ctx.ceph[cluster_name].bootstrap_remote, | |
1373 | args=[ | |
1374 | 'ceph', 'orch', 'ls', '-f', 'json', | |
1375 | ], | |
1376 | stdout=StringIO(), | |
1377 | ) | |
1378 | j = json.loads(r.stdout.getvalue()) | |
1379 | svc = None | |
1380 | for s in j: | |
1381 | if s['service_name'] == service: | |
1382 | svc = s | |
1383 | break | |
1384 | if svc: | |
1385 | log.info( | |
1386 | f"{service} has {s['status']['running']}/{s['status']['size']}" | |
1387 | ) | |
1388 | if s['status']['running'] == s['status']['size']: | |
1389 | break | |
1390 | ||
1391 | ||
9f95a23c TL |
1392 | @contextlib.contextmanager |
1393 | def tweaked_option(ctx, config): | |
1394 | """ | |
1395 | set an option, and then restore it with its original value | |
1396 | ||
1397 | Note, due to the way how tasks are executed/nested, it's not suggested to | |
1398 | use this method as a standalone task. otherwise, it's likely that it will | |
1399 | restore the tweaked option at the /end/ of 'tasks' block. | |
1400 | """ | |
1401 | saved_options = {} | |
1402 | # we can complicate this when necessary | |
1403 | options = ['mon-health-to-clog'] | |
1404 | type_, id_ = 'mon', '*' | |
1405 | cluster = config.get('cluster', 'ceph') | |
1406 | manager = ctx.managers[cluster] | |
1407 | if id_ == '*': | |
1408 | get_from = next(teuthology.all_roles_of_type(ctx.cluster, type_)) | |
1409 | else: | |
1410 | get_from = id_ | |
1411 | for option in options: | |
1412 | if option not in config: | |
1413 | continue | |
1414 | value = 'true' if config[option] else 'false' | |
1415 | option = option.replace('-', '_') | |
1416 | old_value = manager.get_config(type_, get_from, option) | |
1417 | if value != old_value: | |
1418 | saved_options[option] = old_value | |
1419 | manager.inject_args(type_, id_, option, value) | |
1420 | yield | |
1421 | for option, value in saved_options.items(): | |
1422 | manager.inject_args(type_, id_, option, value) | |
1423 | ||
b3b6e05e | 1424 | |
9f95a23c TL |
1425 | @contextlib.contextmanager |
1426 | def restart(ctx, config): | |
1427 | """ | |
1428 | restart ceph daemons | |
1429 | ||
1430 | For example:: | |
1431 | tasks: | |
1432 | - ceph.restart: [all] | |
1433 | ||
1434 | For example:: | |
1435 | tasks: | |
1436 | - ceph.restart: [osd.0, mon.1, mds.*] | |
1437 | ||
1438 | or:: | |
1439 | ||
1440 | tasks: | |
1441 | - ceph.restart: | |
1442 | daemons: [osd.0, mon.1] | |
1443 | wait-for-healthy: false | |
1444 | wait-for-osds-up: true | |
1445 | ||
1446 | :param ctx: Context | |
1447 | :param config: Configuration | |
1448 | """ | |
1449 | if config is None: | |
1450 | config = {} | |
1451 | elif isinstance(config, list): | |
1452 | config = {'daemons': config} | |
1453 | ||
1454 | daemons = ctx.daemons.resolve_role_list( | |
1455 | config.get('daemons', None), CEPH_ROLE_TYPES, True) | |
1456 | clusters = set() | |
1457 | ||
1458 | log.info('daemons %s' % daemons) | |
1459 | with tweaked_option(ctx, config): | |
1460 | for role in daemons: | |
1461 | cluster, type_, id_ = teuthology.split_role(role) | |
1462 | d = ctx.daemons.get_daemon(type_, id_, cluster) | |
1463 | assert d, 'daemon %s does not exist' % role | |
1464 | d.stop() | |
1465 | if type_ == 'osd': | |
1466 | ctx.managers[cluster].mark_down_osd(id_) | |
1467 | d.restart() | |
1468 | clusters.add(cluster) | |
1469 | ||
1470 | if config.get('wait-for-healthy', True): | |
1471 | for cluster in clusters: | |
1472 | healthy(ctx=ctx, config=dict(cluster=cluster)) | |
1473 | if config.get('wait-for-osds-up', False): | |
1474 | for cluster in clusters: | |
1475 | ctx.managers[cluster].wait_for_all_osds_up() | |
1476 | yield | |
1477 | ||
b3b6e05e | 1478 | |
1911f103 TL |
1479 | @contextlib.contextmanager |
1480 | def distribute_config_and_admin_keyring(ctx, config): | |
1481 | """ | |
1482 | Distribute a sufficient config and keyring for clients | |
1483 | """ | |
1484 | cluster_name = config['cluster'] | |
1485 | log.info('Distributing (final) config and client.admin keyring...') | |
1486 | for remote, roles in ctx.cluster.remotes.items(): | |
f67539c2 TL |
1487 | remote.write_file( |
1488 | '/etc/ceph/{}.conf'.format(cluster_name), | |
1489 | ctx.ceph[cluster_name].config_file, | |
1490 | sudo=True) | |
1491 | remote.write_file( | |
1911f103 | 1492 | path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), |
f67539c2 TL |
1493 | data=ctx.ceph[cluster_name].admin_keyring, |
1494 | sudo=True) | |
1911f103 TL |
1495 | try: |
1496 | yield | |
1497 | finally: | |
1498 | ctx.cluster.run(args=[ | |
1499 | 'sudo', 'rm', '-f', | |
1500 | '/etc/ceph/{}.conf'.format(cluster_name), | |
1501 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
1502 | ]) | |
1503 | ||
b3b6e05e | 1504 | |
9f95a23c TL |
1505 | @contextlib.contextmanager |
1506 | def crush_setup(ctx, config): | |
1507 | cluster_name = config['cluster'] | |
9f95a23c TL |
1508 | |
1509 | profile = config.get('crush_tunables', 'default') | |
1510 | log.info('Setting crush tunables to %s', profile) | |
1511 | _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, | |
1512 | args=['ceph', 'osd', 'crush', 'tunables', profile]) | |
1513 | yield | |
1514 | ||
b3b6e05e | 1515 | |
f67539c2 TL |
1516 | @contextlib.contextmanager |
1517 | def create_rbd_pool(ctx, config): | |
1518 | if config.get('create_rbd_pool', False): | |
1519 | cluster_name = config['cluster'] | |
1520 | log.info('Waiting for OSDs to come up') | |
1521 | teuthology.wait_until_osds_up( | |
1522 | ctx, | |
1523 | cluster=ctx.cluster, | |
1524 | remote=ctx.ceph[cluster_name].bootstrap_remote, | |
1525 | ceph_cluster=cluster_name, | |
1526 | ) | |
1527 | log.info('Creating RBD pool') | |
1528 | _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, | |
1529 | args=['sudo', 'ceph', '--cluster', cluster_name, | |
1530 | 'osd', 'pool', 'create', 'rbd', '8']) | |
1531 | _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, | |
1532 | args=['sudo', 'ceph', '--cluster', cluster_name, | |
1533 | 'osd', 'pool', 'application', 'enable', | |
1534 | 'rbd', 'rbd', '--yes-i-really-mean-it' | |
1535 | ]) | |
1536 | yield | |
1537 | ||
b3b6e05e | 1538 | |
9f95a23c | 1539 | @contextlib.contextmanager |
e306af50 TL |
1540 | def _bypass(): |
1541 | yield | |
9f95a23c | 1542 | |
b3b6e05e | 1543 | |
e306af50 TL |
1544 | @contextlib.contextmanager |
1545 | def initialize_config(ctx, config): | |
9f95a23c | 1546 | cluster_name = config['cluster'] |
e306af50 | 1547 | testdir = teuthology.get_testdir(ctx) |
9f95a23c TL |
1548 | |
1549 | ctx.ceph[cluster_name].thrashers = [] | |
1550 | # fixme: setup watchdog, ala ceph.py | |
1551 | ||
1911f103 TL |
1552 | ctx.ceph[cluster_name].roleless = False # see below |
1553 | ||
e306af50 TL |
1554 | first_ceph_cluster = False |
1555 | if not hasattr(ctx, 'daemons'): | |
1556 | first_ceph_cluster = True | |
1557 | ||
9f95a23c TL |
1558 | # cephadm mode? |
1559 | if 'cephadm_mode' not in config: | |
1560 | config['cephadm_mode'] = 'root' | |
1561 | assert config['cephadm_mode'] in ['root', 'cephadm-package'] | |
1562 | if config['cephadm_mode'] == 'root': | |
1563 | ctx.cephadm = testdir + '/cephadm' | |
1564 | else: | |
1565 | ctx.cephadm = 'cephadm' # in the path | |
1566 | ||
1567 | if first_ceph_cluster: | |
1568 | # FIXME: this is global for all clusters | |
1569 | ctx.daemons = DaemonGroup( | |
1570 | use_cephadm=ctx.cephadm) | |
1571 | ||
9f95a23c TL |
1572 | # uuid |
1573 | fsid = str(uuid.uuid1()) | |
1574 | log.info('Cluster fsid is %s' % fsid) | |
1575 | ctx.ceph[cluster_name].fsid = fsid | |
1576 | ||
1577 | # mon ips | |
1578 | log.info('Choosing monitor IPs and ports...') | |
1579 | remotes_and_roles = ctx.cluster.remotes.items() | |
9f95a23c TL |
1580 | ips = [host for (host, port) in |
1581 | (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] | |
1911f103 TL |
1582 | |
1583 | if config.get('roleless', False): | |
1584 | # mons will be named after hosts | |
1911f103 | 1585 | first_mon = None |
20effc67 | 1586 | max_mons = config.get('max_mons', 5) |
1911f103 | 1587 | for remote, _ in remotes_and_roles: |
e306af50 | 1588 | ctx.cluster.remotes[remote].append('mon.' + remote.shortname) |
1911f103 TL |
1589 | if not first_mon: |
1590 | first_mon = remote.shortname | |
1591 | bootstrap_remote = remote | |
20effc67 TL |
1592 | max_mons -= 1 |
1593 | if not max_mons: | |
1594 | break | |
e306af50 TL |
1595 | log.info('No mon roles; fabricating mons') |
1596 | ||
1597 | roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()] | |
1598 | ||
9f95a23c TL |
1599 | ctx.ceph[cluster_name].mons = get_mons( |
1600 | roles, ips, cluster_name, | |
1601 | mon_bind_msgr2=config.get('mon_bind_msgr2', True), | |
1602 | mon_bind_addrvec=config.get('mon_bind_addrvec', True), | |
1911f103 | 1603 | ) |
9f95a23c TL |
1604 | log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) |
1605 | ||
1911f103 TL |
1606 | if config.get('roleless', False): |
1607 | ctx.ceph[cluster_name].roleless = True | |
1608 | ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote | |
1609 | ctx.ceph[cluster_name].first_mon = first_mon | |
1610 | ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon | |
1611 | else: | |
1612 | first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0] | |
1613 | _, _, first_mon = teuthology.split_role(first_mon_role) | |
1614 | (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys() | |
1615 | log.info('First mon is mon.%s on %s' % (first_mon, | |
1616 | bootstrap_remote.shortname)) | |
1617 | ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote | |
1618 | ctx.ceph[cluster_name].first_mon = first_mon | |
1619 | ctx.ceph[cluster_name].first_mon_role = first_mon_role | |
1620 | ||
1621 | others = ctx.cluster.remotes[bootstrap_remote] | |
1622 | mgrs = sorted([r for r in others | |
1623 | if teuthology.is_type('mgr', cluster_name)(r)]) | |
1624 | if not mgrs: | |
1625 | raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon) | |
1626 | _, _, first_mgr = teuthology.split_role(mgrs[0]) | |
1627 | log.info('First mgr is %s' % (first_mgr)) | |
1628 | ctx.ceph[cluster_name].first_mgr = first_mgr | |
e306af50 TL |
1629 | yield |
1630 | ||
b3b6e05e | 1631 | |
e306af50 TL |
1632 | @contextlib.contextmanager |
1633 | def task(ctx, config): | |
1634 | """ | |
1635 | Deploy ceph cluster using cephadm | |
1636 | ||
e306af50 TL |
1637 | For example, teuthology.yaml can contain the 'defaults' section: |
1638 | ||
1639 | defaults: | |
1640 | cephadm: | |
1641 | containers: | |
e306af50 TL |
1642 | image: 'quay.io/ceph-ci/ceph' |
1643 | ||
1644 | Using overrides makes it possible to customize it per run. | |
1645 | The equivalent 'overrides' section looks like: | |
1646 | ||
1647 | overrides: | |
1648 | cephadm: | |
1649 | containers: | |
e306af50 | 1650 | image: 'quay.io/ceph-ci/ceph' |
b3b6e05e TL |
1651 | registry-login: |
1652 | url: registry-url | |
1653 | username: registry-user | |
1654 | password: registry-password | |
e306af50 TL |
1655 | |
1656 | :param ctx: the argparse.Namespace object | |
1657 | :param config: the config dict | |
1658 | """ | |
1659 | if config is None: | |
1660 | config = {} | |
1661 | ||
1662 | assert isinstance(config, dict), \ | |
1663 | "task only supports a dictionary for configuration" | |
1664 | ||
1665 | overrides = ctx.config.get('overrides', {}) | |
1666 | teuthology.deep_merge(config, overrides.get('ceph', {})) | |
1667 | teuthology.deep_merge(config, overrides.get('cephadm', {})) | |
1668 | log.info('Config: ' + str(config)) | |
1669 | ||
e306af50 TL |
1670 | # set up cluster context |
1671 | if not hasattr(ctx, 'ceph'): | |
1672 | ctx.ceph = {} | |
e306af50 TL |
1673 | if 'cluster' not in config: |
1674 | config['cluster'] = 'ceph' | |
1675 | cluster_name = config['cluster'] | |
1676 | if cluster_name not in ctx.ceph: | |
1677 | ctx.ceph[cluster_name] = argparse.Namespace() | |
1678 | ctx.ceph[cluster_name].bootstrapped = False | |
f91f0fd5 | 1679 | |
e306af50 TL |
1680 | # image |
1681 | teuth_defaults = teuth_config.get('defaults', {}) | |
1682 | cephadm_defaults = teuth_defaults.get('cephadm', {}) | |
1683 | containers_defaults = cephadm_defaults.get('containers', {}) | |
e306af50 TL |
1684 | container_image_name = containers_defaults.get('image', None) |
1685 | ||
1686 | containers = config.get('containers', {}) | |
e306af50 | 1687 | container_image_name = containers.get('image', container_image_name) |
e306af50 | 1688 | |
e306af50 TL |
1689 | if not hasattr(ctx.ceph[cluster_name], 'image'): |
1690 | ctx.ceph[cluster_name].image = config.get('image') | |
1e59de90 | 1691 | ref = ctx.config.get("branch", "main") |
e306af50 | 1692 | if not ctx.ceph[cluster_name].image: |
f6b5b4d7 TL |
1693 | if not container_image_name: |
1694 | raise Exception("Configuration error occurred. " | |
1695 | "The 'image' value is undefined for 'cephadm' task. " | |
1696 | "Please provide corresponding options in the task's " | |
1697 | "config, task 'overrides', or teuthology 'defaults' " | |
1698 | "section.") | |
e306af50 | 1699 | sha1 = config.get('sha1') |
f6b5b4d7 TL |
1700 | flavor = config.get('flavor', 'default') |
1701 | ||
e306af50 | 1702 | if sha1: |
f6b5b4d7 TL |
1703 | if flavor == "crimson": |
1704 | ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor | |
1705 | else: | |
1706 | ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 | |
e306af50 TL |
1707 | ref = sha1 |
1708 | else: | |
1e59de90 TL |
1709 | # fall back to using the branch value |
1710 | ctx.ceph[cluster_name].image = container_image_name + ':' + ref | |
e306af50 TL |
1711 | log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) |
1712 | ||
1911f103 | 1713 | |
9f95a23c | 1714 | with contextutil.nested( |
e306af50 | 1715 | #if the cluster is already bootstrapped bypass corresponding methods |
aee94f69 | 1716 | lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \ |
e306af50 | 1717 | else initialize_config(ctx=ctx, config=config), |
9f95a23c TL |
1718 | lambda: ceph_initial(), |
1719 | lambda: normalize_hostnames(ctx=ctx), | |
aee94f69 | 1720 | lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \ |
e306af50 | 1721 | else download_cephadm(ctx=ctx, config=config, ref=ref), |
9f95a23c TL |
1722 | lambda: ceph_log(ctx=ctx, config=config), |
1723 | lambda: ceph_crash(ctx=ctx, config=config), | |
20effc67 | 1724 | lambda: pull_image(ctx=ctx, config=config), |
aee94f69 TL |
1725 | lambda: _bypass() if not (config.get('use-ca-signed-key', False)) \ |
1726 | else setup_ca_signed_keys(ctx, config), | |
1727 | lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \ | |
f67539c2 | 1728 | else ceph_bootstrap(ctx, config), |
9f95a23c TL |
1729 | lambda: crush_setup(ctx=ctx, config=config), |
1730 | lambda: ceph_mons(ctx=ctx, config=config), | |
1911f103 | 1731 | lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config), |
9f95a23c TL |
1732 | lambda: ceph_mgrs(ctx=ctx, config=config), |
1733 | lambda: ceph_osds(ctx=ctx, config=config), | |
1734 | lambda: ceph_mdss(ctx=ctx, config=config), | |
20effc67 | 1735 | lambda: cephfs_setup(ctx=ctx, config=config), |
9f95a23c | 1736 | lambda: ceph_rgw(ctx=ctx, config=config), |
f91f0fd5 | 1737 | lambda: ceph_iscsi(ctx=ctx, config=config), |
9f95a23c TL |
1738 | lambda: ceph_monitoring('prometheus', ctx=ctx, config=config), |
1739 | lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config), | |
1740 | lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config), | |
1741 | lambda: ceph_monitoring('grafana', ctx=ctx, config=config), | |
1742 | lambda: ceph_clients(ctx=ctx, config=config), | |
f67539c2 | 1743 | lambda: create_rbd_pool(ctx=ctx, config=config), |
9f95a23c | 1744 | ): |
9f95a23c TL |
1745 | try: |
1746 | if config.get('wait-for-healthy', True): | |
1747 | healthy(ctx=ctx, config=config) | |
1748 | ||
1749 | log.info('Setup complete, yielding') | |
1750 | yield | |
1751 | ||
1752 | finally: | |
1753 | log.info('Teardown begin') | |
e306af50 | 1754 |