]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | """ |
2 | Ceph cluster task, deployed via cephadm orchestrator | |
3 | """ | |
9f95a23c TL |
4 | import argparse |
5 | import configobj | |
6 | import contextlib | |
e306af50 | 7 | import errno |
9f95a23c TL |
8 | import logging |
9 | import os | |
10 | import json | |
11 | import re | |
12 | import uuid | |
13 | ||
e306af50 TL |
14 | import six |
15 | import toml | |
16 | from io import BytesIO | |
17 | from six import StringIO | |
9f95a23c | 18 | from tarfile import ReadError |
e306af50 | 19 | from tasks.ceph_manager import CephManager |
9f95a23c TL |
20 | from teuthology import misc as teuthology |
21 | from teuthology import contextutil | |
22 | from teuthology.orchestra import run | |
23 | from teuthology.orchestra.daemon import DaemonGroup | |
24 | from teuthology.config import config as teuth_config | |
25 | ||
26 | # these items we use from ceph.py should probably eventually move elsewhere | |
27 | from tasks.ceph import get_mons, healthy | |
28 | ||
29 | CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus'] | |
30 | ||
31 | log = logging.getLogger(__name__) | |
32 | ||
33 | ||
34 | def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs): | |
35 | testdir = teuthology.get_testdir(ctx) | |
36 | return remote.run( | |
37 | args=[ | |
38 | 'sudo', | |
39 | ctx.cephadm, | |
40 | '--image', ctx.ceph[cluster_name].image, | |
41 | 'shell', | |
42 | '-c', '/etc/ceph/{}.conf'.format(cluster_name), | |
43 | '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
44 | '--fsid', ctx.ceph[cluster_name].fsid, | |
45 | ] + extra_cephadm_args + [ | |
46 | '--', | |
47 | ] + args, | |
48 | **kwargs | |
49 | ) | |
50 | ||
51 | def build_initial_config(ctx, config): | |
52 | cluster_name = config['cluster'] | |
53 | ||
54 | path = os.path.join(os.path.dirname(__file__), 'cephadm.conf') | |
55 | conf = configobj.ConfigObj(path, file_error=True) | |
56 | ||
57 | conf.setdefault('global', {}) | |
58 | conf['global']['fsid'] = ctx.ceph[cluster_name].fsid | |
59 | ||
60 | # overrides | |
61 | for section, keys in config.get('conf',{}).items(): | |
62 | for key, value in keys.items(): | |
63 | log.info(" override: [%s] %s = %s" % (section, key, value)) | |
64 | if section not in conf: | |
65 | conf[section] = {} | |
66 | conf[section][key] = value | |
67 | ||
68 | return conf | |
69 | ||
70 | @contextlib.contextmanager | |
71 | def normalize_hostnames(ctx): | |
72 | """ | |
73 | Ensure we have short hostnames throughout, for consistency between | |
74 | remote.shortname and socket.gethostname() in cephadm. | |
75 | """ | |
76 | log.info('Normalizing hostnames...') | |
77 | ctx.cluster.run(args=[ | |
78 | 'sudo', | |
79 | 'hostname', | |
80 | run.Raw('$(hostname -s)'), | |
81 | ]) | |
82 | ||
83 | try: | |
84 | yield | |
85 | finally: | |
86 | pass | |
87 | ||
88 | @contextlib.contextmanager | |
89 | def download_cephadm(ctx, config, ref): | |
90 | cluster_name = config['cluster'] | |
91 | ||
92 | if config.get('cephadm_mode') != 'cephadm-package': | |
93 | ref = config.get('cephadm_branch', ref) | |
94 | git_url = teuth_config.get_ceph_git_url() | |
95 | log.info('Downloading cephadm (repo %s ref %s)...' % (git_url, ref)) | |
96 | if git_url.startswith('https://github.com/'): | |
97 | # git archive doesn't like https:// URLs, which we use with github. | |
98 | rest = git_url.split('https://github.com/', 1)[1] | |
99 | rest = re.sub(r'\.git/?$', '', rest).strip() # no .git suffix | |
100 | ctx.cluster.run( | |
101 | args=[ | |
102 | 'curl', '--silent', | |
103 | 'https://raw.githubusercontent.com/' + rest + '/' + ref + '/src/cephadm/cephadm', | |
104 | run.Raw('>'), | |
105 | ctx.cephadm, | |
106 | run.Raw('&&'), | |
107 | 'ls', '-l', | |
108 | ctx.cephadm, | |
109 | ], | |
110 | ) | |
111 | else: | |
112 | ctx.cluster.run( | |
113 | args=[ | |
114 | 'git', 'archive', | |
115 | '--remote=' + git_url, | |
116 | ref, | |
117 | 'src/cephadm/cephadm', | |
118 | run.Raw('|'), | |
119 | 'tar', '-xO', 'src/cephadm/cephadm', | |
120 | run.Raw('>'), | |
121 | ctx.cephadm, | |
122 | ], | |
123 | ) | |
124 | # sanity-check the resulting file and set executable bit | |
125 | cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm) | |
126 | ctx.cluster.run( | |
127 | args=[ | |
128 | 'test', '-s', ctx.cephadm, | |
129 | run.Raw('&&'), | |
130 | 'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'), | |
131 | run.Raw('&&'), | |
132 | 'chmod', '+x', ctx.cephadm, | |
133 | ], | |
134 | ) | |
135 | ||
136 | try: | |
137 | yield | |
138 | finally: | |
139 | log.info('Removing cluster...') | |
140 | ctx.cluster.run(args=[ | |
141 | 'sudo', | |
142 | ctx.cephadm, | |
143 | 'rm-cluster', | |
144 | '--fsid', ctx.ceph[cluster_name].fsid, | |
145 | '--force', | |
146 | ]) | |
147 | ||
148 | if config.get('cephadm_mode') == 'root': | |
149 | log.info('Removing cephadm ...') | |
150 | ctx.cluster.run( | |
151 | args=[ | |
152 | 'rm', | |
153 | '-rf', | |
154 | ctx.cephadm, | |
155 | ], | |
156 | ) | |
157 | ||
158 | @contextlib.contextmanager | |
159 | def ceph_log(ctx, config): | |
160 | cluster_name = config['cluster'] | |
161 | fsid = ctx.ceph[cluster_name].fsid | |
162 | ||
163 | try: | |
164 | yield | |
165 | ||
166 | except Exception: | |
167 | # we need to know this below | |
168 | ctx.summary['success'] = False | |
169 | raise | |
170 | ||
171 | finally: | |
172 | log.info('Checking cluster log for badness...') | |
173 | def first_in_ceph_log(pattern, excludes): | |
174 | """ | |
175 | Find the first occurrence of the pattern specified in the Ceph log, | |
176 | Returns None if none found. | |
177 | ||
178 | :param pattern: Pattern scanned for. | |
179 | :param excludes: Patterns to ignore. | |
180 | :return: First line of text (or None if not found) | |
181 | """ | |
182 | args = [ | |
183 | 'sudo', | |
184 | 'egrep', pattern, | |
185 | '/var/log/ceph/{fsid}/ceph.log'.format( | |
186 | fsid=fsid), | |
187 | ] | |
188 | if excludes: | |
189 | for exclude in excludes: | |
190 | args.extend([run.Raw('|'), 'egrep', '-v', exclude]) | |
191 | args.extend([ | |
192 | run.Raw('|'), 'head', '-n', '1', | |
193 | ]) | |
194 | r = ctx.ceph[cluster_name].bootstrap_remote.run( | |
e306af50 | 195 | stdout=StringIO(), |
9f95a23c TL |
196 | args=args, |
197 | ) | |
198 | stdout = r.stdout.getvalue() | |
199 | if stdout != '': | |
200 | return stdout | |
201 | return None | |
202 | ||
203 | if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', | |
204 | config.get('log-whitelist')) is not None: | |
205 | log.warning('Found errors (ERR|WRN|SEC) in cluster log') | |
206 | ctx.summary['success'] = False | |
207 | # use the most severe problem as the failure reason | |
208 | if 'failure_reason' not in ctx.summary: | |
209 | for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: | |
210 | match = first_in_ceph_log(pattern, config['log-whitelist']) | |
211 | if match is not None: | |
212 | ctx.summary['failure_reason'] = \ | |
213 | '"{match}" in cluster log'.format( | |
214 | match=match.rstrip('\n'), | |
215 | ) | |
216 | break | |
217 | ||
218 | if ctx.archive is not None and \ | |
219 | not (ctx.config.get('archive-on-error') and ctx.summary['success']): | |
220 | # and logs | |
221 | log.info('Compressing logs...') | |
222 | run.wait( | |
223 | ctx.cluster.run( | |
224 | args=[ | |
225 | 'sudo', | |
226 | 'find', | |
227 | '/var/log/ceph', # all logs, not just for the cluster | |
228 | '-name', | |
229 | '*.log', | |
230 | '-print0', | |
231 | run.Raw('|'), | |
232 | 'sudo', | |
233 | 'xargs', | |
234 | '-0', | |
235 | '--no-run-if-empty', | |
236 | '--', | |
237 | 'gzip', | |
238 | '--', | |
239 | ], | |
240 | wait=False, | |
241 | ), | |
242 | ) | |
243 | ||
244 | log.info('Archiving logs...') | |
245 | path = os.path.join(ctx.archive, 'remote') | |
246 | try: | |
247 | os.makedirs(path) | |
248 | except OSError: | |
249 | pass | |
250 | for remote in ctx.cluster.remotes.keys(): | |
251 | sub = os.path.join(path, remote.name) | |
252 | try: | |
253 | os.makedirs(sub) | |
254 | except OSError: | |
255 | pass | |
e306af50 TL |
256 | try: |
257 | teuthology.pull_directory(remote, '/var/log/ceph', # everything | |
258 | os.path.join(sub, 'log')) | |
259 | except ReadError: | |
260 | pass | |
9f95a23c TL |
261 | |
262 | @contextlib.contextmanager | |
263 | def ceph_crash(ctx, config): | |
264 | """ | |
265 | Gather crash dumps from /var/lib/ceph/$fsid/crash | |
266 | """ | |
267 | cluster_name = config['cluster'] | |
268 | fsid = ctx.ceph[cluster_name].fsid | |
269 | ||
270 | try: | |
271 | yield | |
272 | ||
273 | finally: | |
274 | if ctx.archive is not None: | |
275 | log.info('Archiving crash dumps...') | |
276 | path = os.path.join(ctx.archive, 'remote') | |
277 | try: | |
278 | os.makedirs(path) | |
279 | except OSError: | |
280 | pass | |
281 | for remote in ctx.cluster.remotes.keys(): | |
282 | sub = os.path.join(path, remote.name) | |
283 | try: | |
284 | os.makedirs(sub) | |
285 | except OSError: | |
286 | pass | |
287 | try: | |
288 | teuthology.pull_directory(remote, | |
289 | '/var/lib/ceph/%s/crash' % fsid, | |
290 | os.path.join(sub, 'crash')) | |
291 | except ReadError: | |
292 | pass | |
293 | ||
294 | @contextlib.contextmanager | |
e306af50 TL |
295 | def ceph_bootstrap(ctx, config, registry): |
296 | """ | |
297 | Bootstrap ceph cluster, setup containers' registry mirror before | |
298 | the bootstrap if the registry is provided. | |
299 | ||
300 | :param ctx: the argparse.Namespace object | |
301 | :param config: the config dict | |
302 | :param registry: url to containers' mirror registry | |
303 | """ | |
9f95a23c TL |
304 | cluster_name = config['cluster'] |
305 | testdir = teuthology.get_testdir(ctx) | |
306 | fsid = ctx.ceph[cluster_name].fsid | |
307 | ||
1911f103 TL |
308 | bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote |
309 | first_mon = ctx.ceph[cluster_name].first_mon | |
310 | first_mon_role = ctx.ceph[cluster_name].first_mon_role | |
9f95a23c | 311 | mons = ctx.ceph[cluster_name].mons |
1911f103 | 312 | |
9f95a23c TL |
313 | ctx.cluster.run(args=[ |
314 | 'sudo', 'mkdir', '-p', '/etc/ceph', | |
315 | ]); | |
316 | ctx.cluster.run(args=[ | |
317 | 'sudo', 'chmod', '777', '/etc/ceph', | |
318 | ]); | |
e306af50 TL |
319 | if registry: |
320 | add_mirror_to_cluster(ctx, registry) | |
9f95a23c TL |
321 | try: |
322 | # write seed config | |
323 | log.info('Writing seed config...') | |
324 | conf_fp = BytesIO() | |
325 | seed_config = build_initial_config(ctx, config) | |
326 | seed_config.write(conf_fp) | |
327 | teuthology.write_file( | |
328 | remote=bootstrap_remote, | |
329 | path='{}/seed.{}.conf'.format(testdir, cluster_name), | |
330 | data=conf_fp.getvalue()) | |
e306af50 | 331 | log.debug('Final config:\n' + conf_fp.getvalue().decode()) |
9f95a23c TL |
332 | ctx.ceph[cluster_name].conf = seed_config |
333 | ||
334 | # register initial daemons | |
335 | ctx.daemons.register_daemon( | |
336 | bootstrap_remote, 'mon', first_mon, | |
337 | cluster=cluster_name, | |
338 | fsid=fsid, | |
339 | logger=log.getChild('mon.' + first_mon), | |
340 | wait=False, | |
341 | started=True, | |
342 | ) | |
1911f103 TL |
343 | if not ctx.ceph[cluster_name].roleless: |
344 | first_mgr = ctx.ceph[cluster_name].first_mgr | |
345 | ctx.daemons.register_daemon( | |
346 | bootstrap_remote, 'mgr', first_mgr, | |
347 | cluster=cluster_name, | |
348 | fsid=fsid, | |
349 | logger=log.getChild('mgr.' + first_mgr), | |
350 | wait=False, | |
351 | started=True, | |
352 | ) | |
9f95a23c TL |
353 | |
354 | # bootstrap | |
355 | log.info('Bootstrapping...') | |
356 | cmd = [ | |
357 | 'sudo', | |
358 | ctx.cephadm, | |
359 | '--image', ctx.ceph[cluster_name].image, | |
360 | '-v', | |
361 | 'bootstrap', | |
362 | '--fsid', fsid, | |
9f95a23c TL |
363 | '--config', '{}/seed.{}.conf'.format(testdir, cluster_name), |
364 | '--output-config', '/etc/ceph/{}.conf'.format(cluster_name), | |
365 | '--output-keyring', | |
366 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
367 | '--output-pub-ssh-key', '{}/{}.pub'.format(testdir, cluster_name), | |
368 | ] | |
1911f103 TL |
369 | if not ctx.ceph[cluster_name].roleless: |
370 | cmd += [ | |
371 | '--mon-id', first_mon, | |
372 | '--mgr-id', first_mgr, | |
373 | '--orphan-initial-daemons', # we will do it explicitly! | |
374 | '--skip-monitoring-stack', # we'll provision these explicitly | |
375 | ] | |
9f95a23c TL |
376 | if mons[first_mon_role].startswith('['): |
377 | cmd += ['--mon-addrv', mons[first_mon_role]] | |
378 | else: | |
379 | cmd += ['--mon-ip', mons[first_mon_role]] | |
380 | if config.get('skip_dashboard'): | |
381 | cmd += ['--skip-dashboard'] | |
382 | # bootstrap makes the keyring root 0600, so +r it for our purposes | |
383 | cmd += [ | |
384 | run.Raw('&&'), | |
385 | 'sudo', 'chmod', '+r', | |
386 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
387 | ] | |
388 | bootstrap_remote.run(args=cmd) | |
389 | ||
390 | # fetch keys and configs | |
391 | log.info('Fetching config...') | |
392 | ctx.ceph[cluster_name].config_file = teuthology.get_file( | |
393 | remote=bootstrap_remote, | |
394 | path='/etc/ceph/{}.conf'.format(cluster_name)) | |
395 | log.info('Fetching client.admin keyring...') | |
396 | ctx.ceph[cluster_name].admin_keyring = teuthology.get_file( | |
397 | remote=bootstrap_remote, | |
398 | path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name)) | |
399 | log.info('Fetching mon keyring...') | |
400 | ctx.ceph[cluster_name].mon_keyring = teuthology.get_file( | |
401 | remote=bootstrap_remote, | |
402 | path='/var/lib/ceph/%s/mon.%s/keyring' % (fsid, first_mon), | |
403 | sudo=True) | |
404 | ||
405 | # fetch ssh key, distribute to additional nodes | |
406 | log.info('Fetching pub ssh key...') | |
407 | ssh_pub_key = teuthology.get_file( | |
408 | remote=bootstrap_remote, | |
409 | path='{}/{}.pub'.format(testdir, cluster_name) | |
e306af50 | 410 | ).decode('ascii').strip() |
9f95a23c TL |
411 | |
412 | log.info('Installing pub ssh key for root users...') | |
413 | ctx.cluster.run(args=[ | |
414 | 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh', | |
415 | run.Raw('&&'), | |
416 | 'echo', ssh_pub_key, | |
417 | run.Raw('|'), | |
418 | 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys', | |
419 | run.Raw('&&'), | |
420 | 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys', | |
421 | ]) | |
422 | ||
423 | # set options | |
424 | _shell(ctx, cluster_name, bootstrap_remote, | |
425 | ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true']) | |
426 | ||
427 | # add other hosts | |
428 | for remote in ctx.cluster.remotes.keys(): | |
429 | if remote == bootstrap_remote: | |
430 | continue | |
1911f103 | 431 | log.info('Writing (initial) conf and keyring to %s' % remote.shortname) |
9f95a23c TL |
432 | teuthology.write_file( |
433 | remote=remote, | |
434 | path='/etc/ceph/{}.conf'.format(cluster_name), | |
435 | data=ctx.ceph[cluster_name].config_file) | |
436 | teuthology.write_file( | |
437 | remote=remote, | |
438 | path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
439 | data=ctx.ceph[cluster_name].admin_keyring) | |
440 | ||
441 | log.info('Adding host %s to orchestrator...' % remote.shortname) | |
442 | _shell(ctx, cluster_name, remote, [ | |
443 | 'ceph', 'orch', 'host', 'add', | |
444 | remote.shortname | |
445 | ]) | |
446 | r = _shell(ctx, cluster_name, remote, | |
447 | ['ceph', 'orch', 'host', 'ls', '--format=json'], | |
e306af50 | 448 | stdout=StringIO()) |
9f95a23c TL |
449 | hosts = [node['hostname'] for node in json.loads(r.stdout.getvalue())] |
450 | assert remote.shortname in hosts | |
451 | ||
452 | yield | |
453 | ||
454 | finally: | |
455 | log.info('Cleaning up testdir ceph.* files...') | |
456 | ctx.cluster.run(args=[ | |
457 | 'rm', '-f', | |
458 | '{}/seed.{}.conf'.format(testdir, cluster_name), | |
459 | '{}/{}.pub'.format(testdir, cluster_name), | |
460 | ]) | |
461 | ||
462 | log.info('Stopping all daemons...') | |
463 | ||
464 | # this doesn't block until they are all stopped... | |
465 | #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target']) | |
466 | ||
467 | # so, stop them individually | |
e306af50 | 468 | for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES, True): |
9f95a23c | 469 | cluster, type_, id_ = teuthology.split_role(role) |
e306af50 TL |
470 | try: |
471 | ctx.daemons.get_daemon(type_, id_, cluster).stop() | |
472 | except Exception: | |
473 | log.exception('Failed to stop "{role}"'.format(role=role)) | |
474 | raise | |
9f95a23c TL |
475 | |
476 | # clean up /etc/ceph | |
477 | ctx.cluster.run(args=[ | |
478 | 'sudo', 'rm', '-f', | |
479 | '/etc/ceph/{}.conf'.format(cluster_name), | |
480 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
481 | ]) | |
482 | ||
483 | @contextlib.contextmanager | |
484 | def ceph_mons(ctx, config): | |
485 | """ | |
486 | Deploy any additional mons | |
487 | """ | |
488 | cluster_name = config['cluster'] | |
489 | fsid = ctx.ceph[cluster_name].fsid | |
490 | num_mons = 1 | |
491 | ||
492 | try: | |
493 | for remote, roles in ctx.cluster.remotes.items(): | |
494 | for mon in [r for r in roles | |
495 | if teuthology.is_type('mon', cluster_name)(r)]: | |
496 | c_, _, id_ = teuthology.split_role(mon) | |
497 | if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: | |
498 | continue | |
499 | log.info('Adding %s on %s' % (mon, remote.shortname)) | |
500 | num_mons += 1 | |
501 | _shell(ctx, cluster_name, remote, [ | |
502 | 'ceph', 'orch', 'daemon', 'add', 'mon', | |
503 | remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_, | |
504 | ]) | |
505 | ctx.daemons.register_daemon( | |
506 | remote, 'mon', id_, | |
507 | cluster=cluster_name, | |
508 | fsid=fsid, | |
509 | logger=log.getChild(mon), | |
510 | wait=False, | |
511 | started=True, | |
512 | ) | |
513 | ||
514 | with contextutil.safe_while(sleep=1, tries=180) as proceed: | |
515 | while proceed(): | |
516 | log.info('Waiting for %d mons in monmap...' % (num_mons)) | |
517 | r = _shell( | |
518 | ctx=ctx, | |
519 | cluster_name=cluster_name, | |
520 | remote=remote, | |
521 | args=[ | |
522 | 'ceph', 'mon', 'dump', '-f', 'json', | |
523 | ], | |
e306af50 | 524 | stdout=StringIO(), |
9f95a23c TL |
525 | ) |
526 | j = json.loads(r.stdout.getvalue()) | |
527 | if len(j['mons']) == num_mons: | |
528 | break | |
529 | ||
1911f103 TL |
530 | # refresh our (final) ceph.conf file |
531 | log.info('Generating final ceph.conf file...') | |
532 | r = _shell( | |
533 | ctx=ctx, | |
534 | cluster_name=cluster_name, | |
535 | remote=remote, | |
536 | args=[ | |
537 | 'ceph', 'config', 'generate-minimal-conf', | |
538 | ], | |
e306af50 | 539 | stdout=StringIO(), |
1911f103 TL |
540 | ) |
541 | ctx.ceph[cluster_name].config_file = r.stdout.getvalue() | |
9f95a23c TL |
542 | |
543 | yield | |
544 | ||
545 | finally: | |
546 | pass | |
547 | ||
548 | @contextlib.contextmanager | |
549 | def ceph_mgrs(ctx, config): | |
550 | """ | |
551 | Deploy any additional mgrs | |
552 | """ | |
553 | cluster_name = config['cluster'] | |
554 | fsid = ctx.ceph[cluster_name].fsid | |
555 | ||
556 | try: | |
557 | nodes = [] | |
558 | daemons = {} | |
559 | for remote, roles in ctx.cluster.remotes.items(): | |
560 | for mgr in [r for r in roles | |
561 | if teuthology.is_type('mgr', cluster_name)(r)]: | |
562 | c_, _, id_ = teuthology.split_role(mgr) | |
563 | if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mgr: | |
564 | continue | |
565 | log.info('Adding %s on %s' % (mgr, remote.shortname)) | |
566 | nodes.append(remote.shortname + '=' + id_) | |
567 | daemons[mgr] = (remote, id_) | |
568 | if nodes: | |
569 | _shell(ctx, cluster_name, remote, [ | |
570 | 'ceph', 'orch', 'apply', 'mgr', | |
571 | str(len(nodes) + 1) + ';' + ';'.join(nodes)] | |
572 | ) | |
573 | for mgr, i in daemons.items(): | |
574 | remote, id_ = i | |
575 | ctx.daemons.register_daemon( | |
576 | remote, 'mgr', id_, | |
577 | cluster=cluster_name, | |
578 | fsid=fsid, | |
579 | logger=log.getChild(mgr), | |
580 | wait=False, | |
581 | started=True, | |
582 | ) | |
583 | ||
584 | yield | |
585 | ||
586 | finally: | |
587 | pass | |
588 | ||
589 | @contextlib.contextmanager | |
590 | def ceph_osds(ctx, config): | |
591 | """ | |
592 | Deploy OSDs | |
593 | """ | |
594 | cluster_name = config['cluster'] | |
595 | fsid = ctx.ceph[cluster_name].fsid | |
1911f103 | 596 | |
9f95a23c TL |
597 | try: |
598 | log.info('Deploying OSDs...') | |
599 | ||
600 | # provision OSDs in numeric order | |
601 | id_to_remote = {} | |
602 | devs_by_remote = {} | |
603 | for remote, roles in ctx.cluster.remotes.items(): | |
604 | devs_by_remote[remote] = teuthology.get_scratch_devices(remote) | |
605 | for osd in [r for r in roles | |
606 | if teuthology.is_type('osd', cluster_name)(r)]: | |
607 | _, _, id_ = teuthology.split_role(osd) | |
608 | id_to_remote[int(id_)] = (osd, remote) | |
609 | ||
610 | cur = 0 | |
611 | for osd_id in sorted(id_to_remote.keys()): | |
612 | osd, remote = id_to_remote[osd_id] | |
613 | _, _, id_ = teuthology.split_role(osd) | |
614 | assert int(id_) == cur | |
615 | devs = devs_by_remote[remote] | |
616 | assert devs ## FIXME ## | |
617 | dev = devs.pop() | |
e306af50 TL |
618 | if all(_ in dev for _ in ('lv', 'vg')): |
619 | short_dev = dev.replace('/dev/', '') | |
620 | else: | |
621 | short_dev = dev | |
9f95a23c TL |
622 | log.info('Deploying %s on %s with %s...' % ( |
623 | osd, remote.shortname, dev)) | |
624 | _shell(ctx, cluster_name, remote, [ | |
625 | 'ceph-volume', 'lvm', 'zap', dev]) | |
626 | _shell(ctx, cluster_name, remote, [ | |
627 | 'ceph', 'orch', 'daemon', 'add', 'osd', | |
628 | remote.shortname + ':' + short_dev | |
629 | ]) | |
630 | ctx.daemons.register_daemon( | |
631 | remote, 'osd', id_, | |
632 | cluster=cluster_name, | |
633 | fsid=fsid, | |
634 | logger=log.getChild(osd), | |
635 | wait=False, | |
636 | started=True, | |
637 | ) | |
638 | cur += 1 | |
639 | ||
640 | yield | |
641 | finally: | |
642 | pass | |
643 | ||
644 | @contextlib.contextmanager | |
645 | def ceph_mdss(ctx, config): | |
646 | """ | |
647 | Deploy MDSss | |
648 | """ | |
649 | cluster_name = config['cluster'] | |
650 | fsid = ctx.ceph[cluster_name].fsid | |
651 | ||
652 | nodes = [] | |
653 | daemons = {} | |
654 | for remote, roles in ctx.cluster.remotes.items(): | |
655 | for role in [r for r in roles | |
656 | if teuthology.is_type('mds', cluster_name)(r)]: | |
657 | c_, _, id_ = teuthology.split_role(role) | |
658 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
659 | nodes.append(remote.shortname + '=' + id_) | |
660 | daemons[role] = (remote, id_) | |
661 | if nodes: | |
662 | _shell(ctx, cluster_name, remote, [ | |
663 | 'ceph', 'orch', 'apply', 'mds', | |
664 | 'all', | |
665 | str(len(nodes)) + ';' + ';'.join(nodes)] | |
666 | ) | |
667 | for role, i in daemons.items(): | |
668 | remote, id_ = i | |
669 | ctx.daemons.register_daemon( | |
670 | remote, 'mds', id_, | |
671 | cluster=cluster_name, | |
672 | fsid=fsid, | |
673 | logger=log.getChild(role), | |
674 | wait=False, | |
675 | started=True, | |
676 | ) | |
677 | ||
678 | yield | |
679 | ||
680 | @contextlib.contextmanager | |
681 | def ceph_monitoring(daemon_type, ctx, config): | |
682 | """ | |
683 | Deploy prometheus, node-exporter, etc. | |
684 | """ | |
685 | cluster_name = config['cluster'] | |
686 | fsid = ctx.ceph[cluster_name].fsid | |
687 | ||
688 | nodes = [] | |
689 | daemons = {} | |
690 | for remote, roles in ctx.cluster.remotes.items(): | |
691 | for role in [r for r in roles | |
692 | if teuthology.is_type(daemon_type, cluster_name)(r)]: | |
693 | c_, _, id_ = teuthology.split_role(role) | |
694 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
695 | nodes.append(remote.shortname + '=' + id_) | |
696 | daemons[role] = (remote, id_) | |
697 | if nodes: | |
698 | _shell(ctx, cluster_name, remote, [ | |
699 | 'ceph', 'orch', 'apply', daemon_type, | |
700 | str(len(nodes)) + ';' + ';'.join(nodes)] | |
701 | ) | |
702 | for role, i in daemons.items(): | |
703 | remote, id_ = i | |
704 | ctx.daemons.register_daemon( | |
705 | remote, daemon_type, id_, | |
706 | cluster=cluster_name, | |
707 | fsid=fsid, | |
708 | logger=log.getChild(role), | |
709 | wait=False, | |
710 | started=True, | |
711 | ) | |
712 | ||
713 | yield | |
714 | ||
715 | @contextlib.contextmanager | |
716 | def ceph_rgw(ctx, config): | |
717 | """ | |
718 | Deploy rgw | |
719 | """ | |
720 | cluster_name = config['cluster'] | |
721 | fsid = ctx.ceph[cluster_name].fsid | |
722 | ||
723 | nodes = {} | |
724 | daemons = {} | |
725 | for remote, roles in ctx.cluster.remotes.items(): | |
726 | for role in [r for r in roles | |
727 | if teuthology.is_type('rgw', cluster_name)(r)]: | |
728 | c_, _, id_ = teuthology.split_role(role) | |
729 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
730 | realmzone = '.'.join(id_.split('.')[0:2]) | |
731 | if realmzone not in nodes: | |
732 | nodes[realmzone] = [] | |
733 | nodes[realmzone].append(remote.shortname + '=' + id_) | |
734 | daemons[role] = (remote, id_) | |
e306af50 TL |
735 | |
736 | for realmzone in nodes.keys(): | |
737 | (realm, zone) = realmzone.split('.', 1) | |
738 | ||
739 | # TODO: those should be moved to mgr/cephadm | |
740 | _shell(ctx, cluster_name, remote, | |
741 | ['radosgw-admin', 'realm', 'create', '--rgw-realm', realm, '--default'] | |
742 | ) | |
743 | _shell(ctx, cluster_name, remote, | |
744 | ['radosgw-admin', 'zonegroup', 'create', '--rgw-zonegroup=default', '--master', '--default'] | |
745 | ) | |
746 | _shell(ctx, cluster_name, remote, | |
747 | ['radosgw-admin', 'zone', 'create', '--rgw-zonegroup=default', '--rgw-zone', zone, '--master', '--default'] | |
748 | ) | |
749 | ||
9f95a23c TL |
750 | for realmzone, nodes in nodes.items(): |
751 | (realm, zone) = realmzone.split('.', 1) | |
752 | _shell(ctx, cluster_name, remote, [ | |
e306af50 TL |
753 | 'ceph', 'orch', 'apply', 'rgw', realm, zone, |
754 | '--placement', | |
755 | str(len(nodes)) + ';' + ';'.join(nodes)] | |
9f95a23c TL |
756 | ) |
757 | for role, i in daemons.items(): | |
758 | remote, id_ = i | |
759 | ctx.daemons.register_daemon( | |
760 | remote, 'rgw', id_, | |
761 | cluster=cluster_name, | |
762 | fsid=fsid, | |
763 | logger=log.getChild(role), | |
764 | wait=False, | |
765 | started=True, | |
766 | ) | |
767 | ||
768 | yield | |
769 | ||
770 | @contextlib.contextmanager | |
771 | def ceph_clients(ctx, config): | |
772 | cluster_name = config['cluster'] | |
773 | testdir = teuthology.get_testdir(ctx) | |
774 | ||
775 | log.info('Setting up client nodes...') | |
776 | clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) | |
777 | testdir = teuthology.get_testdir(ctx) | |
778 | coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) | |
779 | for remote, roles_for_host in clients.remotes.items(): | |
780 | for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', | |
781 | cluster_name): | |
782 | name = teuthology.ceph_role(role) | |
783 | client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, | |
784 | name) | |
785 | r = _shell( | |
786 | ctx=ctx, | |
787 | cluster_name=cluster_name, | |
788 | remote=remote, | |
789 | args=[ | |
790 | 'ceph', 'auth', | |
791 | 'get-or-create', name, | |
792 | 'mon', 'allow *', | |
793 | 'osd', 'allow *', | |
794 | 'mds', 'allow *', | |
795 | 'mgr', 'allow *', | |
796 | ], | |
e306af50 | 797 | stdout=StringIO(), |
9f95a23c TL |
798 | ) |
799 | keyring = r.stdout.getvalue() | |
800 | teuthology.sudo_write_file( | |
801 | remote=remote, | |
802 | path=client_keyring, | |
803 | data=keyring, | |
804 | perms='0644' | |
805 | ) | |
806 | yield | |
807 | ||
808 | @contextlib.contextmanager | |
809 | def ceph_initial(): | |
810 | try: | |
811 | yield | |
812 | finally: | |
813 | log.info('Teardown complete') | |
814 | ||
815 | ## public methods | |
816 | @contextlib.contextmanager | |
817 | def stop(ctx, config): | |
818 | """ | |
819 | Stop ceph daemons | |
820 | ||
821 | For example:: | |
822 | tasks: | |
823 | - ceph.stop: [mds.*] | |
824 | ||
825 | tasks: | |
826 | - ceph.stop: [osd.0, osd.2] | |
827 | ||
828 | tasks: | |
829 | - ceph.stop: | |
830 | daemons: [osd.0, osd.2] | |
831 | ||
832 | """ | |
833 | if config is None: | |
834 | config = {} | |
835 | elif isinstance(config, list): | |
836 | config = {'daemons': config} | |
837 | ||
838 | daemons = ctx.daemons.resolve_role_list( | |
839 | config.get('daemons', None), CEPH_ROLE_TYPES, True) | |
840 | clusters = set() | |
841 | ||
842 | for role in daemons: | |
843 | cluster, type_, id_ = teuthology.split_role(role) | |
844 | ctx.daemons.get_daemon(type_, id_, cluster).stop() | |
845 | clusters.add(cluster) | |
846 | ||
847 | # for cluster in clusters: | |
848 | # ctx.ceph[cluster].watchdog.stop() | |
849 | # ctx.ceph[cluster].watchdog.join() | |
850 | ||
851 | yield | |
852 | ||
853 | def shell(ctx, config): | |
854 | """ | |
855 | Execute (shell) commands | |
856 | """ | |
857 | cluster_name = config.get('cluster', 'ceph') | |
858 | ||
859 | env = [] | |
860 | if 'env' in config: | |
861 | for k in config['env']: | |
862 | env.extend(['-e', k + '=' + ctx.config.get(k, '')]) | |
863 | del config['env'] | |
864 | ||
865 | if 'all' in config and len(config) == 1: | |
866 | a = config['all'] | |
867 | roles = teuthology.all_roles(ctx.cluster) | |
868 | config = dict((id_, a) for id_ in roles) | |
869 | ||
870 | for role, ls in config.items(): | |
871 | (remote,) = ctx.cluster.only(role).remotes.keys() | |
872 | log.info('Running commands on role %s host %s', role, remote.name) | |
873 | for c in ls: | |
874 | _shell(ctx, cluster_name, remote, | |
875 | ['bash', '-c', c], | |
876 | extra_cephadm_args=env) | |
877 | ||
878 | @contextlib.contextmanager | |
879 | def tweaked_option(ctx, config): | |
880 | """ | |
881 | set an option, and then restore it with its original value | |
882 | ||
883 | Note, due to the way how tasks are executed/nested, it's not suggested to | |
884 | use this method as a standalone task. otherwise, it's likely that it will | |
885 | restore the tweaked option at the /end/ of 'tasks' block. | |
886 | """ | |
887 | saved_options = {} | |
888 | # we can complicate this when necessary | |
889 | options = ['mon-health-to-clog'] | |
890 | type_, id_ = 'mon', '*' | |
891 | cluster = config.get('cluster', 'ceph') | |
892 | manager = ctx.managers[cluster] | |
893 | if id_ == '*': | |
894 | get_from = next(teuthology.all_roles_of_type(ctx.cluster, type_)) | |
895 | else: | |
896 | get_from = id_ | |
897 | for option in options: | |
898 | if option not in config: | |
899 | continue | |
900 | value = 'true' if config[option] else 'false' | |
901 | option = option.replace('-', '_') | |
902 | old_value = manager.get_config(type_, get_from, option) | |
903 | if value != old_value: | |
904 | saved_options[option] = old_value | |
905 | manager.inject_args(type_, id_, option, value) | |
906 | yield | |
907 | for option, value in saved_options.items(): | |
908 | manager.inject_args(type_, id_, option, value) | |
909 | ||
910 | @contextlib.contextmanager | |
911 | def restart(ctx, config): | |
912 | """ | |
913 | restart ceph daemons | |
914 | ||
915 | For example:: | |
916 | tasks: | |
917 | - ceph.restart: [all] | |
918 | ||
919 | For example:: | |
920 | tasks: | |
921 | - ceph.restart: [osd.0, mon.1, mds.*] | |
922 | ||
923 | or:: | |
924 | ||
925 | tasks: | |
926 | - ceph.restart: | |
927 | daemons: [osd.0, mon.1] | |
928 | wait-for-healthy: false | |
929 | wait-for-osds-up: true | |
930 | ||
931 | :param ctx: Context | |
932 | :param config: Configuration | |
933 | """ | |
934 | if config is None: | |
935 | config = {} | |
936 | elif isinstance(config, list): | |
937 | config = {'daemons': config} | |
938 | ||
939 | daemons = ctx.daemons.resolve_role_list( | |
940 | config.get('daemons', None), CEPH_ROLE_TYPES, True) | |
941 | clusters = set() | |
942 | ||
943 | log.info('daemons %s' % daemons) | |
944 | with tweaked_option(ctx, config): | |
945 | for role in daemons: | |
946 | cluster, type_, id_ = teuthology.split_role(role) | |
947 | d = ctx.daemons.get_daemon(type_, id_, cluster) | |
948 | assert d, 'daemon %s does not exist' % role | |
949 | d.stop() | |
950 | if type_ == 'osd': | |
951 | ctx.managers[cluster].mark_down_osd(id_) | |
952 | d.restart() | |
953 | clusters.add(cluster) | |
954 | ||
955 | if config.get('wait-for-healthy', True): | |
956 | for cluster in clusters: | |
957 | healthy(ctx=ctx, config=dict(cluster=cluster)) | |
958 | if config.get('wait-for-osds-up', False): | |
959 | for cluster in clusters: | |
960 | ctx.managers[cluster].wait_for_all_osds_up() | |
961 | yield | |
962 | ||
1911f103 TL |
963 | @contextlib.contextmanager |
964 | def distribute_config_and_admin_keyring(ctx, config): | |
965 | """ | |
966 | Distribute a sufficient config and keyring for clients | |
967 | """ | |
968 | cluster_name = config['cluster'] | |
969 | log.info('Distributing (final) config and client.admin keyring...') | |
970 | for remote, roles in ctx.cluster.remotes.items(): | |
971 | teuthology.sudo_write_file( | |
972 | remote=remote, | |
973 | path='/etc/ceph/{}.conf'.format(cluster_name), | |
974 | data=ctx.ceph[cluster_name].config_file) | |
975 | teuthology.sudo_write_file( | |
976 | remote=remote, | |
977 | path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
978 | data=ctx.ceph[cluster_name].admin_keyring) | |
979 | try: | |
980 | yield | |
981 | finally: | |
982 | ctx.cluster.run(args=[ | |
983 | 'sudo', 'rm', '-f', | |
984 | '/etc/ceph/{}.conf'.format(cluster_name), | |
985 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
986 | ]) | |
987 | ||
9f95a23c TL |
988 | @contextlib.contextmanager |
989 | def crush_setup(ctx, config): | |
990 | cluster_name = config['cluster'] | |
9f95a23c TL |
991 | |
992 | profile = config.get('crush_tunables', 'default') | |
993 | log.info('Setting crush tunables to %s', profile) | |
994 | _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, | |
995 | args=['ceph', 'osd', 'crush', 'tunables', profile]) | |
996 | yield | |
997 | ||
998 | @contextlib.contextmanager | |
e306af50 TL |
999 | def _bypass(): |
1000 | yield | |
9f95a23c | 1001 | |
e306af50 TL |
1002 | @contextlib.contextmanager |
1003 | def initialize_config(ctx, config): | |
9f95a23c | 1004 | cluster_name = config['cluster'] |
e306af50 | 1005 | testdir = teuthology.get_testdir(ctx) |
9f95a23c TL |
1006 | |
1007 | ctx.ceph[cluster_name].thrashers = [] | |
1008 | # fixme: setup watchdog, ala ceph.py | |
1009 | ||
1911f103 TL |
1010 | ctx.ceph[cluster_name].roleless = False # see below |
1011 | ||
e306af50 TL |
1012 | first_ceph_cluster = False |
1013 | if not hasattr(ctx, 'daemons'): | |
1014 | first_ceph_cluster = True | |
1015 | ||
9f95a23c TL |
1016 | # cephadm mode? |
1017 | if 'cephadm_mode' not in config: | |
1018 | config['cephadm_mode'] = 'root' | |
1019 | assert config['cephadm_mode'] in ['root', 'cephadm-package'] | |
1020 | if config['cephadm_mode'] == 'root': | |
1021 | ctx.cephadm = testdir + '/cephadm' | |
1022 | else: | |
1023 | ctx.cephadm = 'cephadm' # in the path | |
1024 | ||
1025 | if first_ceph_cluster: | |
1026 | # FIXME: this is global for all clusters | |
1027 | ctx.daemons = DaemonGroup( | |
1028 | use_cephadm=ctx.cephadm) | |
1029 | ||
9f95a23c TL |
1030 | # uuid |
1031 | fsid = str(uuid.uuid1()) | |
1032 | log.info('Cluster fsid is %s' % fsid) | |
1033 | ctx.ceph[cluster_name].fsid = fsid | |
1034 | ||
1035 | # mon ips | |
1036 | log.info('Choosing monitor IPs and ports...') | |
1037 | remotes_and_roles = ctx.cluster.remotes.items() | |
9f95a23c TL |
1038 | ips = [host for (host, port) in |
1039 | (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] | |
1911f103 TL |
1040 | |
1041 | if config.get('roleless', False): | |
1042 | # mons will be named after hosts | |
1911f103 TL |
1043 | first_mon = None |
1044 | for remote, _ in remotes_and_roles: | |
e306af50 | 1045 | ctx.cluster.remotes[remote].append('mon.' + remote.shortname) |
1911f103 TL |
1046 | if not first_mon: |
1047 | first_mon = remote.shortname | |
1048 | bootstrap_remote = remote | |
e306af50 TL |
1049 | log.info('No mon roles; fabricating mons') |
1050 | ||
1051 | roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()] | |
1052 | ||
9f95a23c TL |
1053 | ctx.ceph[cluster_name].mons = get_mons( |
1054 | roles, ips, cluster_name, | |
1055 | mon_bind_msgr2=config.get('mon_bind_msgr2', True), | |
1056 | mon_bind_addrvec=config.get('mon_bind_addrvec', True), | |
1911f103 | 1057 | ) |
9f95a23c TL |
1058 | log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) |
1059 | ||
1911f103 TL |
1060 | if config.get('roleless', False): |
1061 | ctx.ceph[cluster_name].roleless = True | |
1062 | ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote | |
1063 | ctx.ceph[cluster_name].first_mon = first_mon | |
1064 | ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon | |
1065 | else: | |
1066 | first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0] | |
1067 | _, _, first_mon = teuthology.split_role(first_mon_role) | |
1068 | (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys() | |
1069 | log.info('First mon is mon.%s on %s' % (first_mon, | |
1070 | bootstrap_remote.shortname)) | |
1071 | ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote | |
1072 | ctx.ceph[cluster_name].first_mon = first_mon | |
1073 | ctx.ceph[cluster_name].first_mon_role = first_mon_role | |
1074 | ||
1075 | others = ctx.cluster.remotes[bootstrap_remote] | |
1076 | mgrs = sorted([r for r in others | |
1077 | if teuthology.is_type('mgr', cluster_name)(r)]) | |
1078 | if not mgrs: | |
1079 | raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon) | |
1080 | _, _, first_mgr = teuthology.split_role(mgrs[0]) | |
1081 | log.info('First mgr is %s' % (first_mgr)) | |
1082 | ctx.ceph[cluster_name].first_mgr = first_mgr | |
e306af50 TL |
1083 | yield |
1084 | ||
1085 | @contextlib.contextmanager | |
1086 | def task(ctx, config): | |
1087 | """ | |
1088 | Deploy ceph cluster using cephadm | |
1089 | ||
1090 | Setup containers' mirrors before the bootstrap, if corresponding | |
1091 | config provided in teuthology server config yaml file. | |
1092 | ||
1093 | For example, teuthology.yaml can contain the 'defaults' section: | |
1094 | ||
1095 | defaults: | |
1096 | cephadm: | |
1097 | containers: | |
1098 | registry_mirrors: | |
1099 | docker.io: 'registry.mirror.example.com:5000' | |
1100 | image: 'quay.io/ceph-ci/ceph' | |
1101 | ||
1102 | Using overrides makes it possible to customize it per run. | |
1103 | The equivalent 'overrides' section looks like: | |
1104 | ||
1105 | overrides: | |
1106 | cephadm: | |
1107 | containers: | |
1108 | registry_mirrors: | |
1109 | docker.io: 'registry.mirror.example.com:5000' | |
1110 | image: 'quay.io/ceph-ci/ceph' | |
1111 | ||
1112 | :param ctx: the argparse.Namespace object | |
1113 | :param config: the config dict | |
1114 | """ | |
1115 | if config is None: | |
1116 | config = {} | |
1117 | ||
1118 | assert isinstance(config, dict), \ | |
1119 | "task only supports a dictionary for configuration" | |
1120 | ||
1121 | overrides = ctx.config.get('overrides', {}) | |
1122 | teuthology.deep_merge(config, overrides.get('ceph', {})) | |
1123 | teuthology.deep_merge(config, overrides.get('cephadm', {})) | |
1124 | log.info('Config: ' + str(config)) | |
1125 | ||
1126 | testdir = teuthology.get_testdir(ctx) | |
1127 | ||
1128 | # set up cluster context | |
1129 | if not hasattr(ctx, 'ceph'): | |
1130 | ctx.ceph = {} | |
1131 | ctx.managers = {} | |
1132 | if 'cluster' not in config: | |
1133 | config['cluster'] = 'ceph' | |
1134 | cluster_name = config['cluster'] | |
1135 | if cluster_name not in ctx.ceph: | |
1136 | ctx.ceph[cluster_name] = argparse.Namespace() | |
1137 | ctx.ceph[cluster_name].bootstrapped = False | |
1138 | ||
1139 | # image | |
1140 | teuth_defaults = teuth_config.get('defaults', {}) | |
1141 | cephadm_defaults = teuth_defaults.get('cephadm', {}) | |
1142 | containers_defaults = cephadm_defaults.get('containers', {}) | |
1143 | mirrors_defaults = containers_defaults.get('registry_mirrors', {}) | |
1144 | container_registry_mirror = mirrors_defaults.get('docker.io', None) | |
1145 | container_image_name = containers_defaults.get('image', None) | |
1146 | ||
1147 | containers = config.get('containers', {}) | |
1148 | mirrors = containers.get('registry_mirrors', {}) | |
1149 | container_image_name = containers.get('image', container_image_name) | |
1150 | container_registry_mirror = mirrors.get('docker.io', | |
1151 | container_registry_mirror) | |
1152 | ||
1153 | if not container_image_name: | |
1154 | raise Exception("Configuration error occurred. " | |
1155 | "The 'image' value is undefined for 'cephadm' task. " | |
1156 | "Please provide corresponding options in the task's " | |
1157 | "config, task 'overrides', or teuthology 'defaults' " | |
1158 | "section.") | |
1159 | ||
1160 | if not hasattr(ctx.ceph[cluster_name], 'image'): | |
1161 | ctx.ceph[cluster_name].image = config.get('image') | |
1162 | ref = None | |
1163 | if not ctx.ceph[cluster_name].image: | |
1164 | sha1 = config.get('sha1') | |
1165 | if sha1: | |
1166 | ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 | |
1167 | ref = sha1 | |
1168 | else: | |
1169 | # hmm, fall back to branch? | |
1170 | branch = config.get('branch', 'master') | |
1171 | ref = branch | |
1172 | ctx.ceph[cluster_name].image = container_image_name + ':' + branch | |
1173 | log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) | |
1174 | ||
1911f103 | 1175 | |
9f95a23c | 1176 | with contextutil.nested( |
e306af50 TL |
1177 | #if the cluster is already bootstrapped bypass corresponding methods |
1178 | lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ | |
1179 | else initialize_config(ctx=ctx, config=config), | |
9f95a23c TL |
1180 | lambda: ceph_initial(), |
1181 | lambda: normalize_hostnames(ctx=ctx), | |
e306af50 TL |
1182 | lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ |
1183 | else download_cephadm(ctx=ctx, config=config, ref=ref), | |
9f95a23c TL |
1184 | lambda: ceph_log(ctx=ctx, config=config), |
1185 | lambda: ceph_crash(ctx=ctx, config=config), | |
e306af50 TL |
1186 | lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ |
1187 | else ceph_bootstrap(ctx, config, | |
1188 | container_registry_mirror), | |
9f95a23c TL |
1189 | lambda: crush_setup(ctx=ctx, config=config), |
1190 | lambda: ceph_mons(ctx=ctx, config=config), | |
1911f103 | 1191 | lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config), |
9f95a23c TL |
1192 | lambda: ceph_mgrs(ctx=ctx, config=config), |
1193 | lambda: ceph_osds(ctx=ctx, config=config), | |
1194 | lambda: ceph_mdss(ctx=ctx, config=config), | |
1195 | lambda: ceph_rgw(ctx=ctx, config=config), | |
1196 | lambda: ceph_monitoring('prometheus', ctx=ctx, config=config), | |
1197 | lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config), | |
1198 | lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config), | |
1199 | lambda: ceph_monitoring('grafana', ctx=ctx, config=config), | |
1200 | lambda: ceph_clients(ctx=ctx, config=config), | |
1201 | ): | |
1202 | ctx.managers[cluster_name] = CephManager( | |
1203 | ctx.ceph[cluster_name].bootstrap_remote, | |
1204 | ctx=ctx, | |
1205 | logger=log.getChild('ceph_manager.' + cluster_name), | |
1206 | cluster=cluster_name, | |
1207 | cephadm=True, | |
1208 | ) | |
1209 | ||
1210 | try: | |
1211 | if config.get('wait-for-healthy', True): | |
1212 | healthy(ctx=ctx, config=config) | |
1213 | ||
1214 | log.info('Setup complete, yielding') | |
1215 | yield | |
1216 | ||
1217 | finally: | |
1218 | log.info('Teardown begin') | |
e306af50 TL |
1219 | |
1220 | ||
1221 | def registries_add_mirror_to_docker_io(conf, mirror): | |
1222 | config = toml.loads(conf) | |
1223 | is_v1 = 'registries' in config | |
1224 | if is_v1: | |
1225 | search = config.get('registries', {}).get('search', {}).get('registries', []) | |
1226 | insecure = config.get('registries', {}).get('search', {}).get('insecure', []) | |
1227 | # v2: MutableMapping[str, Any] = { needs Python 3 | |
1228 | v2 = { | |
1229 | 'unqualified-search-registries': search, | |
1230 | 'registry': [ | |
1231 | { | |
1232 | 'prefix': reg, | |
1233 | 'location': reg, | |
1234 | 'insecure': reg in insecure, | |
1235 | 'blocked': False, | |
1236 | } for reg in search | |
1237 | ] | |
1238 | } | |
1239 | else: | |
1240 | v2 = config # type: ignore | |
1241 | dockers = [r for r in v2['registry'] if r['prefix'] == 'docker.io'] | |
1242 | if dockers: | |
1243 | docker = dockers[0] | |
1244 | docker['mirror'] = [{ | |
1245 | "location": mirror, | |
1246 | "insecure": True, | |
1247 | }] | |
1248 | return v2 | |
1249 | ||
1250 | ||
1251 | def add_mirror_to_cluster(ctx, mirror): | |
1252 | log.info('Adding local image mirror %s' % mirror) | |
1253 | ||
1254 | registries_conf = '/etc/containers/registries.conf' | |
1255 | ||
1256 | for remote in ctx.cluster.remotes.keys(): | |
1257 | try: | |
1258 | config = teuthology.get_file( | |
1259 | remote=remote, | |
1260 | path=registries_conf | |
1261 | ) | |
1262 | new_config = toml.dumps(registries_add_mirror_to_docker_io(config.decode('utf-8'), mirror)) | |
1263 | ||
1264 | teuthology.sudo_write_file( | |
1265 | remote=remote, | |
1266 | path=registries_conf, | |
1267 | data=six.ensure_str(new_config), | |
1268 | ) | |
1269 | except IOError as e: # py3: use FileNotFoundError instead. | |
1270 | if e.errno != errno.ENOENT: | |
1271 | raise | |
1272 | ||
1273 | # Docker doesn't ship a registries.conf | |
1274 | log.info('Failed to add mirror: %s' % str(e)) |