]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | """ |
2 | Ceph cluster task, deployed via cephadm orchestrator | |
3 | """ | |
9f95a23c TL |
4 | import argparse |
5 | import configobj | |
6 | import contextlib | |
e306af50 | 7 | import errno |
9f95a23c TL |
8 | import logging |
9 | import os | |
10 | import json | |
11 | import re | |
12 | import uuid | |
f91f0fd5 | 13 | import yaml |
9f95a23c | 14 | |
e306af50 TL |
15 | import six |
16 | import toml | |
17 | from io import BytesIO | |
18 | from six import StringIO | |
9f95a23c | 19 | from tarfile import ReadError |
e306af50 | 20 | from tasks.ceph_manager import CephManager |
9f95a23c TL |
21 | from teuthology import misc as teuthology |
22 | from teuthology import contextutil | |
23 | from teuthology.orchestra import run | |
24 | from teuthology.orchestra.daemon import DaemonGroup | |
25 | from teuthology.config import config as teuth_config | |
26 | ||
27 | # these items we use from ceph.py should probably eventually move elsewhere | |
28 | from tasks.ceph import get_mons, healthy | |
29 | ||
30 | CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus'] | |
31 | ||
32 | log = logging.getLogger(__name__) | |
33 | ||
34 | ||
35 | def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs): | |
36 | testdir = teuthology.get_testdir(ctx) | |
37 | return remote.run( | |
38 | args=[ | |
39 | 'sudo', | |
40 | ctx.cephadm, | |
41 | '--image', ctx.ceph[cluster_name].image, | |
42 | 'shell', | |
43 | '-c', '/etc/ceph/{}.conf'.format(cluster_name), | |
44 | '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
45 | '--fsid', ctx.ceph[cluster_name].fsid, | |
46 | ] + extra_cephadm_args + [ | |
47 | '--', | |
48 | ] + args, | |
49 | **kwargs | |
50 | ) | |
51 | ||
52 | def build_initial_config(ctx, config): | |
53 | cluster_name = config['cluster'] | |
54 | ||
55 | path = os.path.join(os.path.dirname(__file__), 'cephadm.conf') | |
56 | conf = configobj.ConfigObj(path, file_error=True) | |
57 | ||
58 | conf.setdefault('global', {}) | |
59 | conf['global']['fsid'] = ctx.ceph[cluster_name].fsid | |
60 | ||
61 | # overrides | |
62 | for section, keys in config.get('conf',{}).items(): | |
63 | for key, value in keys.items(): | |
64 | log.info(" override: [%s] %s = %s" % (section, key, value)) | |
65 | if section not in conf: | |
66 | conf[section] = {} | |
67 | conf[section][key] = value | |
68 | ||
69 | return conf | |
70 | ||
71 | @contextlib.contextmanager | |
72 | def normalize_hostnames(ctx): | |
73 | """ | |
74 | Ensure we have short hostnames throughout, for consistency between | |
75 | remote.shortname and socket.gethostname() in cephadm. | |
76 | """ | |
77 | log.info('Normalizing hostnames...') | |
78 | ctx.cluster.run(args=[ | |
79 | 'sudo', | |
80 | 'hostname', | |
81 | run.Raw('$(hostname -s)'), | |
82 | ]) | |
83 | ||
84 | try: | |
85 | yield | |
86 | finally: | |
87 | pass | |
88 | ||
89 | @contextlib.contextmanager | |
90 | def download_cephadm(ctx, config, ref): | |
91 | cluster_name = config['cluster'] | |
92 | ||
93 | if config.get('cephadm_mode') != 'cephadm-package': | |
94 | ref = config.get('cephadm_branch', ref) | |
95 | git_url = teuth_config.get_ceph_git_url() | |
96 | log.info('Downloading cephadm (repo %s ref %s)...' % (git_url, ref)) | |
97 | if git_url.startswith('https://github.com/'): | |
98 | # git archive doesn't like https:// URLs, which we use with github. | |
99 | rest = git_url.split('https://github.com/', 1)[1] | |
100 | rest = re.sub(r'\.git/?$', '', rest).strip() # no .git suffix | |
101 | ctx.cluster.run( | |
102 | args=[ | |
103 | 'curl', '--silent', | |
104 | 'https://raw.githubusercontent.com/' + rest + '/' + ref + '/src/cephadm/cephadm', | |
105 | run.Raw('>'), | |
106 | ctx.cephadm, | |
107 | run.Raw('&&'), | |
108 | 'ls', '-l', | |
109 | ctx.cephadm, | |
110 | ], | |
111 | ) | |
112 | else: | |
113 | ctx.cluster.run( | |
114 | args=[ | |
115 | 'git', 'archive', | |
116 | '--remote=' + git_url, | |
117 | ref, | |
118 | 'src/cephadm/cephadm', | |
119 | run.Raw('|'), | |
120 | 'tar', '-xO', 'src/cephadm/cephadm', | |
121 | run.Raw('>'), | |
122 | ctx.cephadm, | |
123 | ], | |
124 | ) | |
125 | # sanity-check the resulting file and set executable bit | |
126 | cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm) | |
127 | ctx.cluster.run( | |
128 | args=[ | |
129 | 'test', '-s', ctx.cephadm, | |
130 | run.Raw('&&'), | |
131 | 'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'), | |
132 | run.Raw('&&'), | |
133 | 'chmod', '+x', ctx.cephadm, | |
134 | ], | |
135 | ) | |
136 | ||
137 | try: | |
138 | yield | |
139 | finally: | |
140 | log.info('Removing cluster...') | |
141 | ctx.cluster.run(args=[ | |
142 | 'sudo', | |
143 | ctx.cephadm, | |
144 | 'rm-cluster', | |
145 | '--fsid', ctx.ceph[cluster_name].fsid, | |
146 | '--force', | |
147 | ]) | |
148 | ||
149 | if config.get('cephadm_mode') == 'root': | |
150 | log.info('Removing cephadm ...') | |
151 | ctx.cluster.run( | |
152 | args=[ | |
153 | 'rm', | |
154 | '-rf', | |
155 | ctx.cephadm, | |
156 | ], | |
157 | ) | |
158 | ||
159 | @contextlib.contextmanager | |
160 | def ceph_log(ctx, config): | |
161 | cluster_name = config['cluster'] | |
162 | fsid = ctx.ceph[cluster_name].fsid | |
163 | ||
f91f0fd5 TL |
164 | # Add logs directory to job's info log file |
165 | with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file: | |
166 | info_yaml = yaml.safe_load(info_file) | |
167 | info_file.seek(0) | |
168 | if 'archive' not in info_yaml: | |
169 | info_yaml['archive'] = {'log': '/var/log/ceph'} | |
170 | else: | |
171 | info_yaml['archive']['log'] = '/var/log/ceph' | |
172 | yaml.safe_dump(info_yaml, info_file, default_flow_style=False) | |
173 | ||
9f95a23c TL |
174 | try: |
175 | yield | |
176 | ||
177 | except Exception: | |
178 | # we need to know this below | |
179 | ctx.summary['success'] = False | |
180 | raise | |
181 | ||
182 | finally: | |
183 | log.info('Checking cluster log for badness...') | |
184 | def first_in_ceph_log(pattern, excludes): | |
185 | """ | |
186 | Find the first occurrence of the pattern specified in the Ceph log, | |
187 | Returns None if none found. | |
188 | ||
189 | :param pattern: Pattern scanned for. | |
190 | :param excludes: Patterns to ignore. | |
191 | :return: First line of text (or None if not found) | |
192 | """ | |
193 | args = [ | |
194 | 'sudo', | |
195 | 'egrep', pattern, | |
196 | '/var/log/ceph/{fsid}/ceph.log'.format( | |
197 | fsid=fsid), | |
198 | ] | |
199 | if excludes: | |
200 | for exclude in excludes: | |
201 | args.extend([run.Raw('|'), 'egrep', '-v', exclude]) | |
202 | args.extend([ | |
203 | run.Raw('|'), 'head', '-n', '1', | |
204 | ]) | |
205 | r = ctx.ceph[cluster_name].bootstrap_remote.run( | |
e306af50 | 206 | stdout=StringIO(), |
9f95a23c TL |
207 | args=args, |
208 | ) | |
209 | stdout = r.stdout.getvalue() | |
210 | if stdout != '': | |
211 | return stdout | |
212 | return None | |
213 | ||
214 | if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', | |
cd265ab1 | 215 | config.get('log-ignorelist')) is not None: |
9f95a23c TL |
216 | log.warning('Found errors (ERR|WRN|SEC) in cluster log') |
217 | ctx.summary['success'] = False | |
218 | # use the most severe problem as the failure reason | |
219 | if 'failure_reason' not in ctx.summary: | |
220 | for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: | |
cd265ab1 | 221 | match = first_in_ceph_log(pattern, config['log-ignorelist']) |
9f95a23c TL |
222 | if match is not None: |
223 | ctx.summary['failure_reason'] = \ | |
224 | '"{match}" in cluster log'.format( | |
225 | match=match.rstrip('\n'), | |
226 | ) | |
227 | break | |
228 | ||
229 | if ctx.archive is not None and \ | |
230 | not (ctx.config.get('archive-on-error') and ctx.summary['success']): | |
231 | # and logs | |
232 | log.info('Compressing logs...') | |
233 | run.wait( | |
234 | ctx.cluster.run( | |
235 | args=[ | |
236 | 'sudo', | |
237 | 'find', | |
238 | '/var/log/ceph', # all logs, not just for the cluster | |
f91f0fd5 | 239 | '/var/log/rbd-target-api', # ceph-iscsi |
9f95a23c TL |
240 | '-name', |
241 | '*.log', | |
242 | '-print0', | |
243 | run.Raw('|'), | |
244 | 'sudo', | |
245 | 'xargs', | |
246 | '-0', | |
247 | '--no-run-if-empty', | |
248 | '--', | |
249 | 'gzip', | |
250 | '--', | |
251 | ], | |
252 | wait=False, | |
253 | ), | |
254 | ) | |
255 | ||
256 | log.info('Archiving logs...') | |
257 | path = os.path.join(ctx.archive, 'remote') | |
258 | try: | |
259 | os.makedirs(path) | |
260 | except OSError: | |
261 | pass | |
262 | for remote in ctx.cluster.remotes.keys(): | |
263 | sub = os.path.join(path, remote.name) | |
264 | try: | |
265 | os.makedirs(sub) | |
266 | except OSError: | |
267 | pass | |
e306af50 TL |
268 | try: |
269 | teuthology.pull_directory(remote, '/var/log/ceph', # everything | |
270 | os.path.join(sub, 'log')) | |
271 | except ReadError: | |
272 | pass | |
9f95a23c TL |
273 | |
274 | @contextlib.contextmanager | |
275 | def ceph_crash(ctx, config): | |
276 | """ | |
277 | Gather crash dumps from /var/lib/ceph/$fsid/crash | |
278 | """ | |
279 | cluster_name = config['cluster'] | |
280 | fsid = ctx.ceph[cluster_name].fsid | |
281 | ||
f91f0fd5 TL |
282 | # Add logs directory to job's info log file |
283 | with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file: | |
284 | info_yaml = yaml.safe_load(info_file) | |
285 | info_file.seek(0) | |
286 | if 'archive' not in info_yaml: | |
287 | info_yaml['archive'] = {'crash': '/var/lib/ceph/%s/crash' % fsid} | |
288 | else: | |
289 | info_yaml['archive']['crash'] = '/var/lib/ceph/%s/crash' % fsid | |
290 | yaml.safe_dump(info_yaml, info_file, default_flow_style=False) | |
291 | ||
9f95a23c TL |
292 | try: |
293 | yield | |
294 | ||
295 | finally: | |
296 | if ctx.archive is not None: | |
297 | log.info('Archiving crash dumps...') | |
298 | path = os.path.join(ctx.archive, 'remote') | |
299 | try: | |
300 | os.makedirs(path) | |
301 | except OSError: | |
302 | pass | |
303 | for remote in ctx.cluster.remotes.keys(): | |
304 | sub = os.path.join(path, remote.name) | |
305 | try: | |
306 | os.makedirs(sub) | |
307 | except OSError: | |
308 | pass | |
309 | try: | |
310 | teuthology.pull_directory(remote, | |
311 | '/var/lib/ceph/%s/crash' % fsid, | |
312 | os.path.join(sub, 'crash')) | |
313 | except ReadError: | |
314 | pass | |
315 | ||
316 | @contextlib.contextmanager | |
e306af50 TL |
317 | def ceph_bootstrap(ctx, config, registry): |
318 | """ | |
319 | Bootstrap ceph cluster, setup containers' registry mirror before | |
320 | the bootstrap if the registry is provided. | |
321 | ||
322 | :param ctx: the argparse.Namespace object | |
323 | :param config: the config dict | |
324 | :param registry: url to containers' mirror registry | |
325 | """ | |
9f95a23c TL |
326 | cluster_name = config['cluster'] |
327 | testdir = teuthology.get_testdir(ctx) | |
328 | fsid = ctx.ceph[cluster_name].fsid | |
329 | ||
1911f103 TL |
330 | bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote |
331 | first_mon = ctx.ceph[cluster_name].first_mon | |
332 | first_mon_role = ctx.ceph[cluster_name].first_mon_role | |
9f95a23c | 333 | mons = ctx.ceph[cluster_name].mons |
f91f0fd5 | 334 | |
9f95a23c TL |
335 | ctx.cluster.run(args=[ |
336 | 'sudo', 'mkdir', '-p', '/etc/ceph', | |
337 | ]); | |
338 | ctx.cluster.run(args=[ | |
339 | 'sudo', 'chmod', '777', '/etc/ceph', | |
340 | ]); | |
e306af50 TL |
341 | if registry: |
342 | add_mirror_to_cluster(ctx, registry) | |
9f95a23c TL |
343 | try: |
344 | # write seed config | |
345 | log.info('Writing seed config...') | |
346 | conf_fp = BytesIO() | |
347 | seed_config = build_initial_config(ctx, config) | |
348 | seed_config.write(conf_fp) | |
349 | teuthology.write_file( | |
350 | remote=bootstrap_remote, | |
351 | path='{}/seed.{}.conf'.format(testdir, cluster_name), | |
352 | data=conf_fp.getvalue()) | |
e306af50 | 353 | log.debug('Final config:\n' + conf_fp.getvalue().decode()) |
9f95a23c TL |
354 | ctx.ceph[cluster_name].conf = seed_config |
355 | ||
356 | # register initial daemons | |
357 | ctx.daemons.register_daemon( | |
358 | bootstrap_remote, 'mon', first_mon, | |
359 | cluster=cluster_name, | |
360 | fsid=fsid, | |
361 | logger=log.getChild('mon.' + first_mon), | |
362 | wait=False, | |
363 | started=True, | |
364 | ) | |
1911f103 TL |
365 | if not ctx.ceph[cluster_name].roleless: |
366 | first_mgr = ctx.ceph[cluster_name].first_mgr | |
367 | ctx.daemons.register_daemon( | |
368 | bootstrap_remote, 'mgr', first_mgr, | |
369 | cluster=cluster_name, | |
370 | fsid=fsid, | |
371 | logger=log.getChild('mgr.' + first_mgr), | |
372 | wait=False, | |
373 | started=True, | |
374 | ) | |
9f95a23c TL |
375 | |
376 | # bootstrap | |
377 | log.info('Bootstrapping...') | |
378 | cmd = [ | |
379 | 'sudo', | |
380 | ctx.cephadm, | |
381 | '--image', ctx.ceph[cluster_name].image, | |
382 | '-v', | |
383 | 'bootstrap', | |
384 | '--fsid', fsid, | |
9f95a23c TL |
385 | '--config', '{}/seed.{}.conf'.format(testdir, cluster_name), |
386 | '--output-config', '/etc/ceph/{}.conf'.format(cluster_name), | |
387 | '--output-keyring', | |
388 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
389 | '--output-pub-ssh-key', '{}/{}.pub'.format(testdir, cluster_name), | |
390 | ] | |
1911f103 TL |
391 | if not ctx.ceph[cluster_name].roleless: |
392 | cmd += [ | |
393 | '--mon-id', first_mon, | |
394 | '--mgr-id', first_mgr, | |
395 | '--orphan-initial-daemons', # we will do it explicitly! | |
396 | '--skip-monitoring-stack', # we'll provision these explicitly | |
397 | ] | |
9f95a23c TL |
398 | if mons[first_mon_role].startswith('['): |
399 | cmd += ['--mon-addrv', mons[first_mon_role]] | |
400 | else: | |
401 | cmd += ['--mon-ip', mons[first_mon_role]] | |
402 | if config.get('skip_dashboard'): | |
403 | cmd += ['--skip-dashboard'] | |
404 | # bootstrap makes the keyring root 0600, so +r it for our purposes | |
405 | cmd += [ | |
406 | run.Raw('&&'), | |
407 | 'sudo', 'chmod', '+r', | |
408 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
409 | ] | |
410 | bootstrap_remote.run(args=cmd) | |
411 | ||
412 | # fetch keys and configs | |
413 | log.info('Fetching config...') | |
414 | ctx.ceph[cluster_name].config_file = teuthology.get_file( | |
415 | remote=bootstrap_remote, | |
416 | path='/etc/ceph/{}.conf'.format(cluster_name)) | |
417 | log.info('Fetching client.admin keyring...') | |
418 | ctx.ceph[cluster_name].admin_keyring = teuthology.get_file( | |
419 | remote=bootstrap_remote, | |
420 | path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name)) | |
421 | log.info('Fetching mon keyring...') | |
422 | ctx.ceph[cluster_name].mon_keyring = teuthology.get_file( | |
423 | remote=bootstrap_remote, | |
424 | path='/var/lib/ceph/%s/mon.%s/keyring' % (fsid, first_mon), | |
425 | sudo=True) | |
426 | ||
427 | # fetch ssh key, distribute to additional nodes | |
428 | log.info('Fetching pub ssh key...') | |
429 | ssh_pub_key = teuthology.get_file( | |
430 | remote=bootstrap_remote, | |
431 | path='{}/{}.pub'.format(testdir, cluster_name) | |
e306af50 | 432 | ).decode('ascii').strip() |
9f95a23c TL |
433 | |
434 | log.info('Installing pub ssh key for root users...') | |
435 | ctx.cluster.run(args=[ | |
436 | 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh', | |
437 | run.Raw('&&'), | |
438 | 'echo', ssh_pub_key, | |
439 | run.Raw('|'), | |
440 | 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys', | |
441 | run.Raw('&&'), | |
442 | 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys', | |
443 | ]) | |
444 | ||
445 | # set options | |
446 | _shell(ctx, cluster_name, bootstrap_remote, | |
447 | ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true']) | |
448 | ||
449 | # add other hosts | |
450 | for remote in ctx.cluster.remotes.keys(): | |
451 | if remote == bootstrap_remote: | |
452 | continue | |
1911f103 | 453 | log.info('Writing (initial) conf and keyring to %s' % remote.shortname) |
9f95a23c TL |
454 | teuthology.write_file( |
455 | remote=remote, | |
456 | path='/etc/ceph/{}.conf'.format(cluster_name), | |
457 | data=ctx.ceph[cluster_name].config_file) | |
458 | teuthology.write_file( | |
459 | remote=remote, | |
460 | path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
461 | data=ctx.ceph[cluster_name].admin_keyring) | |
462 | ||
463 | log.info('Adding host %s to orchestrator...' % remote.shortname) | |
464 | _shell(ctx, cluster_name, remote, [ | |
465 | 'ceph', 'orch', 'host', 'add', | |
466 | remote.shortname | |
467 | ]) | |
468 | r = _shell(ctx, cluster_name, remote, | |
469 | ['ceph', 'orch', 'host', 'ls', '--format=json'], | |
e306af50 | 470 | stdout=StringIO()) |
9f95a23c TL |
471 | hosts = [node['hostname'] for node in json.loads(r.stdout.getvalue())] |
472 | assert remote.shortname in hosts | |
473 | ||
474 | yield | |
475 | ||
476 | finally: | |
477 | log.info('Cleaning up testdir ceph.* files...') | |
478 | ctx.cluster.run(args=[ | |
479 | 'rm', '-f', | |
480 | '{}/seed.{}.conf'.format(testdir, cluster_name), | |
481 | '{}/{}.pub'.format(testdir, cluster_name), | |
482 | ]) | |
483 | ||
484 | log.info('Stopping all daemons...') | |
485 | ||
486 | # this doesn't block until they are all stopped... | |
487 | #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target']) | |
488 | ||
489 | # so, stop them individually | |
e306af50 | 490 | for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES, True): |
9f95a23c | 491 | cluster, type_, id_ = teuthology.split_role(role) |
e306af50 TL |
492 | try: |
493 | ctx.daemons.get_daemon(type_, id_, cluster).stop() | |
494 | except Exception: | |
495 | log.exception('Failed to stop "{role}"'.format(role=role)) | |
f91f0fd5 | 496 | raise |
9f95a23c TL |
497 | |
498 | # clean up /etc/ceph | |
499 | ctx.cluster.run(args=[ | |
500 | 'sudo', 'rm', '-f', | |
501 | '/etc/ceph/{}.conf'.format(cluster_name), | |
502 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
503 | ]) | |
504 | ||
505 | @contextlib.contextmanager | |
506 | def ceph_mons(ctx, config): | |
507 | """ | |
508 | Deploy any additional mons | |
509 | """ | |
510 | cluster_name = config['cluster'] | |
511 | fsid = ctx.ceph[cluster_name].fsid | |
512 | num_mons = 1 | |
513 | ||
514 | try: | |
515 | for remote, roles in ctx.cluster.remotes.items(): | |
516 | for mon in [r for r in roles | |
517 | if teuthology.is_type('mon', cluster_name)(r)]: | |
518 | c_, _, id_ = teuthology.split_role(mon) | |
519 | if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: | |
520 | continue | |
521 | log.info('Adding %s on %s' % (mon, remote.shortname)) | |
522 | num_mons += 1 | |
523 | _shell(ctx, cluster_name, remote, [ | |
524 | 'ceph', 'orch', 'daemon', 'add', 'mon', | |
525 | remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_, | |
526 | ]) | |
527 | ctx.daemons.register_daemon( | |
528 | remote, 'mon', id_, | |
529 | cluster=cluster_name, | |
530 | fsid=fsid, | |
531 | logger=log.getChild(mon), | |
532 | wait=False, | |
533 | started=True, | |
534 | ) | |
535 | ||
536 | with contextutil.safe_while(sleep=1, tries=180) as proceed: | |
537 | while proceed(): | |
538 | log.info('Waiting for %d mons in monmap...' % (num_mons)) | |
539 | r = _shell( | |
540 | ctx=ctx, | |
541 | cluster_name=cluster_name, | |
542 | remote=remote, | |
543 | args=[ | |
544 | 'ceph', 'mon', 'dump', '-f', 'json', | |
545 | ], | |
e306af50 | 546 | stdout=StringIO(), |
9f95a23c TL |
547 | ) |
548 | j = json.loads(r.stdout.getvalue()) | |
549 | if len(j['mons']) == num_mons: | |
550 | break | |
551 | ||
1911f103 | 552 | # refresh our (final) ceph.conf file |
f91f0fd5 | 553 | bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote |
1911f103 TL |
554 | log.info('Generating final ceph.conf file...') |
555 | r = _shell( | |
556 | ctx=ctx, | |
557 | cluster_name=cluster_name, | |
f91f0fd5 | 558 | remote=bootstrap_remote, |
1911f103 TL |
559 | args=[ |
560 | 'ceph', 'config', 'generate-minimal-conf', | |
561 | ], | |
e306af50 | 562 | stdout=StringIO(), |
1911f103 TL |
563 | ) |
564 | ctx.ceph[cluster_name].config_file = r.stdout.getvalue() | |
9f95a23c TL |
565 | |
566 | yield | |
567 | ||
568 | finally: | |
569 | pass | |
570 | ||
571 | @contextlib.contextmanager | |
572 | def ceph_mgrs(ctx, config): | |
573 | """ | |
574 | Deploy any additional mgrs | |
575 | """ | |
576 | cluster_name = config['cluster'] | |
577 | fsid = ctx.ceph[cluster_name].fsid | |
578 | ||
579 | try: | |
580 | nodes = [] | |
581 | daemons = {} | |
582 | for remote, roles in ctx.cluster.remotes.items(): | |
583 | for mgr in [r for r in roles | |
584 | if teuthology.is_type('mgr', cluster_name)(r)]: | |
585 | c_, _, id_ = teuthology.split_role(mgr) | |
586 | if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mgr: | |
587 | continue | |
588 | log.info('Adding %s on %s' % (mgr, remote.shortname)) | |
589 | nodes.append(remote.shortname + '=' + id_) | |
590 | daemons[mgr] = (remote, id_) | |
591 | if nodes: | |
592 | _shell(ctx, cluster_name, remote, [ | |
593 | 'ceph', 'orch', 'apply', 'mgr', | |
594 | str(len(nodes) + 1) + ';' + ';'.join(nodes)] | |
595 | ) | |
596 | for mgr, i in daemons.items(): | |
597 | remote, id_ = i | |
598 | ctx.daemons.register_daemon( | |
599 | remote, 'mgr', id_, | |
600 | cluster=cluster_name, | |
601 | fsid=fsid, | |
602 | logger=log.getChild(mgr), | |
603 | wait=False, | |
604 | started=True, | |
605 | ) | |
606 | ||
607 | yield | |
608 | ||
609 | finally: | |
610 | pass | |
611 | ||
612 | @contextlib.contextmanager | |
613 | def ceph_osds(ctx, config): | |
614 | """ | |
615 | Deploy OSDs | |
616 | """ | |
617 | cluster_name = config['cluster'] | |
618 | fsid = ctx.ceph[cluster_name].fsid | |
1911f103 | 619 | |
9f95a23c TL |
620 | try: |
621 | log.info('Deploying OSDs...') | |
622 | ||
623 | # provision OSDs in numeric order | |
624 | id_to_remote = {} | |
625 | devs_by_remote = {} | |
626 | for remote, roles in ctx.cluster.remotes.items(): | |
627 | devs_by_remote[remote] = teuthology.get_scratch_devices(remote) | |
628 | for osd in [r for r in roles | |
629 | if teuthology.is_type('osd', cluster_name)(r)]: | |
630 | _, _, id_ = teuthology.split_role(osd) | |
631 | id_to_remote[int(id_)] = (osd, remote) | |
632 | ||
633 | cur = 0 | |
634 | for osd_id in sorted(id_to_remote.keys()): | |
635 | osd, remote = id_to_remote[osd_id] | |
636 | _, _, id_ = teuthology.split_role(osd) | |
637 | assert int(id_) == cur | |
638 | devs = devs_by_remote[remote] | |
639 | assert devs ## FIXME ## | |
640 | dev = devs.pop() | |
e306af50 TL |
641 | if all(_ in dev for _ in ('lv', 'vg')): |
642 | short_dev = dev.replace('/dev/', '') | |
643 | else: | |
644 | short_dev = dev | |
9f95a23c TL |
645 | log.info('Deploying %s on %s with %s...' % ( |
646 | osd, remote.shortname, dev)) | |
647 | _shell(ctx, cluster_name, remote, [ | |
648 | 'ceph-volume', 'lvm', 'zap', dev]) | |
649 | _shell(ctx, cluster_name, remote, [ | |
650 | 'ceph', 'orch', 'daemon', 'add', 'osd', | |
651 | remote.shortname + ':' + short_dev | |
652 | ]) | |
653 | ctx.daemons.register_daemon( | |
654 | remote, 'osd', id_, | |
655 | cluster=cluster_name, | |
656 | fsid=fsid, | |
657 | logger=log.getChild(osd), | |
658 | wait=False, | |
659 | started=True, | |
660 | ) | |
661 | cur += 1 | |
662 | ||
663 | yield | |
664 | finally: | |
665 | pass | |
666 | ||
667 | @contextlib.contextmanager | |
668 | def ceph_mdss(ctx, config): | |
669 | """ | |
670 | Deploy MDSss | |
671 | """ | |
672 | cluster_name = config['cluster'] | |
673 | fsid = ctx.ceph[cluster_name].fsid | |
674 | ||
675 | nodes = [] | |
676 | daemons = {} | |
677 | for remote, roles in ctx.cluster.remotes.items(): | |
678 | for role in [r for r in roles | |
679 | if teuthology.is_type('mds', cluster_name)(r)]: | |
680 | c_, _, id_ = teuthology.split_role(role) | |
681 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
682 | nodes.append(remote.shortname + '=' + id_) | |
683 | daemons[role] = (remote, id_) | |
684 | if nodes: | |
685 | _shell(ctx, cluster_name, remote, [ | |
686 | 'ceph', 'orch', 'apply', 'mds', | |
687 | 'all', | |
688 | str(len(nodes)) + ';' + ';'.join(nodes)] | |
689 | ) | |
690 | for role, i in daemons.items(): | |
691 | remote, id_ = i | |
692 | ctx.daemons.register_daemon( | |
693 | remote, 'mds', id_, | |
694 | cluster=cluster_name, | |
695 | fsid=fsid, | |
696 | logger=log.getChild(role), | |
697 | wait=False, | |
698 | started=True, | |
699 | ) | |
700 | ||
701 | yield | |
702 | ||
703 | @contextlib.contextmanager | |
704 | def ceph_monitoring(daemon_type, ctx, config): | |
705 | """ | |
706 | Deploy prometheus, node-exporter, etc. | |
707 | """ | |
708 | cluster_name = config['cluster'] | |
709 | fsid = ctx.ceph[cluster_name].fsid | |
710 | ||
711 | nodes = [] | |
712 | daemons = {} | |
713 | for remote, roles in ctx.cluster.remotes.items(): | |
714 | for role in [r for r in roles | |
715 | if teuthology.is_type(daemon_type, cluster_name)(r)]: | |
716 | c_, _, id_ = teuthology.split_role(role) | |
717 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
718 | nodes.append(remote.shortname + '=' + id_) | |
719 | daemons[role] = (remote, id_) | |
720 | if nodes: | |
721 | _shell(ctx, cluster_name, remote, [ | |
722 | 'ceph', 'orch', 'apply', daemon_type, | |
723 | str(len(nodes)) + ';' + ';'.join(nodes)] | |
724 | ) | |
725 | for role, i in daemons.items(): | |
726 | remote, id_ = i | |
727 | ctx.daemons.register_daemon( | |
728 | remote, daemon_type, id_, | |
729 | cluster=cluster_name, | |
730 | fsid=fsid, | |
731 | logger=log.getChild(role), | |
732 | wait=False, | |
733 | started=True, | |
734 | ) | |
735 | ||
736 | yield | |
737 | ||
738 | @contextlib.contextmanager | |
739 | def ceph_rgw(ctx, config): | |
740 | """ | |
741 | Deploy rgw | |
742 | """ | |
743 | cluster_name = config['cluster'] | |
744 | fsid = ctx.ceph[cluster_name].fsid | |
745 | ||
746 | nodes = {} | |
747 | daemons = {} | |
748 | for remote, roles in ctx.cluster.remotes.items(): | |
749 | for role in [r for r in roles | |
750 | if teuthology.is_type('rgw', cluster_name)(r)]: | |
751 | c_, _, id_ = teuthology.split_role(role) | |
752 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
753 | realmzone = '.'.join(id_.split('.')[0:2]) | |
754 | if realmzone not in nodes: | |
755 | nodes[realmzone] = [] | |
756 | nodes[realmzone].append(remote.shortname + '=' + id_) | |
757 | daemons[role] = (remote, id_) | |
e306af50 TL |
758 | |
759 | for realmzone in nodes.keys(): | |
760 | (realm, zone) = realmzone.split('.', 1) | |
761 | ||
762 | # TODO: those should be moved to mgr/cephadm | |
763 | _shell(ctx, cluster_name, remote, | |
764 | ['radosgw-admin', 'realm', 'create', '--rgw-realm', realm, '--default'] | |
765 | ) | |
766 | _shell(ctx, cluster_name, remote, | |
767 | ['radosgw-admin', 'zonegroup', 'create', '--rgw-zonegroup=default', '--master', '--default'] | |
768 | ) | |
769 | _shell(ctx, cluster_name, remote, | |
770 | ['radosgw-admin', 'zone', 'create', '--rgw-zonegroup=default', '--rgw-zone', zone, '--master', '--default'] | |
771 | ) | |
772 | ||
9f95a23c TL |
773 | for realmzone, nodes in nodes.items(): |
774 | (realm, zone) = realmzone.split('.', 1) | |
775 | _shell(ctx, cluster_name, remote, [ | |
e306af50 TL |
776 | 'ceph', 'orch', 'apply', 'rgw', realm, zone, |
777 | '--placement', | |
778 | str(len(nodes)) + ';' + ';'.join(nodes)] | |
9f95a23c TL |
779 | ) |
780 | for role, i in daemons.items(): | |
781 | remote, id_ = i | |
782 | ctx.daemons.register_daemon( | |
783 | remote, 'rgw', id_, | |
784 | cluster=cluster_name, | |
785 | fsid=fsid, | |
786 | logger=log.getChild(role), | |
787 | wait=False, | |
788 | started=True, | |
789 | ) | |
790 | ||
791 | yield | |
792 | ||
f91f0fd5 TL |
793 | |
794 | @contextlib.contextmanager | |
795 | def ceph_iscsi(ctx, config): | |
796 | """ | |
797 | Deploy iSCSIs | |
798 | """ | |
799 | cluster_name = config['cluster'] | |
800 | fsid = ctx.ceph[cluster_name].fsid | |
801 | ||
802 | nodes = [] | |
803 | daemons = {} | |
804 | for remote, roles in ctx.cluster.remotes.items(): | |
805 | for role in [r for r in roles | |
806 | if teuthology.is_type('iscsi', cluster_name)(r)]: | |
807 | c_, _, id_ = teuthology.split_role(role) | |
808 | log.info('Adding %s on %s' % (role, remote.shortname)) | |
809 | nodes.append(remote.shortname + '=' + id_) | |
810 | daemons[role] = (remote, id_) | |
811 | if nodes: | |
812 | poolname = 'iscsi' | |
813 | # ceph osd pool create iscsi 3 3 replicated | |
814 | _shell(ctx, cluster_name, remote, [ | |
815 | 'ceph', 'osd', 'pool', 'create', | |
816 | poolname, '3', '3', 'replicated'] | |
817 | ) | |
818 | ||
819 | _shell(ctx, cluster_name, remote, [ | |
820 | 'ceph', 'osd', 'pool', 'application', 'enable', | |
821 | poolname, 'rbd'] | |
822 | ) | |
823 | ||
824 | # ceph orch apply iscsi iscsi user password | |
825 | _shell(ctx, cluster_name, remote, [ | |
826 | 'ceph', 'orch', 'apply', 'iscsi', | |
827 | poolname, 'user', 'password', | |
828 | '--placement', str(len(nodes)) + ';' + ';'.join(nodes)] | |
829 | ) | |
830 | for role, i in daemons.items(): | |
831 | remote, id_ = i | |
832 | ctx.daemons.register_daemon( | |
833 | remote, 'iscsi', id_, | |
834 | cluster=cluster_name, | |
835 | fsid=fsid, | |
836 | logger=log.getChild(role), | |
837 | wait=False, | |
838 | started=True, | |
839 | ) | |
840 | ||
841 | yield | |
842 | ||
9f95a23c TL |
843 | @contextlib.contextmanager |
844 | def ceph_clients(ctx, config): | |
845 | cluster_name = config['cluster'] | |
846 | testdir = teuthology.get_testdir(ctx) | |
847 | ||
848 | log.info('Setting up client nodes...') | |
849 | clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) | |
850 | testdir = teuthology.get_testdir(ctx) | |
851 | coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) | |
852 | for remote, roles_for_host in clients.remotes.items(): | |
853 | for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', | |
854 | cluster_name): | |
855 | name = teuthology.ceph_role(role) | |
856 | client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, | |
857 | name) | |
858 | r = _shell( | |
859 | ctx=ctx, | |
860 | cluster_name=cluster_name, | |
861 | remote=remote, | |
862 | args=[ | |
863 | 'ceph', 'auth', | |
864 | 'get-or-create', name, | |
865 | 'mon', 'allow *', | |
866 | 'osd', 'allow *', | |
867 | 'mds', 'allow *', | |
868 | 'mgr', 'allow *', | |
869 | ], | |
e306af50 | 870 | stdout=StringIO(), |
9f95a23c TL |
871 | ) |
872 | keyring = r.stdout.getvalue() | |
873 | teuthology.sudo_write_file( | |
874 | remote=remote, | |
875 | path=client_keyring, | |
876 | data=keyring, | |
877 | perms='0644' | |
878 | ) | |
879 | yield | |
880 | ||
881 | @contextlib.contextmanager | |
882 | def ceph_initial(): | |
883 | try: | |
884 | yield | |
885 | finally: | |
886 | log.info('Teardown complete') | |
887 | ||
888 | ## public methods | |
889 | @contextlib.contextmanager | |
890 | def stop(ctx, config): | |
891 | """ | |
892 | Stop ceph daemons | |
893 | ||
894 | For example:: | |
895 | tasks: | |
896 | - ceph.stop: [mds.*] | |
897 | ||
898 | tasks: | |
899 | - ceph.stop: [osd.0, osd.2] | |
900 | ||
901 | tasks: | |
902 | - ceph.stop: | |
903 | daemons: [osd.0, osd.2] | |
904 | ||
905 | """ | |
906 | if config is None: | |
907 | config = {} | |
908 | elif isinstance(config, list): | |
909 | config = {'daemons': config} | |
910 | ||
911 | daemons = ctx.daemons.resolve_role_list( | |
912 | config.get('daemons', None), CEPH_ROLE_TYPES, True) | |
913 | clusters = set() | |
914 | ||
915 | for role in daemons: | |
916 | cluster, type_, id_ = teuthology.split_role(role) | |
917 | ctx.daemons.get_daemon(type_, id_, cluster).stop() | |
918 | clusters.add(cluster) | |
919 | ||
920 | # for cluster in clusters: | |
921 | # ctx.ceph[cluster].watchdog.stop() | |
922 | # ctx.ceph[cluster].watchdog.join() | |
923 | ||
924 | yield | |
925 | ||
926 | def shell(ctx, config): | |
927 | """ | |
928 | Execute (shell) commands | |
929 | """ | |
930 | cluster_name = config.get('cluster', 'ceph') | |
931 | ||
932 | env = [] | |
933 | if 'env' in config: | |
934 | for k in config['env']: | |
935 | env.extend(['-e', k + '=' + ctx.config.get(k, '')]) | |
936 | del config['env'] | |
937 | ||
938 | if 'all' in config and len(config) == 1: | |
939 | a = config['all'] | |
940 | roles = teuthology.all_roles(ctx.cluster) | |
941 | config = dict((id_, a) for id_ in roles) | |
942 | ||
943 | for role, ls in config.items(): | |
944 | (remote,) = ctx.cluster.only(role).remotes.keys() | |
945 | log.info('Running commands on role %s host %s', role, remote.name) | |
946 | for c in ls: | |
947 | _shell(ctx, cluster_name, remote, | |
948 | ['bash', '-c', c], | |
949 | extra_cephadm_args=env) | |
950 | ||
951 | @contextlib.contextmanager | |
952 | def tweaked_option(ctx, config): | |
953 | """ | |
954 | set an option, and then restore it with its original value | |
955 | ||
956 | Note, due to the way how tasks are executed/nested, it's not suggested to | |
957 | use this method as a standalone task. otherwise, it's likely that it will | |
958 | restore the tweaked option at the /end/ of 'tasks' block. | |
959 | """ | |
960 | saved_options = {} | |
961 | # we can complicate this when necessary | |
962 | options = ['mon-health-to-clog'] | |
963 | type_, id_ = 'mon', '*' | |
964 | cluster = config.get('cluster', 'ceph') | |
965 | manager = ctx.managers[cluster] | |
966 | if id_ == '*': | |
967 | get_from = next(teuthology.all_roles_of_type(ctx.cluster, type_)) | |
968 | else: | |
969 | get_from = id_ | |
970 | for option in options: | |
971 | if option not in config: | |
972 | continue | |
973 | value = 'true' if config[option] else 'false' | |
974 | option = option.replace('-', '_') | |
975 | old_value = manager.get_config(type_, get_from, option) | |
976 | if value != old_value: | |
977 | saved_options[option] = old_value | |
978 | manager.inject_args(type_, id_, option, value) | |
979 | yield | |
980 | for option, value in saved_options.items(): | |
981 | manager.inject_args(type_, id_, option, value) | |
982 | ||
983 | @contextlib.contextmanager | |
984 | def restart(ctx, config): | |
985 | """ | |
986 | restart ceph daemons | |
987 | ||
988 | For example:: | |
989 | tasks: | |
990 | - ceph.restart: [all] | |
991 | ||
992 | For example:: | |
993 | tasks: | |
994 | - ceph.restart: [osd.0, mon.1, mds.*] | |
995 | ||
996 | or:: | |
997 | ||
998 | tasks: | |
999 | - ceph.restart: | |
1000 | daemons: [osd.0, mon.1] | |
1001 | wait-for-healthy: false | |
1002 | wait-for-osds-up: true | |
1003 | ||
1004 | :param ctx: Context | |
1005 | :param config: Configuration | |
1006 | """ | |
1007 | if config is None: | |
1008 | config = {} | |
1009 | elif isinstance(config, list): | |
1010 | config = {'daemons': config} | |
1011 | ||
1012 | daemons = ctx.daemons.resolve_role_list( | |
1013 | config.get('daemons', None), CEPH_ROLE_TYPES, True) | |
1014 | clusters = set() | |
1015 | ||
1016 | log.info('daemons %s' % daemons) | |
1017 | with tweaked_option(ctx, config): | |
1018 | for role in daemons: | |
1019 | cluster, type_, id_ = teuthology.split_role(role) | |
1020 | d = ctx.daemons.get_daemon(type_, id_, cluster) | |
1021 | assert d, 'daemon %s does not exist' % role | |
1022 | d.stop() | |
1023 | if type_ == 'osd': | |
1024 | ctx.managers[cluster].mark_down_osd(id_) | |
1025 | d.restart() | |
1026 | clusters.add(cluster) | |
1027 | ||
1028 | if config.get('wait-for-healthy', True): | |
1029 | for cluster in clusters: | |
1030 | healthy(ctx=ctx, config=dict(cluster=cluster)) | |
1031 | if config.get('wait-for-osds-up', False): | |
1032 | for cluster in clusters: | |
1033 | ctx.managers[cluster].wait_for_all_osds_up() | |
1034 | yield | |
1035 | ||
1911f103 TL |
1036 | @contextlib.contextmanager |
1037 | def distribute_config_and_admin_keyring(ctx, config): | |
1038 | """ | |
1039 | Distribute a sufficient config and keyring for clients | |
1040 | """ | |
1041 | cluster_name = config['cluster'] | |
1042 | log.info('Distributing (final) config and client.admin keyring...') | |
1043 | for remote, roles in ctx.cluster.remotes.items(): | |
1044 | teuthology.sudo_write_file( | |
1045 | remote=remote, | |
1046 | path='/etc/ceph/{}.conf'.format(cluster_name), | |
1047 | data=ctx.ceph[cluster_name].config_file) | |
1048 | teuthology.sudo_write_file( | |
1049 | remote=remote, | |
1050 | path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
1051 | data=ctx.ceph[cluster_name].admin_keyring) | |
1052 | try: | |
1053 | yield | |
1054 | finally: | |
1055 | ctx.cluster.run(args=[ | |
1056 | 'sudo', 'rm', '-f', | |
1057 | '/etc/ceph/{}.conf'.format(cluster_name), | |
1058 | '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), | |
1059 | ]) | |
1060 | ||
9f95a23c TL |
1061 | @contextlib.contextmanager |
1062 | def crush_setup(ctx, config): | |
1063 | cluster_name = config['cluster'] | |
9f95a23c TL |
1064 | |
1065 | profile = config.get('crush_tunables', 'default') | |
1066 | log.info('Setting crush tunables to %s', profile) | |
1067 | _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, | |
1068 | args=['ceph', 'osd', 'crush', 'tunables', profile]) | |
1069 | yield | |
1070 | ||
1071 | @contextlib.contextmanager | |
e306af50 TL |
1072 | def _bypass(): |
1073 | yield | |
9f95a23c | 1074 | |
e306af50 TL |
1075 | @contextlib.contextmanager |
1076 | def initialize_config(ctx, config): | |
9f95a23c | 1077 | cluster_name = config['cluster'] |
e306af50 | 1078 | testdir = teuthology.get_testdir(ctx) |
9f95a23c TL |
1079 | |
1080 | ctx.ceph[cluster_name].thrashers = [] | |
1081 | # fixme: setup watchdog, ala ceph.py | |
1082 | ||
1911f103 TL |
1083 | ctx.ceph[cluster_name].roleless = False # see below |
1084 | ||
e306af50 TL |
1085 | first_ceph_cluster = False |
1086 | if not hasattr(ctx, 'daemons'): | |
1087 | first_ceph_cluster = True | |
1088 | ||
9f95a23c TL |
1089 | # cephadm mode? |
1090 | if 'cephadm_mode' not in config: | |
1091 | config['cephadm_mode'] = 'root' | |
1092 | assert config['cephadm_mode'] in ['root', 'cephadm-package'] | |
1093 | if config['cephadm_mode'] == 'root': | |
1094 | ctx.cephadm = testdir + '/cephadm' | |
1095 | else: | |
1096 | ctx.cephadm = 'cephadm' # in the path | |
1097 | ||
1098 | if first_ceph_cluster: | |
1099 | # FIXME: this is global for all clusters | |
1100 | ctx.daemons = DaemonGroup( | |
1101 | use_cephadm=ctx.cephadm) | |
1102 | ||
9f95a23c TL |
1103 | # uuid |
1104 | fsid = str(uuid.uuid1()) | |
1105 | log.info('Cluster fsid is %s' % fsid) | |
1106 | ctx.ceph[cluster_name].fsid = fsid | |
1107 | ||
1108 | # mon ips | |
1109 | log.info('Choosing monitor IPs and ports...') | |
1110 | remotes_and_roles = ctx.cluster.remotes.items() | |
9f95a23c TL |
1111 | ips = [host for (host, port) in |
1112 | (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] | |
1911f103 TL |
1113 | |
1114 | if config.get('roleless', False): | |
1115 | # mons will be named after hosts | |
1911f103 TL |
1116 | first_mon = None |
1117 | for remote, _ in remotes_and_roles: | |
e306af50 | 1118 | ctx.cluster.remotes[remote].append('mon.' + remote.shortname) |
1911f103 TL |
1119 | if not first_mon: |
1120 | first_mon = remote.shortname | |
1121 | bootstrap_remote = remote | |
e306af50 TL |
1122 | log.info('No mon roles; fabricating mons') |
1123 | ||
1124 | roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()] | |
1125 | ||
9f95a23c TL |
1126 | ctx.ceph[cluster_name].mons = get_mons( |
1127 | roles, ips, cluster_name, | |
1128 | mon_bind_msgr2=config.get('mon_bind_msgr2', True), | |
1129 | mon_bind_addrvec=config.get('mon_bind_addrvec', True), | |
1911f103 | 1130 | ) |
9f95a23c TL |
1131 | log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) |
1132 | ||
1911f103 TL |
1133 | if config.get('roleless', False): |
1134 | ctx.ceph[cluster_name].roleless = True | |
1135 | ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote | |
1136 | ctx.ceph[cluster_name].first_mon = first_mon | |
1137 | ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon | |
1138 | else: | |
1139 | first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0] | |
1140 | _, _, first_mon = teuthology.split_role(first_mon_role) | |
1141 | (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys() | |
1142 | log.info('First mon is mon.%s on %s' % (first_mon, | |
1143 | bootstrap_remote.shortname)) | |
1144 | ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote | |
1145 | ctx.ceph[cluster_name].first_mon = first_mon | |
1146 | ctx.ceph[cluster_name].first_mon_role = first_mon_role | |
1147 | ||
1148 | others = ctx.cluster.remotes[bootstrap_remote] | |
1149 | mgrs = sorted([r for r in others | |
1150 | if teuthology.is_type('mgr', cluster_name)(r)]) | |
1151 | if not mgrs: | |
1152 | raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon) | |
1153 | _, _, first_mgr = teuthology.split_role(mgrs[0]) | |
1154 | log.info('First mgr is %s' % (first_mgr)) | |
1155 | ctx.ceph[cluster_name].first_mgr = first_mgr | |
e306af50 TL |
1156 | yield |
1157 | ||
1158 | @contextlib.contextmanager | |
1159 | def task(ctx, config): | |
1160 | """ | |
1161 | Deploy ceph cluster using cephadm | |
1162 | ||
1163 | Setup containers' mirrors before the bootstrap, if corresponding | |
1164 | config provided in teuthology server config yaml file. | |
1165 | ||
1166 | For example, teuthology.yaml can contain the 'defaults' section: | |
1167 | ||
1168 | defaults: | |
1169 | cephadm: | |
1170 | containers: | |
1171 | registry_mirrors: | |
1172 | docker.io: 'registry.mirror.example.com:5000' | |
1173 | image: 'quay.io/ceph-ci/ceph' | |
1174 | ||
1175 | Using overrides makes it possible to customize it per run. | |
1176 | The equivalent 'overrides' section looks like: | |
1177 | ||
1178 | overrides: | |
1179 | cephadm: | |
1180 | containers: | |
1181 | registry_mirrors: | |
1182 | docker.io: 'registry.mirror.example.com:5000' | |
1183 | image: 'quay.io/ceph-ci/ceph' | |
1184 | ||
1185 | :param ctx: the argparse.Namespace object | |
1186 | :param config: the config dict | |
1187 | """ | |
1188 | if config is None: | |
1189 | config = {} | |
1190 | ||
1191 | assert isinstance(config, dict), \ | |
1192 | "task only supports a dictionary for configuration" | |
1193 | ||
1194 | overrides = ctx.config.get('overrides', {}) | |
1195 | teuthology.deep_merge(config, overrides.get('ceph', {})) | |
1196 | teuthology.deep_merge(config, overrides.get('cephadm', {})) | |
1197 | log.info('Config: ' + str(config)) | |
1198 | ||
1199 | testdir = teuthology.get_testdir(ctx) | |
1200 | ||
1201 | # set up cluster context | |
1202 | if not hasattr(ctx, 'ceph'): | |
1203 | ctx.ceph = {} | |
1204 | ctx.managers = {} | |
1205 | if 'cluster' not in config: | |
1206 | config['cluster'] = 'ceph' | |
1207 | cluster_name = config['cluster'] | |
1208 | if cluster_name not in ctx.ceph: | |
1209 | ctx.ceph[cluster_name] = argparse.Namespace() | |
1210 | ctx.ceph[cluster_name].bootstrapped = False | |
f91f0fd5 | 1211 | |
e306af50 TL |
1212 | # image |
1213 | teuth_defaults = teuth_config.get('defaults', {}) | |
1214 | cephadm_defaults = teuth_defaults.get('cephadm', {}) | |
1215 | containers_defaults = cephadm_defaults.get('containers', {}) | |
1216 | mirrors_defaults = containers_defaults.get('registry_mirrors', {}) | |
1217 | container_registry_mirror = mirrors_defaults.get('docker.io', None) | |
1218 | container_image_name = containers_defaults.get('image', None) | |
1219 | ||
1220 | containers = config.get('containers', {}) | |
1221 | mirrors = containers.get('registry_mirrors', {}) | |
1222 | container_image_name = containers.get('image', container_image_name) | |
1223 | container_registry_mirror = mirrors.get('docker.io', | |
1224 | container_registry_mirror) | |
1225 | ||
e306af50 TL |
1226 | |
1227 | if not hasattr(ctx.ceph[cluster_name], 'image'): | |
1228 | ctx.ceph[cluster_name].image = config.get('image') | |
1229 | ref = None | |
1230 | if not ctx.ceph[cluster_name].image: | |
f6b5b4d7 TL |
1231 | if not container_image_name: |
1232 | raise Exception("Configuration error occurred. " | |
1233 | "The 'image' value is undefined for 'cephadm' task. " | |
1234 | "Please provide corresponding options in the task's " | |
1235 | "config, task 'overrides', or teuthology 'defaults' " | |
1236 | "section.") | |
e306af50 | 1237 | sha1 = config.get('sha1') |
f6b5b4d7 TL |
1238 | flavor = config.get('flavor', 'default') |
1239 | ||
e306af50 | 1240 | if sha1: |
f6b5b4d7 TL |
1241 | if flavor == "crimson": |
1242 | ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor | |
1243 | else: | |
1244 | ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 | |
e306af50 TL |
1245 | ref = sha1 |
1246 | else: | |
1247 | # hmm, fall back to branch? | |
1248 | branch = config.get('branch', 'master') | |
1249 | ref = branch | |
1250 | ctx.ceph[cluster_name].image = container_image_name + ':' + branch | |
1251 | log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) | |
1252 | ||
1911f103 | 1253 | |
9f95a23c | 1254 | with contextutil.nested( |
e306af50 TL |
1255 | #if the cluster is already bootstrapped bypass corresponding methods |
1256 | lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ | |
1257 | else initialize_config(ctx=ctx, config=config), | |
9f95a23c TL |
1258 | lambda: ceph_initial(), |
1259 | lambda: normalize_hostnames(ctx=ctx), | |
e306af50 TL |
1260 | lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ |
1261 | else download_cephadm(ctx=ctx, config=config, ref=ref), | |
9f95a23c TL |
1262 | lambda: ceph_log(ctx=ctx, config=config), |
1263 | lambda: ceph_crash(ctx=ctx, config=config), | |
e306af50 TL |
1264 | lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ |
1265 | else ceph_bootstrap(ctx, config, | |
1266 | container_registry_mirror), | |
9f95a23c TL |
1267 | lambda: crush_setup(ctx=ctx, config=config), |
1268 | lambda: ceph_mons(ctx=ctx, config=config), | |
1911f103 | 1269 | lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config), |
9f95a23c TL |
1270 | lambda: ceph_mgrs(ctx=ctx, config=config), |
1271 | lambda: ceph_osds(ctx=ctx, config=config), | |
1272 | lambda: ceph_mdss(ctx=ctx, config=config), | |
1273 | lambda: ceph_rgw(ctx=ctx, config=config), | |
f91f0fd5 | 1274 | lambda: ceph_iscsi(ctx=ctx, config=config), |
9f95a23c TL |
1275 | lambda: ceph_monitoring('prometheus', ctx=ctx, config=config), |
1276 | lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config), | |
1277 | lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config), | |
1278 | lambda: ceph_monitoring('grafana', ctx=ctx, config=config), | |
1279 | lambda: ceph_clients(ctx=ctx, config=config), | |
1280 | ): | |
1281 | ctx.managers[cluster_name] = CephManager( | |
1282 | ctx.ceph[cluster_name].bootstrap_remote, | |
1283 | ctx=ctx, | |
1284 | logger=log.getChild('ceph_manager.' + cluster_name), | |
1285 | cluster=cluster_name, | |
1286 | cephadm=True, | |
1287 | ) | |
1288 | ||
1289 | try: | |
1290 | if config.get('wait-for-healthy', True): | |
1291 | healthy(ctx=ctx, config=config) | |
1292 | ||
1293 | log.info('Setup complete, yielding') | |
1294 | yield | |
1295 | ||
1296 | finally: | |
1297 | log.info('Teardown begin') | |
e306af50 TL |
1298 | |
1299 | ||
1300 | def registries_add_mirror_to_docker_io(conf, mirror): | |
1301 | config = toml.loads(conf) | |
1302 | is_v1 = 'registries' in config | |
1303 | if is_v1: | |
1304 | search = config.get('registries', {}).get('search', {}).get('registries', []) | |
1305 | insecure = config.get('registries', {}).get('search', {}).get('insecure', []) | |
1306 | # v2: MutableMapping[str, Any] = { needs Python 3 | |
1307 | v2 = { | |
1308 | 'unqualified-search-registries': search, | |
1309 | 'registry': [ | |
1310 | { | |
1311 | 'prefix': reg, | |
1312 | 'location': reg, | |
1313 | 'insecure': reg in insecure, | |
1314 | 'blocked': False, | |
1315 | } for reg in search | |
1316 | ] | |
1317 | } | |
1318 | else: | |
1319 | v2 = config # type: ignore | |
adb31ebb TL |
1320 | dockers = [ |
1321 | r for r in v2['registry'] if | |
1322 | r.get('prefix') == 'docker.io' or r.get('location') == 'docker.io' | |
1323 | ] | |
e306af50 TL |
1324 | if dockers: |
1325 | docker = dockers[0] | |
adb31ebb TL |
1326 | if 'mirror' not in docker: |
1327 | docker['mirror'] = [{ | |
1328 | "location": mirror, | |
1329 | "insecure": True, | |
1330 | }] | |
e306af50 TL |
1331 | return v2 |
1332 | ||
1333 | ||
1334 | def add_mirror_to_cluster(ctx, mirror): | |
1335 | log.info('Adding local image mirror %s' % mirror) | |
f91f0fd5 | 1336 | |
e306af50 | 1337 | registries_conf = '/etc/containers/registries.conf' |
f91f0fd5 | 1338 | |
e306af50 TL |
1339 | for remote in ctx.cluster.remotes.keys(): |
1340 | try: | |
1341 | config = teuthology.get_file( | |
1342 | remote=remote, | |
1343 | path=registries_conf | |
1344 | ) | |
1345 | new_config = toml.dumps(registries_add_mirror_to_docker_io(config.decode('utf-8'), mirror)) | |
1346 | ||
1347 | teuthology.sudo_write_file( | |
1348 | remote=remote, | |
1349 | path=registries_conf, | |
1350 | data=six.ensure_str(new_config), | |
1351 | ) | |
1352 | except IOError as e: # py3: use FileNotFoundError instead. | |
1353 | if e.errno != errno.ENOENT: | |
1354 | raise | |
1355 | ||
1356 | # Docker doesn't ship a registries.conf | |
1357 | log.info('Failed to add mirror: %s' % str(e)) |