]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Execute ceph-deploy as a task | |
3 | """ | |
4 | from cStringIO import StringIO | |
5 | ||
6 | import contextlib | |
7 | import os | |
8 | import time | |
9 | import logging | |
10 | import traceback | |
11 | ||
12 | from teuthology import misc as teuthology | |
13 | from teuthology import contextutil | |
14 | from teuthology.config import config as teuth_config | |
15 | from teuthology.task import install as install_fn | |
16 | from teuthology.orchestra import run | |
17 | from tasks.cephfs.filesystem import Filesystem | |
3efd9988 | 18 | from teuthology.misc import wait_until_healthy |
7c673cae FG |
19 | |
20 | log = logging.getLogger(__name__) | |
21 | ||
22 | ||
23 | @contextlib.contextmanager | |
24 | def download_ceph_deploy(ctx, config): | |
25 | """ | |
26 | Downloads ceph-deploy from the ceph.com git mirror and (by default) | |
27 | switches to the master branch. If the `ceph-deploy-branch` is specified, it | |
28 | will use that instead. The `bootstrap` script is ran, with the argument | |
29 | obtained from `python_version`, if specified. | |
30 | """ | |
3efd9988 FG |
31 | # use mon.a for ceph_admin |
32 | (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys() | |
7c673cae FG |
33 | |
34 | try: | |
35 | py_ver = str(config['python_version']) | |
36 | except KeyError: | |
37 | pass | |
38 | else: | |
39 | supported_versions = ['2', '3'] | |
40 | if py_ver not in supported_versions: | |
41 | raise ValueError("python_version must be: {}, not {}".format( | |
42 | ' or '.join(supported_versions), py_ver | |
43 | )) | |
44 | ||
45 | log.info("Installing Python") | |
3efd9988 | 46 | system_type = teuthology.get_system_type(ceph_admin) |
7c673cae FG |
47 | |
48 | if system_type == 'rpm': | |
49 | package = 'python34' if py_ver == '3' else 'python' | |
50 | ctx.cluster.run(args=[ | |
51 | 'sudo', 'yum', '-y', 'install', | |
52 | package, 'python-virtualenv' | |
53 | ]) | |
54 | else: | |
55 | package = 'python3' if py_ver == '3' else 'python' | |
56 | ctx.cluster.run(args=[ | |
57 | 'sudo', 'apt-get', '-y', '--force-yes', 'install', | |
58 | package, 'python-virtualenv' | |
59 | ]) | |
60 | ||
61 | log.info('Downloading ceph-deploy...') | |
62 | testdir = teuthology.get_testdir(ctx) | |
63 | ceph_deploy_branch = config.get('ceph-deploy-branch', 'master') | |
64 | ||
65 | ceph_admin.run( | |
66 | args=[ | |
67 | 'git', 'clone', '-b', ceph_deploy_branch, | |
68 | teuth_config.ceph_git_base_url + 'ceph-deploy.git', | |
69 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
70 | ], | |
71 | ) | |
72 | args = [ | |
73 | 'cd', | |
74 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
75 | run.Raw('&&'), | |
76 | './bootstrap', | |
77 | ] | |
78 | try: | |
79 | args.append(str(config['python_version'])) | |
80 | except KeyError: | |
81 | pass | |
82 | ceph_admin.run(args=args) | |
83 | ||
84 | try: | |
85 | yield | |
86 | finally: | |
87 | log.info('Removing ceph-deploy ...') | |
88 | ceph_admin.run( | |
89 | args=[ | |
90 | 'rm', | |
91 | '-rf', | |
92 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
93 | ], | |
94 | ) | |
95 | ||
96 | ||
97 | def is_healthy(ctx, config): | |
98 | """Wait until a Ceph cluster is healthy.""" | |
99 | testdir = teuthology.get_testdir(ctx) | |
100 | ceph_admin = teuthology.get_first_mon(ctx, config) | |
101 | (remote,) = ctx.cluster.only(ceph_admin).remotes.keys() | |
102 | max_tries = 90 # 90 tries * 10 secs --> 15 minutes | |
103 | tries = 0 | |
104 | while True: | |
105 | tries += 1 | |
106 | if tries >= max_tries: | |
107 | msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes" | |
108 | remote.run( | |
109 | args=[ | |
110 | 'cd', | |
111 | '{tdir}'.format(tdir=testdir), | |
112 | run.Raw('&&'), | |
113 | 'sudo', 'ceph', | |
114 | 'report', | |
115 | ], | |
116 | ) | |
117 | raise RuntimeError(msg) | |
118 | ||
119 | r = remote.run( | |
120 | args=[ | |
121 | 'cd', | |
122 | '{tdir}'.format(tdir=testdir), | |
123 | run.Raw('&&'), | |
124 | 'sudo', 'ceph', | |
125 | 'health', | |
126 | ], | |
127 | stdout=StringIO(), | |
128 | logger=log.getChild('health'), | |
129 | ) | |
130 | out = r.stdout.getvalue() | |
131 | log.info('Ceph health: %s', out.rstrip('\n')) | |
132 | if out.split(None, 1)[0] == 'HEALTH_OK': | |
133 | break | |
134 | time.sleep(10) | |
135 | ||
136 | ||
137 | def get_nodes_using_role(ctx, target_role): | |
138 | """ | |
139 | Extract the names of nodes that match a given role from a cluster, and modify the | |
140 | cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy | |
141 | uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23". | |
142 | """ | |
143 | ||
144 | # Nodes containing a service of the specified role | |
145 | nodes_of_interest = [] | |
146 | ||
147 | # Prepare a modified version of cluster.remotes with ceph-deploy-ized names | |
148 | modified_remotes = {} | |
3efd9988 | 149 | ceph_deploy_mapped = dict() |
7c673cae FG |
150 | for _remote, roles_for_host in ctx.cluster.remotes.iteritems(): |
151 | modified_remotes[_remote] = [] | |
152 | for svc_id in roles_for_host: | |
153 | if svc_id.startswith("{0}.".format(target_role)): | |
154 | fqdn = str(_remote).split('@')[-1] | |
155 | nodename = str(str(_remote).split('.')[0]).split('@')[1] | |
156 | if target_role == 'mon': | |
157 | nodes_of_interest.append(fqdn) | |
158 | else: | |
159 | nodes_of_interest.append(nodename) | |
3efd9988 FG |
160 | mapped_role = "{0}.{1}".format(target_role, nodename) |
161 | modified_remotes[_remote].append(mapped_role) | |
162 | # keep dict of mapped role for later use by tasks | |
163 | # eg. mon.a => mon.node1 | |
164 | ceph_deploy_mapped[svc_id] = mapped_role | |
7c673cae FG |
165 | else: |
166 | modified_remotes[_remote].append(svc_id) | |
167 | ||
168 | ctx.cluster.remotes = modified_remotes | |
3efd9988 | 169 | ctx.cluster.mapped_role = ceph_deploy_mapped |
7c673cae FG |
170 | |
171 | return nodes_of_interest | |
172 | ||
173 | ||
174 | def get_dev_for_osd(ctx, config): | |
175 | """Get a list of all osd device names.""" | |
176 | osd_devs = [] | |
177 | for remote, roles_for_host in ctx.cluster.remotes.iteritems(): | |
178 | host = remote.name.split('@')[-1] | |
179 | shortname = host.split('.')[0] | |
180 | devs = teuthology.get_scratch_devices(remote) | |
181 | num_osd_per_host = list( | |
182 | teuthology.roles_of_type( | |
183 | roles_for_host, 'osd')) | |
184 | num_osds = len(num_osd_per_host) | |
185 | if config.get('separate_journal_disk') is not None: | |
186 | num_devs_reqd = 2 * num_osds | |
187 | assert num_devs_reqd <= len( | |
188 | devs), 'fewer data and journal disks than required ' + shortname | |
189 | for dindex in range(0, num_devs_reqd, 2): | |
190 | jd_index = dindex + 1 | |
191 | dev_short = devs[dindex].split('/')[-1] | |
192 | jdev_short = devs[jd_index].split('/')[-1] | |
193 | osd_devs.append((shortname, dev_short, jdev_short)) | |
194 | else: | |
195 | assert num_osds <= len(devs), 'fewer disks than osds ' + shortname | |
196 | for dev in devs[:num_osds]: | |
197 | dev_short = dev.split('/')[-1] | |
198 | osd_devs.append((shortname, dev_short)) | |
199 | return osd_devs | |
200 | ||
201 | ||
202 | def get_all_nodes(ctx, config): | |
203 | """Return a string of node names separated by blanks""" | |
204 | nodelist = [] | |
205 | for t, k in ctx.config['targets'].iteritems(): | |
206 | host = t.split('@')[-1] | |
207 | simple_host = host.split('.')[0] | |
208 | nodelist.append(simple_host) | |
209 | nodelist = " ".join(nodelist) | |
210 | return nodelist | |
211 | ||
212 | ||
213 | @contextlib.contextmanager | |
214 | def build_ceph_cluster(ctx, config): | |
215 | """Build a ceph cluster""" | |
216 | ||
217 | # Expect to find ceph_admin on the first mon by ID, same place that the download task | |
218 | # puts it. Remember this here, because subsequently IDs will change from those in | |
219 | # the test config to those that ceph-deploy invents. | |
3efd9988 FG |
220 | |
221 | (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys() | |
7c673cae FG |
222 | |
223 | def execute_ceph_deploy(cmd): | |
224 | """Remotely execute a ceph_deploy command""" | |
225 | return ceph_admin.run( | |
226 | args=[ | |
227 | 'cd', | |
228 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
229 | run.Raw('&&'), | |
230 | run.Raw(cmd), | |
231 | ], | |
232 | check_status=False, | |
233 | ).exitstatus | |
234 | ||
235 | try: | |
236 | log.info('Building ceph cluster using ceph-deploy...') | |
237 | testdir = teuthology.get_testdir(ctx) | |
238 | ceph_branch = None | |
239 | if config.get('branch') is not None: | |
240 | cbranch = config.get('branch') | |
241 | for var, val in cbranch.iteritems(): | |
242 | ceph_branch = '--{var}={val}'.format(var=var, val=val) | |
243 | all_nodes = get_all_nodes(ctx, config) | |
244 | mds_nodes = get_nodes_using_role(ctx, 'mds') | |
245 | mds_nodes = " ".join(mds_nodes) | |
246 | mon_node = get_nodes_using_role(ctx, 'mon') | |
247 | mon_nodes = " ".join(mon_node) | |
3efd9988 FG |
248 | # skip mgr based on config item |
249 | # this is needed when test uses latest code to install old ceph | |
250 | # versions | |
251 | skip_mgr = config.get('skip-mgr', False) | |
252 | if not skip_mgr: | |
253 | mgr_nodes = get_nodes_using_role(ctx, 'mgr') | |
254 | mgr_nodes = " ".join(mgr_nodes) | |
7c673cae | 255 | new_mon = './ceph-deploy new' + " " + mon_nodes |
3efd9988 FG |
256 | if not skip_mgr: |
257 | mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes | |
7c673cae FG |
258 | mon_hostname = mon_nodes.split(' ')[0] |
259 | mon_hostname = str(mon_hostname) | |
260 | gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname | |
261 | deploy_mds = './ceph-deploy mds create' + " " + mds_nodes | |
262 | no_of_osds = 0 | |
263 | ||
264 | if mon_nodes is None: | |
265 | raise RuntimeError("no monitor nodes in the config file") | |
266 | ||
267 | estatus_new = execute_ceph_deploy(new_mon) | |
268 | if estatus_new != 0: | |
269 | raise RuntimeError("ceph-deploy: new command failed") | |
270 | ||
271 | log.info('adding config inputs...') | |
272 | testdir = teuthology.get_testdir(ctx) | |
273 | conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) | |
274 | ||
275 | if config.get('conf') is not None: | |
276 | confp = config.get('conf') | |
277 | for section, keys in confp.iteritems(): | |
278 | lines = '[{section}]\n'.format(section=section) | |
279 | teuthology.append_lines_to_file(ceph_admin, conf_path, lines, | |
280 | sudo=True) | |
281 | for key, value in keys.iteritems(): | |
282 | log.info("[%s] %s = %s" % (section, key, value)) | |
283 | lines = '{key} = {value}\n'.format(key=key, value=value) | |
284 | teuthology.append_lines_to_file( | |
285 | ceph_admin, conf_path, lines, sudo=True) | |
286 | ||
287 | # install ceph | |
288 | dev_branch = ctx.config['branch'] | |
289 | branch = '--dev={branch}'.format(branch=dev_branch) | |
290 | if ceph_branch: | |
291 | option = ceph_branch | |
292 | else: | |
293 | option = branch | |
294 | install_nodes = './ceph-deploy install ' + option + " " + all_nodes | |
295 | estatus_install = execute_ceph_deploy(install_nodes) | |
296 | if estatus_install != 0: | |
297 | raise RuntimeError("ceph-deploy: Failed to install ceph") | |
298 | # install ceph-test package too | |
299 | install_nodes2 = './ceph-deploy install --tests ' + option + \ | |
300 | " " + all_nodes | |
301 | estatus_install = execute_ceph_deploy(install_nodes2) | |
302 | if estatus_install != 0: | |
303 | raise RuntimeError("ceph-deploy: Failed to install ceph-test") | |
304 | ||
305 | mon_create_nodes = './ceph-deploy mon create-initial' | |
306 | # If the following fails, it is OK, it might just be that the monitors | |
307 | # are taking way more than a minute/monitor to form quorum, so lets | |
308 | # try the next block which will wait up to 15 minutes to gatherkeys. | |
309 | execute_ceph_deploy(mon_create_nodes) | |
310 | ||
311 | # create-keys is explicit now | |
312 | # http://tracker.ceph.com/issues/16036 | |
313 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
314 | for remote in mons.remotes.iterkeys(): | |
315 | remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph', | |
316 | '--id', remote.shortname]) | |
317 | ||
318 | estatus_gather = execute_ceph_deploy(gather_keys) | |
b5b8bbf5 | 319 | |
3efd9988 FG |
320 | if not skip_mgr: |
321 | execute_ceph_deploy(mgr_create) | |
b5b8bbf5 | 322 | |
7c673cae FG |
323 | if mds_nodes: |
324 | estatus_mds = execute_ceph_deploy(deploy_mds) | |
325 | if estatus_mds != 0: | |
326 | raise RuntimeError("ceph-deploy: Failed to deploy mds") | |
327 | ||
328 | if config.get('test_mon_destroy') is not None: | |
329 | for d in range(1, len(mon_node)): | |
330 | mon_destroy_nodes = './ceph-deploy mon destroy' + \ | |
331 | " " + mon_node[d] | |
332 | estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) | |
333 | if estatus_mon_d != 0: | |
334 | raise RuntimeError("ceph-deploy: Failed to delete monitor") | |
335 | ||
336 | node_dev_list = get_dev_for_osd(ctx, config) | |
337 | for d in node_dev_list: | |
338 | node = d[0] | |
339 | for disk in d[1:]: | |
340 | zap = './ceph-deploy disk zap ' + node + ':' + disk | |
341 | estatus = execute_ceph_deploy(zap) | |
342 | if estatus != 0: | |
343 | raise RuntimeError("ceph-deploy: Failed to zap osds") | |
344 | osd_create_cmd = './ceph-deploy osd create ' | |
c07f9fc5 FG |
345 | # first check for filestore, default is bluestore with ceph-deploy |
346 | if config.get('filestore') is not None: | |
347 | osd_create_cmd += '--filestore ' | |
3efd9988 | 348 | elif config.get('bluestore') is not None: |
c07f9fc5 | 349 | osd_create_cmd += '--bluestore ' |
7c673cae FG |
350 | if config.get('dmcrypt') is not None: |
351 | osd_create_cmd += '--dmcrypt ' | |
352 | osd_create_cmd += ":".join(d) | |
353 | estatus_osd = execute_ceph_deploy(osd_create_cmd) | |
354 | if estatus_osd == 0: | |
355 | log.info('successfully created osd') | |
356 | no_of_osds += 1 | |
357 | else: | |
358 | raise RuntimeError("ceph-deploy: Failed to create osds") | |
359 | ||
360 | if config.get('wait-for-healthy', True) and no_of_osds >= 2: | |
361 | is_healthy(ctx=ctx, config=None) | |
362 | ||
363 | log.info('Setting up client nodes...') | |
364 | conf_path = '/etc/ceph/ceph.conf' | |
365 | admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' | |
366 | first_mon = teuthology.get_first_mon(ctx, config) | |
367 | (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() | |
368 | conf_data = teuthology.get_file( | |
369 | remote=mon0_remote, | |
370 | path=conf_path, | |
371 | sudo=True, | |
372 | ) | |
373 | admin_keyring = teuthology.get_file( | |
374 | remote=mon0_remote, | |
375 | path=admin_keyring_path, | |
376 | sudo=True, | |
377 | ) | |
378 | ||
379 | clients = ctx.cluster.only(teuthology.is_type('client')) | |
380 | for remot, roles_for_host in clients.remotes.iteritems(): | |
381 | for id_ in teuthology.roles_of_type(roles_for_host, 'client'): | |
382 | client_keyring = \ | |
383 | '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) | |
384 | mon0_remote.run( | |
385 | args=[ | |
386 | 'cd', | |
387 | '{tdir}'.format(tdir=testdir), | |
388 | run.Raw('&&'), | |
389 | 'sudo', 'bash', '-c', | |
390 | run.Raw('"'), 'ceph', | |
391 | 'auth', | |
392 | 'get-or-create', | |
393 | 'client.{id}'.format(id=id_), | |
394 | 'mds', 'allow', | |
395 | 'mon', 'allow *', | |
396 | 'osd', 'allow *', | |
397 | run.Raw('>'), | |
398 | client_keyring, | |
399 | run.Raw('"'), | |
400 | ], | |
401 | ) | |
402 | key_data = teuthology.get_file( | |
403 | remote=mon0_remote, | |
404 | path=client_keyring, | |
405 | sudo=True, | |
406 | ) | |
407 | teuthology.sudo_write_file( | |
408 | remote=remot, | |
409 | path=client_keyring, | |
410 | data=key_data, | |
411 | perms='0644' | |
412 | ) | |
413 | teuthology.sudo_write_file( | |
414 | remote=remot, | |
415 | path=admin_keyring_path, | |
416 | data=admin_keyring, | |
417 | perms='0644' | |
418 | ) | |
419 | teuthology.sudo_write_file( | |
420 | remote=remot, | |
421 | path=conf_path, | |
422 | data=conf_data, | |
423 | perms='0644' | |
424 | ) | |
425 | ||
426 | if mds_nodes: | |
427 | log.info('Configuring CephFS...') | |
3efd9988 | 428 | Filesystem(ctx, create=True) |
7c673cae FG |
429 | elif not config.get('only_mon'): |
430 | raise RuntimeError( | |
431 | "The cluster is NOT operational due to insufficient OSDs") | |
432 | yield | |
433 | ||
434 | except Exception: | |
435 | log.info( | |
436 | "Error encountered, logging exception before tearing down ceph-deploy") | |
437 | log.info(traceback.format_exc()) | |
438 | raise | |
439 | finally: | |
440 | if config.get('keep_running'): | |
441 | return | |
442 | log.info('Stopping ceph...') | |
443 | ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), | |
444 | 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), | |
445 | 'sudo', 'systemctl', 'stop', 'ceph.target']) | |
446 | ||
447 | # Are you really not running anymore? | |
448 | # try first with the init tooling | |
449 | # ignoring the status so this becomes informational only | |
450 | ctx.cluster.run( | |
451 | args=[ | |
452 | 'sudo', 'status', 'ceph-all', run.Raw('||'), | |
453 | 'sudo', 'service', 'ceph', 'status', run.Raw('||'), | |
454 | 'sudo', 'systemctl', 'status', 'ceph.target'], | |
455 | check_status=False) | |
456 | ||
457 | # and now just check for the processes themselves, as if upstart/sysvinit | |
458 | # is lying to us. Ignore errors if the grep fails | |
459 | ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), | |
460 | 'grep', '-v', 'grep', run.Raw('|'), | |
461 | 'grep', 'ceph'], check_status=False) | |
462 | ||
463 | if ctx.archive is not None: | |
464 | # archive mon data, too | |
465 | log.info('Archiving mon data...') | |
466 | path = os.path.join(ctx.archive, 'data') | |
467 | os.makedirs(path) | |
468 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
469 | for remote, roles in mons.remotes.iteritems(): | |
470 | for role in roles: | |
471 | if role.startswith('mon.'): | |
472 | teuthology.pull_directory_tarball( | |
473 | remote, | |
474 | '/var/lib/ceph/mon', | |
475 | path + '/' + role + '.tgz') | |
476 | ||
477 | log.info('Compressing logs...') | |
478 | run.wait( | |
479 | ctx.cluster.run( | |
480 | args=[ | |
481 | 'sudo', | |
482 | 'find', | |
483 | '/var/log/ceph', | |
484 | '-name', | |
485 | '*.log', | |
486 | '-print0', | |
487 | run.Raw('|'), | |
488 | 'sudo', | |
489 | 'xargs', | |
490 | '-0', | |
491 | '--no-run-if-empty', | |
492 | '--', | |
493 | 'gzip', | |
494 | '--', | |
495 | ], | |
496 | wait=False, | |
497 | ), | |
498 | ) | |
499 | ||
500 | log.info('Archiving logs...') | |
501 | path = os.path.join(ctx.archive, 'remote') | |
502 | os.makedirs(path) | |
503 | for remote in ctx.cluster.remotes.iterkeys(): | |
504 | sub = os.path.join(path, remote.shortname) | |
505 | os.makedirs(sub) | |
506 | teuthology.pull_directory(remote, '/var/log/ceph', | |
507 | os.path.join(sub, 'log')) | |
508 | ||
509 | # Prevent these from being undefined if the try block fails | |
510 | all_nodes = get_all_nodes(ctx, config) | |
511 | purge_nodes = './ceph-deploy purge' + " " + all_nodes | |
512 | purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes | |
513 | ||
514 | log.info('Purging package...') | |
515 | execute_ceph_deploy(purge_nodes) | |
516 | log.info('Purging data...') | |
517 | execute_ceph_deploy(purgedata_nodes) | |
518 | ||
519 | ||
520 | @contextlib.contextmanager | |
521 | def cli_test(ctx, config): | |
522 | """ | |
523 | ceph-deploy cli to exercise most commonly use cli's and ensure | |
524 | all commands works and also startup the init system. | |
525 | ||
526 | """ | |
527 | log.info('Ceph-deploy Test') | |
528 | if config is None: | |
529 | config = {} | |
530 | test_branch = '' | |
531 | conf_dir = teuthology.get_testdir(ctx) + "/cdtest" | |
532 | ||
533 | def execute_cdeploy(admin, cmd, path): | |
534 | """Execute ceph-deploy commands """ | |
535 | """Either use git path or repo path """ | |
536 | args = ['cd', conf_dir, run.Raw(';')] | |
537 | if path: | |
3efd9988 | 538 | args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)) |
7c673cae FG |
539 | else: |
540 | args.append('ceph-deploy') | |
541 | args.append(run.Raw(cmd)) | |
542 | ec = admin.run(args=args, check_status=False).exitstatus | |
543 | if ec != 0: | |
544 | raise RuntimeError( | |
545 | "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec)) | |
546 | ||
547 | if config.get('rhbuild'): | |
548 | path = None | |
549 | else: | |
550 | path = teuthology.get_testdir(ctx) | |
551 | # test on branch from config eg: wip-* , master or next etc | |
552 | # packages for all distro's should exist for wip* | |
553 | if ctx.config.get('branch'): | |
554 | branch = ctx.config.get('branch') | |
555 | test_branch = ' --dev={branch} '.format(branch=branch) | |
556 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
557 | for node, role in mons.remotes.iteritems(): | |
558 | admin = node | |
559 | admin.run(args=['mkdir', conf_dir], check_status=False) | |
560 | nodename = admin.shortname | |
561 | system_type = teuthology.get_system_type(admin) | |
562 | if config.get('rhbuild'): | |
563 | admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) | |
564 | log.info('system type is %s', system_type) | |
565 | osds = ctx.cluster.only(teuthology.is_type('osd')) | |
566 | ||
567 | for remote, roles in osds.remotes.iteritems(): | |
568 | devs = teuthology.get_scratch_devices(remote) | |
569 | log.info("roles %s", roles) | |
570 | if (len(devs) < 3): | |
571 | log.error( | |
572 | 'Test needs minimum of 3 devices, only found %s', | |
573 | str(devs)) | |
574 | raise RuntimeError("Needs minimum of 3 devices ") | |
575 | ||
576 | conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) | |
577 | new_cmd = 'new ' + nodename | |
578 | execute_cdeploy(admin, new_cmd, path) | |
579 | if config.get('conf') is not None: | |
580 | confp = config.get('conf') | |
581 | for section, keys in confp.iteritems(): | |
582 | lines = '[{section}]\n'.format(section=section) | |
583 | teuthology.append_lines_to_file(admin, conf_path, lines, | |
584 | sudo=True) | |
585 | for key, value in keys.iteritems(): | |
586 | log.info("[%s] %s = %s" % (section, key, value)) | |
587 | lines = '{key} = {value}\n'.format(key=key, value=value) | |
588 | teuthology.append_lines_to_file(admin, conf_path, lines, | |
589 | sudo=True) | |
590 | new_mon_install = 'install {branch} --mon '.format( | |
591 | branch=test_branch) + nodename | |
592 | new_mgr_install = 'install {branch} --mgr '.format( | |
593 | branch=test_branch) + nodename | |
594 | new_osd_install = 'install {branch} --osd '.format( | |
595 | branch=test_branch) + nodename | |
596 | new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename | |
597 | create_initial = 'mon create-initial ' | |
598 | # either use create-keys or push command | |
599 | push_keys = 'admin ' + nodename | |
600 | execute_cdeploy(admin, new_mon_install, path) | |
601 | execute_cdeploy(admin, new_mgr_install, path) | |
602 | execute_cdeploy(admin, new_osd_install, path) | |
603 | execute_cdeploy(admin, new_admin, path) | |
604 | execute_cdeploy(admin, create_initial, path) | |
605 | execute_cdeploy(admin, push_keys, path) | |
606 | ||
607 | for i in range(3): | |
608 | zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) | |
609 | prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) | |
610 | execute_cdeploy(admin, zap_disk, path) | |
611 | execute_cdeploy(admin, prepare, path) | |
612 | ||
613 | log.info("list files for debugging purpose to check file permissions") | |
614 | admin.run(args=['ls', run.Raw('-lt'), conf_dir]) | |
615 | remote.run(args=['sudo', 'ceph', '-s'], check_status=False) | |
616 | r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) | |
617 | out = r.stdout.getvalue() | |
618 | log.info('Ceph health: %s', out.rstrip('\n')) | |
619 | log.info("Waiting for cluster to become healthy") | |
620 | with contextutil.safe_while(sleep=10, tries=6, | |
621 | action='check health') as proceed: | |
3efd9988 FG |
622 | while proceed(): |
623 | r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) | |
624 | out = r.stdout.getvalue() | |
625 | if (out.split(None, 1)[0] == 'HEALTH_OK'): | |
626 | break | |
7c673cae FG |
627 | rgw_install = 'install {branch} --rgw {node}'.format( |
628 | branch=test_branch, | |
629 | node=nodename, | |
630 | ) | |
631 | rgw_create = 'rgw create ' + nodename | |
632 | execute_cdeploy(admin, rgw_install, path) | |
633 | execute_cdeploy(admin, rgw_create, path) | |
634 | log.info('All ceph-deploy cli tests passed') | |
635 | try: | |
636 | yield | |
637 | finally: | |
638 | log.info("cleaning up") | |
639 | ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), | |
640 | 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), | |
641 | 'sudo', 'systemctl', 'stop', 'ceph.target'], | |
642 | check_status=False) | |
643 | time.sleep(4) | |
644 | for i in range(3): | |
645 | umount_dev = "{d}1".format(d=devs[i]) | |
646 | r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) | |
647 | cmd = 'purge ' + nodename | |
648 | execute_cdeploy(admin, cmd, path) | |
649 | cmd = 'purgedata ' + nodename | |
650 | execute_cdeploy(admin, cmd, path) | |
651 | log.info("Removing temporary dir") | |
652 | admin.run( | |
653 | args=[ | |
654 | 'rm', | |
655 | run.Raw('-rf'), | |
656 | run.Raw(conf_dir)], | |
657 | check_status=False) | |
658 | if config.get('rhbuild'): | |
659 | admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y']) | |
660 | ||
661 | ||
662 | @contextlib.contextmanager | |
663 | def single_node_test(ctx, config): | |
664 | """ | |
665 | - ceph-deploy.single_node_test: null | |
666 | ||
667 | #rhbuild testing | |
668 | - ceph-deploy.single_node_test: | |
669 | rhbuild: 1.2.3 | |
670 | ||
671 | """ | |
672 | log.info("Testing ceph-deploy on single node") | |
673 | if config is None: | |
674 | config = {} | |
675 | overrides = ctx.config.get('overrides', {}) | |
676 | teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) | |
677 | ||
678 | if config.get('rhbuild'): | |
679 | log.info("RH Build, Skip Download") | |
680 | with contextutil.nested( | |
681 | lambda: cli_test(ctx=ctx, config=config), | |
682 | ): | |
683 | yield | |
684 | else: | |
685 | with contextutil.nested( | |
686 | lambda: install_fn.ship_utilities(ctx=ctx, config=None), | |
687 | lambda: download_ceph_deploy(ctx=ctx, config=config), | |
688 | lambda: cli_test(ctx=ctx, config=config), | |
689 | ): | |
690 | yield | |
691 | ||
692 | ||
3efd9988 FG |
693 | @contextlib.contextmanager |
694 | def upgrade(ctx, config): | |
695 | """ | |
696 | Upgrade using ceph-deploy | |
697 | eg: | |
698 | ceph-deploy.upgrade: | |
699 | # to upgrade to specific branch, use | |
700 | branch: | |
701 | stable: jewel | |
702 | # to setup mgr node, use | |
703 | setup-mgr-node: True | |
704 | # to wait for cluster to be healthy after all upgrade, use | |
705 | wait-for-healthy: True | |
706 | role: (upgrades the below roles serially) | |
707 | mon.a | |
708 | mon.b | |
709 | osd.0 | |
710 | """ | |
711 | roles = config.get('roles') | |
712 | # get the roles that are mapped as per ceph-deploy | |
713 | # roles are mapped for mon/mds eg: mon.a => mon.host_short_name | |
714 | mapped_role = ctx.cluster.mapped_role | |
715 | if config.get('branch'): | |
716 | branch = config.get('branch') | |
717 | (var, val) = branch.items()[0] | |
718 | ceph_branch = '--{var}={val}'.format(var=var, val=val) | |
719 | else: | |
720 | # default to master | |
721 | ceph_branch = '--dev=master' | |
722 | # get the node used for initial deployment which is mon.a | |
723 | mon_a = mapped_role.get('mon.a') | |
724 | (ceph_admin,) = ctx.cluster.only(mon_a).remotes.iterkeys() | |
725 | testdir = teuthology.get_testdir(ctx) | |
726 | cmd = './ceph-deploy install ' + ceph_branch | |
727 | for role in roles: | |
728 | # check if this role is mapped (mon or mds) | |
729 | if mapped_role.get(role): | |
730 | role = mapped_role.get(role) | |
731 | remotes_and_roles = ctx.cluster.only(role).remotes | |
732 | for remote, roles in remotes_and_roles.iteritems(): | |
733 | nodename = remote.shortname | |
734 | cmd = cmd + ' ' + nodename | |
735 | log.info("Upgrading ceph on %s", nodename) | |
736 | ceph_admin.run( | |
737 | args=[ | |
738 | 'cd', | |
739 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
740 | run.Raw('&&'), | |
741 | run.Raw(cmd), | |
742 | ], | |
743 | ) | |
744 | # restart all ceph services, ideally upgrade should but it does not | |
745 | remote.run( | |
746 | args=[ | |
747 | 'sudo', 'systemctl', 'restart', 'ceph.target' | |
748 | ] | |
749 | ) | |
750 | ceph_admin.run(args=['sudo', 'ceph', '-s']) | |
751 | ||
752 | # workaround for http://tracker.ceph.com/issues/20950 | |
753 | # write the correct mgr key to disk | |
754 | if config.get('setup-mgr-node', None): | |
755 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
756 | for remote, roles in mons.remotes.iteritems(): | |
757 | remote.run( | |
758 | args=[ | |
759 | run.Raw('sudo ceph auth get client.bootstrap-mgr'), | |
760 | run.Raw('|'), | |
761 | run.Raw('sudo tee'), | |
762 | run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring') | |
763 | ] | |
764 | ) | |
765 | ||
766 | if config.get('setup-mgr-node', None): | |
767 | mgr_nodes = get_nodes_using_role(ctx, 'mgr') | |
768 | mgr_nodes = " ".join(mgr_nodes) | |
769 | mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes | |
770 | mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes | |
771 | # install mgr | |
772 | ceph_admin.run( | |
773 | args=[ | |
774 | 'cd', | |
775 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
776 | run.Raw('&&'), | |
777 | run.Raw(mgr_install), | |
778 | ], | |
779 | ) | |
780 | # create mgr | |
781 | ceph_admin.run( | |
782 | args=[ | |
783 | 'cd', | |
784 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
785 | run.Raw('&&'), | |
786 | run.Raw(mgr_create), | |
787 | ], | |
788 | ) | |
789 | ceph_admin.run(args=['sudo', 'ceph', '-s']) | |
790 | if config.get('wait-for-healthy', None): | |
791 | wait_until_healthy(ctx, ceph_admin, use_sudo=True) | |
792 | yield | |
793 | ||
794 | ||
7c673cae FG |
795 | @contextlib.contextmanager |
796 | def task(ctx, config): | |
797 | """ | |
798 | Set up and tear down a Ceph cluster. | |
799 | ||
800 | For example:: | |
801 | ||
802 | tasks: | |
803 | - install: | |
804 | extras: yes | |
805 | - ssh_keys: | |
806 | - ceph-deploy: | |
807 | branch: | |
808 | stable: bobtail | |
809 | mon_initial_members: 1 | |
3efd9988 | 810 | ceph-deploy-branch: my-ceph-deploy-branch |
7c673cae FG |
811 | only_mon: true |
812 | keep_running: true | |
c07f9fc5 FG |
813 | # either choose bluestore or filestore, default is bluestore |
814 | bluestore: True | |
815 | # or | |
816 | filestore: True | |
3efd9988 FG |
817 | # skip install of mgr for old release using below flag |
818 | skip-mgr: True ( default is False ) | |
7c673cae FG |
819 | |
820 | tasks: | |
821 | - install: | |
822 | extras: yes | |
823 | - ssh_keys: | |
824 | - ceph-deploy: | |
825 | branch: | |
826 | dev: master | |
827 | conf: | |
828 | mon: | |
829 | debug mon = 20 | |
830 | ||
831 | tasks: | |
832 | - install: | |
833 | extras: yes | |
834 | - ssh_keys: | |
835 | - ceph-deploy: | |
836 | branch: | |
837 | testing: | |
838 | dmcrypt: yes | |
839 | separate_journal_disk: yes | |
840 | ||
841 | """ | |
842 | if config is None: | |
843 | config = {} | |
844 | ||
845 | assert isinstance(config, dict), \ | |
846 | "task ceph-deploy only supports a dictionary for configuration" | |
847 | ||
848 | overrides = ctx.config.get('overrides', {}) | |
849 | teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) | |
850 | ||
851 | if config.get('branch') is not None: | |
852 | assert isinstance( | |
853 | config['branch'], dict), 'branch must be a dictionary' | |
854 | ||
855 | log.info('task ceph-deploy with config ' + str(config)) | |
856 | ||
857 | with contextutil.nested( | |
858 | lambda: install_fn.ship_utilities(ctx=ctx, config=None), | |
859 | lambda: download_ceph_deploy(ctx=ctx, config=config), | |
860 | lambda: build_ceph_cluster(ctx=ctx, config=config), | |
861 | ): | |
862 | yield |