]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Execute ceph-deploy as a task | |
3 | """ | |
4 | from cStringIO import StringIO | |
5 | ||
6 | import contextlib | |
7 | import os | |
8 | import time | |
9 | import logging | |
10 | import traceback | |
11 | ||
12 | from teuthology import misc as teuthology | |
13 | from teuthology import contextutil | |
14 | from teuthology.config import config as teuth_config | |
15 | from teuthology.task import install as install_fn | |
16 | from teuthology.orchestra import run | |
17 | from tasks.cephfs.filesystem import Filesystem | |
3efd9988 | 18 | from teuthology.misc import wait_until_healthy |
7c673cae FG |
19 | |
20 | log = logging.getLogger(__name__) | |
21 | ||
22 | ||
23 | @contextlib.contextmanager | |
24 | def download_ceph_deploy(ctx, config): | |
25 | """ | |
26 | Downloads ceph-deploy from the ceph.com git mirror and (by default) | |
27 | switches to the master branch. If the `ceph-deploy-branch` is specified, it | |
28 | will use that instead. The `bootstrap` script is ran, with the argument | |
29 | obtained from `python_version`, if specified. | |
30 | """ | |
3efd9988 FG |
31 | # use mon.a for ceph_admin |
32 | (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys() | |
7c673cae FG |
33 | |
34 | try: | |
35 | py_ver = str(config['python_version']) | |
36 | except KeyError: | |
37 | pass | |
38 | else: | |
39 | supported_versions = ['2', '3'] | |
40 | if py_ver not in supported_versions: | |
41 | raise ValueError("python_version must be: {}, not {}".format( | |
42 | ' or '.join(supported_versions), py_ver | |
43 | )) | |
44 | ||
45 | log.info("Installing Python") | |
3efd9988 | 46 | system_type = teuthology.get_system_type(ceph_admin) |
7c673cae FG |
47 | |
48 | if system_type == 'rpm': | |
49 | package = 'python34' if py_ver == '3' else 'python' | |
50 | ctx.cluster.run(args=[ | |
51 | 'sudo', 'yum', '-y', 'install', | |
52 | package, 'python-virtualenv' | |
53 | ]) | |
54 | else: | |
55 | package = 'python3' if py_ver == '3' else 'python' | |
56 | ctx.cluster.run(args=[ | |
57 | 'sudo', 'apt-get', '-y', '--force-yes', 'install', | |
58 | package, 'python-virtualenv' | |
59 | ]) | |
60 | ||
61 | log.info('Downloading ceph-deploy...') | |
62 | testdir = teuthology.get_testdir(ctx) | |
63 | ceph_deploy_branch = config.get('ceph-deploy-branch', 'master') | |
64 | ||
65 | ceph_admin.run( | |
66 | args=[ | |
67 | 'git', 'clone', '-b', ceph_deploy_branch, | |
68 | teuth_config.ceph_git_base_url + 'ceph-deploy.git', | |
69 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
70 | ], | |
71 | ) | |
72 | args = [ | |
73 | 'cd', | |
74 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
75 | run.Raw('&&'), | |
76 | './bootstrap', | |
77 | ] | |
78 | try: | |
79 | args.append(str(config['python_version'])) | |
80 | except KeyError: | |
81 | pass | |
82 | ceph_admin.run(args=args) | |
83 | ||
84 | try: | |
85 | yield | |
86 | finally: | |
87 | log.info('Removing ceph-deploy ...') | |
88 | ceph_admin.run( | |
89 | args=[ | |
90 | 'rm', | |
91 | '-rf', | |
92 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
93 | ], | |
94 | ) | |
95 | ||
96 | ||
97 | def is_healthy(ctx, config): | |
98 | """Wait until a Ceph cluster is healthy.""" | |
99 | testdir = teuthology.get_testdir(ctx) | |
100 | ceph_admin = teuthology.get_first_mon(ctx, config) | |
101 | (remote,) = ctx.cluster.only(ceph_admin).remotes.keys() | |
102 | max_tries = 90 # 90 tries * 10 secs --> 15 minutes | |
103 | tries = 0 | |
104 | while True: | |
105 | tries += 1 | |
106 | if tries >= max_tries: | |
107 | msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes" | |
108 | remote.run( | |
109 | args=[ | |
110 | 'cd', | |
111 | '{tdir}'.format(tdir=testdir), | |
112 | run.Raw('&&'), | |
113 | 'sudo', 'ceph', | |
114 | 'report', | |
115 | ], | |
116 | ) | |
117 | raise RuntimeError(msg) | |
118 | ||
119 | r = remote.run( | |
120 | args=[ | |
121 | 'cd', | |
122 | '{tdir}'.format(tdir=testdir), | |
123 | run.Raw('&&'), | |
124 | 'sudo', 'ceph', | |
125 | 'health', | |
126 | ], | |
127 | stdout=StringIO(), | |
128 | logger=log.getChild('health'), | |
129 | ) | |
130 | out = r.stdout.getvalue() | |
131 | log.info('Ceph health: %s', out.rstrip('\n')) | |
132 | if out.split(None, 1)[0] == 'HEALTH_OK': | |
133 | break | |
134 | time.sleep(10) | |
135 | ||
136 | ||
137 | def get_nodes_using_role(ctx, target_role): | |
138 | """ | |
139 | Extract the names of nodes that match a given role from a cluster, and modify the | |
140 | cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy | |
141 | uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23". | |
142 | """ | |
143 | ||
144 | # Nodes containing a service of the specified role | |
145 | nodes_of_interest = [] | |
146 | ||
147 | # Prepare a modified version of cluster.remotes with ceph-deploy-ized names | |
148 | modified_remotes = {} | |
3efd9988 | 149 | ceph_deploy_mapped = dict() |
7c673cae FG |
150 | for _remote, roles_for_host in ctx.cluster.remotes.iteritems(): |
151 | modified_remotes[_remote] = [] | |
152 | for svc_id in roles_for_host: | |
153 | if svc_id.startswith("{0}.".format(target_role)): | |
154 | fqdn = str(_remote).split('@')[-1] | |
155 | nodename = str(str(_remote).split('.')[0]).split('@')[1] | |
156 | if target_role == 'mon': | |
157 | nodes_of_interest.append(fqdn) | |
158 | else: | |
159 | nodes_of_interest.append(nodename) | |
3efd9988 FG |
160 | mapped_role = "{0}.{1}".format(target_role, nodename) |
161 | modified_remotes[_remote].append(mapped_role) | |
162 | # keep dict of mapped role for later use by tasks | |
163 | # eg. mon.a => mon.node1 | |
164 | ceph_deploy_mapped[svc_id] = mapped_role | |
7c673cae FG |
165 | else: |
166 | modified_remotes[_remote].append(svc_id) | |
167 | ||
168 | ctx.cluster.remotes = modified_remotes | |
3efd9988 | 169 | ctx.cluster.mapped_role = ceph_deploy_mapped |
7c673cae FG |
170 | |
171 | return nodes_of_interest | |
172 | ||
173 | ||
174 | def get_dev_for_osd(ctx, config): | |
175 | """Get a list of all osd device names.""" | |
176 | osd_devs = [] | |
177 | for remote, roles_for_host in ctx.cluster.remotes.iteritems(): | |
178 | host = remote.name.split('@')[-1] | |
179 | shortname = host.split('.')[0] | |
180 | devs = teuthology.get_scratch_devices(remote) | |
181 | num_osd_per_host = list( | |
182 | teuthology.roles_of_type( | |
183 | roles_for_host, 'osd')) | |
184 | num_osds = len(num_osd_per_host) | |
185 | if config.get('separate_journal_disk') is not None: | |
186 | num_devs_reqd = 2 * num_osds | |
187 | assert num_devs_reqd <= len( | |
188 | devs), 'fewer data and journal disks than required ' + shortname | |
189 | for dindex in range(0, num_devs_reqd, 2): | |
190 | jd_index = dindex + 1 | |
191 | dev_short = devs[dindex].split('/')[-1] | |
192 | jdev_short = devs[jd_index].split('/')[-1] | |
193 | osd_devs.append((shortname, dev_short, jdev_short)) | |
194 | else: | |
195 | assert num_osds <= len(devs), 'fewer disks than osds ' + shortname | |
196 | for dev in devs[:num_osds]: | |
197 | dev_short = dev.split('/')[-1] | |
198 | osd_devs.append((shortname, dev_short)) | |
199 | return osd_devs | |
200 | ||
201 | ||
202 | def get_all_nodes(ctx, config): | |
203 | """Return a string of node names separated by blanks""" | |
204 | nodelist = [] | |
205 | for t, k in ctx.config['targets'].iteritems(): | |
206 | host = t.split('@')[-1] | |
207 | simple_host = host.split('.')[0] | |
208 | nodelist.append(simple_host) | |
209 | nodelist = " ".join(nodelist) | |
210 | return nodelist | |
211 | ||
7c673cae FG |
212 | @contextlib.contextmanager |
213 | def build_ceph_cluster(ctx, config): | |
214 | """Build a ceph cluster""" | |
215 | ||
216 | # Expect to find ceph_admin on the first mon by ID, same place that the download task | |
217 | # puts it. Remember this here, because subsequently IDs will change from those in | |
218 | # the test config to those that ceph-deploy invents. | |
3efd9988 FG |
219 | |
220 | (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys() | |
7c673cae FG |
221 | |
222 | def execute_ceph_deploy(cmd): | |
223 | """Remotely execute a ceph_deploy command""" | |
224 | return ceph_admin.run( | |
225 | args=[ | |
226 | 'cd', | |
227 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
228 | run.Raw('&&'), | |
229 | run.Raw(cmd), | |
230 | ], | |
231 | check_status=False, | |
232 | ).exitstatus | |
233 | ||
b32b8144 FG |
234 | def ceph_disk_osd_create(ctx, config): |
235 | node_dev_list = get_dev_for_osd(ctx, config) | |
236 | no_of_osds = 0 | |
237 | for d in node_dev_list: | |
238 | node = d[0] | |
239 | for disk in d[1:]: | |
240 | zap = './ceph-deploy disk zap ' + node + ':' + disk | |
241 | estatus = execute_ceph_deploy(zap) | |
242 | if estatus != 0: | |
243 | raise RuntimeError("ceph-deploy: Failed to zap osds") | |
244 | osd_create_cmd = './ceph-deploy osd create ' | |
245 | # first check for filestore, default is bluestore with ceph-deploy | |
246 | if config.get('filestore') is not None: | |
247 | osd_create_cmd += '--filestore ' | |
248 | elif config.get('bluestore') is not None: | |
249 | osd_create_cmd += '--bluestore ' | |
250 | if config.get('dmcrypt') is not None: | |
251 | osd_create_cmd += '--dmcrypt ' | |
252 | osd_create_cmd += ":".join(d) | |
253 | estatus_osd = execute_ceph_deploy(osd_create_cmd) | |
254 | if estatus_osd == 0: | |
255 | log.info('successfully created osd') | |
256 | no_of_osds += 1 | |
257 | else: | |
258 | raise RuntimeError("ceph-deploy: Failed to create osds") | |
259 | return no_of_osds | |
260 | ||
261 | def ceph_volume_osd_create(ctx, config): | |
262 | osds = ctx.cluster.only(teuthology.is_type('osd')) | |
263 | no_of_osds = 0 | |
264 | for remote in osds.remotes.iterkeys(): | |
265 | # all devs should be lvm | |
266 | osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' ' | |
267 | # default is bluestore so we just need config item for filestore | |
268 | roles = ctx.cluster.remotes[remote] | |
269 | dev_needed = len([role for role in roles | |
270 | if role.startswith('osd')]) | |
271 | all_devs = teuthology.get_scratch_devices(remote) | |
272 | log.info("node={n}, need_devs={d}, available={a}".format( | |
273 | n=remote.shortname, | |
274 | d=dev_needed, | |
275 | a=all_devs, | |
276 | )) | |
277 | devs = all_devs[0:dev_needed] | |
278 | # rest of the devices can be used for journal if required | |
279 | jdevs = dev_needed | |
280 | for device in devs: | |
281 | device_split = device.split('/') | |
282 | lv_device = device_split[-2] + '/' + device_split[-1] | |
283 | if config.get('filestore') is not None: | |
284 | osd_create_cmd += '--filestore --data ' + lv_device + ' ' | |
285 | # filestore with ceph-volume also needs journal disk | |
286 | try: | |
287 | jdevice = all_devs.pop(jdevs) | |
288 | except IndexError: | |
289 | raise RuntimeError("No device available for \ | |
290 | journal configuration") | |
291 | jdevice_split = jdevice.split('/') | |
292 | j_lv = jdevice_split[-2] + '/' + jdevice_split[-1] | |
293 | osd_create_cmd += '--journal ' + j_lv | |
294 | else: | |
295 | osd_create_cmd += ' --data ' + lv_device | |
296 | estatus_osd = execute_ceph_deploy(osd_create_cmd) | |
297 | if estatus_osd == 0: | |
298 | log.info('successfully created osd') | |
299 | no_of_osds += 1 | |
300 | else: | |
301 | raise RuntimeError("ceph-deploy: Failed to create osds") | |
302 | return no_of_osds | |
303 | ||
7c673cae FG |
304 | try: |
305 | log.info('Building ceph cluster using ceph-deploy...') | |
306 | testdir = teuthology.get_testdir(ctx) | |
307 | ceph_branch = None | |
308 | if config.get('branch') is not None: | |
309 | cbranch = config.get('branch') | |
310 | for var, val in cbranch.iteritems(): | |
311 | ceph_branch = '--{var}={val}'.format(var=var, val=val) | |
312 | all_nodes = get_all_nodes(ctx, config) | |
313 | mds_nodes = get_nodes_using_role(ctx, 'mds') | |
314 | mds_nodes = " ".join(mds_nodes) | |
315 | mon_node = get_nodes_using_role(ctx, 'mon') | |
316 | mon_nodes = " ".join(mon_node) | |
3efd9988 FG |
317 | # skip mgr based on config item |
318 | # this is needed when test uses latest code to install old ceph | |
319 | # versions | |
320 | skip_mgr = config.get('skip-mgr', False) | |
321 | if not skip_mgr: | |
322 | mgr_nodes = get_nodes_using_role(ctx, 'mgr') | |
323 | mgr_nodes = " ".join(mgr_nodes) | |
7c673cae | 324 | new_mon = './ceph-deploy new' + " " + mon_nodes |
3efd9988 FG |
325 | if not skip_mgr: |
326 | mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes | |
7c673cae FG |
327 | mon_hostname = mon_nodes.split(' ')[0] |
328 | mon_hostname = str(mon_hostname) | |
329 | gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname | |
330 | deploy_mds = './ceph-deploy mds create' + " " + mds_nodes | |
7c673cae FG |
331 | |
332 | if mon_nodes is None: | |
333 | raise RuntimeError("no monitor nodes in the config file") | |
334 | ||
335 | estatus_new = execute_ceph_deploy(new_mon) | |
336 | if estatus_new != 0: | |
337 | raise RuntimeError("ceph-deploy: new command failed") | |
338 | ||
339 | log.info('adding config inputs...') | |
340 | testdir = teuthology.get_testdir(ctx) | |
341 | conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) | |
342 | ||
343 | if config.get('conf') is not None: | |
344 | confp = config.get('conf') | |
345 | for section, keys in confp.iteritems(): | |
346 | lines = '[{section}]\n'.format(section=section) | |
347 | teuthology.append_lines_to_file(ceph_admin, conf_path, lines, | |
348 | sudo=True) | |
349 | for key, value in keys.iteritems(): | |
350 | log.info("[%s] %s = %s" % (section, key, value)) | |
351 | lines = '{key} = {value}\n'.format(key=key, value=value) | |
352 | teuthology.append_lines_to_file( | |
353 | ceph_admin, conf_path, lines, sudo=True) | |
354 | ||
355 | # install ceph | |
356 | dev_branch = ctx.config['branch'] | |
357 | branch = '--dev={branch}'.format(branch=dev_branch) | |
358 | if ceph_branch: | |
359 | option = ceph_branch | |
360 | else: | |
361 | option = branch | |
362 | install_nodes = './ceph-deploy install ' + option + " " + all_nodes | |
363 | estatus_install = execute_ceph_deploy(install_nodes) | |
364 | if estatus_install != 0: | |
365 | raise RuntimeError("ceph-deploy: Failed to install ceph") | |
366 | # install ceph-test package too | |
367 | install_nodes2 = './ceph-deploy install --tests ' + option + \ | |
368 | " " + all_nodes | |
369 | estatus_install = execute_ceph_deploy(install_nodes2) | |
370 | if estatus_install != 0: | |
371 | raise RuntimeError("ceph-deploy: Failed to install ceph-test") | |
372 | ||
373 | mon_create_nodes = './ceph-deploy mon create-initial' | |
374 | # If the following fails, it is OK, it might just be that the monitors | |
375 | # are taking way more than a minute/monitor to form quorum, so lets | |
376 | # try the next block which will wait up to 15 minutes to gatherkeys. | |
377 | execute_ceph_deploy(mon_create_nodes) | |
378 | ||
379 | # create-keys is explicit now | |
380 | # http://tracker.ceph.com/issues/16036 | |
381 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
382 | for remote in mons.remotes.iterkeys(): | |
383 | remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph', | |
384 | '--id', remote.shortname]) | |
385 | ||
386 | estatus_gather = execute_ceph_deploy(gather_keys) | |
b32b8144 FG |
387 | if estatus_gather != 0: |
388 | raise RuntimeError("ceph-deploy: Failed during gather keys") | |
389 | # create osd's | |
390 | if config.get('use-ceph-volume', False): | |
391 | no_of_osds = ceph_volume_osd_create(ctx, config) | |
392 | else: | |
393 | # this method will only work with ceph-deploy v1.5.39 or older | |
394 | no_of_osds = ceph_disk_osd_create(ctx, config) | |
b5b8bbf5 | 395 | |
3efd9988 FG |
396 | if not skip_mgr: |
397 | execute_ceph_deploy(mgr_create) | |
b5b8bbf5 | 398 | |
7c673cae FG |
399 | if mds_nodes: |
400 | estatus_mds = execute_ceph_deploy(deploy_mds) | |
401 | if estatus_mds != 0: | |
402 | raise RuntimeError("ceph-deploy: Failed to deploy mds") | |
403 | ||
404 | if config.get('test_mon_destroy') is not None: | |
405 | for d in range(1, len(mon_node)): | |
406 | mon_destroy_nodes = './ceph-deploy mon destroy' + \ | |
407 | " " + mon_node[d] | |
408 | estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) | |
409 | if estatus_mon_d != 0: | |
410 | raise RuntimeError("ceph-deploy: Failed to delete monitor") | |
411 | ||
b32b8144 | 412 | |
7c673cae FG |
413 | |
414 | if config.get('wait-for-healthy', True) and no_of_osds >= 2: | |
415 | is_healthy(ctx=ctx, config=None) | |
416 | ||
417 | log.info('Setting up client nodes...') | |
418 | conf_path = '/etc/ceph/ceph.conf' | |
419 | admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' | |
420 | first_mon = teuthology.get_first_mon(ctx, config) | |
421 | (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() | |
422 | conf_data = teuthology.get_file( | |
423 | remote=mon0_remote, | |
424 | path=conf_path, | |
425 | sudo=True, | |
426 | ) | |
427 | admin_keyring = teuthology.get_file( | |
428 | remote=mon0_remote, | |
429 | path=admin_keyring_path, | |
430 | sudo=True, | |
431 | ) | |
432 | ||
433 | clients = ctx.cluster.only(teuthology.is_type('client')) | |
434 | for remot, roles_for_host in clients.remotes.iteritems(): | |
435 | for id_ in teuthology.roles_of_type(roles_for_host, 'client'): | |
436 | client_keyring = \ | |
437 | '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) | |
438 | mon0_remote.run( | |
439 | args=[ | |
440 | 'cd', | |
441 | '{tdir}'.format(tdir=testdir), | |
442 | run.Raw('&&'), | |
443 | 'sudo', 'bash', '-c', | |
444 | run.Raw('"'), 'ceph', | |
445 | 'auth', | |
446 | 'get-or-create', | |
447 | 'client.{id}'.format(id=id_), | |
448 | 'mds', 'allow', | |
449 | 'mon', 'allow *', | |
450 | 'osd', 'allow *', | |
451 | run.Raw('>'), | |
452 | client_keyring, | |
453 | run.Raw('"'), | |
454 | ], | |
455 | ) | |
456 | key_data = teuthology.get_file( | |
457 | remote=mon0_remote, | |
458 | path=client_keyring, | |
459 | sudo=True, | |
460 | ) | |
461 | teuthology.sudo_write_file( | |
462 | remote=remot, | |
463 | path=client_keyring, | |
464 | data=key_data, | |
465 | perms='0644' | |
466 | ) | |
467 | teuthology.sudo_write_file( | |
468 | remote=remot, | |
469 | path=admin_keyring_path, | |
470 | data=admin_keyring, | |
471 | perms='0644' | |
472 | ) | |
473 | teuthology.sudo_write_file( | |
474 | remote=remot, | |
475 | path=conf_path, | |
476 | data=conf_data, | |
477 | perms='0644' | |
478 | ) | |
479 | ||
480 | if mds_nodes: | |
481 | log.info('Configuring CephFS...') | |
3efd9988 | 482 | Filesystem(ctx, create=True) |
7c673cae FG |
483 | elif not config.get('only_mon'): |
484 | raise RuntimeError( | |
485 | "The cluster is NOT operational due to insufficient OSDs") | |
28e407b8 AA |
486 | # create rbd pool |
487 | ceph_admin.run( | |
488 | args=[ | |
489 | 'sudo', 'ceph', '--cluster', 'ceph', | |
490 | 'osd', 'pool', 'create', 'rbd', '128', '128'], | |
491 | check_status=False) | |
492 | ceph_admin.run( | |
493 | args=[ | |
494 | 'sudo', 'ceph', '--cluster', 'ceph', | |
495 | 'osd', 'pool', 'application', 'enable', | |
496 | 'rbd', 'rbd', '--yes-i-really-mean-it' | |
497 | ], | |
498 | check_status=False) | |
7c673cae FG |
499 | yield |
500 | ||
501 | except Exception: | |
502 | log.info( | |
503 | "Error encountered, logging exception before tearing down ceph-deploy") | |
504 | log.info(traceback.format_exc()) | |
505 | raise | |
506 | finally: | |
507 | if config.get('keep_running'): | |
508 | return | |
509 | log.info('Stopping ceph...') | |
510 | ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), | |
511 | 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), | |
512 | 'sudo', 'systemctl', 'stop', 'ceph.target']) | |
513 | ||
514 | # Are you really not running anymore? | |
515 | # try first with the init tooling | |
516 | # ignoring the status so this becomes informational only | |
517 | ctx.cluster.run( | |
518 | args=[ | |
519 | 'sudo', 'status', 'ceph-all', run.Raw('||'), | |
520 | 'sudo', 'service', 'ceph', 'status', run.Raw('||'), | |
521 | 'sudo', 'systemctl', 'status', 'ceph.target'], | |
522 | check_status=False) | |
523 | ||
524 | # and now just check for the processes themselves, as if upstart/sysvinit | |
525 | # is lying to us. Ignore errors if the grep fails | |
526 | ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), | |
527 | 'grep', '-v', 'grep', run.Raw('|'), | |
528 | 'grep', 'ceph'], check_status=False) | |
529 | ||
530 | if ctx.archive is not None: | |
531 | # archive mon data, too | |
532 | log.info('Archiving mon data...') | |
533 | path = os.path.join(ctx.archive, 'data') | |
534 | os.makedirs(path) | |
535 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
536 | for remote, roles in mons.remotes.iteritems(): | |
537 | for role in roles: | |
538 | if role.startswith('mon.'): | |
539 | teuthology.pull_directory_tarball( | |
540 | remote, | |
541 | '/var/lib/ceph/mon', | |
542 | path + '/' + role + '.tgz') | |
543 | ||
544 | log.info('Compressing logs...') | |
545 | run.wait( | |
546 | ctx.cluster.run( | |
547 | args=[ | |
548 | 'sudo', | |
549 | 'find', | |
550 | '/var/log/ceph', | |
551 | '-name', | |
552 | '*.log', | |
553 | '-print0', | |
554 | run.Raw('|'), | |
555 | 'sudo', | |
556 | 'xargs', | |
557 | '-0', | |
558 | '--no-run-if-empty', | |
559 | '--', | |
560 | 'gzip', | |
561 | '--', | |
562 | ], | |
563 | wait=False, | |
564 | ), | |
565 | ) | |
566 | ||
567 | log.info('Archiving logs...') | |
568 | path = os.path.join(ctx.archive, 'remote') | |
569 | os.makedirs(path) | |
570 | for remote in ctx.cluster.remotes.iterkeys(): | |
571 | sub = os.path.join(path, remote.shortname) | |
572 | os.makedirs(sub) | |
573 | teuthology.pull_directory(remote, '/var/log/ceph', | |
574 | os.path.join(sub, 'log')) | |
575 | ||
576 | # Prevent these from being undefined if the try block fails | |
577 | all_nodes = get_all_nodes(ctx, config) | |
578 | purge_nodes = './ceph-deploy purge' + " " + all_nodes | |
579 | purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes | |
580 | ||
581 | log.info('Purging package...') | |
582 | execute_ceph_deploy(purge_nodes) | |
583 | log.info('Purging data...') | |
584 | execute_ceph_deploy(purgedata_nodes) | |
585 | ||
586 | ||
587 | @contextlib.contextmanager | |
588 | def cli_test(ctx, config): | |
589 | """ | |
590 | ceph-deploy cli to exercise most commonly use cli's and ensure | |
591 | all commands works and also startup the init system. | |
592 | ||
593 | """ | |
594 | log.info('Ceph-deploy Test') | |
595 | if config is None: | |
596 | config = {} | |
597 | test_branch = '' | |
598 | conf_dir = teuthology.get_testdir(ctx) + "/cdtest" | |
599 | ||
600 | def execute_cdeploy(admin, cmd, path): | |
601 | """Execute ceph-deploy commands """ | |
602 | """Either use git path or repo path """ | |
603 | args = ['cd', conf_dir, run.Raw(';')] | |
604 | if path: | |
3efd9988 | 605 | args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)) |
7c673cae FG |
606 | else: |
607 | args.append('ceph-deploy') | |
608 | args.append(run.Raw(cmd)) | |
609 | ec = admin.run(args=args, check_status=False).exitstatus | |
610 | if ec != 0: | |
611 | raise RuntimeError( | |
612 | "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec)) | |
613 | ||
614 | if config.get('rhbuild'): | |
615 | path = None | |
616 | else: | |
617 | path = teuthology.get_testdir(ctx) | |
618 | # test on branch from config eg: wip-* , master or next etc | |
619 | # packages for all distro's should exist for wip* | |
620 | if ctx.config.get('branch'): | |
621 | branch = ctx.config.get('branch') | |
622 | test_branch = ' --dev={branch} '.format(branch=branch) | |
623 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
624 | for node, role in mons.remotes.iteritems(): | |
625 | admin = node | |
626 | admin.run(args=['mkdir', conf_dir], check_status=False) | |
627 | nodename = admin.shortname | |
628 | system_type = teuthology.get_system_type(admin) | |
629 | if config.get('rhbuild'): | |
630 | admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) | |
631 | log.info('system type is %s', system_type) | |
632 | osds = ctx.cluster.only(teuthology.is_type('osd')) | |
633 | ||
634 | for remote, roles in osds.remotes.iteritems(): | |
635 | devs = teuthology.get_scratch_devices(remote) | |
636 | log.info("roles %s", roles) | |
637 | if (len(devs) < 3): | |
638 | log.error( | |
639 | 'Test needs minimum of 3 devices, only found %s', | |
640 | str(devs)) | |
641 | raise RuntimeError("Needs minimum of 3 devices ") | |
642 | ||
643 | conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) | |
644 | new_cmd = 'new ' + nodename | |
645 | execute_cdeploy(admin, new_cmd, path) | |
646 | if config.get('conf') is not None: | |
647 | confp = config.get('conf') | |
648 | for section, keys in confp.iteritems(): | |
649 | lines = '[{section}]\n'.format(section=section) | |
650 | teuthology.append_lines_to_file(admin, conf_path, lines, | |
651 | sudo=True) | |
652 | for key, value in keys.iteritems(): | |
653 | log.info("[%s] %s = %s" % (section, key, value)) | |
654 | lines = '{key} = {value}\n'.format(key=key, value=value) | |
655 | teuthology.append_lines_to_file(admin, conf_path, lines, | |
656 | sudo=True) | |
657 | new_mon_install = 'install {branch} --mon '.format( | |
658 | branch=test_branch) + nodename | |
659 | new_mgr_install = 'install {branch} --mgr '.format( | |
660 | branch=test_branch) + nodename | |
661 | new_osd_install = 'install {branch} --osd '.format( | |
662 | branch=test_branch) + nodename | |
663 | new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename | |
664 | create_initial = 'mon create-initial ' | |
665 | # either use create-keys or push command | |
666 | push_keys = 'admin ' + nodename | |
667 | execute_cdeploy(admin, new_mon_install, path) | |
668 | execute_cdeploy(admin, new_mgr_install, path) | |
669 | execute_cdeploy(admin, new_osd_install, path) | |
670 | execute_cdeploy(admin, new_admin, path) | |
671 | execute_cdeploy(admin, create_initial, path) | |
672 | execute_cdeploy(admin, push_keys, path) | |
673 | ||
674 | for i in range(3): | |
675 | zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) | |
676 | prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) | |
677 | execute_cdeploy(admin, zap_disk, path) | |
678 | execute_cdeploy(admin, prepare, path) | |
679 | ||
680 | log.info("list files for debugging purpose to check file permissions") | |
681 | admin.run(args=['ls', run.Raw('-lt'), conf_dir]) | |
682 | remote.run(args=['sudo', 'ceph', '-s'], check_status=False) | |
683 | r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) | |
684 | out = r.stdout.getvalue() | |
685 | log.info('Ceph health: %s', out.rstrip('\n')) | |
686 | log.info("Waiting for cluster to become healthy") | |
687 | with contextutil.safe_while(sleep=10, tries=6, | |
688 | action='check health') as proceed: | |
3efd9988 FG |
689 | while proceed(): |
690 | r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) | |
691 | out = r.stdout.getvalue() | |
692 | if (out.split(None, 1)[0] == 'HEALTH_OK'): | |
693 | break | |
7c673cae FG |
694 | rgw_install = 'install {branch} --rgw {node}'.format( |
695 | branch=test_branch, | |
696 | node=nodename, | |
697 | ) | |
698 | rgw_create = 'rgw create ' + nodename | |
699 | execute_cdeploy(admin, rgw_install, path) | |
700 | execute_cdeploy(admin, rgw_create, path) | |
701 | log.info('All ceph-deploy cli tests passed') | |
702 | try: | |
703 | yield | |
704 | finally: | |
705 | log.info("cleaning up") | |
706 | ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'), | |
707 | 'sudo', 'service', 'ceph', 'stop', run.Raw('||'), | |
708 | 'sudo', 'systemctl', 'stop', 'ceph.target'], | |
709 | check_status=False) | |
710 | time.sleep(4) | |
711 | for i in range(3): | |
712 | umount_dev = "{d}1".format(d=devs[i]) | |
713 | r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) | |
714 | cmd = 'purge ' + nodename | |
715 | execute_cdeploy(admin, cmd, path) | |
716 | cmd = 'purgedata ' + nodename | |
717 | execute_cdeploy(admin, cmd, path) | |
718 | log.info("Removing temporary dir") | |
719 | admin.run( | |
720 | args=[ | |
721 | 'rm', | |
722 | run.Raw('-rf'), | |
723 | run.Raw(conf_dir)], | |
724 | check_status=False) | |
725 | if config.get('rhbuild'): | |
726 | admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y']) | |
727 | ||
728 | ||
729 | @contextlib.contextmanager | |
730 | def single_node_test(ctx, config): | |
731 | """ | |
732 | - ceph-deploy.single_node_test: null | |
733 | ||
734 | #rhbuild testing | |
735 | - ceph-deploy.single_node_test: | |
736 | rhbuild: 1.2.3 | |
737 | ||
738 | """ | |
739 | log.info("Testing ceph-deploy on single node") | |
740 | if config is None: | |
741 | config = {} | |
742 | overrides = ctx.config.get('overrides', {}) | |
743 | teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) | |
744 | ||
745 | if config.get('rhbuild'): | |
746 | log.info("RH Build, Skip Download") | |
747 | with contextutil.nested( | |
748 | lambda: cli_test(ctx=ctx, config=config), | |
749 | ): | |
750 | yield | |
751 | else: | |
752 | with contextutil.nested( | |
753 | lambda: install_fn.ship_utilities(ctx=ctx, config=None), | |
754 | lambda: download_ceph_deploy(ctx=ctx, config=config), | |
755 | lambda: cli_test(ctx=ctx, config=config), | |
756 | ): | |
757 | yield | |
758 | ||
759 | ||
3efd9988 FG |
760 | @contextlib.contextmanager |
761 | def upgrade(ctx, config): | |
762 | """ | |
763 | Upgrade using ceph-deploy | |
764 | eg: | |
765 | ceph-deploy.upgrade: | |
766 | # to upgrade to specific branch, use | |
767 | branch: | |
768 | stable: jewel | |
769 | # to setup mgr node, use | |
770 | setup-mgr-node: True | |
771 | # to wait for cluster to be healthy after all upgrade, use | |
772 | wait-for-healthy: True | |
773 | role: (upgrades the below roles serially) | |
774 | mon.a | |
775 | mon.b | |
776 | osd.0 | |
777 | """ | |
778 | roles = config.get('roles') | |
779 | # get the roles that are mapped as per ceph-deploy | |
780 | # roles are mapped for mon/mds eg: mon.a => mon.host_short_name | |
781 | mapped_role = ctx.cluster.mapped_role | |
782 | if config.get('branch'): | |
783 | branch = config.get('branch') | |
784 | (var, val) = branch.items()[0] | |
785 | ceph_branch = '--{var}={val}'.format(var=var, val=val) | |
786 | else: | |
b32b8144 FG |
787 | # default to wip-branch under test |
788 | dev_branch = ctx.config['branch'] | |
789 | ceph_branch = '--dev={branch}'.format(branch=dev_branch) | |
3efd9988 FG |
790 | # get the node used for initial deployment which is mon.a |
791 | mon_a = mapped_role.get('mon.a') | |
792 | (ceph_admin,) = ctx.cluster.only(mon_a).remotes.iterkeys() | |
793 | testdir = teuthology.get_testdir(ctx) | |
794 | cmd = './ceph-deploy install ' + ceph_branch | |
795 | for role in roles: | |
796 | # check if this role is mapped (mon or mds) | |
797 | if mapped_role.get(role): | |
798 | role = mapped_role.get(role) | |
799 | remotes_and_roles = ctx.cluster.only(role).remotes | |
800 | for remote, roles in remotes_and_roles.iteritems(): | |
801 | nodename = remote.shortname | |
802 | cmd = cmd + ' ' + nodename | |
803 | log.info("Upgrading ceph on %s", nodename) | |
804 | ceph_admin.run( | |
805 | args=[ | |
806 | 'cd', | |
807 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
808 | run.Raw('&&'), | |
809 | run.Raw(cmd), | |
810 | ], | |
811 | ) | |
812 | # restart all ceph services, ideally upgrade should but it does not | |
813 | remote.run( | |
814 | args=[ | |
815 | 'sudo', 'systemctl', 'restart', 'ceph.target' | |
816 | ] | |
817 | ) | |
818 | ceph_admin.run(args=['sudo', 'ceph', '-s']) | |
819 | ||
820 | # workaround for http://tracker.ceph.com/issues/20950 | |
821 | # write the correct mgr key to disk | |
822 | if config.get('setup-mgr-node', None): | |
823 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
824 | for remote, roles in mons.remotes.iteritems(): | |
825 | remote.run( | |
826 | args=[ | |
827 | run.Raw('sudo ceph auth get client.bootstrap-mgr'), | |
828 | run.Raw('|'), | |
829 | run.Raw('sudo tee'), | |
830 | run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring') | |
831 | ] | |
832 | ) | |
833 | ||
834 | if config.get('setup-mgr-node', None): | |
835 | mgr_nodes = get_nodes_using_role(ctx, 'mgr') | |
836 | mgr_nodes = " ".join(mgr_nodes) | |
837 | mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes | |
838 | mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes | |
839 | # install mgr | |
840 | ceph_admin.run( | |
841 | args=[ | |
842 | 'cd', | |
843 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
844 | run.Raw('&&'), | |
845 | run.Raw(mgr_install), | |
846 | ], | |
847 | ) | |
848 | # create mgr | |
849 | ceph_admin.run( | |
850 | args=[ | |
851 | 'cd', | |
852 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
853 | run.Raw('&&'), | |
854 | run.Raw(mgr_create), | |
855 | ], | |
856 | ) | |
857 | ceph_admin.run(args=['sudo', 'ceph', '-s']) | |
858 | if config.get('wait-for-healthy', None): | |
859 | wait_until_healthy(ctx, ceph_admin, use_sudo=True) | |
860 | yield | |
861 | ||
862 | ||
7c673cae FG |
863 | @contextlib.contextmanager |
864 | def task(ctx, config): | |
865 | """ | |
866 | Set up and tear down a Ceph cluster. | |
867 | ||
868 | For example:: | |
869 | ||
870 | tasks: | |
871 | - install: | |
872 | extras: yes | |
873 | - ssh_keys: | |
874 | - ceph-deploy: | |
875 | branch: | |
876 | stable: bobtail | |
877 | mon_initial_members: 1 | |
3efd9988 | 878 | ceph-deploy-branch: my-ceph-deploy-branch |
7c673cae FG |
879 | only_mon: true |
880 | keep_running: true | |
c07f9fc5 FG |
881 | # either choose bluestore or filestore, default is bluestore |
882 | bluestore: True | |
883 | # or | |
884 | filestore: True | |
3efd9988 FG |
885 | # skip install of mgr for old release using below flag |
886 | skip-mgr: True ( default is False ) | |
b32b8144 FG |
887 | # to use ceph-volume instead of ceph-disk |
888 | # ceph-disk can only be used with old ceph-deploy release from pypi | |
889 | use-ceph-volume: true | |
7c673cae FG |
890 | |
891 | tasks: | |
892 | - install: | |
893 | extras: yes | |
894 | - ssh_keys: | |
895 | - ceph-deploy: | |
896 | branch: | |
897 | dev: master | |
898 | conf: | |
899 | mon: | |
900 | debug mon = 20 | |
901 | ||
902 | tasks: | |
903 | - install: | |
904 | extras: yes | |
905 | - ssh_keys: | |
906 | - ceph-deploy: | |
907 | branch: | |
908 | testing: | |
909 | dmcrypt: yes | |
910 | separate_journal_disk: yes | |
911 | ||
912 | """ | |
913 | if config is None: | |
914 | config = {} | |
915 | ||
916 | assert isinstance(config, dict), \ | |
917 | "task ceph-deploy only supports a dictionary for configuration" | |
918 | ||
919 | overrides = ctx.config.get('overrides', {}) | |
920 | teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) | |
921 | ||
922 | if config.get('branch') is not None: | |
923 | assert isinstance( | |
924 | config['branch'], dict), 'branch must be a dictionary' | |
925 | ||
926 | log.info('task ceph-deploy with config ' + str(config)) | |
927 | ||
b32b8144 FG |
928 | # we need to use 1.5.39-stable for testing jewel or master branch with |
929 | # ceph-disk | |
930 | if config.get('use-ceph-volume', False) is False: | |
931 | # check we are not testing specific branch | |
932 | if config.get('ceph-deploy-branch', False) is False: | |
933 | config['ceph-deploy-branch'] = '1.5.39-stable' | |
934 | ||
7c673cae FG |
935 | with contextutil.nested( |
936 | lambda: install_fn.ship_utilities(ctx=ctx, config=None), | |
937 | lambda: download_ceph_deploy(ctx=ctx, config=config), | |
938 | lambda: build_ceph_cluster(ctx=ctx, config=config), | |
939 | ): | |
940 | yield |