]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Execute ceph-deploy as a task | |
3 | """ | |
4 | from cStringIO import StringIO | |
5 | ||
6 | import contextlib | |
7 | import os | |
8 | import time | |
9 | import logging | |
10 | import traceback | |
11 | ||
12 | from teuthology import misc as teuthology | |
13 | from teuthology import contextutil | |
14 | from teuthology.config import config as teuth_config | |
15 | from teuthology.task import install as install_fn | |
16 | from teuthology.orchestra import run | |
17 | from tasks.cephfs.filesystem import Filesystem | |
3efd9988 | 18 | from teuthology.misc import wait_until_healthy |
7c673cae FG |
19 | |
20 | log = logging.getLogger(__name__) | |
21 | ||
22 | ||
23 | @contextlib.contextmanager | |
24 | def download_ceph_deploy(ctx, config): | |
25 | """ | |
26 | Downloads ceph-deploy from the ceph.com git mirror and (by default) | |
27 | switches to the master branch. If the `ceph-deploy-branch` is specified, it | |
28 | will use that instead. The `bootstrap` script is ran, with the argument | |
29 | obtained from `python_version`, if specified. | |
30 | """ | |
3efd9988 FG |
31 | # use mon.a for ceph_admin |
32 | (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys() | |
7c673cae FG |
33 | |
34 | try: | |
35 | py_ver = str(config['python_version']) | |
36 | except KeyError: | |
37 | pass | |
38 | else: | |
39 | supported_versions = ['2', '3'] | |
40 | if py_ver not in supported_versions: | |
41 | raise ValueError("python_version must be: {}, not {}".format( | |
42 | ' or '.join(supported_versions), py_ver | |
43 | )) | |
44 | ||
45 | log.info("Installing Python") | |
3efd9988 | 46 | system_type = teuthology.get_system_type(ceph_admin) |
7c673cae FG |
47 | |
48 | if system_type == 'rpm': | |
11fdf7f2 | 49 | package = 'python36' if py_ver == '3' else 'python' |
7c673cae FG |
50 | ctx.cluster.run(args=[ |
51 | 'sudo', 'yum', '-y', 'install', | |
52 | package, 'python-virtualenv' | |
53 | ]) | |
54 | else: | |
55 | package = 'python3' if py_ver == '3' else 'python' | |
56 | ctx.cluster.run(args=[ | |
57 | 'sudo', 'apt-get', '-y', '--force-yes', 'install', | |
58 | package, 'python-virtualenv' | |
59 | ]) | |
60 | ||
61 | log.info('Downloading ceph-deploy...') | |
62 | testdir = teuthology.get_testdir(ctx) | |
63 | ceph_deploy_branch = config.get('ceph-deploy-branch', 'master') | |
64 | ||
65 | ceph_admin.run( | |
66 | args=[ | |
67 | 'git', 'clone', '-b', ceph_deploy_branch, | |
68 | teuth_config.ceph_git_base_url + 'ceph-deploy.git', | |
69 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
70 | ], | |
71 | ) | |
72 | args = [ | |
73 | 'cd', | |
74 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
75 | run.Raw('&&'), | |
76 | './bootstrap', | |
77 | ] | |
78 | try: | |
79 | args.append(str(config['python_version'])) | |
80 | except KeyError: | |
81 | pass | |
82 | ceph_admin.run(args=args) | |
83 | ||
84 | try: | |
85 | yield | |
86 | finally: | |
87 | log.info('Removing ceph-deploy ...') | |
88 | ceph_admin.run( | |
89 | args=[ | |
90 | 'rm', | |
91 | '-rf', | |
92 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
93 | ], | |
94 | ) | |
95 | ||
96 | ||
97 | def is_healthy(ctx, config): | |
98 | """Wait until a Ceph cluster is healthy.""" | |
99 | testdir = teuthology.get_testdir(ctx) | |
100 | ceph_admin = teuthology.get_first_mon(ctx, config) | |
101 | (remote,) = ctx.cluster.only(ceph_admin).remotes.keys() | |
102 | max_tries = 90 # 90 tries * 10 secs --> 15 minutes | |
103 | tries = 0 | |
104 | while True: | |
105 | tries += 1 | |
106 | if tries >= max_tries: | |
107 | msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes" | |
108 | remote.run( | |
109 | args=[ | |
110 | 'cd', | |
111 | '{tdir}'.format(tdir=testdir), | |
112 | run.Raw('&&'), | |
113 | 'sudo', 'ceph', | |
114 | 'report', | |
115 | ], | |
116 | ) | |
117 | raise RuntimeError(msg) | |
118 | ||
119 | r = remote.run( | |
120 | args=[ | |
121 | 'cd', | |
122 | '{tdir}'.format(tdir=testdir), | |
123 | run.Raw('&&'), | |
124 | 'sudo', 'ceph', | |
125 | 'health', | |
126 | ], | |
127 | stdout=StringIO(), | |
128 | logger=log.getChild('health'), | |
129 | ) | |
130 | out = r.stdout.getvalue() | |
131 | log.info('Ceph health: %s', out.rstrip('\n')) | |
132 | if out.split(None, 1)[0] == 'HEALTH_OK': | |
133 | break | |
134 | time.sleep(10) | |
135 | ||
136 | ||
137 | def get_nodes_using_role(ctx, target_role): | |
138 | """ | |
139 | Extract the names of nodes that match a given role from a cluster, and modify the | |
140 | cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy | |
141 | uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23". | |
142 | """ | |
143 | ||
144 | # Nodes containing a service of the specified role | |
145 | nodes_of_interest = [] | |
146 | ||
147 | # Prepare a modified version of cluster.remotes with ceph-deploy-ized names | |
148 | modified_remotes = {} | |
3efd9988 | 149 | ceph_deploy_mapped = dict() |
7c673cae FG |
150 | for _remote, roles_for_host in ctx.cluster.remotes.iteritems(): |
151 | modified_remotes[_remote] = [] | |
152 | for svc_id in roles_for_host: | |
153 | if svc_id.startswith("{0}.".format(target_role)): | |
154 | fqdn = str(_remote).split('@')[-1] | |
155 | nodename = str(str(_remote).split('.')[0]).split('@')[1] | |
156 | if target_role == 'mon': | |
157 | nodes_of_interest.append(fqdn) | |
158 | else: | |
159 | nodes_of_interest.append(nodename) | |
3efd9988 FG |
160 | mapped_role = "{0}.{1}".format(target_role, nodename) |
161 | modified_remotes[_remote].append(mapped_role) | |
162 | # keep dict of mapped role for later use by tasks | |
163 | # eg. mon.a => mon.node1 | |
164 | ceph_deploy_mapped[svc_id] = mapped_role | |
7c673cae FG |
165 | else: |
166 | modified_remotes[_remote].append(svc_id) | |
167 | ||
168 | ctx.cluster.remotes = modified_remotes | |
11fdf7f2 TL |
169 | # since the function is called multiple times for target roles |
170 | # append new mapped roles | |
171 | if not hasattr(ctx.cluster, 'mapped_role'): | |
172 | ctx.cluster.mapped_role = ceph_deploy_mapped | |
173 | else: | |
174 | ctx.cluster.mapped_role.update(ceph_deploy_mapped) | |
175 | log.info("New mapped_role={mr}".format(mr=ctx.cluster.mapped_role)) | |
7c673cae FG |
176 | return nodes_of_interest |
177 | ||
178 | ||
179 | def get_dev_for_osd(ctx, config): | |
180 | """Get a list of all osd device names.""" | |
181 | osd_devs = [] | |
182 | for remote, roles_for_host in ctx.cluster.remotes.iteritems(): | |
183 | host = remote.name.split('@')[-1] | |
184 | shortname = host.split('.')[0] | |
185 | devs = teuthology.get_scratch_devices(remote) | |
186 | num_osd_per_host = list( | |
187 | teuthology.roles_of_type( | |
188 | roles_for_host, 'osd')) | |
189 | num_osds = len(num_osd_per_host) | |
190 | if config.get('separate_journal_disk') is not None: | |
191 | num_devs_reqd = 2 * num_osds | |
192 | assert num_devs_reqd <= len( | |
193 | devs), 'fewer data and journal disks than required ' + shortname | |
194 | for dindex in range(0, num_devs_reqd, 2): | |
195 | jd_index = dindex + 1 | |
196 | dev_short = devs[dindex].split('/')[-1] | |
197 | jdev_short = devs[jd_index].split('/')[-1] | |
198 | osd_devs.append((shortname, dev_short, jdev_short)) | |
199 | else: | |
200 | assert num_osds <= len(devs), 'fewer disks than osds ' + shortname | |
201 | for dev in devs[:num_osds]: | |
202 | dev_short = dev.split('/')[-1] | |
203 | osd_devs.append((shortname, dev_short)) | |
204 | return osd_devs | |
205 | ||
206 | ||
207 | def get_all_nodes(ctx, config): | |
208 | """Return a string of node names separated by blanks""" | |
209 | nodelist = [] | |
210 | for t, k in ctx.config['targets'].iteritems(): | |
211 | host = t.split('@')[-1] | |
212 | simple_host = host.split('.')[0] | |
213 | nodelist.append(simple_host) | |
214 | nodelist = " ".join(nodelist) | |
215 | return nodelist | |
216 | ||
7c673cae FG |
217 | @contextlib.contextmanager |
218 | def build_ceph_cluster(ctx, config): | |
219 | """Build a ceph cluster""" | |
220 | ||
221 | # Expect to find ceph_admin on the first mon by ID, same place that the download task | |
222 | # puts it. Remember this here, because subsequently IDs will change from those in | |
223 | # the test config to those that ceph-deploy invents. | |
3efd9988 FG |
224 | |
225 | (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys() | |
7c673cae FG |
226 | |
227 | def execute_ceph_deploy(cmd): | |
228 | """Remotely execute a ceph_deploy command""" | |
229 | return ceph_admin.run( | |
230 | args=[ | |
231 | 'cd', | |
232 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
233 | run.Raw('&&'), | |
234 | run.Raw(cmd), | |
235 | ], | |
236 | check_status=False, | |
237 | ).exitstatus | |
238 | ||
b32b8144 FG |
239 | def ceph_disk_osd_create(ctx, config): |
240 | node_dev_list = get_dev_for_osd(ctx, config) | |
241 | no_of_osds = 0 | |
242 | for d in node_dev_list: | |
243 | node = d[0] | |
244 | for disk in d[1:]: | |
245 | zap = './ceph-deploy disk zap ' + node + ':' + disk | |
246 | estatus = execute_ceph_deploy(zap) | |
247 | if estatus != 0: | |
248 | raise RuntimeError("ceph-deploy: Failed to zap osds") | |
249 | osd_create_cmd = './ceph-deploy osd create ' | |
250 | # first check for filestore, default is bluestore with ceph-deploy | |
251 | if config.get('filestore') is not None: | |
252 | osd_create_cmd += '--filestore ' | |
253 | elif config.get('bluestore') is not None: | |
254 | osd_create_cmd += '--bluestore ' | |
255 | if config.get('dmcrypt') is not None: | |
256 | osd_create_cmd += '--dmcrypt ' | |
257 | osd_create_cmd += ":".join(d) | |
258 | estatus_osd = execute_ceph_deploy(osd_create_cmd) | |
259 | if estatus_osd == 0: | |
260 | log.info('successfully created osd') | |
261 | no_of_osds += 1 | |
262 | else: | |
263 | raise RuntimeError("ceph-deploy: Failed to create osds") | |
264 | return no_of_osds | |
265 | ||
266 | def ceph_volume_osd_create(ctx, config): | |
267 | osds = ctx.cluster.only(teuthology.is_type('osd')) | |
268 | no_of_osds = 0 | |
269 | for remote in osds.remotes.iterkeys(): | |
270 | # all devs should be lvm | |
271 | osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' ' | |
272 | # default is bluestore so we just need config item for filestore | |
273 | roles = ctx.cluster.remotes[remote] | |
274 | dev_needed = len([role for role in roles | |
275 | if role.startswith('osd')]) | |
276 | all_devs = teuthology.get_scratch_devices(remote) | |
277 | log.info("node={n}, need_devs={d}, available={a}".format( | |
278 | n=remote.shortname, | |
279 | d=dev_needed, | |
280 | a=all_devs, | |
281 | )) | |
282 | devs = all_devs[0:dev_needed] | |
283 | # rest of the devices can be used for journal if required | |
284 | jdevs = dev_needed | |
285 | for device in devs: | |
286 | device_split = device.split('/') | |
287 | lv_device = device_split[-2] + '/' + device_split[-1] | |
288 | if config.get('filestore') is not None: | |
289 | osd_create_cmd += '--filestore --data ' + lv_device + ' ' | |
290 | # filestore with ceph-volume also needs journal disk | |
291 | try: | |
292 | jdevice = all_devs.pop(jdevs) | |
293 | except IndexError: | |
294 | raise RuntimeError("No device available for \ | |
295 | journal configuration") | |
296 | jdevice_split = jdevice.split('/') | |
297 | j_lv = jdevice_split[-2] + '/' + jdevice_split[-1] | |
298 | osd_create_cmd += '--journal ' + j_lv | |
299 | else: | |
300 | osd_create_cmd += ' --data ' + lv_device | |
301 | estatus_osd = execute_ceph_deploy(osd_create_cmd) | |
302 | if estatus_osd == 0: | |
303 | log.info('successfully created osd') | |
304 | no_of_osds += 1 | |
305 | else: | |
306 | raise RuntimeError("ceph-deploy: Failed to create osds") | |
307 | return no_of_osds | |
308 | ||
7c673cae FG |
309 | try: |
310 | log.info('Building ceph cluster using ceph-deploy...') | |
311 | testdir = teuthology.get_testdir(ctx) | |
312 | ceph_branch = None | |
313 | if config.get('branch') is not None: | |
314 | cbranch = config.get('branch') | |
315 | for var, val in cbranch.iteritems(): | |
316 | ceph_branch = '--{var}={val}'.format(var=var, val=val) | |
317 | all_nodes = get_all_nodes(ctx, config) | |
318 | mds_nodes = get_nodes_using_role(ctx, 'mds') | |
319 | mds_nodes = " ".join(mds_nodes) | |
320 | mon_node = get_nodes_using_role(ctx, 'mon') | |
321 | mon_nodes = " ".join(mon_node) | |
3efd9988 FG |
322 | # skip mgr based on config item |
323 | # this is needed when test uses latest code to install old ceph | |
324 | # versions | |
325 | skip_mgr = config.get('skip-mgr', False) | |
326 | if not skip_mgr: | |
327 | mgr_nodes = get_nodes_using_role(ctx, 'mgr') | |
328 | mgr_nodes = " ".join(mgr_nodes) | |
7c673cae | 329 | new_mon = './ceph-deploy new' + " " + mon_nodes |
3efd9988 FG |
330 | if not skip_mgr: |
331 | mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes | |
7c673cae FG |
332 | mon_hostname = mon_nodes.split(' ')[0] |
333 | mon_hostname = str(mon_hostname) | |
334 | gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname | |
335 | deploy_mds = './ceph-deploy mds create' + " " + mds_nodes | |
7c673cae FG |
336 | |
337 | if mon_nodes is None: | |
338 | raise RuntimeError("no monitor nodes in the config file") | |
339 | ||
340 | estatus_new = execute_ceph_deploy(new_mon) | |
341 | if estatus_new != 0: | |
342 | raise RuntimeError("ceph-deploy: new command failed") | |
343 | ||
344 | log.info('adding config inputs...') | |
345 | testdir = teuthology.get_testdir(ctx) | |
346 | conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) | |
347 | ||
348 | if config.get('conf') is not None: | |
349 | confp = config.get('conf') | |
350 | for section, keys in confp.iteritems(): | |
351 | lines = '[{section}]\n'.format(section=section) | |
352 | teuthology.append_lines_to_file(ceph_admin, conf_path, lines, | |
353 | sudo=True) | |
354 | for key, value in keys.iteritems(): | |
355 | log.info("[%s] %s = %s" % (section, key, value)) | |
356 | lines = '{key} = {value}\n'.format(key=key, value=value) | |
357 | teuthology.append_lines_to_file( | |
358 | ceph_admin, conf_path, lines, sudo=True) | |
359 | ||
360 | # install ceph | |
361 | dev_branch = ctx.config['branch'] | |
362 | branch = '--dev={branch}'.format(branch=dev_branch) | |
363 | if ceph_branch: | |
364 | option = ceph_branch | |
365 | else: | |
366 | option = branch | |
367 | install_nodes = './ceph-deploy install ' + option + " " + all_nodes | |
368 | estatus_install = execute_ceph_deploy(install_nodes) | |
369 | if estatus_install != 0: | |
370 | raise RuntimeError("ceph-deploy: Failed to install ceph") | |
371 | # install ceph-test package too | |
372 | install_nodes2 = './ceph-deploy install --tests ' + option + \ | |
373 | " " + all_nodes | |
374 | estatus_install = execute_ceph_deploy(install_nodes2) | |
375 | if estatus_install != 0: | |
376 | raise RuntimeError("ceph-deploy: Failed to install ceph-test") | |
377 | ||
378 | mon_create_nodes = './ceph-deploy mon create-initial' | |
379 | # If the following fails, it is OK, it might just be that the monitors | |
380 | # are taking way more than a minute/monitor to form quorum, so lets | |
381 | # try the next block which will wait up to 15 minutes to gatherkeys. | |
382 | execute_ceph_deploy(mon_create_nodes) | |
383 | ||
7c673cae | 384 | estatus_gather = execute_ceph_deploy(gather_keys) |
b32b8144 FG |
385 | if estatus_gather != 0: |
386 | raise RuntimeError("ceph-deploy: Failed during gather keys") | |
494da23a TL |
387 | |
388 | # install admin key on mons (ceph-create-keys doesn't do this any more) | |
389 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
390 | for remote in mons.remotes.iterkeys(): | |
391 | execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname) | |
392 | ||
b32b8144 FG |
393 | # create osd's |
394 | if config.get('use-ceph-volume', False): | |
395 | no_of_osds = ceph_volume_osd_create(ctx, config) | |
396 | else: | |
397 | # this method will only work with ceph-deploy v1.5.39 or older | |
398 | no_of_osds = ceph_disk_osd_create(ctx, config) | |
b5b8bbf5 | 399 | |
3efd9988 FG |
400 | if not skip_mgr: |
401 | execute_ceph_deploy(mgr_create) | |
b5b8bbf5 | 402 | |
7c673cae FG |
403 | if mds_nodes: |
404 | estatus_mds = execute_ceph_deploy(deploy_mds) | |
405 | if estatus_mds != 0: | |
406 | raise RuntimeError("ceph-deploy: Failed to deploy mds") | |
407 | ||
408 | if config.get('test_mon_destroy') is not None: | |
409 | for d in range(1, len(mon_node)): | |
410 | mon_destroy_nodes = './ceph-deploy mon destroy' + \ | |
411 | " " + mon_node[d] | |
412 | estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) | |
413 | if estatus_mon_d != 0: | |
414 | raise RuntimeError("ceph-deploy: Failed to delete monitor") | |
415 | ||
b32b8144 | 416 | |
7c673cae FG |
417 | |
418 | if config.get('wait-for-healthy', True) and no_of_osds >= 2: | |
419 | is_healthy(ctx=ctx, config=None) | |
420 | ||
421 | log.info('Setting up client nodes...') | |
422 | conf_path = '/etc/ceph/ceph.conf' | |
423 | admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' | |
424 | first_mon = teuthology.get_first_mon(ctx, config) | |
425 | (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() | |
426 | conf_data = teuthology.get_file( | |
427 | remote=mon0_remote, | |
428 | path=conf_path, | |
429 | sudo=True, | |
430 | ) | |
431 | admin_keyring = teuthology.get_file( | |
432 | remote=mon0_remote, | |
433 | path=admin_keyring_path, | |
434 | sudo=True, | |
435 | ) | |
436 | ||
437 | clients = ctx.cluster.only(teuthology.is_type('client')) | |
438 | for remot, roles_for_host in clients.remotes.iteritems(): | |
439 | for id_ in teuthology.roles_of_type(roles_for_host, 'client'): | |
440 | client_keyring = \ | |
441 | '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) | |
442 | mon0_remote.run( | |
443 | args=[ | |
444 | 'cd', | |
445 | '{tdir}'.format(tdir=testdir), | |
446 | run.Raw('&&'), | |
447 | 'sudo', 'bash', '-c', | |
448 | run.Raw('"'), 'ceph', | |
449 | 'auth', | |
450 | 'get-or-create', | |
451 | 'client.{id}'.format(id=id_), | |
452 | 'mds', 'allow', | |
453 | 'mon', 'allow *', | |
454 | 'osd', 'allow *', | |
455 | run.Raw('>'), | |
456 | client_keyring, | |
457 | run.Raw('"'), | |
458 | ], | |
459 | ) | |
460 | key_data = teuthology.get_file( | |
461 | remote=mon0_remote, | |
462 | path=client_keyring, | |
463 | sudo=True, | |
464 | ) | |
465 | teuthology.sudo_write_file( | |
466 | remote=remot, | |
467 | path=client_keyring, | |
468 | data=key_data, | |
469 | perms='0644' | |
470 | ) | |
471 | teuthology.sudo_write_file( | |
472 | remote=remot, | |
473 | path=admin_keyring_path, | |
474 | data=admin_keyring, | |
475 | perms='0644' | |
476 | ) | |
477 | teuthology.sudo_write_file( | |
478 | remote=remot, | |
479 | path=conf_path, | |
480 | data=conf_data, | |
481 | perms='0644' | |
482 | ) | |
483 | ||
484 | if mds_nodes: | |
485 | log.info('Configuring CephFS...') | |
3efd9988 | 486 | Filesystem(ctx, create=True) |
7c673cae FG |
487 | elif not config.get('only_mon'): |
488 | raise RuntimeError( | |
489 | "The cluster is NOT operational due to insufficient OSDs") | |
28e407b8 AA |
490 | # create rbd pool |
491 | ceph_admin.run( | |
492 | args=[ | |
493 | 'sudo', 'ceph', '--cluster', 'ceph', | |
494 | 'osd', 'pool', 'create', 'rbd', '128', '128'], | |
495 | check_status=False) | |
496 | ceph_admin.run( | |
497 | args=[ | |
498 | 'sudo', 'ceph', '--cluster', 'ceph', | |
499 | 'osd', 'pool', 'application', 'enable', | |
500 | 'rbd', 'rbd', '--yes-i-really-mean-it' | |
501 | ], | |
502 | check_status=False) | |
7c673cae FG |
503 | yield |
504 | ||
505 | except Exception: | |
506 | log.info( | |
507 | "Error encountered, logging exception before tearing down ceph-deploy") | |
508 | log.info(traceback.format_exc()) | |
509 | raise | |
510 | finally: | |
511 | if config.get('keep_running'): | |
512 | return | |
513 | log.info('Stopping ceph...') | |
494da23a TL |
514 | ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], |
515 | check_status=False) | |
516 | time.sleep(4) | |
7c673cae FG |
517 | |
518 | # and now just check for the processes themselves, as if upstart/sysvinit | |
519 | # is lying to us. Ignore errors if the grep fails | |
520 | ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), | |
521 | 'grep', '-v', 'grep', run.Raw('|'), | |
522 | 'grep', 'ceph'], check_status=False) | |
494da23a TL |
523 | ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'), |
524 | 'grep', 'ceph'], check_status=False) | |
7c673cae FG |
525 | |
526 | if ctx.archive is not None: | |
527 | # archive mon data, too | |
528 | log.info('Archiving mon data...') | |
529 | path = os.path.join(ctx.archive, 'data') | |
530 | os.makedirs(path) | |
531 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
532 | for remote, roles in mons.remotes.iteritems(): | |
533 | for role in roles: | |
534 | if role.startswith('mon.'): | |
535 | teuthology.pull_directory_tarball( | |
536 | remote, | |
537 | '/var/lib/ceph/mon', | |
538 | path + '/' + role + '.tgz') | |
539 | ||
540 | log.info('Compressing logs...') | |
541 | run.wait( | |
542 | ctx.cluster.run( | |
543 | args=[ | |
544 | 'sudo', | |
545 | 'find', | |
546 | '/var/log/ceph', | |
547 | '-name', | |
548 | '*.log', | |
549 | '-print0', | |
550 | run.Raw('|'), | |
551 | 'sudo', | |
552 | 'xargs', | |
553 | '-0', | |
554 | '--no-run-if-empty', | |
555 | '--', | |
556 | 'gzip', | |
557 | '--', | |
558 | ], | |
559 | wait=False, | |
560 | ), | |
561 | ) | |
562 | ||
563 | log.info('Archiving logs...') | |
564 | path = os.path.join(ctx.archive, 'remote') | |
565 | os.makedirs(path) | |
566 | for remote in ctx.cluster.remotes.iterkeys(): | |
567 | sub = os.path.join(path, remote.shortname) | |
568 | os.makedirs(sub) | |
569 | teuthology.pull_directory(remote, '/var/log/ceph', | |
570 | os.path.join(sub, 'log')) | |
571 | ||
572 | # Prevent these from being undefined if the try block fails | |
573 | all_nodes = get_all_nodes(ctx, config) | |
574 | purge_nodes = './ceph-deploy purge' + " " + all_nodes | |
575 | purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes | |
576 | ||
577 | log.info('Purging package...') | |
578 | execute_ceph_deploy(purge_nodes) | |
579 | log.info('Purging data...') | |
580 | execute_ceph_deploy(purgedata_nodes) | |
581 | ||
582 | ||
583 | @contextlib.contextmanager | |
584 | def cli_test(ctx, config): | |
585 | """ | |
586 | ceph-deploy cli to exercise most commonly use cli's and ensure | |
587 | all commands works and also startup the init system. | |
588 | ||
589 | """ | |
590 | log.info('Ceph-deploy Test') | |
591 | if config is None: | |
592 | config = {} | |
593 | test_branch = '' | |
594 | conf_dir = teuthology.get_testdir(ctx) + "/cdtest" | |
595 | ||
596 | def execute_cdeploy(admin, cmd, path): | |
597 | """Execute ceph-deploy commands """ | |
598 | """Either use git path or repo path """ | |
599 | args = ['cd', conf_dir, run.Raw(';')] | |
600 | if path: | |
3efd9988 | 601 | args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)) |
7c673cae FG |
602 | else: |
603 | args.append('ceph-deploy') | |
604 | args.append(run.Raw(cmd)) | |
605 | ec = admin.run(args=args, check_status=False).exitstatus | |
606 | if ec != 0: | |
607 | raise RuntimeError( | |
608 | "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec)) | |
609 | ||
610 | if config.get('rhbuild'): | |
611 | path = None | |
612 | else: | |
613 | path = teuthology.get_testdir(ctx) | |
614 | # test on branch from config eg: wip-* , master or next etc | |
615 | # packages for all distro's should exist for wip* | |
616 | if ctx.config.get('branch'): | |
617 | branch = ctx.config.get('branch') | |
618 | test_branch = ' --dev={branch} '.format(branch=branch) | |
619 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
620 | for node, role in mons.remotes.iteritems(): | |
621 | admin = node | |
622 | admin.run(args=['mkdir', conf_dir], check_status=False) | |
623 | nodename = admin.shortname | |
624 | system_type = teuthology.get_system_type(admin) | |
625 | if config.get('rhbuild'): | |
626 | admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) | |
627 | log.info('system type is %s', system_type) | |
628 | osds = ctx.cluster.only(teuthology.is_type('osd')) | |
629 | ||
630 | for remote, roles in osds.remotes.iteritems(): | |
631 | devs = teuthology.get_scratch_devices(remote) | |
632 | log.info("roles %s", roles) | |
633 | if (len(devs) < 3): | |
634 | log.error( | |
635 | 'Test needs minimum of 3 devices, only found %s', | |
636 | str(devs)) | |
637 | raise RuntimeError("Needs minimum of 3 devices ") | |
638 | ||
639 | conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) | |
640 | new_cmd = 'new ' + nodename | |
641 | execute_cdeploy(admin, new_cmd, path) | |
642 | if config.get('conf') is not None: | |
643 | confp = config.get('conf') | |
644 | for section, keys in confp.iteritems(): | |
645 | lines = '[{section}]\n'.format(section=section) | |
646 | teuthology.append_lines_to_file(admin, conf_path, lines, | |
647 | sudo=True) | |
648 | for key, value in keys.iteritems(): | |
649 | log.info("[%s] %s = %s" % (section, key, value)) | |
650 | lines = '{key} = {value}\n'.format(key=key, value=value) | |
651 | teuthology.append_lines_to_file(admin, conf_path, lines, | |
652 | sudo=True) | |
653 | new_mon_install = 'install {branch} --mon '.format( | |
654 | branch=test_branch) + nodename | |
655 | new_mgr_install = 'install {branch} --mgr '.format( | |
656 | branch=test_branch) + nodename | |
657 | new_osd_install = 'install {branch} --osd '.format( | |
658 | branch=test_branch) + nodename | |
659 | new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename | |
660 | create_initial = 'mon create-initial ' | |
11fdf7f2 | 661 | mgr_create = 'mgr create ' + nodename |
7c673cae FG |
662 | # either use create-keys or push command |
663 | push_keys = 'admin ' + nodename | |
664 | execute_cdeploy(admin, new_mon_install, path) | |
665 | execute_cdeploy(admin, new_mgr_install, path) | |
666 | execute_cdeploy(admin, new_osd_install, path) | |
667 | execute_cdeploy(admin, new_admin, path) | |
668 | execute_cdeploy(admin, create_initial, path) | |
11fdf7f2 | 669 | execute_cdeploy(admin, mgr_create, path) |
7c673cae FG |
670 | execute_cdeploy(admin, push_keys, path) |
671 | ||
672 | for i in range(3): | |
673 | zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) | |
674 | prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) | |
675 | execute_cdeploy(admin, zap_disk, path) | |
676 | execute_cdeploy(admin, prepare, path) | |
677 | ||
678 | log.info("list files for debugging purpose to check file permissions") | |
679 | admin.run(args=['ls', run.Raw('-lt'), conf_dir]) | |
680 | remote.run(args=['sudo', 'ceph', '-s'], check_status=False) | |
681 | r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) | |
682 | out = r.stdout.getvalue() | |
683 | log.info('Ceph health: %s', out.rstrip('\n')) | |
684 | log.info("Waiting for cluster to become healthy") | |
685 | with contextutil.safe_while(sleep=10, tries=6, | |
686 | action='check health') as proceed: | |
3efd9988 FG |
687 | while proceed(): |
688 | r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO()) | |
689 | out = r.stdout.getvalue() | |
690 | if (out.split(None, 1)[0] == 'HEALTH_OK'): | |
691 | break | |
7c673cae FG |
692 | rgw_install = 'install {branch} --rgw {node}'.format( |
693 | branch=test_branch, | |
694 | node=nodename, | |
695 | ) | |
696 | rgw_create = 'rgw create ' + nodename | |
697 | execute_cdeploy(admin, rgw_install, path) | |
698 | execute_cdeploy(admin, rgw_create, path) | |
699 | log.info('All ceph-deploy cli tests passed') | |
700 | try: | |
701 | yield | |
702 | finally: | |
703 | log.info("cleaning up") | |
494da23a | 704 | ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], |
7c673cae FG |
705 | check_status=False) |
706 | time.sleep(4) | |
707 | for i in range(3): | |
708 | umount_dev = "{d}1".format(d=devs[i]) | |
709 | r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) | |
710 | cmd = 'purge ' + nodename | |
711 | execute_cdeploy(admin, cmd, path) | |
712 | cmd = 'purgedata ' + nodename | |
713 | execute_cdeploy(admin, cmd, path) | |
714 | log.info("Removing temporary dir") | |
715 | admin.run( | |
716 | args=[ | |
717 | 'rm', | |
718 | run.Raw('-rf'), | |
719 | run.Raw(conf_dir)], | |
720 | check_status=False) | |
721 | if config.get('rhbuild'): | |
722 | admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y']) | |
723 | ||
724 | ||
725 | @contextlib.contextmanager | |
726 | def single_node_test(ctx, config): | |
727 | """ | |
728 | - ceph-deploy.single_node_test: null | |
729 | ||
730 | #rhbuild testing | |
731 | - ceph-deploy.single_node_test: | |
732 | rhbuild: 1.2.3 | |
733 | ||
734 | """ | |
735 | log.info("Testing ceph-deploy on single node") | |
736 | if config is None: | |
737 | config = {} | |
738 | overrides = ctx.config.get('overrides', {}) | |
739 | teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) | |
740 | ||
741 | if config.get('rhbuild'): | |
742 | log.info("RH Build, Skip Download") | |
743 | with contextutil.nested( | |
744 | lambda: cli_test(ctx=ctx, config=config), | |
745 | ): | |
746 | yield | |
747 | else: | |
748 | with contextutil.nested( | |
749 | lambda: install_fn.ship_utilities(ctx=ctx, config=None), | |
750 | lambda: download_ceph_deploy(ctx=ctx, config=config), | |
751 | lambda: cli_test(ctx=ctx, config=config), | |
752 | ): | |
753 | yield | |
754 | ||
755 | ||
3efd9988 FG |
756 | @contextlib.contextmanager |
757 | def upgrade(ctx, config): | |
758 | """ | |
759 | Upgrade using ceph-deploy | |
760 | eg: | |
761 | ceph-deploy.upgrade: | |
762 | # to upgrade to specific branch, use | |
763 | branch: | |
764 | stable: jewel | |
765 | # to setup mgr node, use | |
766 | setup-mgr-node: True | |
767 | # to wait for cluster to be healthy after all upgrade, use | |
768 | wait-for-healthy: True | |
769 | role: (upgrades the below roles serially) | |
770 | mon.a | |
771 | mon.b | |
772 | osd.0 | |
773 | """ | |
774 | roles = config.get('roles') | |
775 | # get the roles that are mapped as per ceph-deploy | |
776 | # roles are mapped for mon/mds eg: mon.a => mon.host_short_name | |
777 | mapped_role = ctx.cluster.mapped_role | |
11fdf7f2 | 778 | log.info("roles={r}, mapped_roles={mr}".format(r=roles, mr=mapped_role)) |
3efd9988 FG |
779 | if config.get('branch'): |
780 | branch = config.get('branch') | |
781 | (var, val) = branch.items()[0] | |
782 | ceph_branch = '--{var}={val}'.format(var=var, val=val) | |
783 | else: | |
b32b8144 FG |
784 | # default to wip-branch under test |
785 | dev_branch = ctx.config['branch'] | |
786 | ceph_branch = '--dev={branch}'.format(branch=dev_branch) | |
3efd9988 FG |
787 | # get the node used for initial deployment which is mon.a |
788 | mon_a = mapped_role.get('mon.a') | |
789 | (ceph_admin,) = ctx.cluster.only(mon_a).remotes.iterkeys() | |
790 | testdir = teuthology.get_testdir(ctx) | |
791 | cmd = './ceph-deploy install ' + ceph_branch | |
792 | for role in roles: | |
793 | # check if this role is mapped (mon or mds) | |
794 | if mapped_role.get(role): | |
795 | role = mapped_role.get(role) | |
796 | remotes_and_roles = ctx.cluster.only(role).remotes | |
797 | for remote, roles in remotes_and_roles.iteritems(): | |
798 | nodename = remote.shortname | |
799 | cmd = cmd + ' ' + nodename | |
800 | log.info("Upgrading ceph on %s", nodename) | |
801 | ceph_admin.run( | |
802 | args=[ | |
803 | 'cd', | |
804 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
805 | run.Raw('&&'), | |
806 | run.Raw(cmd), | |
807 | ], | |
808 | ) | |
809 | # restart all ceph services, ideally upgrade should but it does not | |
810 | remote.run( | |
811 | args=[ | |
812 | 'sudo', 'systemctl', 'restart', 'ceph.target' | |
813 | ] | |
814 | ) | |
815 | ceph_admin.run(args=['sudo', 'ceph', '-s']) | |
816 | ||
817 | # workaround for http://tracker.ceph.com/issues/20950 | |
818 | # write the correct mgr key to disk | |
819 | if config.get('setup-mgr-node', None): | |
820 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
821 | for remote, roles in mons.remotes.iteritems(): | |
822 | remote.run( | |
823 | args=[ | |
824 | run.Raw('sudo ceph auth get client.bootstrap-mgr'), | |
825 | run.Raw('|'), | |
826 | run.Raw('sudo tee'), | |
827 | run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring') | |
828 | ] | |
829 | ) | |
830 | ||
831 | if config.get('setup-mgr-node', None): | |
832 | mgr_nodes = get_nodes_using_role(ctx, 'mgr') | |
833 | mgr_nodes = " ".join(mgr_nodes) | |
834 | mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes | |
835 | mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes | |
836 | # install mgr | |
837 | ceph_admin.run( | |
838 | args=[ | |
839 | 'cd', | |
840 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
841 | run.Raw('&&'), | |
842 | run.Raw(mgr_install), | |
843 | ], | |
844 | ) | |
845 | # create mgr | |
846 | ceph_admin.run( | |
847 | args=[ | |
848 | 'cd', | |
849 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
850 | run.Raw('&&'), | |
851 | run.Raw(mgr_create), | |
852 | ], | |
853 | ) | |
854 | ceph_admin.run(args=['sudo', 'ceph', '-s']) | |
855 | if config.get('wait-for-healthy', None): | |
856 | wait_until_healthy(ctx, ceph_admin, use_sudo=True) | |
857 | yield | |
858 | ||
859 | ||
7c673cae FG |
860 | @contextlib.contextmanager |
861 | def task(ctx, config): | |
862 | """ | |
863 | Set up and tear down a Ceph cluster. | |
864 | ||
865 | For example:: | |
866 | ||
867 | tasks: | |
868 | - install: | |
869 | extras: yes | |
870 | - ssh_keys: | |
871 | - ceph-deploy: | |
872 | branch: | |
873 | stable: bobtail | |
874 | mon_initial_members: 1 | |
3efd9988 | 875 | ceph-deploy-branch: my-ceph-deploy-branch |
7c673cae FG |
876 | only_mon: true |
877 | keep_running: true | |
c07f9fc5 FG |
878 | # either choose bluestore or filestore, default is bluestore |
879 | bluestore: True | |
880 | # or | |
881 | filestore: True | |
3efd9988 FG |
882 | # skip install of mgr for old release using below flag |
883 | skip-mgr: True ( default is False ) | |
b32b8144 FG |
884 | # to use ceph-volume instead of ceph-disk |
885 | # ceph-disk can only be used with old ceph-deploy release from pypi | |
886 | use-ceph-volume: true | |
7c673cae FG |
887 | |
888 | tasks: | |
889 | - install: | |
890 | extras: yes | |
891 | - ssh_keys: | |
892 | - ceph-deploy: | |
893 | branch: | |
894 | dev: master | |
895 | conf: | |
896 | mon: | |
897 | debug mon = 20 | |
898 | ||
899 | tasks: | |
900 | - install: | |
901 | extras: yes | |
902 | - ssh_keys: | |
903 | - ceph-deploy: | |
904 | branch: | |
905 | testing: | |
906 | dmcrypt: yes | |
907 | separate_journal_disk: yes | |
908 | ||
909 | """ | |
910 | if config is None: | |
911 | config = {} | |
912 | ||
913 | assert isinstance(config, dict), \ | |
914 | "task ceph-deploy only supports a dictionary for configuration" | |
915 | ||
916 | overrides = ctx.config.get('overrides', {}) | |
917 | teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) | |
918 | ||
919 | if config.get('branch') is not None: | |
920 | assert isinstance( | |
921 | config['branch'], dict), 'branch must be a dictionary' | |
922 | ||
923 | log.info('task ceph-deploy with config ' + str(config)) | |
924 | ||
b32b8144 FG |
925 | # we need to use 1.5.39-stable for testing jewel or master branch with |
926 | # ceph-disk | |
927 | if config.get('use-ceph-volume', False) is False: | |
928 | # check we are not testing specific branch | |
929 | if config.get('ceph-deploy-branch', False) is False: | |
930 | config['ceph-deploy-branch'] = '1.5.39-stable' | |
931 | ||
7c673cae FG |
932 | with contextutil.nested( |
933 | lambda: install_fn.ship_utilities(ctx=ctx, config=None), | |
934 | lambda: download_ceph_deploy(ctx=ctx, config=config), | |
935 | lambda: build_ceph_cluster(ctx=ctx, config=config), | |
936 | ): | |
937 | yield |