]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | """ |
2 | Execute ceph-deploy as a task | |
3 | """ | |
7c673cae FG |
4 | |
5 | import contextlib | |
6 | import os | |
7 | import time | |
8 | import logging | |
9 | import traceback | |
10 | ||
11 | from teuthology import misc as teuthology | |
12 | from teuthology import contextutil | |
13 | from teuthology.config import config as teuth_config | |
14 | from teuthology.task import install as install_fn | |
15 | from teuthology.orchestra import run | |
16 | from tasks.cephfs.filesystem import Filesystem | |
3efd9988 | 17 | from teuthology.misc import wait_until_healthy |
7c673cae FG |
18 | |
19 | log = logging.getLogger(__name__) | |
20 | ||
21 | ||
22 | @contextlib.contextmanager | |
23 | def download_ceph_deploy(ctx, config): | |
24 | """ | |
25 | Downloads ceph-deploy from the ceph.com git mirror and (by default) | |
26 | switches to the master branch. If the `ceph-deploy-branch` is specified, it | |
27 | will use that instead. The `bootstrap` script is ran, with the argument | |
28 | obtained from `python_version`, if specified. | |
29 | """ | |
3efd9988 | 30 | # use mon.a for ceph_admin |
9f95a23c | 31 | (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys() |
7c673cae FG |
32 | |
33 | try: | |
34 | py_ver = str(config['python_version']) | |
35 | except KeyError: | |
36 | pass | |
37 | else: | |
38 | supported_versions = ['2', '3'] | |
39 | if py_ver not in supported_versions: | |
40 | raise ValueError("python_version must be: {}, not {}".format( | |
41 | ' or '.join(supported_versions), py_ver | |
42 | )) | |
43 | ||
44 | log.info("Installing Python") | |
3efd9988 | 45 | system_type = teuthology.get_system_type(ceph_admin) |
7c673cae FG |
46 | |
47 | if system_type == 'rpm': | |
11fdf7f2 | 48 | package = 'python36' if py_ver == '3' else 'python' |
7c673cae FG |
49 | ctx.cluster.run(args=[ |
50 | 'sudo', 'yum', '-y', 'install', | |
51 | package, 'python-virtualenv' | |
52 | ]) | |
53 | else: | |
54 | package = 'python3' if py_ver == '3' else 'python' | |
55 | ctx.cluster.run(args=[ | |
56 | 'sudo', 'apt-get', '-y', '--force-yes', 'install', | |
57 | package, 'python-virtualenv' | |
58 | ]) | |
59 | ||
60 | log.info('Downloading ceph-deploy...') | |
61 | testdir = teuthology.get_testdir(ctx) | |
62 | ceph_deploy_branch = config.get('ceph-deploy-branch', 'master') | |
63 | ||
64 | ceph_admin.run( | |
65 | args=[ | |
66 | 'git', 'clone', '-b', ceph_deploy_branch, | |
67 | teuth_config.ceph_git_base_url + 'ceph-deploy.git', | |
68 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
69 | ], | |
70 | ) | |
71 | args = [ | |
72 | 'cd', | |
73 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
74 | run.Raw('&&'), | |
75 | './bootstrap', | |
76 | ] | |
77 | try: | |
78 | args.append(str(config['python_version'])) | |
79 | except KeyError: | |
80 | pass | |
81 | ceph_admin.run(args=args) | |
82 | ||
83 | try: | |
84 | yield | |
85 | finally: | |
86 | log.info('Removing ceph-deploy ...') | |
87 | ceph_admin.run( | |
88 | args=[ | |
89 | 'rm', | |
90 | '-rf', | |
91 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
92 | ], | |
93 | ) | |
94 | ||
95 | ||
96 | def is_healthy(ctx, config): | |
97 | """Wait until a Ceph cluster is healthy.""" | |
98 | testdir = teuthology.get_testdir(ctx) | |
99 | ceph_admin = teuthology.get_first_mon(ctx, config) | |
100 | (remote,) = ctx.cluster.only(ceph_admin).remotes.keys() | |
101 | max_tries = 90 # 90 tries * 10 secs --> 15 minutes | |
102 | tries = 0 | |
103 | while True: | |
104 | tries += 1 | |
105 | if tries >= max_tries: | |
106 | msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes" | |
107 | remote.run( | |
108 | args=[ | |
109 | 'cd', | |
110 | '{tdir}'.format(tdir=testdir), | |
111 | run.Raw('&&'), | |
112 | 'sudo', 'ceph', | |
113 | 'report', | |
114 | ], | |
115 | ) | |
116 | raise RuntimeError(msg) | |
117 | ||
9f95a23c TL |
118 | out = remote.sh( |
119 | [ | |
7c673cae FG |
120 | 'cd', |
121 | '{tdir}'.format(tdir=testdir), | |
122 | run.Raw('&&'), | |
123 | 'sudo', 'ceph', | |
124 | 'health', | |
125 | ], | |
7c673cae FG |
126 | logger=log.getChild('health'), |
127 | ) | |
7c673cae FG |
128 | log.info('Ceph health: %s', out.rstrip('\n')) |
129 | if out.split(None, 1)[0] == 'HEALTH_OK': | |
130 | break | |
131 | time.sleep(10) | |
132 | ||
133 | ||
134 | def get_nodes_using_role(ctx, target_role): | |
135 | """ | |
136 | Extract the names of nodes that match a given role from a cluster, and modify the | |
137 | cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy | |
138 | uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23". | |
139 | """ | |
140 | ||
141 | # Nodes containing a service of the specified role | |
142 | nodes_of_interest = [] | |
143 | ||
144 | # Prepare a modified version of cluster.remotes with ceph-deploy-ized names | |
145 | modified_remotes = {} | |
3efd9988 | 146 | ceph_deploy_mapped = dict() |
9f95a23c | 147 | for _remote, roles_for_host in ctx.cluster.remotes.items(): |
7c673cae FG |
148 | modified_remotes[_remote] = [] |
149 | for svc_id in roles_for_host: | |
150 | if svc_id.startswith("{0}.".format(target_role)): | |
151 | fqdn = str(_remote).split('@')[-1] | |
152 | nodename = str(str(_remote).split('.')[0]).split('@')[1] | |
153 | if target_role == 'mon': | |
154 | nodes_of_interest.append(fqdn) | |
155 | else: | |
156 | nodes_of_interest.append(nodename) | |
3efd9988 FG |
157 | mapped_role = "{0}.{1}".format(target_role, nodename) |
158 | modified_remotes[_remote].append(mapped_role) | |
159 | # keep dict of mapped role for later use by tasks | |
160 | # eg. mon.a => mon.node1 | |
161 | ceph_deploy_mapped[svc_id] = mapped_role | |
7c673cae FG |
162 | else: |
163 | modified_remotes[_remote].append(svc_id) | |
164 | ||
165 | ctx.cluster.remotes = modified_remotes | |
11fdf7f2 TL |
166 | # since the function is called multiple times for target roles |
167 | # append new mapped roles | |
168 | if not hasattr(ctx.cluster, 'mapped_role'): | |
169 | ctx.cluster.mapped_role = ceph_deploy_mapped | |
170 | else: | |
171 | ctx.cluster.mapped_role.update(ceph_deploy_mapped) | |
172 | log.info("New mapped_role={mr}".format(mr=ctx.cluster.mapped_role)) | |
7c673cae FG |
173 | return nodes_of_interest |
174 | ||
175 | ||
176 | def get_dev_for_osd(ctx, config): | |
177 | """Get a list of all osd device names.""" | |
178 | osd_devs = [] | |
9f95a23c | 179 | for remote, roles_for_host in ctx.cluster.remotes.items(): |
7c673cae FG |
180 | host = remote.name.split('@')[-1] |
181 | shortname = host.split('.')[0] | |
182 | devs = teuthology.get_scratch_devices(remote) | |
183 | num_osd_per_host = list( | |
184 | teuthology.roles_of_type( | |
185 | roles_for_host, 'osd')) | |
186 | num_osds = len(num_osd_per_host) | |
187 | if config.get('separate_journal_disk') is not None: | |
188 | num_devs_reqd = 2 * num_osds | |
189 | assert num_devs_reqd <= len( | |
190 | devs), 'fewer data and journal disks than required ' + shortname | |
191 | for dindex in range(0, num_devs_reqd, 2): | |
192 | jd_index = dindex + 1 | |
193 | dev_short = devs[dindex].split('/')[-1] | |
194 | jdev_short = devs[jd_index].split('/')[-1] | |
195 | osd_devs.append((shortname, dev_short, jdev_short)) | |
196 | else: | |
197 | assert num_osds <= len(devs), 'fewer disks than osds ' + shortname | |
198 | for dev in devs[:num_osds]: | |
199 | dev_short = dev.split('/')[-1] | |
200 | osd_devs.append((shortname, dev_short)) | |
201 | return osd_devs | |
202 | ||
203 | ||
204 | def get_all_nodes(ctx, config): | |
205 | """Return a string of node names separated by blanks""" | |
206 | nodelist = [] | |
9f95a23c | 207 | for t, k in ctx.config['targets'].items(): |
7c673cae FG |
208 | host = t.split('@')[-1] |
209 | simple_host = host.split('.')[0] | |
210 | nodelist.append(simple_host) | |
211 | nodelist = " ".join(nodelist) | |
212 | return nodelist | |
213 | ||
7c673cae FG |
214 | @contextlib.contextmanager |
215 | def build_ceph_cluster(ctx, config): | |
216 | """Build a ceph cluster""" | |
217 | ||
218 | # Expect to find ceph_admin on the first mon by ID, same place that the download task | |
219 | # puts it. Remember this here, because subsequently IDs will change from those in | |
220 | # the test config to those that ceph-deploy invents. | |
3efd9988 | 221 | |
9f95a23c | 222 | (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys() |
7c673cae FG |
223 | |
224 | def execute_ceph_deploy(cmd): | |
225 | """Remotely execute a ceph_deploy command""" | |
226 | return ceph_admin.run( | |
227 | args=[ | |
228 | 'cd', | |
229 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
230 | run.Raw('&&'), | |
231 | run.Raw(cmd), | |
232 | ], | |
233 | check_status=False, | |
234 | ).exitstatus | |
235 | ||
b32b8144 FG |
236 | def ceph_disk_osd_create(ctx, config): |
237 | node_dev_list = get_dev_for_osd(ctx, config) | |
238 | no_of_osds = 0 | |
239 | for d in node_dev_list: | |
240 | node = d[0] | |
241 | for disk in d[1:]: | |
9f95a23c | 242 | zap = './ceph-deploy disk zap ' + node + ' ' + disk |
b32b8144 FG |
243 | estatus = execute_ceph_deploy(zap) |
244 | if estatus != 0: | |
245 | raise RuntimeError("ceph-deploy: Failed to zap osds") | |
246 | osd_create_cmd = './ceph-deploy osd create ' | |
247 | # first check for filestore, default is bluestore with ceph-deploy | |
248 | if config.get('filestore') is not None: | |
249 | osd_create_cmd += '--filestore ' | |
250 | elif config.get('bluestore') is not None: | |
251 | osd_create_cmd += '--bluestore ' | |
252 | if config.get('dmcrypt') is not None: | |
253 | osd_create_cmd += '--dmcrypt ' | |
254 | osd_create_cmd += ":".join(d) | |
255 | estatus_osd = execute_ceph_deploy(osd_create_cmd) | |
256 | if estatus_osd == 0: | |
257 | log.info('successfully created osd') | |
258 | no_of_osds += 1 | |
259 | else: | |
260 | raise RuntimeError("ceph-deploy: Failed to create osds") | |
261 | return no_of_osds | |
262 | ||
263 | def ceph_volume_osd_create(ctx, config): | |
264 | osds = ctx.cluster.only(teuthology.is_type('osd')) | |
265 | no_of_osds = 0 | |
9f95a23c | 266 | for remote in osds.remotes.keys(): |
b32b8144 FG |
267 | # all devs should be lvm |
268 | osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' ' | |
269 | # default is bluestore so we just need config item for filestore | |
270 | roles = ctx.cluster.remotes[remote] | |
271 | dev_needed = len([role for role in roles | |
272 | if role.startswith('osd')]) | |
273 | all_devs = teuthology.get_scratch_devices(remote) | |
274 | log.info("node={n}, need_devs={d}, available={a}".format( | |
275 | n=remote.shortname, | |
276 | d=dev_needed, | |
277 | a=all_devs, | |
278 | )) | |
279 | devs = all_devs[0:dev_needed] | |
280 | # rest of the devices can be used for journal if required | |
281 | jdevs = dev_needed | |
282 | for device in devs: | |
283 | device_split = device.split('/') | |
284 | lv_device = device_split[-2] + '/' + device_split[-1] | |
285 | if config.get('filestore') is not None: | |
286 | osd_create_cmd += '--filestore --data ' + lv_device + ' ' | |
287 | # filestore with ceph-volume also needs journal disk | |
288 | try: | |
289 | jdevice = all_devs.pop(jdevs) | |
290 | except IndexError: | |
291 | raise RuntimeError("No device available for \ | |
292 | journal configuration") | |
293 | jdevice_split = jdevice.split('/') | |
294 | j_lv = jdevice_split[-2] + '/' + jdevice_split[-1] | |
295 | osd_create_cmd += '--journal ' + j_lv | |
296 | else: | |
297 | osd_create_cmd += ' --data ' + lv_device | |
298 | estatus_osd = execute_ceph_deploy(osd_create_cmd) | |
299 | if estatus_osd == 0: | |
300 | log.info('successfully created osd') | |
301 | no_of_osds += 1 | |
302 | else: | |
303 | raise RuntimeError("ceph-deploy: Failed to create osds") | |
304 | return no_of_osds | |
305 | ||
7c673cae FG |
306 | try: |
307 | log.info('Building ceph cluster using ceph-deploy...') | |
308 | testdir = teuthology.get_testdir(ctx) | |
309 | ceph_branch = None | |
310 | if config.get('branch') is not None: | |
311 | cbranch = config.get('branch') | |
9f95a23c | 312 | for var, val in cbranch.items(): |
7c673cae FG |
313 | ceph_branch = '--{var}={val}'.format(var=var, val=val) |
314 | all_nodes = get_all_nodes(ctx, config) | |
315 | mds_nodes = get_nodes_using_role(ctx, 'mds') | |
316 | mds_nodes = " ".join(mds_nodes) | |
317 | mon_node = get_nodes_using_role(ctx, 'mon') | |
318 | mon_nodes = " ".join(mon_node) | |
3efd9988 FG |
319 | # skip mgr based on config item |
320 | # this is needed when test uses latest code to install old ceph | |
321 | # versions | |
322 | skip_mgr = config.get('skip-mgr', False) | |
323 | if not skip_mgr: | |
324 | mgr_nodes = get_nodes_using_role(ctx, 'mgr') | |
325 | mgr_nodes = " ".join(mgr_nodes) | |
7c673cae | 326 | new_mon = './ceph-deploy new' + " " + mon_nodes |
3efd9988 FG |
327 | if not skip_mgr: |
328 | mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes | |
7c673cae FG |
329 | mon_hostname = mon_nodes.split(' ')[0] |
330 | mon_hostname = str(mon_hostname) | |
331 | gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname | |
332 | deploy_mds = './ceph-deploy mds create' + " " + mds_nodes | |
7c673cae FG |
333 | |
334 | if mon_nodes is None: | |
335 | raise RuntimeError("no monitor nodes in the config file") | |
336 | ||
337 | estatus_new = execute_ceph_deploy(new_mon) | |
338 | if estatus_new != 0: | |
339 | raise RuntimeError("ceph-deploy: new command failed") | |
340 | ||
341 | log.info('adding config inputs...') | |
342 | testdir = teuthology.get_testdir(ctx) | |
343 | conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) | |
344 | ||
345 | if config.get('conf') is not None: | |
346 | confp = config.get('conf') | |
9f95a23c | 347 | for section, keys in confp.items(): |
7c673cae | 348 | lines = '[{section}]\n'.format(section=section) |
f67539c2 | 349 | ceph_admin.sudo_write_file(conf_path, lines, append=True) |
9f95a23c | 350 | for key, value in keys.items(): |
7c673cae FG |
351 | log.info("[%s] %s = %s" % (section, key, value)) |
352 | lines = '{key} = {value}\n'.format(key=key, value=value) | |
f67539c2 | 353 | ceph_admin.sudo_write_file(conf_path, lines, append=True) |
7c673cae FG |
354 | |
355 | # install ceph | |
356 | dev_branch = ctx.config['branch'] | |
357 | branch = '--dev={branch}'.format(branch=dev_branch) | |
358 | if ceph_branch: | |
359 | option = ceph_branch | |
360 | else: | |
361 | option = branch | |
362 | install_nodes = './ceph-deploy install ' + option + " " + all_nodes | |
363 | estatus_install = execute_ceph_deploy(install_nodes) | |
364 | if estatus_install != 0: | |
365 | raise RuntimeError("ceph-deploy: Failed to install ceph") | |
366 | # install ceph-test package too | |
367 | install_nodes2 = './ceph-deploy install --tests ' + option + \ | |
368 | " " + all_nodes | |
369 | estatus_install = execute_ceph_deploy(install_nodes2) | |
370 | if estatus_install != 0: | |
371 | raise RuntimeError("ceph-deploy: Failed to install ceph-test") | |
372 | ||
373 | mon_create_nodes = './ceph-deploy mon create-initial' | |
374 | # If the following fails, it is OK, it might just be that the monitors | |
375 | # are taking way more than a minute/monitor to form quorum, so lets | |
376 | # try the next block which will wait up to 15 minutes to gatherkeys. | |
377 | execute_ceph_deploy(mon_create_nodes) | |
378 | ||
7c673cae | 379 | estatus_gather = execute_ceph_deploy(gather_keys) |
b32b8144 FG |
380 | if estatus_gather != 0: |
381 | raise RuntimeError("ceph-deploy: Failed during gather keys") | |
494da23a TL |
382 | |
383 | # install admin key on mons (ceph-create-keys doesn't do this any more) | |
384 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
9f95a23c | 385 | for remote in mons.remotes.keys(): |
494da23a TL |
386 | execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname) |
387 | ||
b32b8144 FG |
388 | # create osd's |
389 | if config.get('use-ceph-volume', False): | |
390 | no_of_osds = ceph_volume_osd_create(ctx, config) | |
391 | else: | |
392 | # this method will only work with ceph-deploy v1.5.39 or older | |
393 | no_of_osds = ceph_disk_osd_create(ctx, config) | |
b5b8bbf5 | 394 | |
3efd9988 FG |
395 | if not skip_mgr: |
396 | execute_ceph_deploy(mgr_create) | |
b5b8bbf5 | 397 | |
7c673cae FG |
398 | if mds_nodes: |
399 | estatus_mds = execute_ceph_deploy(deploy_mds) | |
400 | if estatus_mds != 0: | |
401 | raise RuntimeError("ceph-deploy: Failed to deploy mds") | |
402 | ||
403 | if config.get('test_mon_destroy') is not None: | |
404 | for d in range(1, len(mon_node)): | |
405 | mon_destroy_nodes = './ceph-deploy mon destroy' + \ | |
406 | " " + mon_node[d] | |
407 | estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) | |
408 | if estatus_mon_d != 0: | |
409 | raise RuntimeError("ceph-deploy: Failed to delete monitor") | |
410 | ||
b32b8144 | 411 | |
7c673cae FG |
412 | |
413 | if config.get('wait-for-healthy', True) and no_of_osds >= 2: | |
414 | is_healthy(ctx=ctx, config=None) | |
415 | ||
416 | log.info('Setting up client nodes...') | |
417 | conf_path = '/etc/ceph/ceph.conf' | |
418 | admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' | |
419 | first_mon = teuthology.get_first_mon(ctx, config) | |
420 | (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() | |
f67539c2 TL |
421 | conf_data = mon0_remote.read_file(conf_path, sudo=True) |
422 | admin_keyring = mon0_remote.read_file(admin_keyring_path, sudo=True) | |
7c673cae FG |
423 | |
424 | clients = ctx.cluster.only(teuthology.is_type('client')) | |
f67539c2 | 425 | for remote, roles_for_host in clients.remotes.items(): |
7c673cae FG |
426 | for id_ in teuthology.roles_of_type(roles_for_host, 'client'): |
427 | client_keyring = \ | |
428 | '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) | |
429 | mon0_remote.run( | |
430 | args=[ | |
431 | 'cd', | |
432 | '{tdir}'.format(tdir=testdir), | |
433 | run.Raw('&&'), | |
434 | 'sudo', 'bash', '-c', | |
435 | run.Raw('"'), 'ceph', | |
436 | 'auth', | |
437 | 'get-or-create', | |
438 | 'client.{id}'.format(id=id_), | |
439 | 'mds', 'allow', | |
440 | 'mon', 'allow *', | |
441 | 'osd', 'allow *', | |
442 | run.Raw('>'), | |
443 | client_keyring, | |
444 | run.Raw('"'), | |
445 | ], | |
446 | ) | |
f67539c2 | 447 | key_data = mon0_remote.read_file( |
7c673cae FG |
448 | path=client_keyring, |
449 | sudo=True, | |
450 | ) | |
f67539c2 | 451 | remote.sudo_write_file( |
7c673cae FG |
452 | path=client_keyring, |
453 | data=key_data, | |
f67539c2 | 454 | mode='0644' |
7c673cae | 455 | ) |
f67539c2 | 456 | remote.sudo_write_file( |
7c673cae FG |
457 | path=admin_keyring_path, |
458 | data=admin_keyring, | |
f67539c2 | 459 | mode='0644' |
7c673cae | 460 | ) |
f67539c2 | 461 | remote.sudo_write_file( |
7c673cae FG |
462 | path=conf_path, |
463 | data=conf_data, | |
f67539c2 | 464 | mode='0644' |
7c673cae FG |
465 | ) |
466 | ||
467 | if mds_nodes: | |
468 | log.info('Configuring CephFS...') | |
3efd9988 | 469 | Filesystem(ctx, create=True) |
7c673cae FG |
470 | elif not config.get('only_mon'): |
471 | raise RuntimeError( | |
472 | "The cluster is NOT operational due to insufficient OSDs") | |
28e407b8 AA |
473 | # create rbd pool |
474 | ceph_admin.run( | |
475 | args=[ | |
476 | 'sudo', 'ceph', '--cluster', 'ceph', | |
477 | 'osd', 'pool', 'create', 'rbd', '128', '128'], | |
478 | check_status=False) | |
479 | ceph_admin.run( | |
480 | args=[ | |
481 | 'sudo', 'ceph', '--cluster', 'ceph', | |
482 | 'osd', 'pool', 'application', 'enable', | |
483 | 'rbd', 'rbd', '--yes-i-really-mean-it' | |
484 | ], | |
485 | check_status=False) | |
7c673cae FG |
486 | yield |
487 | ||
488 | except Exception: | |
489 | log.info( | |
490 | "Error encountered, logging exception before tearing down ceph-deploy") | |
491 | log.info(traceback.format_exc()) | |
492 | raise | |
493 | finally: | |
494 | if config.get('keep_running'): | |
495 | return | |
496 | log.info('Stopping ceph...') | |
494da23a TL |
497 | ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], |
498 | check_status=False) | |
499 | time.sleep(4) | |
7c673cae FG |
500 | |
501 | # and now just check for the processes themselves, as if upstart/sysvinit | |
502 | # is lying to us. Ignore errors if the grep fails | |
503 | ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), | |
504 | 'grep', '-v', 'grep', run.Raw('|'), | |
505 | 'grep', 'ceph'], check_status=False) | |
494da23a TL |
506 | ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'), |
507 | 'grep', 'ceph'], check_status=False) | |
7c673cae FG |
508 | |
509 | if ctx.archive is not None: | |
510 | # archive mon data, too | |
511 | log.info('Archiving mon data...') | |
512 | path = os.path.join(ctx.archive, 'data') | |
513 | os.makedirs(path) | |
514 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
9f95a23c | 515 | for remote, roles in mons.remotes.items(): |
7c673cae FG |
516 | for role in roles: |
517 | if role.startswith('mon.'): | |
518 | teuthology.pull_directory_tarball( | |
519 | remote, | |
520 | '/var/lib/ceph/mon', | |
521 | path + '/' + role + '.tgz') | |
522 | ||
523 | log.info('Compressing logs...') | |
524 | run.wait( | |
525 | ctx.cluster.run( | |
526 | args=[ | |
527 | 'sudo', | |
528 | 'find', | |
529 | '/var/log/ceph', | |
530 | '-name', | |
531 | '*.log', | |
532 | '-print0', | |
533 | run.Raw('|'), | |
534 | 'sudo', | |
535 | 'xargs', | |
536 | '-0', | |
537 | '--no-run-if-empty', | |
538 | '--', | |
539 | 'gzip', | |
540 | '--', | |
541 | ], | |
542 | wait=False, | |
543 | ), | |
544 | ) | |
545 | ||
546 | log.info('Archiving logs...') | |
547 | path = os.path.join(ctx.archive, 'remote') | |
548 | os.makedirs(path) | |
9f95a23c | 549 | for remote in ctx.cluster.remotes.keys(): |
7c673cae FG |
550 | sub = os.path.join(path, remote.shortname) |
551 | os.makedirs(sub) | |
552 | teuthology.pull_directory(remote, '/var/log/ceph', | |
553 | os.path.join(sub, 'log')) | |
554 | ||
555 | # Prevent these from being undefined if the try block fails | |
556 | all_nodes = get_all_nodes(ctx, config) | |
557 | purge_nodes = './ceph-deploy purge' + " " + all_nodes | |
558 | purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes | |
559 | ||
560 | log.info('Purging package...') | |
561 | execute_ceph_deploy(purge_nodes) | |
562 | log.info('Purging data...') | |
563 | execute_ceph_deploy(purgedata_nodes) | |
564 | ||
565 | ||
566 | @contextlib.contextmanager | |
567 | def cli_test(ctx, config): | |
568 | """ | |
569 | ceph-deploy cli to exercise most commonly use cli's and ensure | |
570 | all commands works and also startup the init system. | |
571 | ||
572 | """ | |
573 | log.info('Ceph-deploy Test') | |
574 | if config is None: | |
575 | config = {} | |
576 | test_branch = '' | |
577 | conf_dir = teuthology.get_testdir(ctx) + "/cdtest" | |
578 | ||
579 | def execute_cdeploy(admin, cmd, path): | |
580 | """Execute ceph-deploy commands """ | |
581 | """Either use git path or repo path """ | |
582 | args = ['cd', conf_dir, run.Raw(';')] | |
583 | if path: | |
3efd9988 | 584 | args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)) |
7c673cae FG |
585 | else: |
586 | args.append('ceph-deploy') | |
587 | args.append(run.Raw(cmd)) | |
588 | ec = admin.run(args=args, check_status=False).exitstatus | |
589 | if ec != 0: | |
590 | raise RuntimeError( | |
591 | "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec)) | |
592 | ||
593 | if config.get('rhbuild'): | |
594 | path = None | |
595 | else: | |
596 | path = teuthology.get_testdir(ctx) | |
597 | # test on branch from config eg: wip-* , master or next etc | |
598 | # packages for all distro's should exist for wip* | |
599 | if ctx.config.get('branch'): | |
600 | branch = ctx.config.get('branch') | |
601 | test_branch = ' --dev={branch} '.format(branch=branch) | |
602 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
9f95a23c | 603 | for node, role in mons.remotes.items(): |
7c673cae FG |
604 | admin = node |
605 | admin.run(args=['mkdir', conf_dir], check_status=False) | |
606 | nodename = admin.shortname | |
607 | system_type = teuthology.get_system_type(admin) | |
608 | if config.get('rhbuild'): | |
609 | admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) | |
610 | log.info('system type is %s', system_type) | |
611 | osds = ctx.cluster.only(teuthology.is_type('osd')) | |
612 | ||
9f95a23c | 613 | for remote, roles in osds.remotes.items(): |
7c673cae FG |
614 | devs = teuthology.get_scratch_devices(remote) |
615 | log.info("roles %s", roles) | |
616 | if (len(devs) < 3): | |
617 | log.error( | |
618 | 'Test needs minimum of 3 devices, only found %s', | |
619 | str(devs)) | |
620 | raise RuntimeError("Needs minimum of 3 devices ") | |
621 | ||
622 | conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) | |
623 | new_cmd = 'new ' + nodename | |
624 | execute_cdeploy(admin, new_cmd, path) | |
625 | if config.get('conf') is not None: | |
626 | confp = config.get('conf') | |
9f95a23c | 627 | for section, keys in confp.items(): |
7c673cae | 628 | lines = '[{section}]\n'.format(section=section) |
f67539c2 | 629 | admin.sudo_write_file(conf_path, lines, append=True) |
9f95a23c | 630 | for key, value in keys.items(): |
7c673cae FG |
631 | log.info("[%s] %s = %s" % (section, key, value)) |
632 | lines = '{key} = {value}\n'.format(key=key, value=value) | |
f67539c2 | 633 | admin.sudo_write_file(conf_path, lines, append=True) |
7c673cae FG |
634 | new_mon_install = 'install {branch} --mon '.format( |
635 | branch=test_branch) + nodename | |
636 | new_mgr_install = 'install {branch} --mgr '.format( | |
637 | branch=test_branch) + nodename | |
638 | new_osd_install = 'install {branch} --osd '.format( | |
639 | branch=test_branch) + nodename | |
640 | new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename | |
641 | create_initial = 'mon create-initial ' | |
11fdf7f2 | 642 | mgr_create = 'mgr create ' + nodename |
7c673cae FG |
643 | # either use create-keys or push command |
644 | push_keys = 'admin ' + nodename | |
645 | execute_cdeploy(admin, new_mon_install, path) | |
646 | execute_cdeploy(admin, new_mgr_install, path) | |
647 | execute_cdeploy(admin, new_osd_install, path) | |
648 | execute_cdeploy(admin, new_admin, path) | |
649 | execute_cdeploy(admin, create_initial, path) | |
11fdf7f2 | 650 | execute_cdeploy(admin, mgr_create, path) |
7c673cae FG |
651 | execute_cdeploy(admin, push_keys, path) |
652 | ||
653 | for i in range(3): | |
654 | zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) | |
655 | prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) | |
656 | execute_cdeploy(admin, zap_disk, path) | |
657 | execute_cdeploy(admin, prepare, path) | |
658 | ||
659 | log.info("list files for debugging purpose to check file permissions") | |
660 | admin.run(args=['ls', run.Raw('-lt'), conf_dir]) | |
661 | remote.run(args=['sudo', 'ceph', '-s'], check_status=False) | |
9f95a23c | 662 | out = remote.sh('sudo ceph health') |
7c673cae FG |
663 | log.info('Ceph health: %s', out.rstrip('\n')) |
664 | log.info("Waiting for cluster to become healthy") | |
665 | with contextutil.safe_while(sleep=10, tries=6, | |
666 | action='check health') as proceed: | |
3efd9988 | 667 | while proceed(): |
9f95a23c | 668 | out = remote.sh('sudo ceph health') |
3efd9988 FG |
669 | if (out.split(None, 1)[0] == 'HEALTH_OK'): |
670 | break | |
7c673cae FG |
671 | rgw_install = 'install {branch} --rgw {node}'.format( |
672 | branch=test_branch, | |
673 | node=nodename, | |
674 | ) | |
675 | rgw_create = 'rgw create ' + nodename | |
676 | execute_cdeploy(admin, rgw_install, path) | |
677 | execute_cdeploy(admin, rgw_create, path) | |
678 | log.info('All ceph-deploy cli tests passed') | |
679 | try: | |
680 | yield | |
681 | finally: | |
682 | log.info("cleaning up") | |
494da23a | 683 | ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], |
7c673cae FG |
684 | check_status=False) |
685 | time.sleep(4) | |
686 | for i in range(3): | |
687 | umount_dev = "{d}1".format(d=devs[i]) | |
f67539c2 | 688 | remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) |
7c673cae FG |
689 | cmd = 'purge ' + nodename |
690 | execute_cdeploy(admin, cmd, path) | |
691 | cmd = 'purgedata ' + nodename | |
692 | execute_cdeploy(admin, cmd, path) | |
693 | log.info("Removing temporary dir") | |
694 | admin.run( | |
695 | args=[ | |
696 | 'rm', | |
697 | run.Raw('-rf'), | |
698 | run.Raw(conf_dir)], | |
699 | check_status=False) | |
700 | if config.get('rhbuild'): | |
701 | admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y']) | |
702 | ||
703 | ||
704 | @contextlib.contextmanager | |
705 | def single_node_test(ctx, config): | |
706 | """ | |
707 | - ceph-deploy.single_node_test: null | |
708 | ||
709 | #rhbuild testing | |
710 | - ceph-deploy.single_node_test: | |
711 | rhbuild: 1.2.3 | |
712 | ||
713 | """ | |
714 | log.info("Testing ceph-deploy on single node") | |
715 | if config is None: | |
716 | config = {} | |
717 | overrides = ctx.config.get('overrides', {}) | |
718 | teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) | |
719 | ||
720 | if config.get('rhbuild'): | |
721 | log.info("RH Build, Skip Download") | |
722 | with contextutil.nested( | |
723 | lambda: cli_test(ctx=ctx, config=config), | |
724 | ): | |
725 | yield | |
726 | else: | |
727 | with contextutil.nested( | |
728 | lambda: install_fn.ship_utilities(ctx=ctx, config=None), | |
729 | lambda: download_ceph_deploy(ctx=ctx, config=config), | |
730 | lambda: cli_test(ctx=ctx, config=config), | |
731 | ): | |
732 | yield | |
733 | ||
734 | ||
3efd9988 FG |
735 | @contextlib.contextmanager |
736 | def upgrade(ctx, config): | |
737 | """ | |
738 | Upgrade using ceph-deploy | |
739 | eg: | |
740 | ceph-deploy.upgrade: | |
741 | # to upgrade to specific branch, use | |
742 | branch: | |
743 | stable: jewel | |
744 | # to setup mgr node, use | |
745 | setup-mgr-node: True | |
746 | # to wait for cluster to be healthy after all upgrade, use | |
747 | wait-for-healthy: True | |
748 | role: (upgrades the below roles serially) | |
749 | mon.a | |
750 | mon.b | |
751 | osd.0 | |
752 | """ | |
753 | roles = config.get('roles') | |
754 | # get the roles that are mapped as per ceph-deploy | |
755 | # roles are mapped for mon/mds eg: mon.a => mon.host_short_name | |
756 | mapped_role = ctx.cluster.mapped_role | |
11fdf7f2 | 757 | log.info("roles={r}, mapped_roles={mr}".format(r=roles, mr=mapped_role)) |
3efd9988 FG |
758 | if config.get('branch'): |
759 | branch = config.get('branch') | |
760 | (var, val) = branch.items()[0] | |
761 | ceph_branch = '--{var}={val}'.format(var=var, val=val) | |
762 | else: | |
b32b8144 FG |
763 | # default to wip-branch under test |
764 | dev_branch = ctx.config['branch'] | |
765 | ceph_branch = '--dev={branch}'.format(branch=dev_branch) | |
3efd9988 FG |
766 | # get the node used for initial deployment which is mon.a |
767 | mon_a = mapped_role.get('mon.a') | |
9f95a23c | 768 | (ceph_admin,) = ctx.cluster.only(mon_a).remotes.keys() |
3efd9988 FG |
769 | testdir = teuthology.get_testdir(ctx) |
770 | cmd = './ceph-deploy install ' + ceph_branch | |
771 | for role in roles: | |
772 | # check if this role is mapped (mon or mds) | |
773 | if mapped_role.get(role): | |
774 | role = mapped_role.get(role) | |
775 | remotes_and_roles = ctx.cluster.only(role).remotes | |
9f95a23c | 776 | for remote, roles in remotes_and_roles.items(): |
3efd9988 FG |
777 | nodename = remote.shortname |
778 | cmd = cmd + ' ' + nodename | |
779 | log.info("Upgrading ceph on %s", nodename) | |
780 | ceph_admin.run( | |
781 | args=[ | |
782 | 'cd', | |
783 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
784 | run.Raw('&&'), | |
785 | run.Raw(cmd), | |
786 | ], | |
787 | ) | |
788 | # restart all ceph services, ideally upgrade should but it does not | |
789 | remote.run( | |
790 | args=[ | |
791 | 'sudo', 'systemctl', 'restart', 'ceph.target' | |
792 | ] | |
793 | ) | |
794 | ceph_admin.run(args=['sudo', 'ceph', '-s']) | |
795 | ||
796 | # workaround for http://tracker.ceph.com/issues/20950 | |
797 | # write the correct mgr key to disk | |
798 | if config.get('setup-mgr-node', None): | |
799 | mons = ctx.cluster.only(teuthology.is_type('mon')) | |
9f95a23c | 800 | for remote, roles in mons.remotes.items(): |
3efd9988 FG |
801 | remote.run( |
802 | args=[ | |
803 | run.Raw('sudo ceph auth get client.bootstrap-mgr'), | |
804 | run.Raw('|'), | |
805 | run.Raw('sudo tee'), | |
806 | run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring') | |
807 | ] | |
808 | ) | |
809 | ||
810 | if config.get('setup-mgr-node', None): | |
811 | mgr_nodes = get_nodes_using_role(ctx, 'mgr') | |
812 | mgr_nodes = " ".join(mgr_nodes) | |
813 | mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes | |
814 | mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes | |
815 | # install mgr | |
816 | ceph_admin.run( | |
817 | args=[ | |
818 | 'cd', | |
819 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
820 | run.Raw('&&'), | |
821 | run.Raw(mgr_install), | |
822 | ], | |
823 | ) | |
824 | # create mgr | |
825 | ceph_admin.run( | |
826 | args=[ | |
827 | 'cd', | |
828 | '{tdir}/ceph-deploy'.format(tdir=testdir), | |
829 | run.Raw('&&'), | |
830 | run.Raw(mgr_create), | |
831 | ], | |
832 | ) | |
833 | ceph_admin.run(args=['sudo', 'ceph', '-s']) | |
834 | if config.get('wait-for-healthy', None): | |
835 | wait_until_healthy(ctx, ceph_admin, use_sudo=True) | |
836 | yield | |
837 | ||
838 | ||
7c673cae FG |
839 | @contextlib.contextmanager |
840 | def task(ctx, config): | |
841 | """ | |
842 | Set up and tear down a Ceph cluster. | |
843 | ||
844 | For example:: | |
845 | ||
846 | tasks: | |
847 | - install: | |
848 | extras: yes | |
849 | - ssh_keys: | |
850 | - ceph-deploy: | |
851 | branch: | |
852 | stable: bobtail | |
853 | mon_initial_members: 1 | |
3efd9988 | 854 | ceph-deploy-branch: my-ceph-deploy-branch |
7c673cae FG |
855 | only_mon: true |
856 | keep_running: true | |
c07f9fc5 FG |
857 | # either choose bluestore or filestore, default is bluestore |
858 | bluestore: True | |
859 | # or | |
860 | filestore: True | |
3efd9988 FG |
861 | # skip install of mgr for old release using below flag |
862 | skip-mgr: True ( default is False ) | |
b32b8144 FG |
863 | # to use ceph-volume instead of ceph-disk |
864 | # ceph-disk can only be used with old ceph-deploy release from pypi | |
865 | use-ceph-volume: true | |
7c673cae FG |
866 | |
867 | tasks: | |
868 | - install: | |
869 | extras: yes | |
870 | - ssh_keys: | |
871 | - ceph-deploy: | |
872 | branch: | |
873 | dev: master | |
874 | conf: | |
875 | mon: | |
876 | debug mon = 20 | |
877 | ||
878 | tasks: | |
879 | - install: | |
880 | extras: yes | |
881 | - ssh_keys: | |
882 | - ceph-deploy: | |
883 | branch: | |
884 | testing: | |
885 | dmcrypt: yes | |
886 | separate_journal_disk: yes | |
887 | ||
888 | """ | |
889 | if config is None: | |
890 | config = {} | |
891 | ||
892 | assert isinstance(config, dict), \ | |
893 | "task ceph-deploy only supports a dictionary for configuration" | |
894 | ||
895 | overrides = ctx.config.get('overrides', {}) | |
896 | teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) | |
897 | ||
898 | if config.get('branch') is not None: | |
899 | assert isinstance( | |
900 | config['branch'], dict), 'branch must be a dictionary' | |
901 | ||
902 | log.info('task ceph-deploy with config ' + str(config)) | |
903 | ||
b32b8144 FG |
904 | # we need to use 1.5.39-stable for testing jewel or master branch with |
905 | # ceph-disk | |
906 | if config.get('use-ceph-volume', False) is False: | |
907 | # check we are not testing specific branch | |
908 | if config.get('ceph-deploy-branch', False) is False: | |
909 | config['ceph-deploy-branch'] = '1.5.39-stable' | |
910 | ||
7c673cae FG |
911 | with contextutil.nested( |
912 | lambda: install_fn.ship_utilities(ctx=ctx, config=None), | |
913 | lambda: download_ceph_deploy(ctx=ctx, config=config), | |
914 | lambda: build_ceph_cluster(ctx=ctx, config=config), | |
915 | ): | |
916 | yield |