]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/rebuild_mondb.py
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / qa / tasks / rebuild_mondb.py
1 """
2 Test if we can recover the leveldb from OSD after where all leveldbs are
3 corrupted
4 """
5
6 import logging
7 import os.path
8 import shutil
9 import tempfile
10
11 import ceph_manager
12 from teuthology import misc as teuthology
13
14 log = logging.getLogger(__name__)
15
16
17 def push_directory(path, remote, remote_dir):
18 """
19 local_temp_path=`mktemp`
20 tar czf $local_temp_path $path
21 ssh remote mkdir -p remote_dir
22 remote_temp_path=`mktemp`
23 scp $local_temp_path $remote_temp_path
24 rm $local_temp_path
25 tar xzf $remote_temp_path -C $remote_dir
26 ssh remote:$remote_temp_path
27 """
28 fd, local_temp_path = tempfile.mkstemp(suffix='.tgz',
29 prefix='rebuild_mondb-')
30 os.close(fd)
31 cmd = ' '.join(['tar', 'cz',
32 '-f', local_temp_path,
33 '-C', path,
34 '--', '.'])
35 teuthology.sh(cmd)
36 _, fname = os.path.split(local_temp_path)
37 fd, remote_temp_path = tempfile.mkstemp(suffix='.tgz',
38 prefix='rebuild_mondb-')
39 os.close(fd)
40 remote.put_file(local_temp_path, remote_temp_path)
41 os.remove(local_temp_path)
42 remote.run(args=['sudo',
43 'tar', 'xz',
44 '-C', remote_dir,
45 '-f', remote_temp_path])
46 remote.run(args=['sudo', 'rm', '-fr', remote_temp_path])
47
48
49 def task(ctx, config):
50 """
51 Test monitor recovery from OSD
52 """
53 if config is None:
54 config = {}
55 assert isinstance(config, dict), \
56 'task only accepts a dict for configuration'
57
58 first_mon = teuthology.get_first_mon(ctx, config)
59 (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
60
61 manager = ceph_manager.CephManager(
62 mon,
63 ctx=ctx,
64 logger=log.getChild('ceph_manager'))
65
66 mons = ctx.cluster.only(teuthology.is_type('mon'))
67 assert mons
68 # note down the first cluster_name and mon_id
69 # we will recover it later on
70 cluster_name = None
71 mon_id = None
72 for remote, roles in mons.remotes.iteritems():
73 is_mon = teuthology.is_type('mon')
74 for role in roles:
75 if not is_mon(role):
76 continue
77 cluster, _, m = teuthology.split_role(role)
78 if cluster_name is None:
79 cluster_name = cluster
80 mon_id = m
81 assert cluster_name == cluster
82 log.info('killing {cluster}:mon.{mon}'.format(
83 cluster=cluster,
84 mon=m))
85 manager.kill_mon(m)
86 mon_data = os.path.join('/var/lib/ceph/mon/',
87 '{0}-{1}'.format(cluster_name, m))
88 if m == mon_id:
89 # so we will only need to recreate the store.db for the
90 # first mon, would be easier than mkfs on it then replace
91 # the its store.db with the recovered one
92 store_dir = os.path.join(mon_data, 'store.db')
93 remote.run(args=['sudo', 'rm', '-r', store_dir])
94 else:
95 remote.run(args=['sudo', 'rm', '-r', mon_data])
96
97 local_mstore = tempfile.mkdtemp()
98
99 # collect the maps from all OSDs
100 osds = ctx.cluster.only(teuthology.is_type('osd'))
101 assert osds
102 for osd, roles in osds.remotes.iteritems():
103 is_osd = teuthology.is_type('osd')
104 for role in roles:
105 if not is_osd(role):
106 continue
107 cluster, _, osd_id = teuthology.split_role(role)
108 assert cluster_name == cluster
109 log.info('collecting maps from {cluster}:osd.{osd}'.format(
110 cluster=cluster,
111 osd=osd_id))
112 # push leveldb to OSD
113 osd_mstore = os.path.join(teuthology.get_testdir(ctx), 'mon-store')
114 osd.run(args=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore])
115
116 push_directory(local_mstore, osd, osd_mstore)
117 log.info('rm -rf {0}'.format(local_mstore))
118 shutil.rmtree(local_mstore)
119 # update leveldb with OSD data
120 options = '--op update-mon-db --mon-store-path {0}'
121 log.info('cot {0}'.format(osd_mstore))
122 manager.objectstore_tool(pool=None,
123 options=options.format(osd_mstore),
124 args='',
125 osd=osd_id,
126 do_revive=False)
127 # pull the updated mon db
128 log.info('pull dir {0} -> {1}'.format(osd_mstore, local_mstore))
129 local_mstore = tempfile.mkdtemp()
130 teuthology.pull_directory(osd, osd_mstore, local_mstore)
131 log.info('rm -rf osd:{0}'.format(osd_mstore))
132 osd.run(args=['sudo', 'rm', '-fr', osd_mstore])
133
134 # recover the first_mon with re-built mon db
135 # pull from recovered leveldb from client
136 mon_store_dir = os.path.join('/var/lib/ceph/mon',
137 '{0}-{1}'.format(cluster_name, mon_id))
138 push_directory(local_mstore, mon, mon_store_dir)
139 mon.run(args=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir])
140 shutil.rmtree(local_mstore)
141 default_keyring = '/etc/ceph/{cluster}.keyring'.format(
142 cluster=cluster_name)
143 keyring_path = config.get('keyring_path', default_keyring)
144 # fill up the caps in the keyring file
145 mon.run(args=['sudo',
146 'ceph-authtool', keyring_path,
147 '-n', 'mon.',
148 '--cap', 'mon', 'allow *'])
149 mon.run(args=['sudo',
150 'ceph-authtool', keyring_path,
151 '-n', 'client.admin',
152 '--cap', 'mon', 'allow *',
153 '--cap', 'osd', 'allow *',
154 '--cap', 'mds', 'allow *'])
155 mon.run(args=['sudo', '-u', 'ceph',
156 'ceph-monstore-tool', mon_store_dir,
157 'rebuild', '--', '--keyring',
158 keyring_path])
159
160 # revive monitors
161 # the initial monmap is in the ceph.conf, so we are good.
162 n_mons = 0
163 for remote, roles in mons.remotes.iteritems():
164 is_mon = teuthology.is_type('mon')
165 for role in roles:
166 if not is_mon(role):
167 continue
168 cluster, _, m = teuthology.split_role(role)
169 assert cluster_name == cluster
170 if mon_id != m:
171 log.info('running mkfs on {cluster}:mon.{mon}'.format(
172 cluster=cluster,
173 mon=m))
174 remote.run(
175 args=[
176 'sudo',
177 'ceph-mon',
178 '--cluster', cluster,
179 '--mkfs',
180 '-i', m,
181 '--keyring', keyring_path])
182 manager.revive_mon(m)
183 n_mons += 1
184
185 manager.wait_for_mon_quorum_size(n_mons, timeout=30)
186 for osd, roles in osds.remotes.iteritems():
187 is_osd = teuthology.is_type('osd')
188 for role in roles:
189 if not is_osd(role):
190 continue
191 _, _, osd_id = teuthology.split_role(role)
192 log.info('reviving osd.{0}'.format(osd_id))
193 manager.revive_osd(osd_id)