]> git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/rebuild_mondb.py
update sources to v12.1.4
[ceph.git] / ceph / qa / tasks / rebuild_mondb.py
1 """
2 Test if we can recover the leveldb from OSD after where all leveldbs are
3 corrupted
4 """
5
6 import logging
7 import os.path
8 import shutil
9 import tempfile
10
11 import ceph_manager
12 from teuthology import misc as teuthology
13
14 log = logging.getLogger(__name__)
15
16
17 def _push_directory(path, remote, remote_dir):
18 """
19 local_temp_path=`mktemp`
20 tar czf $local_temp_path $path
21 ssh remote mkdir -p remote_dir
22 remote_temp_path=`mktemp`
23 scp $local_temp_path $remote_temp_path
24 rm $local_temp_path
25 tar xzf $remote_temp_path -C $remote_dir
26 ssh remote:$remote_temp_path
27 """
28 fd, local_temp_path = tempfile.mkstemp(suffix='.tgz',
29 prefix='rebuild_mondb-')
30 os.close(fd)
31 cmd = ' '.join(['tar', 'cz',
32 '-f', local_temp_path,
33 '-C', path,
34 '--', '.'])
35 teuthology.sh(cmd)
36 _, fname = os.path.split(local_temp_path)
37 fd, remote_temp_path = tempfile.mkstemp(suffix='.tgz',
38 prefix='rebuild_mondb-')
39 os.close(fd)
40 remote.put_file(local_temp_path, remote_temp_path)
41 os.remove(local_temp_path)
42 remote.run(args=['sudo',
43 'tar', 'xz',
44 '-C', remote_dir,
45 '-f', remote_temp_path])
46 remote.run(args=['sudo', 'rm', '-fr', remote_temp_path])
47
48
49 def _nuke_mons(manager, mons, mon_id):
50 assert mons
51 is_mon = teuthology.is_type('mon')
52 for remote, roles in mons.remotes.iteritems():
53 for role in roles:
54 if not is_mon(role):
55 continue
56 cluster, _, m = teuthology.split_role(role)
57 log.info('killing {cluster}:mon.{mon}'.format(
58 cluster=cluster,
59 mon=m))
60 manager.kill_mon(m)
61 mon_data = os.path.join('/var/lib/ceph/mon/',
62 '{0}-{1}'.format(cluster, m))
63 if m == mon_id:
64 # so we will only need to recreate the store.db for the
65 # first mon, would be easier than mkfs on it then replace
66 # the its store.db with the recovered one
67 store_dir = os.path.join(mon_data, 'store.db')
68 remote.run(args=['sudo', 'rm', '-r', store_dir])
69 else:
70 remote.run(args=['sudo', 'rm', '-r', mon_data])
71
72
73 def _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path):
74 local_mstore = tempfile.mkdtemp()
75
76 # collect the maps from all OSDs
77 is_osd = teuthology.is_type('osd')
78 osds = ctx.cluster.only(is_osd)
79 assert osds
80 for osd, roles in osds.remotes.iteritems():
81 for role in roles:
82 if not is_osd(role):
83 continue
84 cluster, _, osd_id = teuthology.split_role(role)
85 assert cluster_name == cluster
86 log.info('collecting maps from {cluster}:osd.{osd}'.format(
87 cluster=cluster,
88 osd=osd_id))
89 # push leveldb to OSD
90 osd_mstore = os.path.join(teuthology.get_testdir(ctx), 'mon-store')
91 osd.run(args=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore])
92
93 _push_directory(local_mstore, osd, osd_mstore)
94 log.info('rm -rf {0}'.format(local_mstore))
95 shutil.rmtree(local_mstore)
96 # update leveldb with OSD data
97 options = '--op update-mon-db --mon-store-path {0}'
98 log.info('cot {0}'.format(osd_mstore))
99 manager.objectstore_tool(pool=None,
100 options=options.format(osd_mstore),
101 args='',
102 osd=osd_id,
103 do_revive=False)
104 # pull the updated mon db
105 log.info('pull dir {0} -> {1}'.format(osd_mstore, local_mstore))
106 local_mstore = tempfile.mkdtemp()
107 teuthology.pull_directory(osd, osd_mstore, local_mstore)
108 log.info('rm -rf osd:{0}'.format(osd_mstore))
109 osd.run(args=['sudo', 'rm', '-fr', osd_mstore])
110
111 # recover the first_mon with re-built mon db
112 # pull from recovered leveldb from client
113 mon_store_dir = os.path.join('/var/lib/ceph/mon',
114 '{0}-{1}'.format(cluster_name, mon_id))
115 _push_directory(local_mstore, mon, mon_store_dir)
116 mon.run(args=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir])
117 shutil.rmtree(local_mstore)
118
119 # fill up the caps in the keyring file
120 mon.run(args=['sudo',
121 'ceph-authtool', keyring_path,
122 '-n', 'mon.',
123 '--cap', 'mon', 'allow *'])
124 mon.run(args=['sudo',
125 'ceph-authtool', keyring_path,
126 '-n', 'client.admin',
127 '--cap', 'mon', 'allow *',
128 '--cap', 'osd', 'allow *',
129 '--cap', 'mds', 'allow *',
130 '--cap', 'mgr', 'allow *'])
131 mon.run(args=['sudo', '-u', 'ceph',
132 'ceph-monstore-tool', mon_store_dir,
133 'rebuild', '--', '--keyring',
134 keyring_path])
135
136
137 def _revive_mons(manager, mons, recovered, keyring_path):
138 # revive monitors
139 # the initial monmap is in the ceph.conf, so we are good.
140 n_mons = 0
141 is_mon = teuthology.is_type('mon')
142 for remote, roles in mons.remotes.iteritems():
143 for role in roles:
144 if not is_mon(role):
145 continue
146 cluster, _, m = teuthology.split_role(role)
147 if recovered != m:
148 log.info('running mkfs on {cluster}:mon.{mon}'.format(
149 cluster=cluster,
150 mon=m))
151 remote.run(
152 args=[
153 'sudo',
154 'ceph-mon',
155 '--cluster', cluster,
156 '--mkfs',
157 '-i', m,
158 '--keyring', keyring_path])
159 log.info('reviving mon.{0}'.format(m))
160 manager.revive_mon(m)
161 n_mons += 1
162 manager.wait_for_mon_quorum_size(n_mons, timeout=30)
163
164
165 def _revive_mgrs(ctx, manager):
166 is_mgr = teuthology.is_type('mgr')
167 mgrs = ctx.cluster.only(is_mgr)
168 for _, roles in mgrs.remotes.iteritems():
169 for role in roles:
170 if not is_mgr(role):
171 continue
172 _, _, mgr_id = teuthology.split_role(role)
173 log.info('reviving mgr.{0}'.format(mgr_id))
174 manager.revive_mgr(mgr_id)
175
176
177 def _revive_osds(ctx, manager):
178 is_osd = teuthology.is_type('osd')
179 osds = ctx.cluster.only(is_osd)
180 for _, roles in osds.remotes.iteritems():
181 for role in roles:
182 if not is_osd(role):
183 continue
184 _, _, osd_id = teuthology.split_role(role)
185 log.info('reviving osd.{0}'.format(osd_id))
186 manager.revive_osd(osd_id)
187
188
189 def task(ctx, config):
190 """
191 Test monitor recovery from OSD
192 """
193 if config is None:
194 config = {}
195 assert isinstance(config, dict), \
196 'task only accepts a dict for configuration'
197
198 first_mon = teuthology.get_first_mon(ctx, config)
199 (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
200 manager = ceph_manager.CephManager(
201 mon,
202 ctx=ctx,
203 logger=log.getChild('ceph_manager'))
204
205 mons = ctx.cluster.only(teuthology.is_type('mon'))
206 # note down the first cluster_name and mon_id
207 # we will recover it later on
208 cluster_name, _, mon_id = teuthology.split_role(first_mon)
209 _nuke_mons(manager, mons, mon_id)
210 default_keyring = '/etc/ceph/{cluster}.keyring'.format(
211 cluster=cluster_name)
212 keyring_path = config.get('keyring_path', default_keyring)
213 _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path)
214 _revive_mons(manager, mons, mon_id, keyring_path)
215 _revive_mgrs(ctx, manager)
216 _revive_osds(ctx, manager)