]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/rebuild_mondb.py
7877f22ab3ead223989e973e36a920d17e1541ec
2 Test if we can recover the leveldb from OSD after where all leveldbs are
12 from teuthology
import misc
as teuthology
14 log
= logging
.getLogger(__name__
)
17 def _push_directory(path
, remote
, remote_dir
):
19 local_temp_path=`mktemp`
20 tar czf $local_temp_path $path
21 ssh remote mkdir -p remote_dir
22 remote_temp_path=`mktemp`
23 scp $local_temp_path $remote_temp_path
25 tar xzf $remote_temp_path -C $remote_dir
26 ssh remote:$remote_temp_path
28 fd
, local_temp_path
= tempfile
.mkstemp(suffix
='.tgz',
29 prefix
='rebuild_mondb-')
31 cmd
= ' '.join(['tar', 'cz',
32 '-f', local_temp_path
,
36 _
, fname
= os
.path
.split(local_temp_path
)
37 fd
, remote_temp_path
= tempfile
.mkstemp(suffix
='.tgz',
38 prefix
='rebuild_mondb-')
40 remote
.put_file(local_temp_path
, remote_temp_path
)
41 os
.remove(local_temp_path
)
42 remote
.run(args
=['sudo',
45 '-f', remote_temp_path
])
46 remote
.run(args
=['sudo', 'rm', '-fr', remote_temp_path
])
49 def _nuke_mons(manager
, mons
, mon_id
):
51 is_mon
= teuthology
.is_type('mon')
52 for remote
, roles
in mons
.remotes
.items():
56 cluster
, _
, m
= teuthology
.split_role(role
)
57 log
.info('killing {cluster}:mon.{mon}'.format(
61 mon_data
= os
.path
.join('/var/lib/ceph/mon/',
62 '{0}-{1}'.format(cluster
, m
))
64 # so we will only need to recreate the store.db for the
65 # first mon, would be easier than mkfs on it then replace
66 # the its store.db with the recovered one
67 store_dir
= os
.path
.join(mon_data
, 'store.db')
68 remote
.run(args
=['sudo', 'rm', '-r', store_dir
])
70 remote
.run(args
=['sudo', 'rm', '-r', mon_data
])
73 def _rebuild_db(ctx
, manager
, cluster_name
, mon
, mon_id
, keyring_path
):
74 local_mstore
= tempfile
.mkdtemp()
76 # collect the maps from all OSDs
77 is_osd
= teuthology
.is_type('osd')
78 osds
= ctx
.cluster
.only(is_osd
)
80 for osd
, roles
in osds
.remotes
.items():
84 cluster
, _
, osd_id
= teuthology
.split_role(role
)
85 assert cluster_name
== cluster
86 log
.info('collecting maps from {cluster}:osd.{osd}'.format(
90 osd_mstore
= os
.path
.join(teuthology
.get_testdir(ctx
), 'mon-store')
91 osd
.run(args
=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore
])
93 _push_directory(local_mstore
, osd
, osd_mstore
)
94 log
.info('rm -rf {0}'.format(local_mstore
))
95 shutil
.rmtree(local_mstore
)
96 # update leveldb with OSD data
97 options
= '--no-mon-config --op update-mon-db --mon-store-path {0}'
98 log
.info('cot {0}'.format(osd_mstore
))
99 manager
.objectstore_tool(pool
=None,
100 options
=options
.format(osd_mstore
),
104 # pull the updated mon db
105 log
.info('pull dir {0} -> {1}'.format(osd_mstore
, local_mstore
))
106 local_mstore
= tempfile
.mkdtemp()
107 teuthology
.pull_directory(osd
, osd_mstore
, local_mstore
)
108 log
.info('rm -rf osd:{0}'.format(osd_mstore
))
109 osd
.run(args
=['sudo', 'rm', '-fr', osd_mstore
])
111 # recover the first_mon with re-built mon db
112 # pull from recovered leveldb from client
113 mon_store_dir
= os
.path
.join('/var/lib/ceph/mon',
114 '{0}-{1}'.format(cluster_name
, mon_id
))
115 _push_directory(local_mstore
, mon
, mon_store_dir
)
116 mon
.run(args
=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir
])
117 shutil
.rmtree(local_mstore
)
119 # fill up the caps in the keyring file
120 mon
.run(args
=['sudo',
121 'ceph-authtool', keyring_path
,
123 '--cap', 'mon', 'allow *'])
124 mon
.run(args
=['sudo',
125 'ceph-authtool', keyring_path
,
126 '-n', 'client.admin',
127 '--cap', 'mon', 'allow *',
128 '--cap', 'osd', 'allow *',
129 '--cap', 'mds', 'allow *',
130 '--cap', 'mgr', 'allow *'])
131 mon
.run(args
=['sudo', '-u', 'ceph',
132 'CEPH_ARGS=--no-mon-config',
133 'ceph-monstore-tool', mon_store_dir
,
135 '--keyring', keyring_path
,
136 '--monmap', '/tmp/monmap',
140 def _revive_mons(manager
, mons
, recovered
, keyring_path
):
142 # the initial monmap is in the ceph.conf, so we are good.
144 is_mon
= teuthology
.is_type('mon')
145 for remote
, roles
in mons
.remotes
.items():
149 cluster
, _
, m
= teuthology
.split_role(role
)
151 log
.info('running mkfs on {cluster}:mon.{mon}'.format(
158 '--cluster', cluster
,
161 '--keyring', keyring_path
,
162 '--monmap', '/tmp/monmap'])
163 log
.info('reviving mon.{0}'.format(m
))
164 manager
.revive_mon(m
)
166 manager
.wait_for_mon_quorum_size(n_mons
, timeout
=30)
169 def _revive_mgrs(ctx
, manager
):
170 is_mgr
= teuthology
.is_type('mgr')
171 mgrs
= ctx
.cluster
.only(is_mgr
)
172 for _
, roles
in mgrs
.remotes
.items():
176 _
, _
, mgr_id
= teuthology
.split_role(role
)
177 log
.info('reviving mgr.{0}'.format(mgr_id
))
178 manager
.revive_mgr(mgr_id
)
181 def _revive_osds(ctx
, manager
):
182 is_osd
= teuthology
.is_type('osd')
183 osds
= ctx
.cluster
.only(is_osd
)
184 for _
, roles
in osds
.remotes
.items():
188 _
, _
, osd_id
= teuthology
.split_role(role
)
189 log
.info('reviving osd.{0}'.format(osd_id
))
190 manager
.revive_osd(osd_id
)
193 def task(ctx
, config
):
195 Test monitor recovery from OSD
199 assert isinstance(config
, dict), \
200 'task only accepts a dict for configuration'
202 first_mon
= teuthology
.get_first_mon(ctx
, config
)
203 (mon
,) = ctx
.cluster
.only(first_mon
).remotes
.keys()
205 # stash a monmap for later
206 mon
.run(args
=['ceph', 'mon', 'getmap', '-o', '/tmp/monmap'])
208 manager
= ceph_manager
.CephManager(
211 logger
=log
.getChild('ceph_manager'))
213 mons
= ctx
.cluster
.only(teuthology
.is_type('mon'))
214 # note down the first cluster_name and mon_id
215 # we will recover it later on
216 cluster_name
, _
, mon_id
= teuthology
.split_role(first_mon
)
217 _nuke_mons(manager
, mons
, mon_id
)
218 default_keyring
= '/etc/ceph/{cluster}.keyring'.format(
219 cluster
=cluster_name
)
220 keyring_path
= config
.get('keyring_path', default_keyring
)
221 _rebuild_db(ctx
, manager
, cluster_name
, mon
, mon_id
, keyring_path
)
222 _revive_mons(manager
, mons
, mon_id
, keyring_path
)
223 _revive_mgrs(ctx
, manager
)
224 _revive_osds(ctx
, manager
)